diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 718d914570..654afad97a 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -14,3 +14,6 @@ # Transition from clang-format 11 to clang-format 18 b37657e6ad9af16eaec2982d8e2397acd2af2881 + +# Add cython-lint to pre-commit config +0ce1aef210ffb88b7d2ea3a89e861486498f652f diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 241bed91e4..a313d8062c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,3 +42,8 @@ repos: hooks: - id: pretty-format-toml args: [--autofix] +- repo: https://github.com/MarcoGorelli/cython-lint + rev: v0.16.6 + hooks: + - id: cython-lint + - id: double-quote-cython-strings diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 27a00ef2c0..7464d311c6 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -34,93 +34,94 @@ cdef extern from "syclinterface/dpctl_utils.h": cdef extern from "syclinterface/dpctl_sycl_enum_types.h": - ctypedef enum _usm_type 'DPCTLSyclUSMType': - _USM_UNKNOWN 'DPCTL_USM_UNKNOWN' - _USM_DEVICE 'DPCTL_USM_DEVICE' - _USM_SHARED 'DPCTL_USM_SHARED' - _USM_HOST 'DPCTL_USM_HOST' - - ctypedef enum _backend_type 'DPCTLSyclBackendType': - _ALL_BACKENDS 'DPCTL_ALL_BACKENDS' - _CUDA 'DPCTL_CUDA' - _HIP 'DPCTL_HIP' - _LEVEL_ZERO 'DPCTL_LEVEL_ZERO' - _OPENCL 'DPCTL_OPENCL' - _UNKNOWN_BACKEND 'DPCTL_UNKNOWN_BACKEND' - - ctypedef enum _device_type 'DPCTLSyclDeviceType': - _ACCELERATOR 'DPCTL_ACCELERATOR' - _ALL_DEVICES 'DPCTL_ALL' - _AUTOMATIC 'DPCTL_AUTOMATIC' - _CPU 'DPCTL_CPU' - _CUSTOM 'DPCTL_CUSTOM' - _GPU 'DPCTL_GPU' - _UNKNOWN_DEVICE 'DPCTL_UNKNOWN_DEVICE' - - ctypedef enum _arg_data_type 'DPCTLKernelArgType': - _INT8_T 'DPCTL_INT8_T', - _UINT8_T 'DPCTL_UINT8_T', - _INT16_T 'DPCTL_INT16_T', - _UINT16_T 'DPCTL_UINT16_T', - _INT32_T 'DPCTL_INT32_T', - _UINT32_T 'DPCTL_UINT32_T', - _INT64_T 'DPCTL_INT64_T', - _UINT64_T 'DPCTL_UINT64_T', - _FLOAT 'DPCTL_FLOAT32_T', - _DOUBLE 'DPCTL_FLOAT64_T', - _VOID_PTR 'DPCTL_VOID_PTR', - _LOCAL_ACCESSOR 'DPCTL_LOCAL_ACCESSOR', - _WORK_GROUP_MEMORY 'DPCTL_WORK_GROUP_MEMORY' - - ctypedef enum _queue_property_type 'DPCTLQueuePropertyType': - _DEFAULT_PROPERTY 'DPCTL_DEFAULT_PROPERTY' - _ENABLE_PROFILING 'DPCTL_ENABLE_PROFILING' - _IN_ORDER 'DPCTL_IN_ORDER' - - ctypedef enum _aspect_type 'DPCTLSyclAspectType': - _host 'host', - _cpu 'cpu', - _gpu 'gpu', - _accelerator 'accelerator', - _custom 'custom', - _fp16 'fp16', - _fp64 'fp64', - _atomic64 'atomic64', - _image 'image', - _online_compiler 'online_compiler', - _online_linker 'online_linker', - _queue_profiling 'queue_profiling', - _usm_device_allocations 'usm_device_allocations', - _usm_host_allocations 'usm_host_allocations', - _usm_shared_allocations 'usm_shared_allocations', - _usm_system_allocations 'usm_system_allocations', - _usm_atomic_host_allocations 'usm_atomic_host_allocations', - _usm_atomic_shared_allocations 'usm_atomic_shared_allocations', - _host_debuggable 'host_debuggable', - _emulated 'emulated', - _is_component 'is_component', - _is_composite 'is_composite', - - ctypedef enum _partition_affinity_domain_type 'DPCTLPartitionAffinityDomainType': - _not_applicable 'not_applicable', - _numa 'numa', - _L4_cache 'L4_cache', - _L3_cache 'L3_cache', - _L2_cache 'L2_cache', - _L1_cache 'L1_cache', - _next_partitionable 'next_partitionable', - - ctypedef enum _event_status_type 'DPCTLSyclEventStatusType': - _UNKNOWN_STATUS 'DPCTL_UNKNOWN_STATUS' - _SUBMITTED 'DPCTL_SUBMITTED' - _RUNNING 'DPCTL_RUNNING' - _COMPLETE 'DPCTL_COMPLETE' - - ctypedef enum _global_mem_cache_type 'DPCTLGlobalMemCacheType': - _MEM_CACHE_TYPE_INDETERMINATE 'DPCTL_MEM_CACHE_TYPE_INDETERMINATE' - _MEM_CACHE_TYPE_NONE 'DPCTL_MEM_CACHE_TYPE_NONE' - _MEM_CACHE_TYPE_READ_ONLY 'DPCTL_MEM_CACHE_TYPE_READ_ONLY' - _MEM_CACHE_TYPE_READ_WRITE 'DPCTL_MEM_CACHE_TYPE_READ_WRITE' + ctypedef enum _usm_type "DPCTLSyclUSMType": + _USM_UNKNOWN "DPCTL_USM_UNKNOWN" + _USM_DEVICE "DPCTL_USM_DEVICE" + _USM_SHARED "DPCTL_USM_SHARED" + _USM_HOST "DPCTL_USM_HOST" + + ctypedef enum _backend_type "DPCTLSyclBackendType": + _ALL_BACKENDS "DPCTL_ALL_BACKENDS" + _CUDA "DPCTL_CUDA" + _HIP "DPCTL_HIP" + _LEVEL_ZERO "DPCTL_LEVEL_ZERO" + _OPENCL "DPCTL_OPENCL" + _UNKNOWN_BACKEND "DPCTL_UNKNOWN_BACKEND" + + ctypedef enum _device_type "DPCTLSyclDeviceType": + _ACCELERATOR "DPCTL_ACCELERATOR" + _ALL_DEVICES "DPCTL_ALL" + _AUTOMATIC "DPCTL_AUTOMATIC" + _CPU "DPCTL_CPU" + _CUSTOM "DPCTL_CUSTOM" + _GPU "DPCTL_GPU" + _UNKNOWN_DEVICE "DPCTL_UNKNOWN_DEVICE" + + ctypedef enum _arg_data_type "DPCTLKernelArgType": + _INT8_T "DPCTL_INT8_T", + _UINT8_T "DPCTL_UINT8_T", + _INT16_T "DPCTL_INT16_T", + _UINT16_T "DPCTL_UINT16_T", + _INT32_T "DPCTL_INT32_T", + _UINT32_T "DPCTL_UINT32_T", + _INT64_T "DPCTL_INT64_T", + _UINT64_T "DPCTL_UINT64_T", + _FLOAT "DPCTL_FLOAT32_T", + _DOUBLE "DPCTL_FLOAT64_T", + _VOID_PTR "DPCTL_VOID_PTR", + _LOCAL_ACCESSOR "DPCTL_LOCAL_ACCESSOR", + _WORK_GROUP_MEMORY "DPCTL_WORK_GROUP_MEMORY" + + ctypedef enum _queue_property_type "DPCTLQueuePropertyType": + _DEFAULT_PROPERTY "DPCTL_DEFAULT_PROPERTY" + _ENABLE_PROFILING "DPCTL_ENABLE_PROFILING" + _IN_ORDER "DPCTL_IN_ORDER" + + ctypedef enum _aspect_type "DPCTLSyclAspectType": + _host "host", + _cpu "cpu", + _gpu "gpu", + _accelerator "accelerator", + _custom "custom", + _fp16 "fp16", + _fp64 "fp64", + _atomic64 "atomic64", + _image "image", + _online_compiler "online_compiler", + _online_linker "online_linker", + _queue_profiling "queue_profiling", + _usm_device_allocations "usm_device_allocations", + _usm_host_allocations "usm_host_allocations", + _usm_shared_allocations "usm_shared_allocations", + _usm_system_allocations "usm_system_allocations", + _usm_atomic_host_allocations "usm_atomic_host_allocations", + _usm_atomic_shared_allocations "usm_atomic_shared_allocations", + _host_debuggable "host_debuggable", + _emulated "emulated", + _is_component "is_component", + _is_composite "is_composite", + + ctypedef enum _partition_affinity_domain_type \ + "DPCTLPartitionAffinityDomainType": + _not_applicable "not_applicable", + _numa "numa", + _L4_cache "L4_cache", + _L3_cache "L3_cache", + _L2_cache "L2_cache", + _L1_cache "L1_cache", + _next_partitionable "next_partitionable", + + ctypedef enum _event_status_type "DPCTLSyclEventStatusType": + _UNKNOWN_STATUS "DPCTL_UNKNOWN_STATUS" + _SUBMITTED "DPCTL_SUBMITTED" + _RUNNING "DPCTL_RUNNING" + _COMPLETE "DPCTL_COMPLETE" + + ctypedef enum _global_mem_cache_type "DPCTLGlobalMemCacheType": + _MEM_CACHE_TYPE_INDETERMINATE "DPCTL_MEM_CACHE_TYPE_INDETERMINATE" + _MEM_CACHE_TYPE_NONE "DPCTL_MEM_CACHE_TYPE_NONE" + _MEM_CACHE_TYPE_READ_ONLY "DPCTL_MEM_CACHE_TYPE_READ_ONLY" + _MEM_CACHE_TYPE_READ_WRITE "DPCTL_MEM_CACHE_TYPE_READ_WRITE" cdef extern from "syclinterface/dpctl_sycl_types.h": @@ -134,15 +135,15 @@ cdef extern from "syclinterface/dpctl_sycl_types.h": cdef struct DPCTLOpaqueSyclQueue cdef struct DPCTLOpaqueSyclUSM - ctypedef DPCTLOpaqueSyclContext *DPCTLSyclContextRef - ctypedef DPCTLOpaqueSyclDevice *DPCTLSyclDeviceRef + ctypedef DPCTLOpaqueSyclContext *DPCTLSyclContextRef + ctypedef DPCTLOpaqueSyclDevice *DPCTLSyclDeviceRef ctypedef DPCTLOpaqueSyclDeviceSelector *DPCTLSyclDeviceSelectorRef - ctypedef DPCTLOpaqueSyclEvent *DPCTLSyclEventRef - ctypedef DPCTLOpaqueSyclKernel *DPCTLSyclKernelRef - ctypedef DPCTLOpaqueSyclPlatform *DPCTLSyclPlatformRef - ctypedef DPCTLOpaqueSyclKernelBundle *DPCTLSyclKernelBundleRef - ctypedef DPCTLOpaqueSyclQueue *DPCTLSyclQueueRef - ctypedef DPCTLOpaqueSyclUSM *DPCTLSyclUSMRef + ctypedef DPCTLOpaqueSyclEvent *DPCTLSyclEventRef + ctypedef DPCTLOpaqueSyclKernel *DPCTLSyclKernelRef + ctypedef DPCTLOpaqueSyclPlatform *DPCTLSyclPlatformRef + ctypedef DPCTLOpaqueSyclKernelBundle *DPCTLSyclKernelBundleRef + ctypedef DPCTLOpaqueSyclQueue *DPCTLSyclQueueRef + ctypedef DPCTLOpaqueSyclUSM *DPCTLSyclUSMRef cdef extern from "syclinterface/dpctl_sycl_device_manager.h": @@ -167,9 +168,15 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": cdef uint32_t DPCTLDevice_GetMaxNumSubGroups(const DPCTLSyclDeviceRef DRef) cdef size_t DPCTLDevice_GetMaxWorkGroupSize(const DPCTLSyclDeviceRef DRef) cdef uint32_t DPCTLDevice_GetMaxWorkItemDims(const DPCTLSyclDeviceRef DRef) - cdef size_t *DPCTLDevice_GetMaxWorkItemSizes1d(const DPCTLSyclDeviceRef DRef) - cdef size_t *DPCTLDevice_GetMaxWorkItemSizes2d(const DPCTLSyclDeviceRef DRef) - cdef size_t *DPCTLDevice_GetMaxWorkItemSizes3d(const DPCTLSyclDeviceRef DRef) + cdef size_t *DPCTLDevice_GetMaxWorkItemSizes1d( + const DPCTLSyclDeviceRef DRef + ) + cdef size_t *DPCTLDevice_GetMaxWorkItemSizes2d( + const DPCTLSyclDeviceRef DRef + ) + cdef size_t *DPCTLDevice_GetMaxWorkItemSizes3d( + const DPCTLSyclDeviceRef DRef + ) cdef const char *DPCTLDevice_GetName(const DPCTLSyclDeviceRef DRef) cdef DPCTLSyclPlatformRef DPCTLDevice_GetPlatform( const DPCTLSyclDeviceRef DRef) @@ -178,24 +185,56 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": cdef bool DPCTLDevice_IsAccelerator(const DPCTLSyclDeviceRef DRef) cdef bool DPCTLDevice_IsCPU(const DPCTLSyclDeviceRef DRef) cdef bool DPCTLDevice_IsGPU(const DPCTLSyclDeviceRef DRef) - cdef bool DPCTLDevice_GetSubGroupIndependentForwardProgress(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthChar(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthShort(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthInt(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthLong(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthFloat(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthDouble(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetPreferredVectorWidthHalf(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthChar(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthShort(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthInt(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthLong(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthFloat(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthDouble(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetNativeVectorWidthHalf(const DPCTLSyclDeviceRef DRef) + cdef bool DPCTLDevice_GetSubGroupIndependentForwardProgress( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthChar( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthShort( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthInt( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthLong( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthFloat( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthDouble( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetPreferredVectorWidthHalf( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthChar( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthShort( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthInt( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthLong( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthFloat( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthDouble( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetNativeVectorWidthHalf( + const DPCTLSyclDeviceRef DRef + ) cdef bool DPCTLDevice_HasAspect(const DPCTLSyclDeviceRef, _aspect_type) cdef uint32_t DPCTLDevice_GetMaxReadImageArgs(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetMaxWriteImageArgs(const DPCTLSyclDeviceRef DRef) + cdef uint32_t DPCTLDevice_GetMaxWriteImageArgs( + const DPCTLSyclDeviceRef DRef + ) cdef size_t DPCTLDevice_GetImage2dMaxWidth(const DPCTLSyclDeviceRef DRef) cdef size_t DPCTLDevice_GetImage2dMaxHeight(const DPCTLSyclDeviceRef DRef) cdef size_t DPCTLDevice_GetImage3dMaxWidth(const DPCTLSyclDeviceRef DRef) @@ -208,19 +247,36 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": cdef DPCTLDeviceVectorRef DPCTLDevice_CreateSubDevicesByAffinity( const DPCTLSyclDeviceRef DRef, _partition_affinity_domain_type PartitionAffinityDomainTy) - cdef DPCTLSyclDeviceRef DPCTLDevice_GetParentDevice(const DPCTLSyclDeviceRef DRef) - cdef size_t DPCTLDevice_GetProfilingTimerResolution(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetGlobalMemCacheLineSize(const DPCTLSyclDeviceRef DRef) - cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize(const DPCTLSyclDeviceRef DRef) + cdef DPCTLSyclDeviceRef DPCTLDevice_GetParentDevice( + const DPCTLSyclDeviceRef DRef + ) + cdef size_t DPCTLDevice_GetProfilingTimerResolution( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetGlobalMemCacheLineSize( + const DPCTLSyclDeviceRef DRef + ) + cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize( + const DPCTLSyclDeviceRef DRef + ) cdef _global_mem_cache_type DPCTLDevice_GetGlobalMemCacheType( const DPCTLSyclDeviceRef DRef) - cdef size_t *DPCTLDevice_GetSubGroupSizes(const DPCTLSyclDeviceRef DRef, - size_t *res_len) - cdef uint32_t DPCTLDevice_GetPartitionMaxSubDevices(const DPCTLSyclDeviceRef DRef) - cdef uint32_t DPCTLDevice_GetMaxClockFrequency(const DPCTLSyclDeviceRef DRef) + cdef size_t *DPCTLDevice_GetSubGroupSizes( + const DPCTLSyclDeviceRef DRef, size_t *res_len + ) + cdef uint32_t DPCTLDevice_GetPartitionMaxSubDevices( + const DPCTLSyclDeviceRef DRef + ) + cdef uint32_t DPCTLDevice_GetMaxClockFrequency( + const DPCTLSyclDeviceRef DRef + ) cdef uint64_t DPCTLDevice_GetMaxMemAllocSize(const DPCTLSyclDeviceRef DRef) - cdef DPCTLSyclDeviceRef DPCTLDevice_GetCompositeDevice(const DPCTLSyclDeviceRef DRef) - cdef DPCTLDeviceVectorRef DPCTLDevice_GetComponentDevices(const DPCTLSyclDeviceRef DRef) + cdef DPCTLSyclDeviceRef DPCTLDevice_GetCompositeDevice( + const DPCTLSyclDeviceRef DRef + ) + cdef DPCTLDeviceVectorRef DPCTLDevice_GetComponentDevices( + const DPCTLSyclDeviceRef DRef + ) cdef extern from "syclinterface/dpctl_sycl_device_manager.h": @@ -238,7 +294,9 @@ cdef extern from "syclinterface/dpctl_sycl_device_manager.h": const DPCTLSyclDeviceRef DRef, int device_identifier) cdef size_t DPCTLDeviceMgr_GetNumDevices(int device_identifier) - cdef const char * DPCTLDeviceMgr_GetDeviceInfoStr(const DPCTLSyclDeviceRef DRef) + cdef const char * DPCTLDeviceMgr_GetDeviceInfoStr( + const DPCTLSyclDeviceRef DRef + ) cdef DPCTLSyclContextRef DPCTLDeviceMgr_GetCachedContext( const DPCTLSyclDeviceRef DRef) cdef int64_t DPCTLDeviceMgr_GetRelativeId(const DPCTLSyclDeviceRef DRef) @@ -252,7 +310,9 @@ cdef extern from "syclinterface/dpctl_sycl_device_selector_interface.h": DPCTLSyclDeviceSelectorRef DPCTLFilterSelector_Create(const char *) DPCTLSyclDeviceSelectorRef DPCTLGPUSelector_Create() void DPCTLDeviceSelector_Delete(DPCTLSyclDeviceSelectorRef) - int DPCTLDeviceSelector_Score(DPCTLSyclDeviceSelectorRef, DPCTLSyclDeviceRef) + int DPCTLDeviceSelector_Score( + DPCTLSyclDeviceSelectorRef, DPCTLSyclDeviceRef + ) cdef extern from "syclinterface/dpctl_sycl_event_interface.h": @@ -261,7 +321,9 @@ cdef extern from "syclinterface/dpctl_sycl_event_interface.h": cdef void DPCTLEvent_Wait(DPCTLSyclEventRef ERef) nogil cdef void DPCTLEvent_WaitAndThrow(DPCTLSyclEventRef ERef) nogil cdef void DPCTLEvent_Delete(DPCTLSyclEventRef ERef) - cdef _event_status_type DPCTLEvent_GetCommandExecutionStatus(DPCTLSyclEventRef ERef) + cdef _event_status_type DPCTLEvent_GetCommandExecutionStatus( + DPCTLSyclEventRef ERef + ) cdef _backend_type DPCTLEvent_GetBackend(DPCTLSyclEventRef ERef) cdef struct DPCTLEventVector ctypedef DPCTLEventVector *DPCTLEventVectorRef @@ -282,12 +344,18 @@ cdef extern from "syclinterface/dpctl_sycl_kernel_interface.h": cdef void DPCTLKernel_Delete(DPCTLSyclKernelRef KRef) cdef DPCTLSyclKernelRef DPCTLKernel_Copy(const DPCTLSyclKernelRef KRef) cdef size_t DPCTLKernel_GetWorkGroupSize(const DPCTLSyclKernelRef KRef) - cdef size_t DPCTLKernel_GetPreferredWorkGroupSizeMultiple(const DPCTLSyclKernelRef KRef) + cdef size_t DPCTLKernel_GetPreferredWorkGroupSizeMultiple( + const DPCTLSyclKernelRef KRef + ) cdef size_t DPCTLKernel_GetPrivateMemSize(const DPCTLSyclKernelRef KRef) cdef uint32_t DPCTLKernel_GetMaxNumSubGroups(const DPCTLSyclKernelRef KRef) cdef uint32_t DPCTLKernel_GetMaxSubGroupSize(const DPCTLSyclKernelRef KRef) - cdef uint32_t DPCTLKernel_GetCompileNumSubGroups(const DPCTLSyclKernelRef KRef) - cdef uint32_t DPCTLKernel_GetCompileSubGroupSize(const DPCTLSyclKernelRef KRef) + cdef uint32_t DPCTLKernel_GetCompileNumSubGroups( + const DPCTLSyclKernelRef KRef + ) + cdef uint32_t DPCTLKernel_GetCompileSubGroupSize( + const DPCTLSyclKernelRef KRef + ) cdef extern from "syclinterface/dpctl_sycl_platform_manager.h": @@ -301,11 +369,15 @@ cdef extern from "syclinterface/dpctl_sycl_platform_manager.h": DPCTLPlatformVectorRef, size_t index) cdef void DPCTLPlatformMgr_PrintInfo(const DPCTLSyclPlatformRef, size_t) - cdef const char *DPCTLPlatformMgr_GetInfo(const DPCTLSyclPlatformRef, size_t) + cdef const char *DPCTLPlatformMgr_GetInfo( + const DPCTLSyclPlatformRef, size_t + ) cdef extern from "syclinterface/dpctl_sycl_platform_interface.h": - cdef bool DPCTLPlatform_AreEq(const DPCTLSyclPlatformRef, const DPCTLSyclPlatformRef) + cdef bool DPCTLPlatform_AreEq( + const DPCTLSyclPlatformRef, const DPCTLSyclPlatformRef + ) cdef DPCTLSyclPlatformRef DPCTLPlatform_Copy(const DPCTLSyclPlatformRef) cdef DPCTLSyclPlatformRef DPCTLPlatform_Create() cdef DPCTLSyclPlatformRef DPCTLPlatform_CreateFromSelector( @@ -361,15 +433,16 @@ cdef extern from "syclinterface/dpctl_sycl_kernel_bundle_interface.h": cdef DPCTLSyclKernelRef DPCTLKernelBundle_GetKernel( DPCTLSyclKernelBundleRef KBRef, const char *KernelName) - cdef bool DPCTLKernelBundle_HasKernel(DPCTLSyclKernelBundleRef KBRef, - const char *KernelName) + cdef bool DPCTLKernelBundle_HasKernel( + DPCTLSyclKernelBundleRef KBRef, const char *KernelName + ) cdef void DPCTLKernelBundle_Delete(DPCTLSyclKernelBundleRef KBRef) cdef DPCTLSyclKernelBundleRef DPCTLKernelBundle_Copy( const DPCTLSyclKernelBundleRef KBRef) cdef extern from "syclinterface/dpctl_sycl_queue_interface.h": - ctypedef struct _md_local_accessor 'MDLocalAccessor': + ctypedef struct _md_local_accessor "MDLocalAccessor": size_t ndim _arg_data_type dpctl_type_id size_t dim0 @@ -490,11 +563,11 @@ cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": ctypedef RawWorkGroupMemoryTy RawWorkGroupMemory cdef struct DPCTLOpaqueWorkGroupMemory - ctypedef DPCTLOpaqueWorkGroupMemory *DPCTLSyclWorkGroupMemoryRef; + ctypedef DPCTLOpaqueWorkGroupMemory *DPCTLSyclWorkGroupMemoryRef - cdef DPCTLSyclWorkGroupMemoryRef DPCTLWorkGroupMemory_Create(size_t nbytes); + cdef DPCTLSyclWorkGroupMemoryRef DPCTLWorkGroupMemory_Create(size_t nbytes) cdef void DPCTLWorkGroupMemory_Delete( - DPCTLSyclWorkGroupMemoryRef Ref); + DPCTLSyclWorkGroupMemoryRef Ref) - cdef bint DPCTLWorkGroupMemory_Available(); + cdef bint DPCTLWorkGroupMemory_Available() diff --git a/dpctl/_diagnostics.pyx b/dpctl/_diagnostics.pyx index 0b26130965..3723c23493 100644 --- a/dpctl/_diagnostics.pyx +++ b/dpctl/_diagnostics.pyx @@ -25,8 +25,8 @@ import os cdef extern from "syclinterface/dpctl_service.h": - cdef void DPCTLService_InitLogger(const char *, const char *) - cdef void DPCTLService_ShutdownLogger() + cdef void DPCTLService_InitLogger(const char *, const char *) + cdef void DPCTLService_ShutdownLogger() def _init_logger(log_dir=None): @@ -62,7 +62,7 @@ def syclinterface_diagnostics(verbosity="warning", log_dir=None): function calls. """ _allowed_verbosity = ["warning", "error"] - if not verbosity in _allowed_verbosity: + if verbosity not in _allowed_verbosity: raise ValueError( f"Verbosity argument not understood. " f"Permitted values are {_allowed_verbosity}" diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx index 13a9b135e5..aefa6d301f 100644 --- a/dpctl/_sycl_context.pyx +++ b/dpctl/_sycl_context.pyx @@ -35,8 +35,6 @@ from ._backend cimport ( # noqa: E211 DPCTLContext_DeviceCount, DPCTLContext_GetDevices, DPCTLContext_Hash, - DPCTLDevice_Copy, - DPCTLDevice_Delete, DPCTLDeviceMgr_GetCachedContext, DPCTLDeviceVector_CreateFromArray, DPCTLDeviceVector_Delete, @@ -216,7 +214,6 @@ cdef class SyclContext(_SyclContext): cdef int _init_context_from_devices(self, object devices, int props): cdef int num_devices = len(devices) cdef int i = 0 - cdef int j = 0 cdef size_t num_bytes cdef DPCTLDeviceVectorRef DVRef = NULL cdef error_handler_callback * eh_callback = NULL @@ -289,7 +286,7 @@ cdef class SyclContext(_SyclContext): elif isinstance(arg, SyclDevice): ret = self._init_context_from_one_device( arg, 0) elif pycapsule.PyCapsule_IsValid(arg, "SyclContextRef"): - status = self._init_context_from_capsule(arg) + ret = self._init_context_from_capsule(arg) elif isinstance(arg, (list, tuple)) and all( [isinstance(argi, SyclDevice) for argi in arg] ): diff --git a/dpctl/_sycl_device.pxd b/dpctl/_sycl_device.pxd index 6cf8950445..190d981cd0 100644 --- a/dpctl/_sycl_device.pxd +++ b/dpctl/_sycl_device.pxd @@ -53,7 +53,9 @@ cdef public api class SyclDevice(_SyclDevice) [ cdef DPCTLSyclDeviceRef get_device_ref(self) cdef list create_sub_devices_equally(self, size_t count) cdef list create_sub_devices_by_counts(self, object counts) - cdef list create_sub_devices_by_affinity(self, _partition_affinity_domain_type domain) + cdef list create_sub_devices_by_affinity( + self, _partition_affinity_domain_type domain + ) cdef cpp_bool equals(self, SyclDevice q) cdef int get_device_type_ordinal(self) cdef int get_overall_ordinal(self) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index fef70b5cfb..5b43ffed1a 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -85,7 +85,6 @@ from ._backend cimport ( # noqa: E211 DPCTLDevice_IsCPU, DPCTLDevice_IsGPU, DPCTLDeviceMgr_GetDeviceInfoStr, - DPCTLDeviceMgr_GetDevices, DPCTLDeviceMgr_GetPositionInDevices, DPCTLDeviceMgr_GetRelativeId, DPCTLDeviceSelector_Delete, @@ -348,7 +347,7 @@ cdef class SyclDevice(_SyclDevice): if ret == -1: raise SyclDeviceCreationError( "Could not create a SyclDevice with the selector string " - "'{selector_string}'".format(selector_string=arg) + "'{selector_string}'".format(selector_string=arg) ) elif isinstance(arg, _SyclDevice): ret = self._init_from__SyclDevice(arg) @@ -810,7 +809,6 @@ cdef class SyclDevice(_SyclDevice): cdef _aspect_type AT = _aspect_type._is_component return DPCTLDevice_HasAspect(self._device_ref, AT) - @property def has_aspect_is_composite(self): """ Returns ``True`` if this device is a composite device, ``False`` @@ -1113,8 +1111,8 @@ cdef class SyclDevice(_SyclDevice): @property def sub_group_independent_forward_progress(self): - """ Returns ``True`` if the device supports independent forward progress of - sub-groups with respect to other sub-groups in the same work-group. + """ Returns ``True`` if the device supports independent forward progress + of sub-groups with respect to other sub-groups in the same work-group. Returns: bool: @@ -1155,7 +1153,7 @@ cdef class SyclDevice(_SyclDevice): DPCTLSize_t_Array_Delete(sg_sizes) return res else: - return [] + return [] @property def sycl_platform(self): @@ -1667,19 +1665,20 @@ cdef class SyclDevice(_SyclDevice): - Specifying an int (``count``) The returned list contains as - many sub-devices as can be created such that each sub-device - contains ``count`` compute units. If the device’s total number - of compute units is not evenly divided by ``count``, then the - remaining compute units are not included in any of the - sub-devices. + many sub-devices as can be created such that each + sub-device contains ``count`` compute units. If the + device’s total number of compute units is not evenly + divided by ``count``, then the remaining compute units + are not included in any of the sub-devices. - Specifying an affinity domain as a string - The supported values are: ``"numa"``, ``"L4_cache"``, ``"L3_cache"``, - ``"L2_cache"``, ``"L1_cache"``, ``"next_partitionable"``. + The supported values are: ``"numa"``, ``"L4_cache"``, + ``"L3_cache"``, ``"L2_cache"``, ``"L1_cache"``, + ``"next_partitionable"``. - Specifying a collection of integral values - For each non-zero value ``M`` in the collection, a sub-device with ``M`` - compute units is created. + For each non-zero value ``M`` in the collection, a + sub-device with ``M`` compute units is created. Returns: List[:class:`dpctl.SyclDevice`]: @@ -1816,7 +1815,7 @@ cdef class SyclDevice(_SyclDevice): """ cdef uint32_t clock_fr = DPCTLDevice_GetMaxClockFrequency( self._device_ref - ) + ) return clock_fr @property @@ -1829,7 +1828,7 @@ cdef class SyclDevice(_SyclDevice): """ cdef uint64_t max_alloc_sz = DPCTLDevice_GetMaxMemAllocSize( self._device_ref - ) + ) return max_alloc_sz @property @@ -1875,7 +1874,7 @@ cdef class SyclDevice(_SyclDevice): """ cdef uint64_t cache_sz = DPCTLDevice_GetGlobalMemCacheSize( self._device_ref - ) + ) return cache_sz @property @@ -1888,7 +1887,7 @@ cdef class SyclDevice(_SyclDevice): """ cdef uint64_t cache_line_sz = DPCTLDevice_GetGlobalMemCacheLineSize( self._device_ref - ) + ) return cache_line_sz @property @@ -1957,9 +1956,6 @@ cdef class SyclDevice(_SyclDevice): assert level_zero_gpu == dev """ cdef DPCTLSyclDeviceRef pDRef = NULL - cdef _backend_type BTy - cdef _device_type DTy - cdef int64_t relId = -1 pDRef = DPCTLDevice_GetParentDevice(self._device_ref) if (pDRef is NULL): return _cached_filter_string(self) diff --git a/dpctl/_sycl_device_factory.pyx b/dpctl/_sycl_device_factory.pyx index ef2f7a4a1a..6fe7a9ad78 100644 --- a/dpctl/_sycl_device_factory.pyx +++ b/dpctl/_sycl_device_factory.pyx @@ -437,7 +437,7 @@ cdef class _DefaultDeviceCache: _global_default_device_cache = ContextVar( - 'global_default_device_cache', + "global_default_device_cache", default=_DefaultDeviceCache() ) @@ -452,5 +452,6 @@ cpdef SyclDevice _cached_default_device(): """ cdef _DefaultDeviceCache _cache = _global_default_device_cache.get() d_, changed_ = _cache.get_or_create() - if changed_: _global_default_device_cache.set(_cache) + if changed_: + _global_default_device_cache.set(_cache) return d_ diff --git a/dpctl/_sycl_event.pyx b/dpctl/_sycl_event.pyx index 1527af6023..8766408644 100644 --- a/dpctl/_sycl_event.pyx +++ b/dpctl/_sycl_event.pyx @@ -38,7 +38,6 @@ from ._backend cimport ( # noqa: E211 DPCTLEvent_GetProfilingInfoSubmit, DPCTLEvent_GetWaitList, DPCTLEvent_Wait, - DPCTLEvent_WaitAndThrow, DPCTLEventVector_Delete, DPCTLEventVector_GetAt, DPCTLEventVector_Size, @@ -220,7 +219,8 @@ cdef class SyclEvent(_SyclEvent): @staticmethod cdef void _wait(SyclEvent event): - with nogil: DPCTLEvent_Wait(event._event_ref) + with nogil: + DPCTLEvent_Wait(event._event_ref) @staticmethod def wait_for(event): @@ -417,4 +417,5 @@ cdef class SyclEvent(_SyclEvent): Returns: None """ - with nogil: DPCTLEvent_Wait(self._event_ref) + with nogil: + DPCTLEvent_Wait(self._event_ref) diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx index 88dcb25dc3..7ceb725083 100644 --- a/dpctl/_sycl_platform.pyx +++ b/dpctl/_sycl_platform.pyx @@ -238,8 +238,8 @@ cdef class SyclPlatform(_SyclPlatform): Args: verbosity (Literal[0, 1, 2], optional):. The verbosity controls how much information is printed by the - function. Value ``0`` is the lowest level set by default and ``2`` - is the highest level to print the most verbose output. + function. Value ``0`` is the lowest level set by default and + ``2`` is the highest level to print the most verbose output. Default: ``0`` """ cdef size_t v = 0 @@ -367,7 +367,8 @@ cdef class SyclPlatform(_SyclPlatform): def __hash__(self): """ - Returns a hash value by hashing the underlying ``sycl::platform`` object. + Returns a hash value by hashing the underlying ``sycl::platform`` + object. Returns: int: @@ -579,11 +580,11 @@ def lsplatform(verbosity=0): if v != 0: print("Platform ", i, "::") PRef = DPCTLPlatformVector_GetAt(PVRef, i) - info_str = DPCTLPlatformMgr_GetInfo(PRef,v) + info_str = DPCTLPlatformMgr_GetInfo(PRef, v) py_info = info_str DPCTLCString_Delete(info_str) DPCTLPlatform_Delete(PRef) - print(py_info.decode("utf-8"),end='') + print(py_info.decode("utf-8"), end="") DPCTLPlatformVector_Delete(PVRef) diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 4fde4af77a..5836bc95de 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -65,8 +65,10 @@ cdef public api class SyclQueue (_SyclQueue) [ _arg_data_type *kargty ) cdef int _populate_range(self, size_t Range[3], list gS, size_t nGS) + @staticmethod cdef SyclQueue _create(DPCTLSyclQueueRef qref) + @staticmethod cdef SyclQueue _create_from_context_and_device( SyclContext ctx, SyclDevice dev, int props=* diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index ad44e8faa2..e04658dc44 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -79,7 +79,7 @@ from cpython.buffer cimport ( PyObject_CheckBuffer, PyObject_GetBuffer, ) -from cpython.ref cimport Py_DECREF, Py_INCREF, PyObject +from cpython.ref cimport Py_INCREF, PyObject from libc.stdlib cimport free, malloc import collections.abc @@ -87,7 +87,10 @@ import logging cdef extern from "_host_task_util.hpp": - DPCTLSyclEventRef async_dec_ref(DPCTLSyclQueueRef, PyObject **, size_t, DPCTLSyclEventRef *, size_t, int *) nogil + DPCTLSyclEventRef async_dec_ref( + DPCTLSyclQueueRef, PyObject **, + size_t, DPCTLSyclEventRef *, size_t, int * + ) nogil __all__ = [ @@ -155,7 +158,8 @@ cdef class LocalAccessor: TypeError: If the given shape is not a tuple or list. ValueError: - If the given shape sequence is not between one and three elements long. + If the given shape sequence is not between one and + three elements long. TypeError: If the shape is not a sequence of integers. ValueError: @@ -166,43 +170,51 @@ cdef class LocalAccessor: cdef _md_local_accessor lacc def __cinit__(self, str dtype, shape): - if not isinstance(shape, (list, tuple)): - raise TypeError(f"`shape` must be a list or tuple, got {type(shape)}") - ndim = len(shape) - if ndim < 1 or ndim > 3: - raise ValueError("LocalAccessor must have dimension between one and three") - for s in shape: + if not isinstance(shape, (list, tuple)): + raise TypeError( + f"`shape` must be a list or tuple, got {type(shape)}" + ) + ndim = len(shape) + if ndim < 1 or ndim > 3: + raise ValueError( + "LocalAccessor must have dimension between one and three" + ) + for s in shape: if not isinstance(s, numbers.Integral): - raise TypeError("LocalAccessor shape must be a sequence of integers") + raise TypeError( + "LocalAccessor shape must be a sequence of integers" + ) if s < 0: - raise ValueError("LocalAccessor dimensions must be non-negative") - self.lacc.ndim = ndim - self.lacc.dim0 = shape[0] - self.lacc.dim1 = shape[1] if ndim > 1 else 1 - self.lacc.dim2 = shape[2] if ndim > 2 else 1 - - if dtype == 'i1': - self.lacc.dpctl_type_id = _arg_data_type._INT8_T - elif dtype == 'u1': - self.lacc.dpctl_type_id = _arg_data_type._UINT8_T - elif dtype == 'i2': - self.lacc.dpctl_type_id = _arg_data_type._INT16_T - elif dtype == 'u2': - self.lacc.dpctl_type_id = _arg_data_type._UINT16_T - elif dtype == 'i4': - self.lacc.dpctl_type_id = _arg_data_type._INT32_T - elif dtype == 'u4': - self.lacc.dpctl_type_id = _arg_data_type._UINT32_T - elif dtype == 'i8': - self.lacc.dpctl_type_id = _arg_data_type._INT64_T - elif dtype == 'u8': - self.lacc.dpctl_type_id = _arg_data_type._UINT64_T - elif dtype == 'f4': - self.lacc.dpctl_type_id = _arg_data_type._FLOAT - elif dtype == 'f8': - self.lacc.dpctl_type_id = _arg_data_type._DOUBLE - else: - raise ValueError(f"Unrecognized type value: '{dtype}'") + raise ValueError( + "LocalAccessor dimensions must be non-negative" + ) + self.lacc.ndim = ndim + self.lacc.dim0 = shape[0] + self.lacc.dim1 = shape[1] if ndim > 1 else 1 + self.lacc.dim2 = shape[2] if ndim > 2 else 1 + + if dtype == "i1": + self.lacc.dpctl_type_id = _arg_data_type._INT8_T + elif dtype == "u1": + self.lacc.dpctl_type_id = _arg_data_type._UINT8_T + elif dtype == "i2": + self.lacc.dpctl_type_id = _arg_data_type._INT16_T + elif dtype == "u2": + self.lacc.dpctl_type_id = _arg_data_type._UINT16_T + elif dtype == "i4": + self.lacc.dpctl_type_id = _arg_data_type._INT32_T + elif dtype == "u4": + self.lacc.dpctl_type_id = _arg_data_type._UINT32_T + elif dtype == "i8": + self.lacc.dpctl_type_id = _arg_data_type._INT64_T + elif dtype == "u8": + self.lacc.dpctl_type_id = _arg_data_type._UINT64_T + elif dtype == "f4": + self.lacc.dpctl_type_id = _arg_data_type._FLOAT + elif dtype == "f8": + self.lacc.dpctl_type_id = _arg_data_type._DOUBLE + else: + raise ValueError(f"Unrecognized type value: '{dtype}'") def __repr__(self): return f"LocalAccessor({self.lacc.ndim})" @@ -225,7 +237,6 @@ cdef class _kernel_arg_type: def __cinit__(self): self._name = "kernel_arg_type" - @property def __name__(self): return self._name @@ -458,8 +469,10 @@ cdef DPCTLSyclEventRef _memcpy_impl( if isinstance(src, _Memory): c_src_ptr = (<_Memory>src).get_data_ptr() elif _is_buffer(src): - ret_code = PyObject_GetBuffer(src, &src_buf_view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS) - if ret_code != 0: # pragma: no cover + ret_code = PyObject_GetBuffer( + src, &src_buf_view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS + ) + if ret_code != 0: # pragma: no cover raise RuntimeError("Could not access buffer") c_src_ptr = src_buf_view.buf src_is_buf = True @@ -468,13 +481,16 @@ cdef DPCTLSyclEventRef _memcpy_impl( "Parameter `src` should have either type " "`dpctl.memory._Memory` or a type that " "supports Python buffer protocol" - ) + ) if isinstance(dst, _Memory): c_dst_ptr = (<_Memory>dst).get_data_ptr() elif _is_buffer(dst): - ret_code = PyObject_GetBuffer(dst, &dst_buf_view, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE) - if ret_code != 0: # pragma: no cover + ret_code = PyObject_GetBuffer( + dst, &dst_buf_view, + PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS | PyBUF_WRITABLE + ) + if ret_code != 0: # pragma: no cover if src_is_buf: PyBuffer_Release(&src_buf_view) raise RuntimeError("Could not access buffer") @@ -485,7 +501,7 @@ cdef DPCTLSyclEventRef _memcpy_impl( "Parameter `dst` should have either type " "`dpctl.memory._Memory` or a type that " "supports Python buffer protocol" - ) + ) if dep_events_count == 0 or dep_events is NULL: ERef = DPCTLQueue_Memcpy(q._queue_ref, c_dst_ptr, c_src_ptr, byte_count) @@ -623,7 +639,7 @@ cdef class SyclQueue(_SyclQueue): "but {} were given.".format(len(args)) ) props = _parse_queue_properties( - kwargs.pop('property', _queue_property_type._DEFAULT_PROPERTY) + kwargs.pop("property", _queue_property_type._DEFAULT_PROPERTY) ) if (kwargs): raise TypeError( @@ -685,9 +701,8 @@ cdef class SyclQueue(_SyclQueue): elif status == -3 or status == -7: raise SyclQueueCreationError( "SYCL Context could not be created " + - ("by default constructor" if len_args == 0 else - "from '{}'.".format(arg) - ) + "by default constructor" if len_args == 0 else + "from '{}'.".format(arg) ) elif status == -4 or status == -6: if len_args == 2: @@ -1068,7 +1083,6 @@ cdef class SyclQueue(_SyclQueue): """ return self._queue_ref - cpdef SyclEvent _submit_keep_args_alive( self, object args, @@ -1138,13 +1152,13 @@ cdef class SyclQueue(_SyclQueue): free(depEvents) if (status != 0): - with nogil: DPCTLEvent_Wait(htERef) + with nogil: + DPCTLEvent_Wait(htERef) DPCTLEvent_Delete(htERef) raise RuntimeError("Could not submit keep_args_alive host_task") return SyclEvent._create(htERef) - cpdef SyclEvent submit_async( self, SyclKernel kernel, @@ -1189,15 +1203,12 @@ cdef class SyclQueue(_SyclQueue): cdef _arg_data_type *kargty = NULL cdef DPCTLSyclEventRef *depEvents = NULL cdef DPCTLSyclEventRef Eref = NULL - cdef DPCTLSyclEventRef htEref = NULL cdef int ret = 0 cdef size_t gRange[3] cdef size_t lRange[3] cdef size_t nGS = len(gS) cdef size_t nLS = len(lS) if lS is not None else 0 cdef size_t nDE = len(dEvents) if dEvents is not None else 0 - cdef PyObject *args_raw = NULL - cdef ssize_t i = 0 # Allocate the arrays to be sent to DPCTLQueue_Submit kargs = malloc(len(args) * sizeof(void*)) @@ -1348,7 +1359,8 @@ cdef class SyclQueue(_SyclQueue): return e cpdef void wait(self): - with nogil: DPCTLQueue_Wait(self._queue_ref) + with nogil: + DPCTLQueue_Wait(self._queue_ref) cpdef memcpy(self, dest, src, size_t count): """Copy memory from `src` to `dst`""" @@ -1359,10 +1371,13 @@ cdef class SyclQueue(_SyclQueue): raise RuntimeError( "SyclQueue.memcpy operation encountered an error" ) - with nogil: DPCTLEvent_Wait(ERef) + with nogil: + DPCTLEvent_Wait(ERef) DPCTLEvent_Delete(ERef) - cpdef SyclEvent memcpy_async(self, dest, src, size_t count, list dEvents=None): + cpdef SyclEvent memcpy_async( + self, dest, src, size_t count, list dEvents=None + ): """Copy memory from ``src`` to ``dst``""" cdef DPCTLSyclEventRef ERef = NULL cdef DPCTLSyclEventRef *depEvents = NULL @@ -1410,10 +1425,9 @@ cdef class SyclQueue(_SyclQueue): ERef = DPCTLQueue_Prefetch(self._queue_ref, ptr, count) if (ERef is NULL): - raise RuntimeError( - "SyclQueue.prefetch encountered an error" - ) - with nogil: DPCTLEvent_Wait(ERef) + raise RuntimeError("SyclQueue.prefetch encountered an error") + with nogil: + DPCTLEvent_Wait(ERef) DPCTLEvent_Delete(ERef) cpdef mem_advise(self, mem, size_t count, int advice): @@ -1433,7 +1447,8 @@ cdef class SyclQueue(_SyclQueue): raise RuntimeError( "SyclQueue.mem_advise operation encountered an error" ) - with nogil: DPCTLEvent_Wait(ERef) + with nogil: + DPCTLEvent_Wait(ERef) DPCTLEvent_Delete(ERef) @property @@ -1689,11 +1704,15 @@ cdef class WorkGroupMemory: nbytes = (args[0]) else: if not isinstance(args[0], str): - raise TypeError("WorkGroupMemory constructor expects first" - f"argument to be `str`, but got {type(args[0])}") + raise TypeError( + "WorkGroupMemory constructor expects first" + f"argument to be `str`, but got {type(args[0])}" + ) if not isinstance(args[1], numbers.Integral): - raise TypeError("WorkGroupMemory constructor expects second" - f"argument to be `int`, but got {type(args[1])}") + raise TypeError( + "WorkGroupMemory constructor expects second" + f"argument to be `int`, but got {type(args[1])}" + ) dtype = (args[0]) count = (args[1]) if not dtype[0] in ["i", "u", "f"]: @@ -1708,7 +1727,7 @@ cdef class WorkGroupMemory: self._mem_ref = DPCTLWorkGroupMemory_Create(nbytes) - """Check whether the work_group_memory extension is available""" + # Check whether the work_group_memory extension is available @staticmethod def is_available(): return DPCTLWorkGroupMemory_Available() diff --git a/dpctl/_sycl_queue_manager.pyx b/dpctl/_sycl_queue_manager.pyx index 80f7cc6eb6..bf720e9402 100644 --- a/dpctl/_sycl_queue_manager.pyx +++ b/dpctl/_sycl_queue_manager.pyx @@ -58,7 +58,8 @@ cdef class _DeviceDefaultQueueCache: raise TypeError if ctx_dev in self.__device_queue_map__: return self.__device_queue_map__[ctx_dev], False - if q is None: q = SyclQueue(*ctx_dev) + if q is None: + q = SyclQueue(*ctx_dev) self.__device_queue_map__[ctx_dev] = q return q, True @@ -67,13 +68,14 @@ cdef class _DeviceDefaultQueueCache: def __copy__(self): cdef _DeviceDefaultQueueCache _copy = _DeviceDefaultQueueCache.__new__( - _DeviceDefaultQueueCache) + _DeviceDefaultQueueCache + ) _copy._update_map(self.__device_queue_map__) return _copy _global_device_queue_cache = ContextVar( - 'global_device_queue_cache', + "global_device_queue_cache", default=_DeviceDefaultQueueCache() ) @@ -96,5 +98,6 @@ cpdef object get_device_cached_queue(object key): """ _cache = _global_device_queue_cache.get() q_, changed_ = _cache.get_or_create(key) - if changed_: _global_device_queue_cache.set(_cache) + if changed_: + _global_device_queue_cache.set(_cache) return q_ diff --git a/dpctl/memory/_memory.pxd b/dpctl/memory/_memory.pxd index f407347052..71c17b9c80 100644 --- a/dpctl/memory/_memory.pxd +++ b/dpctl/memory/_memory.pxd @@ -56,11 +56,15 @@ cdef public api class _Memory [object Py_MemoryObject, type Py_MemoryType]: @staticmethod cdef SyclDevice get_pointer_device( - DPCTLSyclUSMRef p, SyclContext ctx) + DPCTLSyclUSMRef p, SyclContext ctx + ) + @staticmethod cdef bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx) + @staticmethod cdef _usm_type get_pointer_type_enum(DPCTLSyclUSMRef p, SyclContext ctx) + @staticmethod cdef object create_from_usm_pointer_size_qref( DPCTLSyclUSMRef USMRef, @@ -70,16 +74,19 @@ cdef public api class _Memory [object Py_MemoryObject, type Py_MemoryType]: ) -cdef public api class MemoryUSMShared(_Memory) [object PyMemoryUSMSharedObject, - type PyMemoryUSMSharedType]: +cdef public api class MemoryUSMShared(_Memory) [ + object PyMemoryUSMSharedObject, type PyMemoryUSMSharedType +]: pass -cdef public api class MemoryUSMHost(_Memory) [object PyMemoryUSMHostObject, - type PyMemoryUSMHostType]: +cdef public api class MemoryUSMHost(_Memory) [ + object PyMemoryUSMHostObject, type PyMemoryUSMHostType +]: pass -cdef public api class MemoryUSMDevice(_Memory) [object PyMemoryUSMDeviceObject, - type PyMemoryUSMDeviceType]: +cdef public api class MemoryUSMDevice(_Memory) [ + object PyMemoryUSMDeviceObject, type PyMemoryUSMDeviceType +]: pass diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 6d63dd1688..2cf011bc2d 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -38,7 +38,6 @@ from dpctl._backend cimport ( # noqa: E211 DPCTLDevice_Copy, DPCTLEvent_Delete, DPCTLEvent_Wait, - DPCTLfree_with_queue, DPCTLmalloc_device, DPCTLmalloc_host, DPCTLmalloc_shared, @@ -84,6 +83,7 @@ cdef extern from "_opaque_smart_ptr.hpp": void OpaqueSmartPtr_Delete(void *) nogil void * OpaqueSmartPtr_Get(void *) nogil + class USMAllocationError(Exception): """ An exception raised when Universal Shared Memory (USM) allocation @@ -127,7 +127,8 @@ cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue, 1 ) DPCTLEvent_Delete(E1Ref) - with nogil: DPCTLEvent_Wait(E2Ref) + with nogil: + DPCTLEvent_Wait(E2Ref) DPCTLEvent_Delete(E2Ref) @@ -178,25 +179,31 @@ cdef class _Memory: QRef = queue.get_queue_ref() if (ptr_type == b"shared"): if alignment > 0: - with nogil: p = DPCTLaligned_alloc_shared( - alignment, nbytes, QRef - ) + with nogil: + p = DPCTLaligned_alloc_shared( + alignment, nbytes, QRef + ) else: - with nogil: p = DPCTLmalloc_shared(nbytes, QRef) + with nogil: + p = DPCTLmalloc_shared(nbytes, QRef) elif (ptr_type == b"host"): if alignment > 0: - with nogil: p = DPCTLaligned_alloc_host( - alignment, nbytes, QRef - ) + with nogil: + p = DPCTLaligned_alloc_host( + alignment, nbytes, QRef + ) else: - with nogil: p = DPCTLmalloc_host(nbytes, QRef) + with nogil: + p = DPCTLmalloc_host(nbytes, QRef) elif (ptr_type == b"device"): if (alignment > 0): - with nogil: p = DPCTLaligned_alloc_device( - alignment, nbytes, QRef - ) + with nogil: + p = DPCTLaligned_alloc_device( + alignment, nbytes, QRef + ) else: - with nogil: p = DPCTLmalloc_device(nbytes, QRef) + with nogil: + p = DPCTLmalloc_device(nbytes, QRef) else: raise RuntimeError( "Pointer type '{}' is not recognized".format( @@ -232,7 +239,7 @@ cdef class _Memory: self._memory_ptr = other_mem._memory_ptr self._opaque_ptr = OpaqueSmartPtr_Copy(other_mem._opaque_ptr) self.refobj = None - elif hasattr(other, '__sycl_usm_array_interface__'): + elif hasattr(other, "__sycl_usm_array_interface__"): other_iface = other.__sycl_usm_array_interface__ if isinstance(other_iface, dict): other_buf = _USMBufferData.from_sycl_usm_ary_iface(other_iface) @@ -272,7 +279,7 @@ cdef class _Memory: if UsmTy == _usm_type._USM_DEVICE: raise ValueError("USM Device memory is not host accessible") buffer.buf = self._memory_ptr - buffer.format = 'B' # byte + buffer.format = "B" # byte buffer.internal = NULL # see References buffer.itemsize = 1 buffer.len = self.nbytes @@ -410,7 +417,6 @@ cdef class _Memory: to query against ``self.sycl_context`` - the context used to create the allocation. """ - cdef const char* kind cdef SyclContext ctx cdef SyclQueue q if syclobj is None: @@ -443,7 +449,6 @@ cdef class _Memory: to query against ``self.sycl_context`` - the context used to create the allocation. """ - cdef const char* kind cdef SyclContext ctx cdef SyclQueue q if syclobj is None: @@ -493,7 +498,8 @@ cdef class _Memory: self._memory_ptr, # source self.nbytes ) - with nogil: DPCTLEvent_Wait(ERef) + with nogil: + DPCTLEvent_Wait(ERef) DPCTLEvent_Delete(ERef) return obj @@ -518,7 +524,8 @@ cdef class _Memory: &host_buf[0], # source buf_len ) - with nogil: DPCTLEvent_Wait(ERef) + with nogil: + DPCTLEvent_Wait(ERef) DPCTLEvent_Delete(ERef) cpdef copy_from_device(self, object sycl_usm_ary): @@ -532,7 +539,7 @@ cdef class _Memory: cdef SyclQueue this_queue = None cdef SyclQueue src_queue = None - if not hasattr(sycl_usm_ary, '__sycl_usm_array_interface__'): + if not hasattr(sycl_usm_ary, "__sycl_usm_array_interface__"): raise ValueError( "Object does not implement " "`__sycl_usm_array_interface__` protocol" @@ -560,12 +567,13 @@ cdef class _Memory: src_buf.p, src_buf.nbytes ) - with nogil: DPCTLEvent_Wait(ERef) + with nogil: + DPCTLEvent_Wait(ERef) DPCTLEvent_Delete(ERef) else: copy_via_host( - self._memory_ptr, this_queue, # dest - src_buf.p, src_queue, # src + self._memory_ptr, this_queue, # dest + src_buf.p, src_queue, # src src_buf.nbytes ) else: @@ -643,13 +651,13 @@ cdef class _Memory: p, ctx.get_context_ref() ) if usm_ty == _usm_type._USM_DEVICE: - return b'device' + return b"device" elif usm_ty == _usm_type._USM_SHARED: - return b'shared' + return b"shared" elif usm_ty == _usm_type._USM_HOST: - return b'host' + return b"host" else: - return b'unknown' + return b"unknown" @staticmethod cdef _usm_type get_pointer_type_enum(DPCTLSyclUSMRef p, SyclContext ctx): @@ -931,6 +939,7 @@ def as_usm_memory(obj): format(obj) ) + cdef api void * Memory_GetOpaquePointer(_Memory obj): "Opaque pointer value" return obj.get_opaque_ptr() diff --git a/dpctl/memory/_sycl_usm_array_interface_utils.pxi b/dpctl/memory/_sycl_usm_array_interface_utils.pxi index 5215713134..83be116541 100644 --- a/dpctl/memory/_sycl_usm_array_interface_utils.pxi +++ b/dpctl/memory/_sycl_usm_array_interface_utils.pxi @@ -20,7 +20,8 @@ cdef bint _valid_usm_ptr_and_context(DPCTLSyclUSMRef ptr, SyclContext ctx): cdef DPCTLSyclQueueRef _queue_ref_copy_from_SyclQueue( - DPCTLSyclUSMRef ptr, SyclQueue q): + DPCTLSyclUSMRef ptr, SyclQueue q +): """ Check that USM ptr is consistent with SYCL context in the queue, and return a copy of QueueRef if so, or NULL otherwise. """ @@ -32,7 +33,8 @@ cdef DPCTLSyclQueueRef _queue_ref_copy_from_SyclQueue( cdef DPCTLSyclQueueRef _queue_ref_copy_from_USMRef_and_SyclContext( - DPCTLSyclUSMRef ptr, SyclContext ctx): + DPCTLSyclUSMRef ptr, SyclContext ctx +): """ Obtain device from pointer and sycl context, use context and device to create a queue from which this memory can be accessible. @@ -44,7 +46,8 @@ cdef DPCTLSyclQueueRef _queue_ref_copy_from_USMRef_and_SyclContext( cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj( - DPCTLSyclUSMRef ptr, object syclobj): + DPCTLSyclUSMRef ptr, object syclobj +): """ Constructs queue from pointer and syclobject from __sycl_usm_array_interface__ """ @@ -78,8 +81,9 @@ cdef DPCTLSyclQueueRef get_queue_ref_from_ptr_and_syclobj( cdef object _pointers_from_shape_and_stride( - int nd, object ary_shape, Py_ssize_t itemsize, Py_ssize_t ary_offset, - object ary_strides): + int nd, object ary_shape, Py_ssize_t itemsize, + Py_ssize_t ary_offset, object ary_strides +): """ Internal utility: for given array data about shape/layout/element compute left-most displacement when enumerating all elements of the array @@ -141,13 +145,13 @@ cdef class _USMBufferData: @staticmethod cdef _USMBufferData from_sycl_usm_ary_iface(dict ary_iface): - cdef object ary_data_tuple = ary_iface.get('data', None) - cdef object ary_typestr = ary_iface.get('typestr', None) - cdef object ary_shape = ary_iface.get('shape', None) - cdef object ary_strides = ary_iface.get('strides', None) - cdef object ary_syclobj = ary_iface.get('syclobj', None) - cdef Py_ssize_t ary_offset = ary_iface.get('offset', 0) - cdef int ary_version = ary_iface.get('version', 0) + cdef object ary_data_tuple = ary_iface.get("data", None) + cdef object ary_typestr = ary_iface.get("typestr", None) + cdef object ary_shape = ary_iface.get("shape", None) + cdef object ary_strides = ary_iface.get("strides", None) + cdef object ary_syclobj = ary_iface.get("syclobj", None) + cdef Py_ssize_t ary_offset = ary_iface.get("offset", 0) + cdef int ary_version = ary_iface.get("version", 0) cdef size_t arr_data_ptr = 0 cdef DPCTLSyclUSMRef memRef = NULL cdef Py_ssize_t itemsize = -1 diff --git a/dpctl/program/_program.pxd b/dpctl/program/_program.pxd index 2aebbe477c..dc4208a29b 100644 --- a/dpctl/program/_program.pxd +++ b/dpctl/program/_program.pxd @@ -41,13 +41,13 @@ cdef api class SyclKernel [object PySyclKernelObject, type PySyclKernelType]: cdef api class SyclProgram [object PySyclProgramObject, type PySyclProgramType]: - ''' Wraps a sycl::kernel_bundle object created by - using SYCL interoperability layer for OpenCL and Level-Zero backends. - - SyclProgram exposes the C API from dpctl_sycl_kernel_bundle_interface.h. A - SyclProgram can be created from either a source string or a SPIR-V - binary file. - ''' + """ + Wraps a sycl::kernel_bundle object created + by using SYCL interoperability layer for OpenCL and Level-Zero backends. + SyclProgram exposes the C API from dpctl_sycl_kernel_bundle_interface.h. + A SyclProgram can be created from either a source string or a SPIR-V + binary file. + """ cdef DPCTLSyclKernelBundleRef _program_ref @staticmethod diff --git a/dpctl/program/_program.pyx b/dpctl/program/_program.pyx index 67c75bcc7c..3859314505 100644 --- a/dpctl/program/_program.pyx +++ b/dpctl/program/_program.pyx @@ -28,7 +28,6 @@ a OpenCL source string or a SPIR-V binary file. from libc.stdint cimport uint32_t from dpctl._backend cimport ( # noqa: E211, E402; - DPCTLCString_Delete, DPCTLKernel_Copy, DPCTLKernel_Delete, DPCTLKernel_GetCompileNumSubGroups, @@ -125,7 +124,7 @@ cdef class SyclKernel: """ cdef size_t v = DPCTLKernel_GetPreferredWorkGroupSizeMultiple( self._kernel_ref - ) + ) return v @property @@ -209,14 +208,14 @@ cdef class SyclProgram: return self._program_ref cpdef SyclKernel get_sycl_kernel(self, str kernel_name): - name = kernel_name.encode('utf8') + name = kernel_name.encode("utf8") return SyclKernel._create( DPCTLKernelBundle_GetKernel(self._program_ref, name), kernel_name ) def has_sycl_kernel(self, str kernel_name): - name = kernel_name.encode('utf8') + name = kernel_name.encode("utf8") return DPCTLKernelBundle_HasKernel(self._program_ref, name) def addressof_ref(self): @@ -261,8 +260,8 @@ cpdef create_program_from_source(SyclQueue q, str src, str copts=""): """ cdef DPCTLSyclKernelBundleRef KBref - cdef bytes bSrc = src.encode('utf8') - cdef bytes bCOpts = copts.encode('utf8') + cdef bytes bSrc = src.encode("utf8") + cdef bytes bCOpts = copts.encode("utf8") cdef const char *Src = bSrc cdef const char *COpts = bCOpts cdef DPCTLSyclContextRef CRef = q.get_sycl_context().get_context_ref() @@ -280,9 +279,9 @@ cpdef create_program_from_spirv(SyclQueue q, const unsigned char[:] IL, """ Creates a Sycl interoperability program from an SPIR-V binary. - We use the :c:func:`DPCTLKernelBundle_CreateFromOCLSpirv` C API function to - create a ``sycl::kernel_bundle`` object - from an compiled SPIR-V binary file. + We use the :c:func:`DPCTLKernelBundle_CreateFromOCLSpirv` C API function + to create a ``sycl::kernel_bundle`` + object from an compiled SPIR-V binary file. Parameters: q (:class:`dpctl.SyclQueue`) @@ -310,7 +309,7 @@ cpdef create_program_from_spirv(SyclQueue q, const unsigned char[:] IL, cdef DPCTLSyclContextRef CRef = q.get_sycl_context().get_context_ref() cdef DPCTLSyclDeviceRef DRef = q.get_sycl_device().get_device_ref() cdef size_t length = IL.shape[0] - cdef bytes bCOpts = copts.encode('utf8') + cdef bytes bCOpts = copts.encode("utf8") cdef const char *COpts = bCOpts KBref = DPCTLKernelBundle_CreateFromSpirv( CRef, DRef, dIL, length, COpts @@ -321,7 +320,9 @@ cpdef create_program_from_spirv(SyclQueue q, const unsigned char[:] IL, return SyclProgram._create(KBref) -cdef api DPCTLSyclKernelBundleRef SyclProgram_GetKernelBundleRef(SyclProgram pro): +cdef api DPCTLSyclKernelBundleRef SyclProgram_GetKernelBundleRef( + SyclProgram pro +): """ C-API function to access opaque kernel bundle reference from Python object of type :class:`dpctl.program.SyclKernel`. """ diff --git a/dpctl/sycl.pxd b/dpctl/sycl.pxd index ce0c674512..f0b6f1eb2a 100644 --- a/dpctl/sycl.pxd +++ b/dpctl/sycl.pxd @@ -39,7 +39,7 @@ cdef extern from "sycl/sycl.hpp" namespace "sycl": pass cdef cppclass executable_kernel_bundle \ - "sycl::kernel_bundle": + "sycl::kernel_bundle": pass cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": @@ -47,7 +47,7 @@ cdef extern from "syclinterface/dpctl_sycl_extension_interface.h": ctypedef RawWorkGroupMemoryTy RawWorkGroupMemory cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ - namespace "dpctl::syclinterface": + namespace "dpctl::syclinterface": # queue cdef dpctl_backend.DPCTLSyclQueueRef wrap_queue \ "dpctl::syclinterface::wrap" (const queue *) @@ -63,20 +63,25 @@ cdef extern from "syclinterface/dpctl_sycl_type_casters.hpp" \ # context cdef dpctl_backend.DPCTLSyclContextRef wrap_context \ "dpctl::syclinterface::wrap" (const context *) - cdef context * unwrap_context "dpctl::syclinterface::unwrap" ( - dpctl_backend.DPCTLSyclContextRef) + cdef context * unwrap_context \ + "dpctl::syclinterface::unwrap" ( + dpctl_backend.DPCTLSyclContextRef + ) # event cdef dpctl_backend.DPCTLSyclEventRef wrap_event \ "dpctl::syclinterface::wrap" (const event *) - cdef event * unwrap_event "dpctl::syclinterface::unwrap" ( - dpctl_backend.DPCTLSyclEventRef) + cdef event * unwrap_event \ + "dpctl::syclinterface::unwrap" ( + dpctl_backend.DPCTLSyclEventRef + ) # work group memory extension cdef dpctl_backend.DPCTLSyclWorkGroupMemoryRef wrap_work_group_memory \ - "dpctl::syclinterface::wrap" \ - (const RawWorkGroupMemory *) + "dpctl::syclinterface::wrap" \ + (const RawWorkGroupMemory *) cdef RawWorkGroupMemory * unwrap_work_group_memory \ - "dpctl::syclinterface::unwrap" ( - dpctl_backend.DPCTLSyclWorkGroupMemoryRef) + "dpctl::syclinterface::unwrap" ( + dpctl_backend.DPCTLSyclWorkGroupMemoryRef + ) diff --git a/dpctl/tensor/_dlpack.pxd b/dpctl/tensor/_dlpack.pxd index 028c2b10c7..1a10c79fc7 100644 --- a/dpctl/tensor/_dlpack.pxd +++ b/dpctl/tensor/_dlpack.pxd @@ -26,25 +26,29 @@ from .._sycl_device cimport SyclDevice from ._usmarray cimport usm_ndarray -cdef extern from 'dlpack/dlpack.h' nogil: - int device_CPU 'kDLCPU' - int device_CUDA 'kDLCUDA' - int device_CUDAHost 'kDLCUDAHost' - int device_CUDAManaged 'kDLCUDAManaged' - int device_DLROCM 'kDLROCM' - int device_ROCMHost 'kDLROCMHost' - int device_OpenCL 'kDLOpenCL' - int device_Vulkan 'kDLVulkan' - int device_Metal 'kDLMetal' - int device_VPI 'kDLVPI' - int device_OneAPI 'kDLOneAPI' - int device_WebGPU 'kDLWebGPU' - int device_Hexagon 'kDLHexagon' - int device_MAIA 'kDLMAIA' +cdef extern from "dlpack/dlpack.h" nogil: + int device_CPU "kDLCPU" + int device_CUDA "kDLCUDA" + int device_CUDAHost "kDLCUDAHost" + int device_CUDAManaged "kDLCUDAManaged" + int device_DLROCM "kDLROCM" + int device_ROCMHost "kDLROCMHost" + int device_OpenCL "kDLOpenCL" + int device_Vulkan "kDLVulkan" + int device_Metal "kDLMetal" + int device_VPI "kDLVPI" + int device_OneAPI "kDLOneAPI" + int device_WebGPU "kDLWebGPU" + int device_Hexagon "kDLHexagon" + int device_MAIA "kDLMAIA" cpdef object to_dlpack_capsule(usm_ndarray array) except + -cpdef object to_dlpack_versioned_capsule(usm_ndarray array, bint copied) except + -cpdef object numpy_to_dlpack_versioned_capsule(ndarray array, bint copied) except + +cpdef object to_dlpack_versioned_capsule( + usm_ndarray array, bint copied +) except + +cpdef object numpy_to_dlpack_versioned_capsule( + ndarray array, bint copied +) except + cpdef object from_dlpack_capsule(object dltensor) except + cdef class DLPackCreationError(Exception): diff --git a/dpctl/tensor/_dlpack.pyx b/dpctl/tensor/_dlpack.pyx index 611fe27d2a..29b3eccead 100644 --- a/dpctl/tensor/_dlpack.pyx +++ b/dpctl/tensor/_dlpack.pyx @@ -23,7 +23,7 @@ cdef extern from "numpy/npy_no_deprecated_api.h": cimport cpython from libc cimport stdlib -from libc.stdint cimport int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t +from libc.stdint cimport int64_t, uint8_t, uint16_t, uint32_t, uint64_t from numpy cimport ndarray cimport dpctl as c_dpctl @@ -48,7 +48,7 @@ import dpctl.memory as dpmem from ._device import Device -cdef extern from 'dlpack/dlpack.h' nogil: +cdef extern from "dlpack/dlpack.h" nogil: cdef int DLPACK_MAJOR_VERSION cdef int DLPACK_MINOR_VERSION @@ -134,13 +134,15 @@ def get_build_dlpack_version(): cdef void _pycapsule_deleter(object dlt_capsule) noexcept: cdef DLManagedTensor *dlm_tensor = NULL - if cpython.PyCapsule_IsValid(dlt_capsule, 'dltensor'): + if cpython.PyCapsule_IsValid(dlt_capsule, "dltensor"): dlm_tensor = cpython.PyCapsule_GetPointer( - dlt_capsule, 'dltensor') + dlt_capsule, "dltensor") dlm_tensor.deleter(dlm_tensor) -cdef void _managed_tensor_deleter(DLManagedTensor *dlm_tensor) noexcept with gil: +cdef void _managed_tensor_deleter( + DLManagedTensor *dlm_tensor +) noexcept with gil: if dlm_tensor is not NULL: # we only delete shape, because we make single allocation to # acommodate both shape and strides if strides are needed @@ -152,13 +154,15 @@ cdef void _managed_tensor_deleter(DLManagedTensor *dlm_tensor) noexcept with gil cdef void _pycapsule_versioned_deleter(object dlt_capsule) noexcept: cdef DLManagedTensorVersioned *dlmv_tensor = NULL - if cpython.PyCapsule_IsValid(dlt_capsule, 'dltensor_versioned'): + if cpython.PyCapsule_IsValid(dlt_capsule, "dltensor_versioned"): dlmv_tensor = cpython.PyCapsule_GetPointer( - dlt_capsule, 'dltensor_versioned') + dlt_capsule, "dltensor_versioned") dlmv_tensor.deleter(dlmv_tensor) -cdef void _managed_tensor_versioned_deleter(DLManagedTensorVersioned *dlmv_tensor) noexcept with gil: +cdef void _managed_tensor_versioned_deleter( + DLManagedTensorVersioned *dlmv_tensor +) noexcept with gil: if dlmv_tensor is not NULL: # we only delete shape, because we make single allocation to # acommodate both shape and strides if strides are needed @@ -251,7 +255,6 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary): cdef int i = 0 cdef int device_id = -1 cdef int flags = 0 - cdef char *base_ptr = NULL cdef Py_ssize_t element_offset = 0 cdef Py_ssize_t byte_offset = 0 cdef Py_ssize_t si = 1 @@ -325,7 +328,7 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary): cpython.Py_INCREF(ary_base) dlm_tensor.deleter = _managed_tensor_deleter - return cpython.PyCapsule_New(dlm_tensor, 'dltensor', _pycapsule_deleter) + return cpython.PyCapsule_New(dlm_tensor, "dltensor", _pycapsule_deleter) cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied): @@ -365,7 +368,6 @@ cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied): cdef int i = 0 cdef int device_id = -1 cdef int flags = 0 - cdef char *base_ptr = NULL cdef Py_ssize_t element_offset = 0 cdef Py_ssize_t byte_offset = 0 cdef Py_ssize_t si = 1 @@ -455,7 +457,9 @@ cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied): cpython.Py_INCREF(ary_base) dlmv_tensor.deleter = _managed_tensor_versioned_deleter - return cpython.PyCapsule_New(dlmv_tensor, 'dltensor_versioned', _pycapsule_versioned_deleter) + return cpython.PyCapsule_New( + dlmv_tensor, "dltensor_versioned", _pycapsule_versioned_deleter + ) cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied): @@ -486,11 +490,8 @@ cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied): cdef DLTensor *dl_tensor = NULL cdef uint32_t dlmv_flags = 0 cdef int nd = npy_ary.ndim - cdef Py_ssize_t *shape_ptr = NULL - cdef Py_ssize_t *strides_ptr = NULL cdef int64_t *shape_strides_ptr = NULL cdef int i = 0 - cdef int device_id = -1 cdef Py_ssize_t byte_offset = 0 cdef int itemsize = npy_ary.itemsize @@ -511,8 +512,9 @@ cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied): if shape[i] != 1 and strides[i] % itemsize != 0: stdlib.free(dlmv_tensor) raise BufferError( - "numpy_to_dlpack_versioned_capsule: DLPack cannot encode " - "an array if strides are not a multiple of itemsize" + "numpy_to_dlpack_versioned_capsule: DLPack cannot " + "encode an array if strides are not a multiple of " + "itemsize" ) shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) else: @@ -576,7 +578,9 @@ cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied): cpython.Py_INCREF(npy_ary) dlmv_tensor.deleter = _managed_tensor_versioned_deleter - return cpython.PyCapsule_New(dlmv_tensor, 'dltensor_versioned', _pycapsule_versioned_deleter) + return cpython.PyCapsule_New( + dlmv_tensor, "dltensor_versioned", _pycapsule_versioned_deleter + ) cdef class _DLManagedTensorOwner: @@ -596,7 +600,8 @@ cdef class _DLManagedTensorOwner: @staticmethod cdef _DLManagedTensorOwner _create(DLManagedTensor *dlm_tensor_src): - cdef _DLManagedTensorOwner res = _DLManagedTensorOwner.__new__(_DLManagedTensorOwner) + cdef _DLManagedTensorOwner res + res = _DLManagedTensorOwner.__new__(_DLManagedTensorOwner) res.dlm_tensor = dlm_tensor_src return res @@ -617,15 +622,19 @@ cdef class _DLManagedTensorVersionedOwner: self.dlmv_tensor = NULL @staticmethod - cdef _DLManagedTensorVersionedOwner _create(DLManagedTensorVersioned *dlmv_tensor_src): - cdef _DLManagedTensorVersionedOwner res = _DLManagedTensorVersionedOwner.__new__(_DLManagedTensorVersionedOwner) + cdef _DLManagedTensorVersionedOwner _create( + DLManagedTensorVersioned *dlmv_tensor_src + ): + cdef _DLManagedTensorVersionedOwner res + res = _DLManagedTensorVersionedOwner.__new__( + _DLManagedTensorVersionedOwner + ) res.dlmv_tensor = dlmv_tensor_src return res cdef dict _numpy_array_interface_from_dl_tensor(DLTensor *dlt, bint ro_flag): """Constructs a NumPy `__array_interface__` dictionary from a DLTensor.""" - cdef int i = 0 cdef int itemsize = 0 if dlt.dtype.lanes != 1: @@ -745,11 +754,11 @@ cpdef object from_dlpack_capsule(object py_caps): cdef int64_t stride_i = -1 cdef int64_t shape_i = -1 - if cpython.PyCapsule_IsValid(py_caps, 'dltensor'): + if cpython.PyCapsule_IsValid(py_caps, "dltensor"): dlm_tensor = cpython.PyCapsule_GetPointer( py_caps, "dltensor") dl_tensor = &dlm_tensor.dl_tensor - elif cpython.PyCapsule_IsValid(py_caps, 'dltensor_versioned'): + elif cpython.PyCapsule_IsValid(py_caps, "dltensor_versioned"): dlmv_tensor = cpython.PyCapsule_GetPointer( py_caps, "dltensor_versioned") if dlmv_tensor.version.major > DLPACK_MAJOR_VERSION: @@ -760,7 +769,10 @@ cpdef object from_dlpack_capsule(object py_caps): versioned = 1 readonly = (dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0 dl_tensor = &dlmv_tensor.dl_tensor - elif cpython.PyCapsule_IsValid(py_caps, 'used_dltensor') or cpython.PyCapsule_IsValid(py_caps, 'used_dltensor_versioned'): + elif ( + cpython.PyCapsule_IsValid(py_caps, "used_dltensor") + or cpython.PyCapsule_IsValid(py_caps, "used_dltensor_versioned") + ): raise ValueError( "A DLPack tensor object can not be consumed multiple times" ) @@ -831,10 +843,10 @@ cpdef object from_dlpack_capsule(object py_caps): # transfer ownership if not versioned: dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor') + cpython.PyCapsule_SetName(py_caps, "used_dltensor") else: dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor_versioned') + cpython.PyCapsule_SetName(py_caps, "used_dltensor_versioned") if dl_tensor.data is NULL: usm_mem = dpmem.MemoryUSMDevice(sz, q) @@ -857,7 +869,9 @@ cpdef object from_dlpack_capsule(object py_caps): alloc_sz = dl_tensor.byte_offset + ( (offset_max * element_bytesize + mem_ptr_delta)) usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( - (mem_ptr + (element_bytesize - mem_ptr_delta)), + ( + mem_ptr + (element_bytesize - mem_ptr_delta) + ), max(alloc_sz, element_bytesize), (q).get_queue_ref(), memory_owner=tmp @@ -901,12 +915,16 @@ cpdef object from_dlpack_capsule(object py_caps): ary_iface = _numpy_array_interface_from_dl_tensor(dl_tensor, readonly) if not versioned: dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor') - return np.ctypeslib.as_array(_numpy_array_interface_wrapper(ary_iface, dlm_holder)) + cpython.PyCapsule_SetName(py_caps, "used_dltensor") + return np.ctypeslib.as_array( + _numpy_array_interface_wrapper(ary_iface, dlm_holder) + ) else: dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) - cpython.PyCapsule_SetName(py_caps, 'used_dltensor_versioned') - return np.ctypeslib.as_array(_numpy_array_interface_wrapper(ary_iface, dlmv_holder)) + cpython.PyCapsule_SetName(py_caps, "used_dltensor_versioned") + return np.ctypeslib.as_array( + _numpy_array_interface_wrapper(ary_iface, dlmv_holder) + ) else: raise BufferError( "The DLPack tensor resides on unsupported device." @@ -942,20 +960,27 @@ cdef object _create_device(object device, object dl_device): def from_dlpack(x, /, *, device=None, copy=None): """from_dlpack(x, /, *, device=None, copy=None) - Constructs :class:`dpctl.tensor.usm_ndarray` or :class:`numpy.ndarray` instance - from a Python object ``x`` that implements ``__dlpack__`` protocol. + Constructs :class:`dpctl.tensor.usm_ndarray` or :class:`numpy.ndarray` + instance from a Python object ``x`` that implements ``__dlpack__`` protocol. Args: x (object): A Python object representing an array that supports ``__dlpack__`` protocol. - device (Optional[str, :class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`, :class:`dpctl.tensor.Device`, tuple([:class:`enum.IntEnum`, int])])): - Device where the output array is to be placed. ``device`` keyword values can be: + device ( + Optional[str, :class:`dpctl.SyclDevice`, + :class:`dpctl.SyclQueue`, + :class:`dpctl.tensor.Device`, + tuple([:class:`enum.IntEnum`, int])]) + ): + Device where the output array is to be placed. ``device`` keyword + values can be: * ``None`` The data remains on the same device. * oneAPI filter selector string - SYCL device selected by :ref:`filter selector string `. + SYCL device selected by :ref:`filter selector string + `. * :class:`dpctl.SyclDevice` explicit SYCL device that must correspond to a non-partitioned SYCL device. @@ -965,11 +990,11 @@ def from_dlpack(x, /, *, device=None, copy=None): implies SYCL device `device.sycl_queue`. The `Device` object is obtained via :attr:`dpctl.tensor.usm_ndarray.device`. * ``(device_type, device_id)`` - 2-tuple matching the format of the output of the ``__dlpack_device__`` - method: an integer enumerator representing the device type followed by - an integer representing the index of the device. - The only supported :class:`dpctl.tensor.DLDeviceType` device types - are ``"kDLCPU"`` and ``"kDLOneAPI"``. + 2-tuple matching the format of the output of the + ``__dlpack_device__`` method: an integer enumerator representing + the device type followed by an integer representing the index of + the device. The only supported :class:`dpctl.tensor.DLDeviceType` + device types are ``"kDLCPU"`` and ``"kDLOneAPI"``. Default: ``None``. @@ -995,17 +1020,18 @@ def from_dlpack(x, /, *, device=None, copy=None): The type of the returned object depends on where the data backing up input object ``x`` resides. If it resides in a USM allocation on a SYCL device, the - type :class:`dpctl.tensor.usm_ndarray` is returned, otherwise if it resides - on ``"kDLCPU"`` device the type is :class:`numpy.ndarray`, and otherwise - an exception is raised. + type :class:`dpctl.tensor.usm_ndarray` is returned, otherwise if it + resides on ``"kDLCPU"`` device the type is :class:`numpy.ndarray`, + and otherwise an exception is raised. .. note:: - If the return type is :class:`dpctl.tensor.usm_ndarray`, the associated - SYCL queue is derived from the ``device`` keyword. When ``device`` - keyword value has type :class:`dpctl.SyclQueue`, the explicit queue - instance is used, when ``device`` keyword value has type :class:`dpctl.tensor.Device`, - the ``device.sycl_queue`` is used. In all other cases, the cached + If the return type is :class:`dpctl.tensor.usm_ndarray`, the + associated SYCL queue is derived from the ``device`` keyword. + When ``device`` keyword value has type :class:`dpctl.SyclQueue`, + the explicit queue instance is used, when ``device`` keyword + value has type :class:`dpctl.tensor.Device`, the + ``device.sycl_queue`` is used. In all other cases, the cached SYCL queue corresponding to the implied SYCL device is used. Raises: @@ -1077,7 +1103,8 @@ def from_dlpack(x, /, *, device=None, copy=None): requested_ver = (1, 0) cpu_dev = (device_CPU, 0) try: - # setting max_version to minimal version that supports dl_device/copy keywords + # setting max_version to minimal version that supports + # dl_device/copy keywords dlpack_capsule = dlpack_attr( max_version=requested_ver, dl_device=dl_device, @@ -1098,7 +1125,8 @@ def from_dlpack(x, /, *, device=None, copy=None): return from_dlpack_capsule(dlpack_capsule) finally: if got_type_error: - # max_version/dl_device, copy keywords are not supported by __dlpack__ + # max_version/dl_device, copy keywords are not supported + # by __dlpack__ x_dldev = dlpack_dev_attr() if (dl_device is None) or (dl_device == x_dldev): dlpack_capsule = dlpack_attr() @@ -1106,7 +1134,8 @@ def from_dlpack(x, /, *, device=None, copy=None): # must copy via host if copy is False: raise BufferError( - "Importing data via DLPack requires copying, but copy=False was provided" + "Importing data via DLPack requires copying, but " + "copy=False was provided" ) # when max_version/dl_device/copy are not supported # we can only support importing to OneAPI devices @@ -1123,7 +1152,9 @@ def from_dlpack(x, /, *, device=None, copy=None): dlpack_capsule = dlpack_attr() blob = from_dlpack_capsule(dlpack_capsule) else: - raise BufferError(f"Can not import to requested device {dl_device}") + raise BufferError( + f"Can not import to requested device {dl_device}" + ) dev = _create_device(device, dl_device) if x_dldev == cpu_dev and dl_device == cpu_dev: # both source and destination are CPU @@ -1142,17 +1173,20 @@ def from_dlpack(x, /, *, device=None, copy=None): import dpctl.tensor as dpt return dpt.asarray(blob, device=dev) elif got_buffer_error: - # we are here, because dlpack_attr could not deal with requested dl_device, - # or copying was required + # we are here, because dlpack_attr could not deal with requested + # dl_device, or copying was required if copy is False: raise BufferError( - "Importing data via DLPack requires copying, but copy=False was provided" + "Importing data via DLPack requires copying, but " + "copy=False was provided" ) if dl_device is None: raise saved_exception # must copy via host if dl_device[0] != device_OneAPI: - raise BufferError(f"Can not import to requested device {dl_device}") + raise BufferError( + f"Can not import to requested device {dl_device}" + ) x_dldev = dlpack_dev_attr() if x_dldev == cpu_dev: dlpack_capsule = dlpack_attr() diff --git a/dpctl/tensor/_flags.pyx b/dpctl/tensor/_flags.pyx index e51fbb21bd..052687838c 100644 --- a/dpctl/tensor/_flags.pyx +++ b/dpctl/tensor/_flags.pyx @@ -150,14 +150,14 @@ cdef class Flags: out = [] for name in "C_CONTIGUOUS", "F_CONTIGUOUS", "WRITABLE": out.append(" {} : {}".format(name, self[name])) - return '\n'.join(out) + return "\n".join(out) def __eq__(self, other): cdef Flags other_ if isinstance(other, self.__class__): - other_ = other - return self.flags_ == other_.flags_ + other_ = other + return self.flags_ == other_.flags_ elif isinstance(other, int): - return self.flags_ == other + return self.flags_ == other else: - return False + return False diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index bc7b13f7c7..b94167e7e6 100644 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -59,10 +59,10 @@ cdef bint _is_integral(object x) except *: if _is_buffer(x): mbuf = memoryview(x) if mbuf.ndim == 0: - f = mbuf.format - return f in "bBhHiIlLqQ" + f = mbuf.format + return f in "bBhHiIlLqQ" else: - return False + return False if callable(getattr(x, "__index__", None)): try: index(x) @@ -87,10 +87,10 @@ cdef bint _is_boolean(object x) except *: if _is_buffer(x): mbuf = memoryview(x) if mbuf.ndim == 0: - f = mbuf.format - return f in "?" + f = mbuf.format + return f in "?" else: - return False + return False if callable(getattr(x, "__bool__", None)): try: x.__bool__() @@ -115,12 +115,20 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): if ind is Ellipsis: return (shape, strides, offset, _no_advanced_ind, _no_advanced_pos) elif ind is None: - return ((1,) + shape, (0,) + strides, offset, _no_advanced_ind, _no_advanced_pos) + return ( + (1,) + shape, + (0,) + strides, + offset, + _no_advanced_ind, + _no_advanced_pos, + ) elif isinstance(ind, slice): sl_start, sl_stop, sl_step = ind.indices(shape[0]) sh0 = _slice_len(sl_start, sl_stop, sl_step) str0 = sl_step * strides[0] - new_strides = strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:] + new_strides = ( + strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:] + ) new_shape = (sh0, ) + shape[1:] is_empty = any(sh_i == 0 for sh_i in new_shape) new_offset = offset if is_empty else offset + sl_start * strides[0] @@ -129,13 +137,25 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_strides, new_offset, _no_advanced_ind, - _no_advanced_pos + _no_advanced_pos, ) elif _is_boolean(ind): if ind: - return ((1,) + shape, (0,) + strides, offset, _no_advanced_ind, _no_advanced_pos) + return ( + (1,) + shape, + (0,) + strides, + offset, + _no_advanced_ind, + _no_advanced_pos, + ) else: - return ((0,) + shape, (0,) + strides, offset, _no_advanced_ind, _no_advanced_pos) + return ( + (0,) + shape, + (0,) + strides, + offset, + _no_advanced_ind, + _no_advanced_pos, + ) elif _is_integral(ind): ind = index(ind) new_shape = shape[1:] @@ -143,10 +163,24 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): is_empty = any(sh_i == 0 for sh_i in new_shape) if 0 <= ind < shape[0]: new_offset = offset if is_empty else offset + ind * strides[0] - return (new_shape, new_strides, new_offset, _no_advanced_ind, _no_advanced_pos) + return ( + new_shape, + new_strides, + new_offset, + _no_advanced_ind, + _no_advanced_pos, + ) elif -shape[0] <= ind < 0: - new_offset = offset if is_empty else offset + (shape[0] + ind) * strides[0] - return (new_shape, new_strides, new_offset, _no_advanced_ind, _no_advanced_pos) + new_offset = ( + offset if is_empty else offset + (shape[0] + ind) * strides[0] + ) + return ( + new_shape, + new_strides, + new_offset, + _no_advanced_ind, + _no_advanced_pos, + ) else: raise IndexError( "Index {0} is out of range for axes 0 with " @@ -158,7 +192,6 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): ellipses_count = 0 newaxis_count = 0 explicit_index = 0 - array_count = 0 seen_arrays_yet = False array_streak_started = False array_streak_interrupted = False @@ -181,10 +214,7 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): array_streak_interrupted = True elif _is_integral(i): axes_referenced += 1 - if array_streak_started and not array_streak_interrupted: - # integers converted to arrays in this case - array_count += 1 - else: + if not array_streak_started and array_streak_interrupted: explicit_index += 1 elif isinstance(i, usm_ndarray): if not seen_arrays_yet: @@ -203,9 +233,9 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): axes_referenced += 1 else: raise IndexError( - "arrays used as indices must be of integer (or boolean) type" + "arrays used as indices must be of integer " + "(or boolean) type" ) - array_count += 1 else: raise IndexError( "Only integers, slices (`:`), ellipsis (`...`), " @@ -274,11 +304,15 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): if array_streak: if not isinstance(ind_i, usm_ndarray): ind_i = index(ind_i) - # integer will be converted to an array, still raise if OOB - if not (0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0): + # integer will be converted to an array, + # still raise if OOB + if not ( + 0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0 + ): raise IndexError( - ("Index {0} is out of range for " - "axes {1} with size {2}").format(ind_i, k, shape[k])) + "Index {0} is out of range for axes " + "{1} with size {2}".format(ind_i, k, shape[k]) + ) new_advanced_ind.append(ind_i) k_new = k + 1 new_shape.extend(shape[k:k_new]) @@ -294,12 +328,15 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): elif -shape[k] <= ind_i < 0: k_new = k + 1 if not is_empty: - new_offset = new_offset + (shape[k] + ind_i) * strides[k] + new_offset = ( + new_offset + (shape[k] + ind_i) * strides[k] + ) k = k_new else: raise IndexError( - ("Index {0} is out of range for " - "axes {1} with size {2}").format(ind_i, k, shape[k])) + "Index {0} is out of range for axes " + "{1} with size {2}".format(ind_i, k, shape[k]) + ) elif isinstance(ind_i, usm_ndarray): if not array_streak: array_streak = True @@ -318,10 +355,13 @@ def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): new_shape.extend(shape[k:]) new_strides.extend(strides[k:]) new_shape_len += len(shape) - k -# assert len(new_shape) == new_shape_len, f"{len(new_shape)} vs {new_shape_len}" -# assert len(new_strides) == new_shape_len, f"{len(new_strides)} vs {new_shape_len}" -# assert len(new_advanced_ind) == array_count - return (tuple(new_shape), tuple(new_strides), new_offset, tuple(new_advanced_ind), new_advanced_start_pos) + return ( + tuple(new_shape), + tuple(new_strides), + new_offset, + tuple(new_advanced_ind), + new_advanced_start_pos + ) else: raise IndexError( "Only integers, slices (`:`), ellipsis (`...`), " diff --git a/dpctl/tensor/_stride_utils.pxi b/dpctl/tensor/_stride_utils.pxi index 0595cd1741..adbeefe3a6 100644 --- a/dpctl/tensor/_stride_utils.pxi +++ b/dpctl/tensor/_stride_utils.pxi @@ -46,7 +46,8 @@ cdef int _from_input_shape_strides( int nd, object shape, object strides, int itemsize, char order, Py_ssize_t **shape_ptr, Py_ssize_t **strides_ptr, Py_ssize_t *nelems, Py_ssize_t *min_disp, Py_ssize_t *max_disp, - int *contig): + int *contig +): """ Arguments: nd, shape, strides, itemsize, order Modifies: @@ -72,7 +73,7 @@ cdef int _from_input_shape_strides( cdef Py_ssize_t* shape_arr cdef Py_ssize_t* strides_arr - if (int(order) not in [ord('C'), ord('F'), ord('c'), ord('f')]): + if (int(order) not in [ord("C"), ord("F"), ord("c"), ord("f")]): return ERROR_INCORRECT_ORDER # 0-d array @@ -102,7 +103,7 @@ cdef int _from_input_shape_strides( else: strides_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t)) if (not strides_arr): - PyMem_Free(shape_ptr[0]); + PyMem_Free(shape_ptr[0]) shape_ptr[0] = (0) return ERROR_MALLOC strides_ptr[0] = strides_arr @@ -112,7 +113,7 @@ cdef int _from_input_shape_strides( nelems[0] = elem_count if (strides is None): # no need to allocate and populate strides - if order == ord('C') or order == ord('c'): + if order == ord("C") or order == ord("c"): contig[0] = USM_ARRAY_C_CONTIGUOUS else: contig[0] = USM_ARRAY_F_CONTIGUOUS @@ -129,11 +130,11 @@ cdef int _from_input_shape_strides( max_disp[0] = (elem_count - 1) strides_ptr[0] = (0) return 0 - elif ((isinstance(strides, (list, tuple)) or hasattr(strides, 'tolist')) + elif ((isinstance(strides, (list, tuple)) or hasattr(strides, "tolist")) and len(strides) == nd): strides_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t)) if (not strides_arr): - PyMem_Free(shape_ptr[0]); + PyMem_Free(shape_ptr[0]) shape_ptr[0] = (0) return ERROR_MALLOC strides_ptr[0] = strides_arr @@ -205,7 +206,7 @@ cdef int _from_input_shape_strides( contig[0] = 0 # non-contiguous return 0 else: - PyMem_Free(shape_ptr[0]); + PyMem_Free(shape_ptr[0]) shape_ptr[0] = (0) return ERROR_UNEXPECTED_STRIDES # return ERROR_INTERNAL diff --git a/dpctl/tensor/_types.pxi b/dpctl/tensor/_types.pxi index df47fa28dc..c36147bb1c 100644 --- a/dpctl/tensor/_types.pxi +++ b/dpctl/tensor/_types.pxi @@ -16,23 +16,23 @@ # these typenum values are aligned to values in NumPy cdef: - int UAR_BOOL = 0 # pragma: no cover - int UAR_BYTE = 1 # pragma: no cover - int UAR_UBYTE = 2 # pragma: no cover - int UAR_SHORT = 3 # pragma: no cover - int UAR_USHORT = 4 # pragma: no cover - int UAR_INT = 5 # pragma: no cover - int UAR_UINT = 6 # pragma: no cover - int UAR_LONG = 7 # pragma: no cover - int UAR_ULONG = 8 # pragma: no cover - int UAR_LONGLONG = 9 # pragma: no cover - int UAR_ULONGLONG = 10 # pragma: no cover - int UAR_FLOAT = 11 # pragma: no cover + int UAR_BOOL = 0 # pragma: no cover + int UAR_BYTE = 1 # pragma: no cover + int UAR_UBYTE = 2 # pragma: no cover + int UAR_SHORT = 3 # pragma: no cover + int UAR_USHORT = 4 # pragma: no cover + int UAR_INT = 5 # pragma: no cover + int UAR_UINT = 6 # pragma: no cover + int UAR_LONG = 7 # pragma: no cover + int UAR_ULONG = 8 # pragma: no cover + int UAR_LONGLONG = 9 # pragma: no cover + int UAR_ULONGLONG = 10 # pragma: no cover + int UAR_FLOAT = 11 # pragma: no cover int UAR_DOUBLE = 12 # pragma: no cover int UAR_CFLOAT = 14 # pragma: no cover - int UAR_CDOUBLE = 15 # pragma: no cover - int UAR_TYPE_SENTINEL = 17 # pragma: no cover - int UAR_HALF = 23 # pragma: no cover + int UAR_CDOUBLE = 15 # pragma: no cover + int UAR_TYPE_SENTINEL = 17 # pragma: no cover + int UAR_HALF = 23 # pragma: no cover cdef int type_bytesize(int typenum): """ @@ -72,7 +72,7 @@ cdef int type_bytesize(int typenum): sizeof(float complex), sizeof(double complex), -1] - if typenum < 0: # pragma: no cover + if typenum < 0: # pragma: no cover return -1 if typenum > 16: if typenum == 23: @@ -90,7 +90,7 @@ cdef str _make_typestr(int typenum): "|i", "|u", "|i", "|u", "|i", "|u", "|f", "|f", "", "|c", "|c", ""] - if (typenum < 0): # pragma: no cover + if (typenum < 0): # pragma: no cover return "" if (typenum > 16): if (typenum == 23): @@ -119,15 +119,20 @@ cdef int typenum_from_format(str s): cdef int descr_to_typenum(object dtype): - "Returns typenum for argumentd dtype that has attribute descr, assumed numpy.dtype" - obj = getattr(dtype, 'descr') + """ + Returns typenum for argumentd dtype that has attribute descr, + assumed numpy.dtype + """ + obj = getattr(dtype, "descr") if (not isinstance(obj, list) or len(obj) != 1): return -1 # token for ValueError obj = obj[0] - if (not isinstance(obj, tuple) or len(obj) != 2 or obj[0]): # pragma: no cover + if ( + not isinstance(obj, tuple) or len(obj) != 2 or obj[0] + ): # pragma: no cover return -1 obj = obj[1] - if not isinstance(obj, str): # pragma: no cover + if not isinstance(obj, str): # pragma: no cover return -1 return typenum_from_format(obj) @@ -137,16 +142,16 @@ cdef int dtype_to_typenum(dtype): return typenum_from_format(dtype) elif isinstance(dtype, bytes): return typenum_from_format(dtype.decode("UTF-8")) - elif hasattr(dtype, 'descr'): + elif hasattr(dtype, "descr"): return descr_to_typenum(dtype) else: try: dt = np.dtype(dtype) except TypeError: return -3 - except Exception: # pragma: no cover + except Exception: # pragma: no cover return -1 - if hasattr(dt, 'descr'): + if hasattr(dt, "descr"): return descr_to_typenum(dt) - else: # pragma: no cover + else: # pragma: no cover return -3 # token for TypeError diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index a375bf93fe..b73edef82e 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -18,8 +18,6 @@ # cython: language_level=3 # cython: linetrace=True -import sys - import numpy as np import dpctl @@ -55,7 +53,8 @@ include "_slicing.pxi" class DLDeviceType(IntEnum): """ - An :class:`enum.IntEnum` for the types of DLDevices supported by the DLPack protocol. + An :class:`enum.IntEnum` for the types of DLDevices supported by the DLPack + protocol. ``kDLCPU``: CPU (host) device @@ -149,7 +148,9 @@ cdef bint _is_host_cpu(object dl_device): return (dl_type == DLDeviceType.kDLCPU) and (dl_id == 0) -cdef void _validate_and_use_stream(object stream, c_dpctl.SyclQueue self_queue) except *: +cdef void _validate_and_use_stream( + object stream, c_dpctl.SyclQueue self_queue +) except *: if (stream is None or stream == self_queue): pass else: @@ -258,8 +259,8 @@ cdef class usm_ndarray: PyMem_Free(self.strides_) self._reset() - def __cinit__(self, shape, dtype=None, strides=None, buffer='device', - Py_ssize_t offset=0, order='C', + def __cinit__(self, shape, dtype=None, strides=None, buffer="device", + Py_ssize_t offset=0, order="C", buffer_ctor_kwargs=dict(), array_namespace=None): """ @@ -285,8 +286,8 @@ cdef class usm_ndarray: self._reset() if not isinstance(shape, (list, tuple)): - if hasattr(shape, 'tolist'): - fn = getattr(shape, 'tolist') + if hasattr(shape, "tolist"): + fn = getattr(shape, "tolist") if callable(fn): shape = shape.tolist() if not isinstance(shape, (list, tuple)): @@ -312,13 +313,22 @@ cdef class usm_ndarray: typenum = dtype_to_typenum(dtype) if (typenum < 0): if typenum == -2: - raise ValueError("Data type '" + str(dtype) + "' can only have native byteorder.") + raise ValueError( + "Data type '" + str(dtype) + + "' can only have native byteorder." + ) elif typenum == -1: - raise ValueError("Data type '" + str(dtype) + "' is not understood.") - raise TypeError(f"Expected string or a dtype object, got {type(dtype)}") + raise ValueError( + "Data type '" + str(dtype) + "' is not understood." + ) + raise TypeError( + f"Expected string or a dtype object, got {type(dtype)}" + ) itemsize = type_bytesize(typenum) if (itemsize < 1): - raise TypeError("dtype=" + np.dtype(dtype).name + " is not supported.") + raise TypeError( + "dtype=" + np.dtype(dtype).name + " is not supported." + ) # allocate host C-arrays for shape, strides err = _from_input_shape_strides( nd, shape, strides, itemsize, ord(order), @@ -360,10 +370,11 @@ cdef class usm_ndarray: else: self._cleanup() raise ValueError( - ("buffer='{}' is not understood. " + "buffer='{}' is not understood. " "Recognized values are 'device', 'shared', 'host', " "an instance of `MemoryUSM*` object, or a usm_ndarray" - "").format(buffer)) + "".format(buffer) + ) elif isinstance(buffer, usm_ndarray): if not buffer.flags.writable: writable_flag = 0 @@ -379,7 +390,8 @@ cdef class usm_ndarray: is_fp64 = (typenum == UAR_DOUBLE or typenum == UAR_CDOUBLE) is_fp16 = (typenum == UAR_HALF) if (is_fp64 or is_fp16): - if ((is_fp64 and not _buffer.sycl_device.has_aspect_fp64) or + if ( + (is_fp64 and not _buffer.sycl_device.has_aspect_fp64) or (is_fp16 and not _buffer.sycl_device.has_aspect_fp16) ): raise ValueError( @@ -419,8 +431,8 @@ cdef class usm_ndarray: @property def _element_offset(self): - """Returns the offset of the zero-index element of the array, in elements, - relative to the start of memory allocation""" + """Returns the offset of the zero-index element of the array, in + elements, relative to the start of memory allocation""" return self.get_offset() @property @@ -452,27 +464,31 @@ cdef class usm_ndarray: cdef int it = 0 cdef Py_ssize_t _itemsize = self.get_itemsize() - if ((self.flags_ & USM_ARRAY_C_CONTIGUOUS) or (self.flags_ & USM_ARRAY_F_CONTIGUOUS)): + if ( + (self.flags_ & USM_ARRAY_C_CONTIGUOUS) + or (self.flags_ & USM_ARRAY_F_CONTIGUOUS) + ): return ( self._pointer, - self._pointer + shape_to_elem_count(self.nd_, self.shape_) * _itemsize + self._pointer + shape_to_elem_count( + self.nd_, self.shape_ + ) * _itemsize ) for it in range(self.nd_): - dim_ = self.shape[it] - if dim_ > 0: - step_ = self.strides[it] - if step_ > 0: - max_disp += step_ * (dim_ - 1) - else: - min_disp += step_ * (dim_ - 1) + dim_ = self.shape[it] + if dim_ > 0: + step_ = self.strides[it] + if step_ > 0: + max_disp += step_ * (dim_ - 1) + else: + min_disp += step_ * (dim_ - 1) return ( self._pointer + min_disp * _itemsize, self._pointer + (max_disp + 1) * _itemsize ) - cdef char* get_data(self): """Returns the USM pointer for this array.""" return self.data_ @@ -564,30 +580,30 @@ cdef class usm_ndarray: ) ) ary_iface = self.base_.__sycl_usm_array_interface__ - mem_ptr = ( ary_iface['data'][0]) + mem_ptr = ( ary_iface["data"][0]) ary_ptr = ( self.data_) ro_flag = False if (self.flags_ & USM_ARRAY_WRITABLE) else True - ary_iface['data'] = ( mem_ptr, ro_flag) - ary_iface['shape'] = self.shape + ary_iface["data"] = ( mem_ptr, ro_flag) + ary_iface["shape"] = self.shape if (self.strides_): - ary_iface['strides'] = _make_int_tuple(self.nd_, self.strides_) + ary_iface["strides"] = _make_int_tuple(self.nd_, self.strides_) else: if (self.flags_ & USM_ARRAY_C_CONTIGUOUS): - ary_iface['strides'] = None + ary_iface["strides"] = None elif (self.flags_ & USM_ARRAY_F_CONTIGUOUS): - ary_iface['strides'] = _f_contig_strides(self.nd_, self.shape_) + ary_iface["strides"] = _f_contig_strides(self.nd_, self.shape_) else: raise InternalUSMArrayError( "USM Array is not contiguous and has empty strides" ) - ary_iface['typestr'] = _make_typestr(self.typenum_) + ary_iface["typestr"] = _make_typestr(self.typenum_) byte_offset = ary_ptr - mem_ptr item_size = self.get_itemsize() if (byte_offset % item_size): raise InternalUSMArrayError( "byte_offset is not a multiple of item_size.") elem_offset = byte_offset // item_size - ary_iface['offset'] = elem_offset + ary_iface["offset"] = elem_offset # must wait for content of the memory to finalize self.sycl_queue.wait() return ary_iface @@ -818,7 +834,8 @@ cdef class usm_ndarray: @property def sycl_device(self): """ - Returns :class:`dpctl.SyclDevice` object on which USM data was allocated. + Returns :class:`dpctl.SyclDevice` object on which USM data + was allocated. """ q = self.sycl_queue return q.sycl_device @@ -975,12 +992,18 @@ cdef class usm_ndarray: if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: matching = 0 if not matching: - raise IndexError("boolean index did not match indexed array in dimensions") + raise IndexError( + "boolean index did not match indexed array in dimensions" + ) res = _extract_impl(res, key_, axis=adv_ind_start_p) res.flags_ = _copy_writable(res.flags_, self.flags_) return res - if any((isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool) for ind in adv_ind): + if any( + ( + isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool + ) for ind in adv_ind + ): adv_ind_int = list() for ind in adv_ind: if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: @@ -1097,7 +1120,11 @@ cdef class usm_ndarray: raise TypeError(f"Expected type str, got {type(api_version)}") if api_version != __array_api_version__: raise ValueError(f"Only {__array_api_version__} is supported") - return self.array_namespace_ if self.array_namespace_ is not None else dpctl.tensor + return ( + self.array_namespace_ + if self.array_namespace_ is not None + else dpctl.tensor + ) def __bool__(self): if self.size == 1: @@ -1160,7 +1187,9 @@ cdef class usm_ndarray: "Implementation for operator.and" return dpctl.tensor.bitwise_and(self, other) - def __dlpack__(self, *, stream=None, max_version=None, dl_device=None, copy=None): + def __dlpack__( + self, *, stream=None, max_version=None, dl_device=None, copy=None + ): """ Produces DLPack capsule. @@ -1232,27 +1261,32 @@ cdef class usm_ndarray: f"got {dl_device}" ) if dl_device != self.__dlpack_device__(): - if copy == False: + if copy is False: raise BufferError( - "array cannot be placed on the requested device without a copy" + "array cannot be placed on the requested " + "device without a copy" ) if _is_host_cpu(dl_device): if stream is not None: raise ValueError( - "`stream` must be `None` when `dl_device` is of type `kDLCPU`" + "`stream` must be `None` when `dl_device` " + "is of type `kDLCPU`" ) from ._copy_utils import _copy_to_numpy _arr = _copy_to_numpy(self) _arr.flags["W"] = self.flags["W"] - return c_dlpack.numpy_to_dlpack_versioned_capsule(_arr, True) + return c_dlpack.numpy_to_dlpack_versioned_capsule( + _arr, True + ) else: raise BufferError( - f"targeting `dl_device` {dl_device} with `__dlpack__` is not " - "yet implemented" + f"targeting `dl_device` {dl_device} with " + "`__dlpack__` is not yet implemented" ) if copy is None: copy = False - # TODO: strategy for handling stream on different device from dl_device + # TODO: strategy for handling stream on different device + # from dl_device if copy: _validate_and_use_stream(stream, self.sycl_queue) nbytes = self.usm_data.nbytes @@ -1268,7 +1302,9 @@ cdef class usm_ndarray: offset=self.get_offset() ) _copied_arr.flags_ = self.flags_ - _caps = c_dlpack.to_dlpack_versioned_capsule(_copied_arr, copy) + _caps = c_dlpack.to_dlpack_versioned_capsule( + _copied_arr, copy + ) else: _caps = c_dlpack.to_dlpack_versioned_capsule(self, copy) _validate_and_use_stream(stream, self.sycl_queue) @@ -1284,10 +1320,12 @@ cdef class usm_ndarray: Gives a tuple (``device_type``, ``device_id``) corresponding to ``DLDevice`` entry in ``DLTensor`` in DLPack protocol. - The tuple describes the non-partitioned device where the array has been allocated, - or the non-partitioned parent device of the allocation device. + The tuple describes the non-partitioned device where the array has been + allocated, or the non-partitioned parent device of the allocation + device. - See ``DLDeviceType`` for a list of devices supported by the DLPack protocol. + See ``DLDeviceType`` for a list of devices supported by the DLPack + protocol. Raises: DLPackCreationError: @@ -1297,7 +1335,8 @@ cdef class usm_ndarray: dev_id = self.sycl_device.get_device_id() except ValueError as e: raise c_dlpack.DLPackCreationError( - "Could not determine id of the device where array was allocated." + "Could not determine id of the device where array was " + "allocated." ) return ( DLDeviceType.kDLOneAPI, @@ -1434,7 +1473,11 @@ cdef class usm_ndarray: _place_impl(Xv, adv_ind[0], rhs, axis=adv_ind_start_p) return - if any((isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool) for ind in adv_ind): + if any( + ( + isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool + ) for ind in adv_ind + ): adv_ind_int = list() for ind in adv_ind: if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: @@ -1447,7 +1490,6 @@ cdef class usm_ndarray: _put_multi_index(Xv, adv_ind, adv_ind_start_p, rhs) return - def __sub__(self, other): return dpctl.tensor.subtract(self, other) @@ -1581,7 +1623,7 @@ cdef usm_ndarray _real_view(usm_ndarray ary): strides=tuple(2 * si for si in ary.strides), buffer=ary.base_, offset=offset_elems, - order=('C' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'F') + order=("C" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "F") ) r.flags_ = _copy_writable(r.flags_, ary.flags_) r.array_namespace_ = ary.array_namespace_ @@ -1613,7 +1655,7 @@ cdef usm_ndarray _imag_view(usm_ndarray ary): strides=tuple(2 * si for si in ary.strides), buffer=ary.base_, offset=offset_elems, - order=('C' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'F') + order=("C" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "F") ) r.flags_ = _copy_writable(r.flags_, ary.flags_) r.array_namespace_ = ary.array_namespace_ @@ -1632,7 +1674,7 @@ cdef usm_ndarray _transpose(usm_ndarray ary): _make_reversed_int_tuple(ary.nd_, ary.strides_) if (ary.strides_) else None), buffer=ary.base_, - order=('F' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'C'), + order=("F" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "C"), offset=ary.get_offset() ) r.flags_ = _copy_writable(r.flags_, ary.flags_) @@ -1649,7 +1691,7 @@ cdef usm_ndarray _m_transpose(usm_ndarray ary): dtype=_make_typestr(ary.typenum_), strides=_swap_last_two(ary.strides), buffer=ary.base_, - order=('F' if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else 'C'), + order=("F" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "C"), offset=ary.get_offset() ) r.flags_ = _copy_writable(r.flags_, ary.flags_) @@ -1780,9 +1822,12 @@ cdef api object UsmNDArray_MakeSimpleFromPtr( """ cdef int itemsize = type_bytesize(typenum) if (itemsize < 1): - raise ValueError("dtype with typenum=" + str(typenum) + " is not supported.") + raise ValueError( + "dtype with typenum=" + str(typenum) + " is not supported." + ) cdef size_t nbytes = ( itemsize) * nelems - cdef c_dpmem._Memory mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + cdef c_dpmem._Memory mobj + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( ptr, nbytes, QRef, memory_owner=owner ) cdef usm_ndarray arr = usm_ndarray( @@ -1821,7 +1866,6 @@ cdef api object UsmNDArray_MakeFromPtr( Created usm_ndarray instance """ cdef int itemsize = type_bytesize(typenum) - cdef int err = 0 cdef size_t nelems = 1 cdef Py_ssize_t min_disp = 0 cdef Py_ssize_t max_disp = 0 @@ -1834,7 +1878,9 @@ cdef api object UsmNDArray_MakeFromPtr( cdef object obj_strides if (itemsize < 1): - raise ValueError("dtype with typenum=" + str(typenum) + " is not supported.") + raise ValueError( + "dtype with typenum=" + str(typenum) + " is not supported." + ) if (nd < 0): raise ValueError("Dimensionality must be non-negative") if (ptr is NULL or QRef is NULL): @@ -1901,5 +1947,5 @@ cdef api object UsmNDArray_MakeFromPtr( def _is_object_with_buffer_protocol(o): - "Returns True if object supports Python buffer protocol" - return _is_buffer(o) + "Returns True if object supports Python buffer protocol" + return _is_buffer(o) diff --git a/dpctl/utils/_compute_follows_data.pyx b/dpctl/utils/_compute_follows_data.pyx index 81dca6075a..ce3823ffd5 100644 --- a/dpctl/utils/_compute_follows_data.pyx +++ b/dpctl/utils/_compute_follows_data.pyx @@ -28,7 +28,9 @@ import dpctl from .._sycl_queue cimport SyclQueue -__all__ = ["get_execution_queue", "get_coerced_usm_type", "ExecutionPlacementError"] +__all__ = [ + "get_execution_queue", "get_coerced_usm_type", "ExecutionPlacementError" +] class ExecutionPlacementError(Exception): @@ -77,7 +79,7 @@ def get_execution_queue(qs, /): elif not isinstance(q2, dpctl.SyclQueue): return None elif not queue_equiv( q1, q2): - return None + return None return qs[0] @@ -104,7 +106,7 @@ def get_coerced_usm_type(usm_types, /): if len(usm_types) == 0: return None _k = ["device", "shared", "host"] - _m = {k:i for i, k in enumerate(_k)} + _m = {k: i for i, k in enumerate(_k)} res = len(_k) for t in usm_types: if not isinstance(t, str): @@ -129,6 +131,7 @@ def _validate_usm_type_allow_none(usm_type): f"Expected usm_type to be a str or None, got {type(usm_type)}" ) + def _validate_usm_type_disallow_none(usm_type): "Validates usm_type argument" if isinstance(usm_type, str): diff --git a/examples/cython/sycl_buffer/syclbuffer/_syclbuffer.pyx b/examples/cython/sycl_buffer/syclbuffer/_syclbuffer.pyx index f79f878376..cdba89d1ef 100644 --- a/examples/cython/sycl_buffer/syclbuffer/_syclbuffer.pyx +++ b/examples/cython/sycl_buffer/syclbuffer/_syclbuffer.pyx @@ -71,10 +71,10 @@ def columnwise_total(cython.floating[:, ::1] mat, queue=None): res_memslice = np.empty(n_cols, dtype=np.double) else: raise TypeError( - "Use single or double precision floating point types are supported" - ) + "Use single or double precision floating point types are supported" + ) - if (queue is None): + if queue is None: # use default-constructed queue q = c_dpctl.SyclQueue() elif isinstance(queue, dpctl.SyclQueue): @@ -85,7 +85,7 @@ def columnwise_total(cython.floating[:, ::1] mat, queue=None): with nogil, cython.boundscheck(False): native_columnwise_total( - exec_queue_ptr[0], n_rows, n_cols, &mat[0,0], &res_memslice[0] + exec_queue_ptr[0], n_rows, n_cols, &mat[0, 0], &res_memslice[0] ) return np.asarray(res_memslice) diff --git a/examples/cython/use_dpctl_sycl/use_dpctl_sycl/_cython_api.pyx b/examples/cython/use_dpctl_sycl/use_dpctl_sycl/_cython_api.pyx index 7e984b6b4a..697ef539d5 100644 --- a/examples/cython/use_dpctl_sycl/use_dpctl_sycl/_cython_api.pyx +++ b/examples/cython/use_dpctl_sycl/use_dpctl_sycl/_cython_api.pyx @@ -48,6 +48,8 @@ cpdef dpctl.SyclDevice device_copy(dpctl.SyclDevice dev): cdef dpctl.DPCTLSyclDeviceRef d_ref = dev.get_device_ref() cdef const dpctl.sycl.device *dpcpp_device = dpctl.sycl.unwrap_device(d_ref) cdef dpctl.sycl.device *copied_device = copy_device(dpcpp_device[0]) - cdef dpctl.DPCTLSyclDeviceRef copied_d_ref = dpctl.sycl.wrap_device(copied_device) + cdef dpctl.DPCTLSyclDeviceRef copied_d_ref = dpctl.sycl.wrap_device( + copied_device + ) return dpctl.SyclDevice._create(copied_d_ref) diff --git a/examples/cython/usm_memory/blackscholes/_blackscholes_usm.pyx b/examples/cython/usm_memory/blackscholes/_blackscholes_usm.pyx index aaa86778a0..a88175982c 100644 --- a/examples/cython/usm_memory/blackscholes/_blackscholes_usm.pyx +++ b/examples/cython/usm_memory/blackscholes/_blackscholes_usm.pyx @@ -17,17 +17,11 @@ # cython: language_level=3 # distutils: language=c++ -cimport numpy as cnp -from cython cimport floating - cimport dpctl as c_dpctl cimport dpctl.tensor as c_dpt from dpctl.sycl cimport queue as dpcpp_queue from dpctl.sycl cimport unwrap_queue -import numpy as np - -import dpctl import dpctl.tensor as dpt @@ -56,7 +50,8 @@ cdef extern from "sycl_blackscholes.hpp": def black_scholes_price(c_dpt.usm_ndarray option_params_arr): """black_scholes_price(params) - Applies Black-Scholes-Merton formula to compute call and put European option prices. + Applies Black-Scholes-Merton formula to compute call and put European + option prices. Args: option_params_arr: usm_ndarray @@ -64,12 +59,11 @@ def black_scholes_price(c_dpt.usm_ndarray option_params_arr): (price, strike, maturity, rate, volatility) per each option. Returns: usm_ndarray - Floating point array with shape (n_opts, 2) containing (call_price, put_price) - per each option. + Floating point array with shape (n_opts, 2) containing + (call_price, put_price) per each option. """ cdef size_t n_opts = 0 cdef size_t n_params = 0 - cdef size_t n_bytes = 0 cdef c_dpctl.SyclQueue q cdef dpcpp_queue* exec_q_ptr = NULL cdef c_dpt.usm_ndarray call_put_prices @@ -103,14 +97,14 @@ def black_scholes_price(c_dpt.usm_ndarray option_params_arr): typenum_ = option_params_arr.get_typenum() if (typenum_ == c_dpt.UAR_DOUBLE): - call_put_prices = dpt.empty((n_opts, 2), dtype='d', sycl_queue=q) + call_put_prices = dpt.empty((n_opts, 2), dtype="d", sycl_queue=q) dp1 = option_params_arr.get_data() dp2 = call_put_prices.get_data() # ensure content of dp1 and dp2 is no longer worked on exec_q_ptr[0].wait() cpp_blackscholes[double](exec_q_ptr[0], n_opts, dp1, dp2) elif (typenum_ == c_dpt.UAR_FLOAT): - call_put_prices = dpt.empty((n_opts, 2), dtype='f', sycl_queue=q) + call_put_prices = dpt.empty((n_opts, 2), dtype="f", sycl_queue=q) fp1 = option_params_arr.get_data() fp2 = call_put_prices.get_data() # ensure content of fp1 and fp2 is no longer worked on @@ -168,7 +162,6 @@ def populate_params( """ cdef size_t n_opts = 0 cdef size_t n_params = 0 - cdef c_dpctl.SyclQueue sycl_queue cdef dpcpp_queue* exec_q_ptr = NULL cdef double* dp = NULL cdef float* fp = NULL @@ -202,13 +195,15 @@ def populate_params( dp = option_params_arr.get_data() exec_q_ptr[0].wait() cpp_populate_params[double]( - exec_q_ptr[0], n_opts, dp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed + exec_q_ptr[0], n_opts, dp, pl, ph, + sl, sh, tl, th, rl, rh, vl, vh, seed ) elif (typenum_ == c_dpt.UAR_FLOAT): fp = option_params_arr.get_data() exec_q_ptr[0].wait() cpp_populate_params[float]( - exec_q_ptr[0], n_opts, fp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed + exec_q_ptr[0], n_opts, fp, pl, ph, + sl, sh, tl, th, rl, rh, vl, vh, seed ) else: raise ValueError("Unsupported data-type") diff --git a/pyproject.toml b/pyproject.toml index cd4b31adbe..516b34549e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -107,6 +107,9 @@ source = [ "dpctl" ] +[tool.cython-lint] +max-line-length = 80 + [tool.isort] ensure_newline_before_comments = true force_grid_wrap = 0