3294 lines
199 KiB
C
3294 lines
199 KiB
C
|
/*
|
||
|
* Copyright 2011-2014 Blender Foundation
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License
|
||
|
*/
|
||
|
|
||
|
#ifndef __CUEW_H__
|
||
|
#define __CUEW_H__
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
extern "C" {
|
||
|
#endif
|
||
|
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
/* Defines. */
|
||
|
#define CUEW_VERSION_MAJOR 2
|
||
|
#define CUEW_VERSION_MINOR 0
|
||
|
|
||
|
#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v3
|
||
|
#define CUDA_VERSION 11020
|
||
|
#define CU_UUID_HAS_BEEN_DEFINED
|
||
|
#define CU_IPC_HANDLE_SIZE 64
|
||
|
#define CU_STREAM_LEGACY ((CUstream)0x1)
|
||
|
#define CU_STREAM_PER_THREAD ((CUstream)0x2)
|
||
|
#define CU_MEMHOSTALLOC_PORTABLE 0x01
|
||
|
#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
|
||
|
#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
|
||
|
#define CU_MEMHOSTREGISTER_PORTABLE 0x01
|
||
|
#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
|
||
|
#define CU_MEMHOSTREGISTER_IOMEMORY 0x04
|
||
|
#define CU_MEMHOSTREGISTER_READ_ONLY 0x08
|
||
|
#define CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL 0x1
|
||
|
#define CUDA_EXTERNAL_MEMORY_DEDICATED 0x1
|
||
|
#define CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC 0x01
|
||
|
#define CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC 0x02
|
||
|
#define CUDA_NVSCISYNC_ATTR_SIGNAL 0x1
|
||
|
#define CUDA_NVSCISYNC_ATTR_WAIT 0x2
|
||
|
#define CU_MEM_CREATE_USAGE_TILE_POOL 0x1
|
||
|
#define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC 0x01
|
||
|
#define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
|
||
|
#define CUDA_ARRAY3D_LAYERED 0x01
|
||
|
#define CUDA_ARRAY3D_2DARRAY 0x01
|
||
|
#define CUDA_ARRAY3D_SURFACE_LDST 0x02
|
||
|
#define CUDA_ARRAY3D_CUBEMAP 0x04
|
||
|
#define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
|
||
|
#define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
|
||
|
#define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20
|
||
|
#define CUDA_ARRAY3D_SPARSE 0x40
|
||
|
#define CU_TRSA_OVERRIDE_FORMAT 0x01
|
||
|
#define CU_TRSF_READ_AS_INTEGER 0x01
|
||
|
#define CU_TRSF_NORMALIZED_COORDINATES 0x02
|
||
|
#define CU_TRSF_SRGB 0x10
|
||
|
#define CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION 0x20
|
||
|
#define CU_LAUNCH_PARAM_END ((void*)0x00)
|
||
|
#define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
|
||
|
#define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
|
||
|
#define CU_PARAM_TR_DEFAULT -1
|
||
|
#define CU_DEVICE_CPU ((CUdevice)-1)
|
||
|
#define CU_DEVICE_INVALID ((CUdevice)-2)
|
||
|
|
||
|
/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
|
||
|
* the cuda library has both the old ones for compatibility and new
|
||
|
* ones with _v2 postfix,
|
||
|
*/
|
||
|
#define cuDeviceTotalMem cuDeviceTotalMem_v2
|
||
|
#define cuCtxCreate cuCtxCreate_v2
|
||
|
#define cuModuleGetGlobal cuModuleGetGlobal_v2
|
||
|
#define cuMemGetInfo cuMemGetInfo_v2
|
||
|
#define cuMemAlloc cuMemAlloc_v2
|
||
|
#define cuMemAllocPitch cuMemAllocPitch_v2
|
||
|
#define cuMemFree cuMemFree_v2
|
||
|
#define cuMemGetAddressRange cuMemGetAddressRange_v2
|
||
|
#define cuMemAllocHost cuMemAllocHost_v2
|
||
|
#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
|
||
|
#define cuMemcpyHtoD cuMemcpyHtoD_v2
|
||
|
#define cuMemcpyDtoH cuMemcpyDtoH_v2
|
||
|
#define cuMemcpyDtoD cuMemcpyDtoD_v2
|
||
|
#define cuMemcpyDtoA cuMemcpyDtoA_v2
|
||
|
#define cuMemcpyAtoD cuMemcpyAtoD_v2
|
||
|
#define cuMemcpyHtoA cuMemcpyHtoA_v2
|
||
|
#define cuMemcpyAtoH cuMemcpyAtoH_v2
|
||
|
#define cuMemcpyAtoA cuMemcpyAtoA_v2
|
||
|
#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2
|
||
|
#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2
|
||
|
#define cuMemcpy2D cuMemcpy2D_v2
|
||
|
#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
|
||
|
#define cuMemcpy3D cuMemcpy3D_v2
|
||
|
#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
|
||
|
#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
|
||
|
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
|
||
|
#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
|
||
|
#define cuMemcpy3DAsync cuMemcpy3DAsync_v2
|
||
|
#define cuMemsetD8 cuMemsetD8_v2
|
||
|
#define cuMemsetD16 cuMemsetD16_v2
|
||
|
#define cuMemsetD32 cuMemsetD32_v2
|
||
|
#define cuMemsetD2D8 cuMemsetD2D8_v2
|
||
|
#define cuMemsetD2D16 cuMemsetD2D16_v2
|
||
|
#define cuMemsetD2D32 cuMemsetD2D32_v2
|
||
|
#define cuArrayCreate cuArrayCreate_v2
|
||
|
#define cuArrayGetDescriptor cuArrayGetDescriptor_v2
|
||
|
#define cuArray3DCreate cuArray3DCreate_v2
|
||
|
#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
|
||
|
#define cuTexRefSetAddress cuTexRefSetAddress_v2
|
||
|
#define cuTexRefGetAddress cuTexRefGetAddress_v2
|
||
|
#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2
|
||
|
#define cuCtxDestroy cuCtxDestroy_v2
|
||
|
#define cuCtxPopCurrent cuCtxPopCurrent_v2
|
||
|
#define cuCtxPushCurrent cuCtxPushCurrent_v2
|
||
|
#define cuStreamDestroy cuStreamDestroy_v2
|
||
|
#define cuEventDestroy cuEventDestroy_v2
|
||
|
#define cuLinkCreate cuLinkCreate_v2
|
||
|
#define cuLinkAddData cuLinkAddData_v2
|
||
|
#define cuLinkAddFile cuLinkAddFile_v2
|
||
|
#define cuMemHostRegister cuMemHostRegister_v2
|
||
|
#define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2
|
||
|
#define cuStreamBeginCapture cuStreamBeginCapture_v2
|
||
|
#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
|
||
|
#define cuDevicePrimaryCtxReset cuDevicePrimaryCtxReset_v2
|
||
|
#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
|
||
|
#define cuIpcOpenMemHandle cuIpcOpenMemHandle_v2
|
||
|
#define cuGraphInstantiate cuGraphInstantiate_v2
|
||
|
#define cuGLCtxCreate cuGLCtxCreate_v2
|
||
|
#define cuGLMapBufferObject cuGLMapBufferObject_v2
|
||
|
#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
|
||
|
#define cuGLGetDevices cuGLGetDevices_v2
|
||
|
|
||
|
/* Types. */
|
||
|
#ifdef _MSC_VER
|
||
|
typedef unsigned __int32 cuuint32_t;
|
||
|
typedef unsigned __int64 cuuint64_t;
|
||
|
/* Assume VS2017 or later */
|
||
|
#include <stdint.h>
|
||
|
#else
|
||
|
#include <stdint.h>
|
||
|
typedef uint32_t cuuint32_t;
|
||
|
typedef uint64_t cuuint64_t;
|
||
|
#endif
|
||
|
|
||
|
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
|
||
|
typedef unsigned long long CUdeviceptr;
|
||
|
#else
|
||
|
typedef unsigned int CUdeviceptr;
|
||
|
#endif
|
||
|
|
||
|
#ifdef _WIN32
|
||
|
# define CUDAAPI __stdcall
|
||
|
# define CUDA_CB __stdcall
|
||
|
#else
|
||
|
# define CUDAAPI
|
||
|
# define CUDA_CB
|
||
|
#endif
|
||
|
|
||
|
#if !defined(__CUDACC__)
|
||
|
# define __device_builtin__
|
||
|
#else
|
||
|
# define __device_builtin__ __location__(device_builtin)
|
||
|
#endif
|
||
|
|
||
|
typedef __device_builtin__ struct CUstream_st *cudaStream_t;
|
||
|
|
||
|
typedef int CUdevice;
|
||
|
typedef struct CUctx_st* CUcontext;
|
||
|
typedef struct CUmod_st* CUmodule;
|
||
|
typedef struct CUfunc_st* CUfunction;
|
||
|
typedef struct CUarray_st* CUarray;
|
||
|
typedef struct CUmipmappedArray_st* CUmipmappedArray;
|
||
|
typedef struct CUtexref_st* CUtexref;
|
||
|
typedef struct CUsurfref_st* CUsurfref;
|
||
|
typedef struct CUevent_st* CUevent;
|
||
|
typedef struct CUstream_st* CUstream;
|
||
|
typedef struct CUgraphicsResource_st* CUgraphicsResource;
|
||
|
typedef unsigned long long CUtexObject;
|
||
|
typedef unsigned long long CUsurfObject;
|
||
|
typedef struct CUextMemory_st* CUexternalMemory;
|
||
|
typedef struct CUextSemaphore_st* CUexternalSemaphore;
|
||
|
typedef struct CUgraph_st* CUgraph;
|
||
|
typedef struct CUgraphNode_st* CUgraphNode;
|
||
|
typedef struct CUgraphExec_st* CUgraphExec;
|
||
|
typedef struct CUmemPoolHandle_st* CUmemoryPool;
|
||
|
|
||
|
typedef struct CUuuid_st {
|
||
|
char bytes[16];
|
||
|
} CUuuid;
|
||
|
|
||
|
typedef struct CUipcEventHandle_st {
|
||
|
char reserved[CU_IPC_HANDLE_SIZE];
|
||
|
} CUipcEventHandle;
|
||
|
|
||
|
typedef struct CUipcMemHandle_st {
|
||
|
char reserved[CU_IPC_HANDLE_SIZE];
|
||
|
} CUipcMemHandle;
|
||
|
|
||
|
typedef enum CUipcMem_flags_enum {
|
||
|
CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1,
|
||
|
} CUipcMem_flags;
|
||
|
|
||
|
typedef enum CUmemAttach_flags_enum {
|
||
|
CU_MEM_ATTACH_GLOBAL = 0x1,
|
||
|
CU_MEM_ATTACH_HOST = 0x2,
|
||
|
CU_MEM_ATTACH_SINGLE = 0x4,
|
||
|
} CUmemAttach_flags;
|
||
|
|
||
|
typedef enum CUctx_flags_enum {
|
||
|
CU_CTX_SCHED_AUTO = 0x00,
|
||
|
CU_CTX_SCHED_SPIN = 0x01,
|
||
|
CU_CTX_SCHED_YIELD = 0x02,
|
||
|
CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
|
||
|
CU_CTX_BLOCKING_SYNC = 0x04,
|
||
|
CU_CTX_SCHED_MASK = 0x07,
|
||
|
CU_CTX_MAP_HOST = 0x08,
|
||
|
CU_CTX_LMEM_RESIZE_TO_MAX = 0x10,
|
||
|
CU_CTX_FLAGS_MASK = 0x1f,
|
||
|
} CUctx_flags;
|
||
|
|
||
|
typedef enum CUstream_flags_enum {
|
||
|
CU_STREAM_DEFAULT = 0x0,
|
||
|
CU_STREAM_NON_BLOCKING = 0x1,
|
||
|
} CUstream_flags;
|
||
|
|
||
|
typedef enum CUevent_flags_enum {
|
||
|
CU_EVENT_DEFAULT = 0x0,
|
||
|
CU_EVENT_BLOCKING_SYNC = 0x1,
|
||
|
CU_EVENT_DISABLE_TIMING = 0x2,
|
||
|
CU_EVENT_INTERPROCESS = 0x4,
|
||
|
} CUevent_flags;
|
||
|
|
||
|
typedef enum CUevent_record_flags_enum {
|
||
|
CU_EVENT_RECORD_DEFAULT = 0x0,
|
||
|
CU_EVENT_RECORD_EXTERNAL = 0x1,
|
||
|
} CUevent_record_flags;
|
||
|
|
||
|
typedef enum CUevent_wait_flags_enum {
|
||
|
CU_EVENT_WAIT_DEFAULT = 0x0,
|
||
|
CU_EVENT_WAIT_EXTERNAL = 0x1,
|
||
|
} CUevent_wait_flags;
|
||
|
|
||
|
typedef enum CUstreamWaitValue_flags_enum {
|
||
|
CU_STREAM_WAIT_VALUE_GEQ = 0x0,
|
||
|
CU_STREAM_WAIT_VALUE_EQ = 0x1,
|
||
|
CU_STREAM_WAIT_VALUE_AND = 0x2,
|
||
|
CU_STREAM_WAIT_VALUE_NOR = 0x3,
|
||
|
CU_STREAM_WAIT_VALUE_FLUSH = (1 << 30),
|
||
|
} CUstreamWaitValue_flags;
|
||
|
|
||
|
typedef enum CUstreamWriteValue_flags_enum {
|
||
|
CU_STREAM_WRITE_VALUE_DEFAULT = 0x0,
|
||
|
CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1,
|
||
|
} CUstreamWriteValue_flags;
|
||
|
|
||
|
typedef enum CUstreamBatchMemOpType_enum {
|
||
|
CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1,
|
||
|
CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2,
|
||
|
CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4,
|
||
|
CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5,
|
||
|
CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3,
|
||
|
} CUstreamBatchMemOpType;
|
||
|
|
||
|
typedef union CUstreamBatchMemOpParams_union {
|
||
|
CUstreamBatchMemOpType operation;
|
||
|
struct CUstreamMemOpWaitValueParams_st {
|
||
|
CUstreamBatchMemOpType operation;
|
||
|
CUdeviceptr address;
|
||
|
union {
|
||
|
cuuint32_t value;
|
||
|
cuuint64_t value64;
|
||
|
};
|
||
|
unsigned int flags;
|
||
|
CUdeviceptr alias;
|
||
|
} waitValue;
|
||
|
struct CUstreamMemOpWriteValueParams_st {
|
||
|
CUstreamBatchMemOpType operation;
|
||
|
CUdeviceptr address;
|
||
|
union {
|
||
|
cuuint32_t value;
|
||
|
cuuint64_t value64;
|
||
|
};
|
||
|
unsigned int flags;
|
||
|
CUdeviceptr alias;
|
||
|
} writeValue;
|
||
|
struct CUstreamMemOpFlushRemoteWritesParams_st {
|
||
|
CUstreamBatchMemOpType operation;
|
||
|
unsigned int flags;
|
||
|
} flushRemoteWrites;
|
||
|
cuuint64_t pad[6];
|
||
|
} CUstreamBatchMemOpParams;
|
||
|
|
||
|
typedef enum CUoccupancy_flags_enum {
|
||
|
CU_OCCUPANCY_DEFAULT = 0x0,
|
||
|
CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
|
||
|
} CUoccupancy_flags;
|
||
|
|
||
|
typedef enum CUarray_format_enum {
|
||
|
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
|
||
|
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
|
||
|
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
|
||
|
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
|
||
|
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
|
||
|
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
|
||
|
CU_AD_FORMAT_HALF = 0x10,
|
||
|
CU_AD_FORMAT_FLOAT = 0x20,
|
||
|
CU_AD_FORMAT_NV12 = 0xb0,
|
||
|
} CUarray_format;
|
||
|
|
||
|
typedef enum CUaddress_mode_enum {
|
||
|
CU_TR_ADDRESS_MODE_WRAP = 0,
|
||
|
CU_TR_ADDRESS_MODE_CLAMP = 1,
|
||
|
CU_TR_ADDRESS_MODE_MIRROR = 2,
|
||
|
CU_TR_ADDRESS_MODE_BORDER = 3,
|
||
|
} CUaddress_mode;
|
||
|
|
||
|
typedef enum CUfilter_mode_enum {
|
||
|
CU_TR_FILTER_MODE_POINT = 0,
|
||
|
CU_TR_FILTER_MODE_LINEAR = 1,
|
||
|
} CUfilter_mode;
|
||
|
|
||
|
typedef enum CUdevice_attribute_enum {
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
|
||
|
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
|
||
|
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
|
||
|
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
|
||
|
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
|
||
|
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
|
||
|
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
|
||
|
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
|
||
|
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
|
||
|
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
|
||
|
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
|
||
|
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
|
||
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
|
||
|
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
|
||
|
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
|
||
|
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
|
||
|
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
|
||
|
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
|
||
|
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
|
||
|
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
|
||
|
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
|
||
|
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
|
||
|
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
|
||
|
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
|
||
|
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
|
||
|
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
|
||
|
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
|
||
|
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
|
||
|
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
|
||
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
|
||
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
|
||
|
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
|
||
|
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
|
||
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
|
||
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
|
||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
|
||
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
|
||
|
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
|
||
|
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
|
||
|
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
|
||
|
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
|
||
|
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
|
||
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
|
||
|
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
|
||
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
|
||
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
|
||
|
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
|
||
|
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
|
||
|
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
|
||
|
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
|
||
|
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
|
||
|
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
|
||
|
CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
|
||
|
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
|
||
|
CU_DEVICE_ATTRIBUTE_MAX,
|
||
|
} CUdevice_attribute;
|
||
|
|
||
|
typedef struct CUdevprop_st {
|
||
|
int maxThreadsPerBlock;
|
||
|
int maxThreadsDim[3];
|
||
|
int maxGridSize[3];
|
||
|
int sharedMemPerBlock;
|
||
|
int totalConstantMemory;
|
||
|
int SIMDWidth;
|
||
|
int memPitch;
|
||
|
int regsPerBlock;
|
||
|
int clockRate;
|
||
|
int textureAlign;
|
||
|
} CUdevprop;
|
||
|
|
||
|
typedef enum CUpointer_attribute_enum {
|
||
|
CU_POINTER_ATTRIBUTE_CONTEXT = 1,
|
||
|
CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
|
||
|
CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
|
||
|
CU_POINTER_ATTRIBUTE_HOST_POINTER = 4,
|
||
|
CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5,
|
||
|
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
|
||
|
CU_POINTER_ATTRIBUTE_BUFFER_ID = 7,
|
||
|
CU_POINTER_ATTRIBUTE_IS_MANAGED = 8,
|
||
|
CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
|
||
|
CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10,
|
||
|
CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11,
|
||
|
CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12,
|
||
|
CU_POINTER_ATTRIBUTE_MAPPED = 13,
|
||
|
CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14,
|
||
|
CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15,
|
||
|
CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16,
|
||
|
} CUpointer_attribute;
|
||
|
|
||
|
typedef enum CUfunction_attribute_enum {
|
||
|
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
|
||
|
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
|
||
|
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
|
||
|
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
|
||
|
CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
|
||
|
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
|
||
|
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
|
||
|
CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
|
||
|
CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
|
||
|
CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
|
||
|
CU_FUNC_ATTRIBUTE_MAX,
|
||
|
} CUfunction_attribute;
|
||
|
|
||
|
typedef enum CUfunc_cache_enum {
|
||
|
CU_FUNC_CACHE_PREFER_NONE = 0x00,
|
||
|
CU_FUNC_CACHE_PREFER_SHARED = 0x01,
|
||
|
CU_FUNC_CACHE_PREFER_L1 = 0x02,
|
||
|
CU_FUNC_CACHE_PREFER_EQUAL = 0x03,
|
||
|
} CUfunc_cache;
|
||
|
|
||
|
typedef enum CUsharedconfig_enum {
|
||
|
CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00,
|
||
|
CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01,
|
||
|
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02,
|
||
|
} CUsharedconfig;
|
||
|
|
||
|
typedef enum CUshared_carveout_enum {
|
||
|
CU_SHAREDMEM_CARVEOUT_DEFAULT = -1,
|
||
|
CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100,
|
||
|
CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0,
|
||
|
} CUshared_carveout;
|
||
|
|
||
|
typedef enum CUmemorytype_enum {
|
||
|
CU_MEMORYTYPE_HOST = 0x01,
|
||
|
CU_MEMORYTYPE_DEVICE = 0x02,
|
||
|
CU_MEMORYTYPE_ARRAY = 0x03,
|
||
|
CU_MEMORYTYPE_UNIFIED = 0x04,
|
||
|
} CUmemorytype;
|
||
|
|
||
|
typedef enum CUcomputemode_enum {
|
||
|
CU_COMPUTEMODE_DEFAULT = 0,
|
||
|
CU_COMPUTEMODE_PROHIBITED = 2,
|
||
|
CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
|
||
|
} CUcomputemode;
|
||
|
|
||
|
typedef enum CUmem_advise_enum {
|
||
|
CU_MEM_ADVISE_SET_READ_MOSTLY = 1,
|
||
|
CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
|
||
|
CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3,
|
||
|
CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4,
|
||
|
CU_MEM_ADVISE_SET_ACCESSED_BY = 5,
|
||
|
CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6,
|
||
|
} CUmem_advise;
|
||
|
|
||
|
typedef enum CUmem_range_attribute_enum {
|
||
|
CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1,
|
||
|
CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2,
|
||
|
CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3,
|
||
|
CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4,
|
||
|
} CUmem_range_attribute;
|
||
|
|
||
|
typedef enum CUjit_option_enum {
|
||
|
CU_JIT_MAX_REGISTERS = 0,
|
||
|
CU_JIT_THREADS_PER_BLOCK,
|
||
|
CU_JIT_WALL_TIME,
|
||
|
CU_JIT_INFO_LOG_BUFFER,
|
||
|
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
|
||
|
CU_JIT_ERROR_LOG_BUFFER,
|
||
|
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
|
||
|
CU_JIT_OPTIMIZATION_LEVEL,
|
||
|
CU_JIT_TARGET_FROM_CUCONTEXT,
|
||
|
CU_JIT_TARGET,
|
||
|
CU_JIT_FALLBACK_STRATEGY,
|
||
|
CU_JIT_GENERATE_DEBUG_INFO,
|
||
|
CU_JIT_LOG_VERBOSE,
|
||
|
CU_JIT_GENERATE_LINE_INFO,
|
||
|
CU_JIT_CACHE_MODE,
|
||
|
CU_JIT_NEW_SM3X_OPT,
|
||
|
CU_JIT_FAST_COMPILE,
|
||
|
CU_JIT_GLOBAL_SYMBOL_NAMES,
|
||
|
CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
|
||
|
CU_JIT_GLOBAL_SYMBOL_COUNT,
|
||
|
CU_JIT_NUM_OPTIONS,
|
||
|
} CUjit_option;
|
||
|
|
||
|
typedef enum CUjit_target_enum {
|
||
|
CU_TARGET_COMPUTE_20 = 20,
|
||
|
CU_TARGET_COMPUTE_21 = 21,
|
||
|
CU_TARGET_COMPUTE_30 = 30,
|
||
|
CU_TARGET_COMPUTE_32 = 32,
|
||
|
CU_TARGET_COMPUTE_35 = 35,
|
||
|
CU_TARGET_COMPUTE_37 = 37,
|
||
|
CU_TARGET_COMPUTE_50 = 50,
|
||
|
CU_TARGET_COMPUTE_52 = 52,
|
||
|
CU_TARGET_COMPUTE_53 = 53,
|
||
|
CU_TARGET_COMPUTE_60 = 60,
|
||
|
CU_TARGET_COMPUTE_61 = 61,
|
||
|
CU_TARGET_COMPUTE_62 = 62,
|
||
|
CU_TARGET_COMPUTE_70 = 70,
|
||
|
CU_TARGET_COMPUTE_72 = 72,
|
||
|
CU_TARGET_COMPUTE_75 = 75,
|
||
|
CU_TARGET_COMPUTE_80 = 80,
|
||
|
CU_TARGET_COMPUTE_86 = 86,
|
||
|
} CUjit_target;
|
||
|
|
||
|
typedef enum CUjit_fallback_enum {
|
||
|
CU_PREFER_PTX = 0,
|
||
|
CU_PREFER_BINARY,
|
||
|
} CUjit_fallback;
|
||
|
|
||
|
typedef enum CUjit_cacheMode_enum {
|
||
|
CU_JIT_CACHE_OPTION_NONE = 0,
|
||
|
CU_JIT_CACHE_OPTION_CG,
|
||
|
CU_JIT_CACHE_OPTION_CA,
|
||
|
} CUjit_cacheMode;
|
||
|
|
||
|
typedef enum CUjitInputType_enum {
|
||
|
CU_JIT_INPUT_CUBIN = 0,
|
||
|
CU_JIT_INPUT_PTX,
|
||
|
CU_JIT_INPUT_FATBINARY,
|
||
|
CU_JIT_INPUT_OBJECT,
|
||
|
CU_JIT_INPUT_LIBRARY,
|
||
|
CU_JIT_NUM_INPUT_TYPES,
|
||
|
} CUjitInputType;
|
||
|
|
||
|
typedef struct CUlinkState_st* CUlinkState;
|
||
|
|
||
|
typedef enum CUgraphicsRegisterFlags_enum {
|
||
|
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
|
||
|
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
|
||
|
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
|
||
|
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
|
||
|
CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08,
|
||
|
} CUgraphicsRegisterFlags;
|
||
|
|
||
|
typedef enum CUgraphicsMapResourceFlags_enum {
|
||
|
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
|
||
|
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
|
||
|
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
|
||
|
} CUgraphicsMapResourceFlags;
|
||
|
|
||
|
typedef enum CUarray_cubemap_face_enum {
|
||
|
CU_CUBEMAP_FACE_POSITIVE_X = 0x00,
|
||
|
CU_CUBEMAP_FACE_NEGATIVE_X = 0x01,
|
||
|
CU_CUBEMAP_FACE_POSITIVE_Y = 0x02,
|
||
|
CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03,
|
||
|
CU_CUBEMAP_FACE_POSITIVE_Z = 0x04,
|
||
|
CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05,
|
||
|
} CUarray_cubemap_face;
|
||
|
|
||
|
typedef enum CUlimit_enum {
|
||
|
CU_LIMIT_STACK_SIZE = 0x00,
|
||
|
CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
|
||
|
CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
|
||
|
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
|
||
|
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
|
||
|
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05,
|
||
|
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06,
|
||
|
CU_LIMIT_MAX,
|
||
|
} CUlimit;
|
||
|
|
||
|
typedef enum CUresourcetype_enum {
|
||
|
CU_RESOURCE_TYPE_ARRAY = 0x00,
|
||
|
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
|
||
|
CU_RESOURCE_TYPE_LINEAR = 0x02,
|
||
|
CU_RESOURCE_TYPE_PITCH2D = 0x03,
|
||
|
} CUresourcetype;
|
||
|
|
||
|
typedef void (CUDA_CB *CUhostFn)(void* userData);
|
||
|
|
||
|
typedef enum CUaccessProperty_enum {
|
||
|
CU_ACCESS_PROPERTY_NORMAL = 0,
|
||
|
CU_ACCESS_PROPERTY_STREAMING = 1,
|
||
|
CU_ACCESS_PROPERTY_PERSISTING = 2,
|
||
|
} CUaccessProperty;
|
||
|
|
||
|
typedef struct CUaccessPolicyWindow_st {
|
||
|
void* base_ptr;
|
||
|
size_t num_bytes;
|
||
|
float hitRatio;
|
||
|
CUaccessProperty hitProp;
|
||
|
CUaccessProperty missProp;
|
||
|
} CUaccessPolicyWindow;
|
||
|
|
||
|
typedef struct CUDA_KERNEL_NODE_PARAMS_st {
|
||
|
CUfunction func;
|
||
|
unsigned int gridDimX;
|
||
|
unsigned int gridDimY;
|
||
|
unsigned int gridDimZ;
|
||
|
unsigned int blockDimX;
|
||
|
unsigned int blockDimY;
|
||
|
unsigned int blockDimZ;
|
||
|
unsigned int sharedMemBytes;
|
||
|
void** kernelParams;
|
||
|
void** extra;
|
||
|
} CUDA_KERNEL_NODE_PARAMS;
|
||
|
|
||
|
typedef struct CUDA_MEMSET_NODE_PARAMS_st {
|
||
|
CUdeviceptr dst;
|
||
|
size_t pitch;
|
||
|
unsigned int value;
|
||
|
unsigned int elementSize;
|
||
|
size_t width;
|
||
|
size_t height;
|
||
|
} CUDA_MEMSET_NODE_PARAMS;
|
||
|
|
||
|
typedef struct CUDA_HOST_NODE_PARAMS_st {
|
||
|
CUhostFn fn;
|
||
|
void* userData;
|
||
|
} CUDA_HOST_NODE_PARAMS;
|
||
|
|
||
|
typedef enum CUgraphNodeType_enum {
|
||
|
CU_GRAPH_NODE_TYPE_KERNEL = 0,
|
||
|
CU_GRAPH_NODE_TYPE_MEMCPY = 1,
|
||
|
CU_GRAPH_NODE_TYPE_MEMSET = 2,
|
||
|
CU_GRAPH_NODE_TYPE_HOST = 3,
|
||
|
CU_GRAPH_NODE_TYPE_GRAPH = 4,
|
||
|
CU_GRAPH_NODE_TYPE_EMPTY = 5,
|
||
|
CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6,
|
||
|
CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7,
|
||
|
CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8,
|
||
|
CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9,
|
||
|
} CUgraphNodeType;
|
||
|
|
||
|
typedef enum CUsynchronizationPolicy_enum {
|
||
|
CU_SYNC_POLICY_AUTO = 1,
|
||
|
CU_SYNC_POLICY_SPIN = 2,
|
||
|
CU_SYNC_POLICY_YIELD = 3,
|
||
|
CU_SYNC_POLICY_BLOCKING_SYNC = 4,
|
||
|
} CUsynchronizationPolicy;
|
||
|
|
||
|
typedef enum CUkernelNodeAttrID_enum {
|
||
|
CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
|
||
|
CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2,
|
||
|
} CUkernelNodeAttrID;
|
||
|
|
||
|
typedef union CUkernelNodeAttrValue_union {
|
||
|
CUaccessPolicyWindow accessPolicyWindow;
|
||
|
int cooperative;
|
||
|
} CUkernelNodeAttrValue;
|
||
|
|
||
|
typedef enum CUstreamCaptureStatus_enum {
|
||
|
CU_STREAM_CAPTURE_STATUS_NONE = 0,
|
||
|
CU_STREAM_CAPTURE_STATUS_ACTIVE = 1,
|
||
|
CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2,
|
||
|
} CUstreamCaptureStatus;
|
||
|
|
||
|
typedef enum CUstreamCaptureMode_enum {
|
||
|
CU_STREAM_CAPTURE_MODE_GLOBAL = 0,
|
||
|
CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1,
|
||
|
CU_STREAM_CAPTURE_MODE_RELAXED = 2,
|
||
|
} CUstreamCaptureMode;
|
||
|
|
||
|
typedef enum CUstreamAttrID_enum {
|
||
|
CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
|
||
|
CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3,
|
||
|
} CUstreamAttrID;
|
||
|
|
||
|
typedef union CUstreamAttrValue_union {
|
||
|
CUaccessPolicyWindow accessPolicyWindow;
|
||
|
CUsynchronizationPolicy syncPolicy;
|
||
|
} CUstreamAttrValue;
|
||
|
|
||
|
typedef enum cudaError_enum {
|
||
|
CUDA_SUCCESS = 0,
|
||
|
CUDA_ERROR_INVALID_VALUE = 1,
|
||
|
CUDA_ERROR_OUT_OF_MEMORY = 2,
|
||
|
CUDA_ERROR_NOT_INITIALIZED = 3,
|
||
|
CUDA_ERROR_DEINITIALIZED = 4,
|
||
|
CUDA_ERROR_PROFILER_DISABLED = 5,
|
||
|
CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
|
||
|
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
|
||
|
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
|
||
|
CUDA_ERROR_STUB_LIBRARY = 34,
|
||
|
CUDA_ERROR_NO_DEVICE = 100,
|
||
|
CUDA_ERROR_INVALID_DEVICE = 101,
|
||
|
CUDA_ERROR_DEVICE_NOT_LICENSED = 102,
|
||
|
CUDA_ERROR_INVALID_IMAGE = 200,
|
||
|
CUDA_ERROR_INVALID_CONTEXT = 201,
|
||
|
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
|
||
|
CUDA_ERROR_MAP_FAILED = 205,
|
||
|
CUDA_ERROR_UNMAP_FAILED = 206,
|
||
|
CUDA_ERROR_ARRAY_IS_MAPPED = 207,
|
||
|
CUDA_ERROR_ALREADY_MAPPED = 208,
|
||
|
CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
|
||
|
CUDA_ERROR_ALREADY_ACQUIRED = 210,
|
||
|
CUDA_ERROR_NOT_MAPPED = 211,
|
||
|
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
|
||
|
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
|
||
|
CUDA_ERROR_ECC_UNCORRECTABLE = 214,
|
||
|
CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
|
||
|
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
|
||
|
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
|
||
|
CUDA_ERROR_INVALID_PTX = 218,
|
||
|
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
|
||
|
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
|
||
|
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
|
||
|
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
|
||
|
CUDA_ERROR_JIT_COMPILATION_DISABLED = 223,
|
||
|
CUDA_ERROR_INVALID_SOURCE = 300,
|
||
|
CUDA_ERROR_FILE_NOT_FOUND = 301,
|
||
|
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
|
||
|
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
|
||
|
CUDA_ERROR_OPERATING_SYSTEM = 304,
|
||
|
CUDA_ERROR_INVALID_HANDLE = 400,
|
||
|
CUDA_ERROR_ILLEGAL_STATE = 401,
|
||
|
CUDA_ERROR_NOT_FOUND = 500,
|
||
|
CUDA_ERROR_NOT_READY = 600,
|
||
|
CUDA_ERROR_ILLEGAL_ADDRESS = 700,
|
||
|
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
|
||
|
CUDA_ERROR_LAUNCH_TIMEOUT = 702,
|
||
|
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
|
||
|
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
|
||
|
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
|
||
|
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
|
||
|
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
|
||
|
CUDA_ERROR_ASSERT = 710,
|
||
|
CUDA_ERROR_TOO_MANY_PEERS = 711,
|
||
|
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
|
||
|
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
|
||
|
CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
|
||
|
CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
|
||
|
CUDA_ERROR_MISALIGNED_ADDRESS = 716,
|
||
|
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
|
||
|
CUDA_ERROR_INVALID_PC = 718,
|
||
|
CUDA_ERROR_LAUNCH_FAILED = 719,
|
||
|
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
|
||
|
CUDA_ERROR_NOT_PERMITTED = 800,
|
||
|
CUDA_ERROR_NOT_SUPPORTED = 801,
|
||
|
CUDA_ERROR_SYSTEM_NOT_READY = 802,
|
||
|
CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803,
|
||
|
CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_MERGE = 902,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906,
|
||
|
CUDA_ERROR_CAPTURED_EVENT = 907,
|
||
|
CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908,
|
||
|
CUDA_ERROR_TIMEOUT = 909,
|
||
|
CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910,
|
||
|
CUDA_ERROR_UNKNOWN = 999,
|
||
|
} CUresult;
|
||
|
|
||
|
typedef enum CUdevice_P2PAttribute_enum {
|
||
|
CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
|
||
|
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
|
||
|
CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
|
||
|
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04,
|
||
|
CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04,
|
||
|
} CUdevice_P2PAttribute;
|
||
|
|
||
|
typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void* userData);
|
||
|
typedef size_t (CUDA_CB *CUoccupancyB2DSize)(int blockSize);
|
||
|
|
||
|
typedef struct CUDA_MEMCPY2D_st {
|
||
|
size_t srcXInBytes;
|
||
|
size_t srcY;
|
||
|
CUmemorytype srcMemoryType;
|
||
|
const void* srcHost;
|
||
|
CUdeviceptr srcDevice;
|
||
|
CUarray srcArray;
|
||
|
size_t srcPitch;
|
||
|
size_t dstXInBytes;
|
||
|
size_t dstY;
|
||
|
CUmemorytype dstMemoryType;
|
||
|
void* dstHost;
|
||
|
CUdeviceptr dstDevice;
|
||
|
CUarray dstArray;
|
||
|
size_t dstPitch;
|
||
|
size_t WidthInBytes;
|
||
|
size_t Height;
|
||
|
} CUDA_MEMCPY2D;
|
||
|
|
||
|
typedef struct CUDA_MEMCPY3D_st {
|
||
|
size_t srcXInBytes;
|
||
|
size_t srcY;
|
||
|
size_t srcZ;
|
||
|
size_t srcLOD;
|
||
|
CUmemorytype srcMemoryType;
|
||
|
const void* srcHost;
|
||
|
CUdeviceptr srcDevice;
|
||
|
CUarray srcArray;
|
||
|
void* reserved0;
|
||
|
size_t srcPitch;
|
||
|
size_t srcHeight;
|
||
|
size_t dstXInBytes;
|
||
|
size_t dstY;
|
||
|
size_t dstZ;
|
||
|
size_t dstLOD;
|
||
|
CUmemorytype dstMemoryType;
|
||
|
void* dstHost;
|
||
|
CUdeviceptr dstDevice;
|
||
|
CUarray dstArray;
|
||
|
void* reserved1;
|
||
|
size_t dstPitch;
|
||
|
size_t dstHeight;
|
||
|
size_t WidthInBytes;
|
||
|
size_t Height;
|
||
|
size_t Depth;
|
||
|
} CUDA_MEMCPY3D;
|
||
|
|
||
|
typedef struct CUDA_MEMCPY3D_PEER_st {
|
||
|
size_t srcXInBytes;
|
||
|
size_t srcY;
|
||
|
size_t srcZ;
|
||
|
size_t srcLOD;
|
||
|
CUmemorytype srcMemoryType;
|
||
|
const void* srcHost;
|
||
|
CUdeviceptr srcDevice;
|
||
|
CUarray srcArray;
|
||
|
CUcontext srcContext;
|
||
|
size_t srcPitch;
|
||
|
size_t srcHeight;
|
||
|
size_t dstXInBytes;
|
||
|
size_t dstY;
|
||
|
size_t dstZ;
|
||
|
size_t dstLOD;
|
||
|
CUmemorytype dstMemoryType;
|
||
|
void* dstHost;
|
||
|
CUdeviceptr dstDevice;
|
||
|
CUarray dstArray;
|
||
|
CUcontext dstContext;
|
||
|
size_t dstPitch;
|
||
|
size_t dstHeight;
|
||
|
size_t WidthInBytes;
|
||
|
size_t Height;
|
||
|
size_t Depth;
|
||
|
} CUDA_MEMCPY3D_PEER;
|
||
|
|
||
|
typedef struct CUDA_ARRAY_DESCRIPTOR_st {
|
||
|
size_t Width;
|
||
|
size_t Height;
|
||
|
CUarray_format Format;
|
||
|
unsigned int NumChannels;
|
||
|
} CUDA_ARRAY_DESCRIPTOR;
|
||
|
|
||
|
typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
|
||
|
size_t Width;
|
||
|
size_t Height;
|
||
|
size_t Depth;
|
||
|
CUarray_format Format;
|
||
|
unsigned int NumChannels;
|
||
|
unsigned int Flags;
|
||
|
} CUDA_ARRAY3D_DESCRIPTOR;
|
||
|
|
||
|
typedef struct CUDA_ARRAY_SPARSE_PROPERTIES_st {
|
||
|
struct {
|
||
|
unsigned int width;
|
||
|
unsigned int height;
|
||
|
unsigned int depth;
|
||
|
} tileExtent;
|
||
|
unsigned int miptailFirstLevel;
|
||
|
unsigned long long miptailSize;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[4];
|
||
|
} CUDA_ARRAY_SPARSE_PROPERTIES;
|
||
|
|
||
|
typedef struct CUDA_RESOURCE_DESC_st {
|
||
|
CUresourcetype resType;
|
||
|
union {
|
||
|
struct {
|
||
|
CUarray hArray;
|
||
|
} array;
|
||
|
struct {
|
||
|
CUmipmappedArray hMipmappedArray;
|
||
|
} mipmap;
|
||
|
struct {
|
||
|
CUdeviceptr devPtr;
|
||
|
CUarray_format format;
|
||
|
unsigned int numChannels;
|
||
|
size_t sizeInBytes;
|
||
|
} linear;
|
||
|
struct {
|
||
|
CUdeviceptr devPtr;
|
||
|
CUarray_format format;
|
||
|
unsigned int numChannels;
|
||
|
size_t width;
|
||
|
size_t height;
|
||
|
size_t pitchInBytes;
|
||
|
} pitch2D;
|
||
|
struct {
|
||
|
int reserved[32];
|
||
|
} reserved;
|
||
|
} res;
|
||
|
unsigned int flags;
|
||
|
} CUDA_RESOURCE_DESC;
|
||
|
|
||
|
typedef struct CUDA_TEXTURE_DESC_st {
|
||
|
CUaddress_mode addressMode[3];
|
||
|
CUfilter_mode filterMode;
|
||
|
unsigned int flags;
|
||
|
unsigned int maxAnisotropy;
|
||
|
CUfilter_mode mipmapFilterMode;
|
||
|
float mipmapLevelBias;
|
||
|
float minMipmapLevelClamp;
|
||
|
float maxMipmapLevelClamp;
|
||
|
float borderColor[4];
|
||
|
int reserved[12];
|
||
|
} CUDA_TEXTURE_DESC;
|
||
|
|
||
|
typedef enum CUresourceViewFormat_enum {
|
||
|
CU_RES_VIEW_FORMAT_NONE = 0x00,
|
||
|
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01,
|
||
|
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02,
|
||
|
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03,
|
||
|
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04,
|
||
|
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05,
|
||
|
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06,
|
||
|
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07,
|
||
|
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08,
|
||
|
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09,
|
||
|
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a,
|
||
|
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b,
|
||
|
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c,
|
||
|
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d,
|
||
|
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e,
|
||
|
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f,
|
||
|
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10,
|
||
|
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11,
|
||
|
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12,
|
||
|
CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13,
|
||
|
CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14,
|
||
|
CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15,
|
||
|
CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16,
|
||
|
CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17,
|
||
|
CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c,
|
||
|
CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e,
|
||
|
CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20,
|
||
|
CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21,
|
||
|
CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22,
|
||
|
} CUresourceViewFormat;
|
||
|
|
||
|
typedef struct CUDA_RESOURCE_VIEW_DESC_st {
|
||
|
CUresourceViewFormat format;
|
||
|
size_t width;
|
||
|
size_t height;
|
||
|
size_t depth;
|
||
|
unsigned int firstMipmapLevel;
|
||
|
unsigned int lastMipmapLevel;
|
||
|
unsigned int firstLayer;
|
||
|
unsigned int lastLayer;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_RESOURCE_VIEW_DESC;
|
||
|
|
||
|
typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
|
||
|
unsigned long long p2pToken;
|
||
|
unsigned int vaSpaceToken;
|
||
|
} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
|
||
|
|
||
|
typedef enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum {
|
||
|
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0,
|
||
|
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1,
|
||
|
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3,
|
||
|
} CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS;
|
||
|
|
||
|
typedef struct CUDA_LAUNCH_PARAMS_st {
|
||
|
CUfunction function;
|
||
|
unsigned int gridDimX;
|
||
|
unsigned int gridDimY;
|
||
|
unsigned int gridDimZ;
|
||
|
unsigned int blockDimX;
|
||
|
unsigned int blockDimY;
|
||
|
unsigned int blockDimZ;
|
||
|
unsigned int sharedMemBytes;
|
||
|
CUstream hStream;
|
||
|
void** kernelParams;
|
||
|
} CUDA_LAUNCH_PARAMS;
|
||
|
|
||
|
typedef enum CUexternalMemoryHandleType_enum {
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
|
||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
|
||
|
} CUexternalMemoryHandleType;
|
||
|
|
||
|
typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||
|
CUexternalMemoryHandleType type;
|
||
|
union {
|
||
|
int fd;
|
||
|
struct {
|
||
|
void* handle;
|
||
|
const void* name;
|
||
|
} win32;
|
||
|
const void* nvSciBufObject;
|
||
|
} handle;
|
||
|
unsigned long long size;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
|
||
|
|
||
|
typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
|
||
|
unsigned long long offset;
|
||
|
unsigned long long size;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_EXTERNAL_MEMORY_BUFFER_DESC;
|
||
|
|
||
|
typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
|
||
|
unsigned long long offset;
|
||
|
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
|
||
|
unsigned int numLevels;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
|
||
|
|
||
|
typedef enum CUexternalSemaphoreHandleType_enum {
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
|
||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10,
|
||
|
} CUexternalSemaphoreHandleType;
|
||
|
|
||
|
typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
|
||
|
CUexternalSemaphoreHandleType type;
|
||
|
union {
|
||
|
int fd;
|
||
|
struct {
|
||
|
void* handle;
|
||
|
const void* name;
|
||
|
} win32;
|
||
|
const void* nvSciSyncObj;
|
||
|
} handle;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;
|
||
|
|
||
|
typedef struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
|
||
|
struct {
|
||
|
struct {
|
||
|
unsigned long long value;
|
||
|
} fence;
|
||
|
union {
|
||
|
void* fence;
|
||
|
unsigned long long reserved;
|
||
|
} nvSciSync;
|
||
|
struct {
|
||
|
unsigned long long key;
|
||
|
} keyedMutex;
|
||
|
unsigned int reserved[12];
|
||
|
} params;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS;
|
||
|
|
||
|
typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
|
||
|
struct {
|
||
|
struct {
|
||
|
unsigned long long value;
|
||
|
} fence;
|
||
|
union {
|
||
|
void* fence;
|
||
|
unsigned long long reserved;
|
||
|
} nvSciSync;
|
||
|
struct {
|
||
|
unsigned long long key;
|
||
|
unsigned int timeoutMs;
|
||
|
} keyedMutex;
|
||
|
unsigned int reserved[10];
|
||
|
} params;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[16];
|
||
|
} CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
|
||
|
|
||
|
typedef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {
|
||
|
CUexternalSemaphore* extSemArray;
|
||
|
const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray;
|
||
|
unsigned int numExtSems;
|
||
|
} CUDA_EXT_SEM_SIGNAL_NODE_PARAMS;
|
||
|
|
||
|
typedef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {
|
||
|
CUexternalSemaphore* extSemArray;
|
||
|
const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray;
|
||
|
unsigned int numExtSems;
|
||
|
} CUDA_EXT_SEM_WAIT_NODE_PARAMS;
|
||
|
|
||
|
typedef unsigned long long CUmemGenericAllocationHandle;
|
||
|
|
||
|
typedef enum CUmemAllocationHandleType_enum {
|
||
|
CU_MEM_HANDLE_TYPE_NONE = 0x0,
|
||
|
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
|
||
|
CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
|
||
|
CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
|
||
|
CU_MEM_HANDLE_TYPE_MAX = 0xFFFFFFFF,
|
||
|
} CUmemAllocationHandleType;
|
||
|
|
||
|
typedef enum CUmemAccess_flags_enum {
|
||
|
CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
|
||
|
CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
|
||
|
CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
|
||
|
CU_MEM_ACCESS_FLAGS_PROT_MAX = 0xFFFFFFFF,
|
||
|
} CUmemAccess_flags;
|
||
|
|
||
|
typedef enum CUmemLocationType_enum {
|
||
|
CU_MEM_LOCATION_TYPE_INVALID = 0x0,
|
||
|
CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
|
||
|
CU_MEM_LOCATION_TYPE_MAX = 0xFFFFFFFF,
|
||
|
} CUmemLocationType;
|
||
|
|
||
|
typedef enum CUmemAllocationType_enum {
|
||
|
CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
|
||
|
CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
|
||
|
CU_MEM_ALLOCATION_TYPE_MAX = 0xFFFFFFFF,
|
||
|
} CUmemAllocationType;
|
||
|
|
||
|
typedef enum CUmemAllocationGranularity_flags_enum {
|
||
|
CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
|
||
|
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1,
|
||
|
} CUmemAllocationGranularity_flags;
|
||
|
|
||
|
typedef enum CUarraySparseSubresourceType_enum {
|
||
|
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
|
||
|
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1,
|
||
|
} CUarraySparseSubresourceType;
|
||
|
|
||
|
typedef enum CUmemOperationType_enum {
|
||
|
CU_MEM_OPERATION_TYPE_MAP = 1,
|
||
|
CU_MEM_OPERATION_TYPE_UNMAP = 2,
|
||
|
} CUmemOperationType;
|
||
|
|
||
|
typedef enum CUmemHandleType_enum {
|
||
|
CU_MEM_HANDLE_TYPE_GENERIC = 0,
|
||
|
} CUmemHandleType;
|
||
|
|
||
|
typedef struct CUarrayMapInfo_st {
|
||
|
CUresourcetype resourceType;
|
||
|
union {
|
||
|
CUmipmappedArray mipmap;
|
||
|
CUarray array;
|
||
|
} resource;
|
||
|
CUarraySparseSubresourceType subresourceType;
|
||
|
union {
|
||
|
struct {
|
||
|
unsigned int level;
|
||
|
unsigned int layer;
|
||
|
unsigned int offsetX;
|
||
|
unsigned int offsetY;
|
||
|
unsigned int offsetZ;
|
||
|
unsigned int extentWidth;
|
||
|
unsigned int extentHeight;
|
||
|
unsigned int extentDepth;
|
||
|
} sparseLevel;
|
||
|
struct {
|
||
|
unsigned int layer;
|
||
|
unsigned long long offset;
|
||
|
unsigned long long size;
|
||
|
} miptail;
|
||
|
} subresource;
|
||
|
CUmemOperationType memOperationType;
|
||
|
CUmemHandleType memHandleType;
|
||
|
union {
|
||
|
CUmemGenericAllocationHandle memHandle;
|
||
|
} memHandle;
|
||
|
unsigned long long offset;
|
||
|
unsigned int deviceBitMask;
|
||
|
unsigned int flags;
|
||
|
unsigned int reserved[2];
|
||
|
} CUarrayMapInfo;
|
||
|
|
||
|
typedef struct CUmemLocation_st {
|
||
|
CUmemLocationType type;
|
||
|
int id;
|
||
|
} CUmemLocation;
|
||
|
|
||
|
typedef enum CUmemAllocationCompType_enum {
|
||
|
CU_MEM_ALLOCATION_COMP_NONE = 0x0,
|
||
|
CU_MEM_ALLOCATION_COMP_GENERIC = 0x1,
|
||
|
} CUmemAllocationCompType;
|
||
|
|
||
|
typedef struct CUmemAllocationProp_st {
|
||
|
CUmemAllocationType type;
|
||
|
CUmemAllocationHandleType requestedHandleTypes;
|
||
|
CUmemLocation location;
|
||
|
void* win32HandleMetaData;
|
||
|
struct {
|
||
|
unsigned char compressionType;
|
||
|
unsigned char gpuDirectRDMACapable;
|
||
|
unsigned short usage;
|
||
|
unsigned char reserved[4];
|
||
|
} allocFlags;
|
||
|
} CUmemAllocationProp;
|
||
|
|
||
|
typedef struct CUmemAccessDesc_st {
|
||
|
CUmemLocation location;
|
||
|
CUmemAccess_flags flags;
|
||
|
} CUmemAccessDesc;
|
||
|
|
||
|
typedef enum CUgraphExecUpdateResult_enum {
|
||
|
CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR = 0x1,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6,
|
||
|
CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7,
|
||
|
} CUgraphExecUpdateResult;
|
||
|
|
||
|
typedef enum CUmemPool_attribute_enum {
|
||
|
CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
|
||
|
CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
|
||
|
CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
|
||
|
CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
|
||
|
} CUmemPool_attribute;
|
||
|
|
||
|
typedef struct CUmemPoolProps_st {
|
||
|
CUmemAllocationType allocType;
|
||
|
CUmemAllocationHandleType handleTypes;
|
||
|
CUmemLocation location;
|
||
|
void* win32SecurityAttributes;
|
||
|
unsigned char reserved[CU_IPC_HANDLE_SIZE];
|
||
|
} CUmemPoolProps;
|
||
|
|
||
|
typedef struct CUmemPoolPtrExportData_st {
|
||
|
unsigned char reserved[CU_IPC_HANDLE_SIZE];
|
||
|
} CUmemPoolPtrExportData;
|
||
|
|
||
|
typedef enum {
|
||
|
NVRTC_SUCCESS = 0,
|
||
|
NVRTC_ERROR_OUT_OF_MEMORY = 1,
|
||
|
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
|
||
|
NVRTC_ERROR_INVALID_INPUT = 3,
|
||
|
NVRTC_ERROR_INVALID_PROGRAM = 4,
|
||
|
NVRTC_ERROR_INVALID_OPTION = 5,
|
||
|
NVRTC_ERROR_COMPILATION = 6,
|
||
|
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
|
||
|
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
|
||
|
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
|
||
|
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
|
||
|
NVRTC_ERROR_INTERNAL_ERROR = 11,
|
||
|
} nvrtcResult;
|
||
|
|
||
|
typedef struct _nvrtcProgram* nvrtcProgram;
|
||
|
typedef struct cudnnContext* cudnnHandle_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_STATUS_SUCCESS = 0,
|
||
|
CUDNN_STATUS_NOT_INITIALIZED = 1,
|
||
|
CUDNN_STATUS_ALLOC_FAILED = 2,
|
||
|
CUDNN_STATUS_BAD_PARAM = 3,
|
||
|
CUDNN_STATUS_INTERNAL_ERROR = 4,
|
||
|
CUDNN_STATUS_INVALID_VALUE = 5,
|
||
|
CUDNN_STATUS_ARCH_MISMATCH = 6,
|
||
|
CUDNN_STATUS_MAPPING_ERROR = 7,
|
||
|
CUDNN_STATUS_EXECUTION_FAILED = 8,
|
||
|
CUDNN_STATUS_NOT_SUPPORTED = 9,
|
||
|
CUDNN_STATUS_LICENSE_ERROR = 10,
|
||
|
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
|
||
|
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
|
||
|
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
|
||
|
CUDNN_STATUS_VERSION_MISMATCH = 14,
|
||
|
} cudnnStatus_t;
|
||
|
|
||
|
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_ERRQUERY_RAWCODE = 0,
|
||
|
CUDNN_ERRQUERY_NONBLOCKING = 1,
|
||
|
CUDNN_ERRQUERY_BLOCKING = 2,
|
||
|
} cudnnErrQueryMode_t;
|
||
|
|
||
|
typedef enum libraryPropertyType_t {
|
||
|
MAJOR_VERSION,
|
||
|
MINOR_VERSION,
|
||
|
PATCH_LEVEL,
|
||
|
} libraryPropertyType;
|
||
|
|
||
|
typedef struct cudnnTensorStruct* cudnnTensorDescriptor_t;
|
||
|
typedef struct cudnnPoolingStruct* cudnnPoolingDescriptor_t;
|
||
|
typedef struct cudnnFilterStruct* cudnnFilterDescriptor_t;
|
||
|
typedef struct cudnnLRNStruct* cudnnLRNDescriptor_t;
|
||
|
typedef struct cudnnActivationStruct* cudnnActivationDescriptor_t;
|
||
|
typedef struct cudnnSpatialTransformerStruct* cudnnSpatialTransformerDescriptor_t;
|
||
|
typedef struct cudnnOpTensorStruct* cudnnOpTensorDescriptor_t;
|
||
|
typedef struct cudnnReduceTensorStruct* cudnnReduceTensorDescriptor_t;
|
||
|
typedef struct cudnnCTCLossStruct* cudnnCTCLossDescriptor_t;
|
||
|
typedef struct cudnnTensorTransformStruct* cudnnTensorTransformDescriptor_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_DATA_FLOAT = 0,
|
||
|
CUDNN_DATA_DOUBLE = 1,
|
||
|
CUDNN_DATA_HALF = 2,
|
||
|
CUDNN_DATA_INT8 = 3,
|
||
|
CUDNN_DATA_INT32 = 4,
|
||
|
CUDNN_DATA_INT8x4 = 5,
|
||
|
CUDNN_DATA_UINT8 = 6,
|
||
|
CUDNN_DATA_UINT8x4 = 7,
|
||
|
CUDNN_DATA_INT8x32 = 8,
|
||
|
} cudnnDataType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_DEFAULT_MATH = 0,
|
||
|
CUDNN_TENSOR_OP_MATH = 1,
|
||
|
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
|
||
|
CUDNN_FMA_MATH = 3,
|
||
|
} cudnnMathType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_NOT_PROPAGATE_NAN = 0,
|
||
|
CUDNN_PROPAGATE_NAN = 1,
|
||
|
} cudnnNanPropagation_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_NON_DETERMINISTIC = 0,
|
||
|
CUDNN_DETERMINISTIC = 1,
|
||
|
} cudnnDeterminism_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_TENSOR_NCHW = 0,
|
||
|
CUDNN_TENSOR_NHWC = 1,
|
||
|
CUDNN_TENSOR_NCHW_VECT_C = 2,
|
||
|
} cudnnTensorFormat_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_TRANSFORM_FOLD = 0U,
|
||
|
CUDNN_TRANSFORM_UNFOLD = 1U,
|
||
|
} cudnnFoldingDirection_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_OP_TENSOR_ADD = 0,
|
||
|
CUDNN_OP_TENSOR_MUL = 1,
|
||
|
CUDNN_OP_TENSOR_MIN = 2,
|
||
|
CUDNN_OP_TENSOR_MAX = 3,
|
||
|
CUDNN_OP_TENSOR_SQRT = 4,
|
||
|
CUDNN_OP_TENSOR_NOT = 5,
|
||
|
} cudnnOpTensorOp_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_REDUCE_TENSOR_ADD = 0,
|
||
|
CUDNN_REDUCE_TENSOR_MUL = 1,
|
||
|
CUDNN_REDUCE_TENSOR_MIN = 2,
|
||
|
CUDNN_REDUCE_TENSOR_MAX = 3,
|
||
|
CUDNN_REDUCE_TENSOR_AMAX = 4,
|
||
|
CUDNN_REDUCE_TENSOR_AVG = 5,
|
||
|
CUDNN_REDUCE_TENSOR_NORM1 = 6,
|
||
|
CUDNN_REDUCE_TENSOR_NORM2 = 7,
|
||
|
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
|
||
|
} cudnnReduceTensorOp_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
|
||
|
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
|
||
|
} cudnnReduceTensorIndices_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_32BIT_INDICES = 0,
|
||
|
CUDNN_64BIT_INDICES = 1,
|
||
|
CUDNN_16BIT_INDICES = 2,
|
||
|
CUDNN_8BIT_INDICES = 3,
|
||
|
} cudnnIndicesType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_SOFTMAX_FAST = 0,
|
||
|
CUDNN_SOFTMAX_ACCURATE = 1,
|
||
|
CUDNN_SOFTMAX_LOG = 2,
|
||
|
} cudnnSoftmaxAlgorithm_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_SOFTMAX_MODE_INSTANCE = 0,
|
||
|
CUDNN_SOFTMAX_MODE_CHANNEL = 1,
|
||
|
} cudnnSoftmaxMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_POOLING_MAX = 0,
|
||
|
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1,
|
||
|
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2,
|
||
|
CUDNN_POOLING_MAX_DETERMINISTIC = 3,
|
||
|
} cudnnPoolingMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_ACTIVATION_SIGMOID = 0,
|
||
|
CUDNN_ACTIVATION_RELU = 1,
|
||
|
CUDNN_ACTIVATION_TANH = 2,
|
||
|
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
|
||
|
CUDNN_ACTIVATION_ELU = 4,
|
||
|
CUDNN_ACTIVATION_IDENTITY = 5,
|
||
|
} cudnnActivationMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0,
|
||
|
} cudnnLRNMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
|
||
|
} cudnnDivNormMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_BATCHNORM_PER_ACTIVATION = 0,
|
||
|
CUDNN_BATCHNORM_SPATIAL = 1,
|
||
|
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
|
||
|
} cudnnBatchNormMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_BATCHNORM_OPS_BN = 0,
|
||
|
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1,
|
||
|
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2,
|
||
|
} cudnnBatchNormOps_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_NORM_PER_ACTIVATION = 0,
|
||
|
CUDNN_NORM_PER_CHANNEL = 1,
|
||
|
} cudnnNormMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_NORM_ALGO_STANDARD = 0,
|
||
|
CUDNN_NORM_ALGO_PERSIST = 1,
|
||
|
} cudnnNormAlgo_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_NORM_OPS_NORM = 0,
|
||
|
CUDNN_NORM_OPS_NORM_ACTIVATION = 1,
|
||
|
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2,
|
||
|
} cudnnNormOps_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_SAMPLER_BILINEAR = 0,
|
||
|
} cudnnSamplerType_t;
|
||
|
|
||
|
typedef struct cudnnDropoutStruct* cudnnDropoutDescriptor_t;
|
||
|
typedef struct cudnnAlgorithmStruct* cudnnAlgorithmDescriptor_t;
|
||
|
typedef struct cudnnAlgorithmPerformanceStruct* cudnnAlgorithmPerformance_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
|
||
|
CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8,
|
||
|
} cudnnConvolutionFwdAlgo_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
|
||
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7,
|
||
|
} cudnnConvolutionBwdFilterAlgo_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0,
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
|
||
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6,
|
||
|
} cudnnConvolutionBwdDataAlgo_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_RNN_ALGO_STANDARD = 0,
|
||
|
CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
|
||
|
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
|
||
|
CUDNN_RNN_ALGO_COUNT = 3,
|
||
|
} cudnnRNNAlgo_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0,
|
||
|
CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1,
|
||
|
} cudnnCTCLossAlgo_t;
|
||
|
|
||
|
typedef struct {
|
||
|
union Algorithm {
|
||
|
cudnnConvolutionFwdAlgo_t convFwdAlgo;
|
||
|
cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
|
||
|
cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
|
||
|
cudnnRNNAlgo_t RNNAlgo;
|
||
|
cudnnCTCLossAlgo_t CTCLossAlgo;
|
||
|
} algo;
|
||
|
} cudnnAlgorithm_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_SEV_FATAL = 0,
|
||
|
CUDNN_SEV_ERROR = 1,
|
||
|
CUDNN_SEV_WARNING = 2,
|
||
|
CUDNN_SEV_INFO = 3,
|
||
|
} cudnnSeverity_t;
|
||
|
|
||
|
typedef struct {
|
||
|
unsigned cudnn_version;
|
||
|
cudnnStatus_t cudnnStatus;
|
||
|
unsigned time_sec;
|
||
|
unsigned time_usec;
|
||
|
unsigned time_delta;
|
||
|
cudnnHandle_t handle;
|
||
|
cudaStream_t stream;
|
||
|
unsigned long long pid;
|
||
|
unsigned long long tid;
|
||
|
int cudaDeviceId;
|
||
|
int reserved[15];
|
||
|
} cudnnDebug_t;
|
||
|
|
||
|
typedef void (CUDA_CB *cudnnCallback_t)(cudnnSeverity_t sev, void* udata, const cudnnDebug_t* dbg, const char* msg);
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_FWD_MODE_INFERENCE = 0,
|
||
|
CUDNN_FWD_MODE_TRAINING = 1,
|
||
|
} cudnnForwardMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_RNN_RELU = 0,
|
||
|
CUDNN_RNN_TANH = 1,
|
||
|
CUDNN_LSTM = 2,
|
||
|
CUDNN_GRU = 3,
|
||
|
} cudnnRNNMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_RNN_NO_BIAS = 0,
|
||
|
CUDNN_RNN_SINGLE_INP_BIAS = 1,
|
||
|
CUDNN_RNN_DOUBLE_BIAS = 2,
|
||
|
CUDNN_RNN_SINGLE_REC_BIAS = 3,
|
||
|
} cudnnRNNBiasMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_UNIDIRECTIONAL = 0,
|
||
|
CUDNN_BIDIRECTIONAL = 1,
|
||
|
} cudnnDirectionMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_LINEAR_INPUT = 0,
|
||
|
CUDNN_SKIP_INPUT = 1,
|
||
|
} cudnnRNNInputMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_RNN_CLIP_NONE = 0,
|
||
|
CUDNN_RNN_CLIP_MINMAX = 1,
|
||
|
} cudnnRNNClipMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED = 0,
|
||
|
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED = 1,
|
||
|
CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2,
|
||
|
} cudnnRNNDataLayout_t;
|
||
|
|
||
|
typedef unsigned cudnnRNNPaddingMode_t;
|
||
|
typedef struct cudnnRNNStruct* cudnnRNNDescriptor_t;
|
||
|
typedef struct cudnnPersistentRNNPlan* cudnnPersistentRNNPlan_t;
|
||
|
typedef struct cudnnRNNDataStruct* cudnnRNNDataDescriptor_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_SEQDATA_TIME_DIM = 0,
|
||
|
CUDNN_SEQDATA_BATCH_DIM = 1,
|
||
|
CUDNN_SEQDATA_BEAM_DIM = 2,
|
||
|
CUDNN_SEQDATA_VECT_DIM = 3,
|
||
|
} cudnnSeqDataAxis_t;
|
||
|
|
||
|
typedef struct cudnnSeqDataStruct* cudnnSeqDataDescriptor_t;
|
||
|
typedef unsigned cudnnAttnQueryMap_t;
|
||
|
typedef struct cudnnAttnStruct* cudnnAttnDescriptor_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_MH_ATTN_Q_WEIGHTS = 0,
|
||
|
CUDNN_MH_ATTN_K_WEIGHTS = 1,
|
||
|
CUDNN_MH_ATTN_V_WEIGHTS = 2,
|
||
|
CUDNN_MH_ATTN_O_WEIGHTS = 3,
|
||
|
CUDNN_MH_ATTN_Q_BIASES = 4,
|
||
|
CUDNN_MH_ATTN_K_BIASES = 5,
|
||
|
CUDNN_MH_ATTN_V_BIASES = 6,
|
||
|
CUDNN_MH_ATTN_O_BIASES = 7,
|
||
|
} cudnnMultiHeadAttnWeightKind_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_WGRAD_MODE_ADD = 0,
|
||
|
CUDNN_WGRAD_MODE_SET = 1,
|
||
|
} cudnnWgradMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_LOSS_NORMALIZATION_NONE = 0,
|
||
|
CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1,
|
||
|
} cudnnLossNormalizationMode_t;
|
||
|
|
||
|
typedef struct cudnnConvolutionStruct* cudnnConvolutionDescriptor_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_CONVOLUTION = 0,
|
||
|
CUDNN_CROSS_CORRELATION = 1,
|
||
|
} cudnnConvolutionMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_DEFAULT_REORDER = 0,
|
||
|
CUDNN_NO_REORDER = 1,
|
||
|
} cudnnReorderType_t;
|
||
|
|
||
|
typedef struct {
|
||
|
cudnnConvolutionFwdAlgo_t algo;
|
||
|
cudnnStatus_t status;
|
||
|
float time;
|
||
|
size_t memory;
|
||
|
cudnnDeterminism_t determinism;
|
||
|
cudnnMathType_t mathType;
|
||
|
int reserved[3];
|
||
|
} cudnnConvolutionFwdAlgoPerf_t;
|
||
|
|
||
|
typedef struct {
|
||
|
cudnnConvolutionBwdDataAlgo_t algo;
|
||
|
cudnnStatus_t status;
|
||
|
float time;
|
||
|
size_t memory;
|
||
|
cudnnDeterminism_t determinism;
|
||
|
cudnnMathType_t mathType;
|
||
|
int reserved[3];
|
||
|
} cudnnConvolutionBwdDataAlgoPerf_t;
|
||
|
|
||
|
typedef struct cudnnFusedOpsConstParamStruct* cudnnFusedOpsConstParamPack_t;
|
||
|
typedef struct cudnnFusedOpsVariantParamStruct* cudnnFusedOpsVariantParamPack_t;
|
||
|
typedef struct cudnnFusedOpsPlanStruct* cudnnFusedOpsPlan_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
|
||
|
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
|
||
|
CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
|
||
|
CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
|
||
|
CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
|
||
|
CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
|
||
|
CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
|
||
|
} cudnnFusedOps_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_PARAM_XDESC = 0,
|
||
|
CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
|
||
|
CUDNN_PARAM_BN_MODE = 2,
|
||
|
CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
|
||
|
CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
|
||
|
CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
|
||
|
CUDNN_PARAM_ACTIVATION_DESC = 6,
|
||
|
CUDNN_PARAM_CONV_DESC = 7,
|
||
|
CUDNN_PARAM_WDESC = 8,
|
||
|
CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
|
||
|
CUDNN_PARAM_DWDESC = 10,
|
||
|
CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
|
||
|
CUDNN_PARAM_YDESC = 12,
|
||
|
CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
|
||
|
CUDNN_PARAM_DYDESC = 14,
|
||
|
CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
|
||
|
CUDNN_PARAM_YSTATS_DESC = 16,
|
||
|
CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
|
||
|
CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
|
||
|
CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
|
||
|
CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
|
||
|
CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
|
||
|
CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
|
||
|
CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
|
||
|
CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
|
||
|
CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
|
||
|
CUDNN_PARAM_ZDESC = 26,
|
||
|
CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
|
||
|
CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
|
||
|
CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
|
||
|
CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
|
||
|
CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
|
||
|
CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
|
||
|
CUDNN_PARAM_DXDESC = 33,
|
||
|
CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
|
||
|
CUDNN_PARAM_DZDESC = 35,
|
||
|
CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
|
||
|
CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
|
||
|
CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
|
||
|
} cudnnFusedOpsConstParamLabel_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_PTR_NULL = 0,
|
||
|
CUDNN_PTR_ELEM_ALIGNED = 1,
|
||
|
CUDNN_PTR_16B_ALIGNED = 2,
|
||
|
} cudnnFusedOpsPointerPlaceHolder_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_PTR_XDATA = 0,
|
||
|
CUDNN_PTR_BN_EQSCALE = 1,
|
||
|
CUDNN_PTR_BN_EQBIAS = 2,
|
||
|
CUDNN_PTR_WDATA = 3,
|
||
|
CUDNN_PTR_DWDATA = 4,
|
||
|
CUDNN_PTR_YDATA = 5,
|
||
|
CUDNN_PTR_DYDATA = 6,
|
||
|
CUDNN_PTR_YSUM = 7,
|
||
|
CUDNN_PTR_YSQSUM = 8,
|
||
|
CUDNN_PTR_WORKSPACE = 9,
|
||
|
CUDNN_PTR_BN_SCALE = 10,
|
||
|
CUDNN_PTR_BN_BIAS = 11,
|
||
|
CUDNN_PTR_BN_SAVED_MEAN = 12,
|
||
|
CUDNN_PTR_BN_SAVED_INVSTD = 13,
|
||
|
CUDNN_PTR_BN_RUNNING_MEAN = 14,
|
||
|
CUDNN_PTR_BN_RUNNING_VAR = 15,
|
||
|
CUDNN_PTR_ZDATA = 16,
|
||
|
CUDNN_PTR_BN_Z_EQSCALE = 17,
|
||
|
CUDNN_PTR_BN_Z_EQBIAS = 18,
|
||
|
CUDNN_PTR_ACTIVATION_BITMASK = 19,
|
||
|
CUDNN_PTR_DXDATA = 20,
|
||
|
CUDNN_PTR_DZDATA = 21,
|
||
|
CUDNN_PTR_BN_DSCALE = 22,
|
||
|
CUDNN_PTR_BN_DBIAS = 23,
|
||
|
CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
|
||
|
CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
|
||
|
CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
|
||
|
CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
|
||
|
} cudnnFusedOpsVariantParamLabel_t;
|
||
|
|
||
|
typedef struct {
|
||
|
cudnnConvolutionBwdFilterAlgo_t algo;
|
||
|
cudnnStatus_t status;
|
||
|
float time;
|
||
|
size_t memory;
|
||
|
cudnnDeterminism_t determinism;
|
||
|
cudnnMathType_t mathType;
|
||
|
int reserved[3];
|
||
|
} cudnnConvolutionBwdFilterAlgoPerf_t;
|
||
|
|
||
|
typedef void* cudnnBackendDescriptor_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_POINTWISE_ADD = 0,
|
||
|
CUDNN_POINTWISE_MUL = 1,
|
||
|
CUDNN_POINTWISE_MIN = 2,
|
||
|
CUDNN_POINTWISE_MAX = 3,
|
||
|
CUDNN_POINTWISE_SQRT = 4,
|
||
|
CUDNN_POINTWISE_RELU_FWD = 100,
|
||
|
CUDNN_POINTWISE_TANH_FWD = 101,
|
||
|
CUDNN_POINTWISE_SIGMOID_FWD = 102,
|
||
|
CUDNN_POINTWISE_ELU_FWD = 103,
|
||
|
} cudnnPointwiseMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_GENSTATS_SUM_SQSUM = 0,
|
||
|
} cudnnGenStatsMode_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_ATTR_POINTWISE_MODE = 0,
|
||
|
CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
|
||
|
CUDNN_ATTR_POINTWISE_NAN_PROPAGATION = 2,
|
||
|
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
|
||
|
CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
|
||
|
CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
|
||
|
CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
|
||
|
CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
|
||
|
CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
|
||
|
CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
|
||
|
CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
|
||
|
CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
|
||
|
CUDNN_ATTR_ENGINEHEUR_MODE = 200,
|
||
|
CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
|
||
|
CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
|
||
|
CUDNN_ATTR_ENGINECFG_ENGINE = 300,
|
||
|
CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
|
||
|
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
|
||
|
CUDNN_ATTR_EXECUTION_PLAN_HANDLE = 400,
|
||
|
CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
|
||
|
CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
|
||
|
CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
|
||
|
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
|
||
|
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
|
||
|
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
|
||
|
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
|
||
|
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
|
||
|
CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
|
||
|
CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
|
||
|
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
|
||
|
CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
|
||
|
CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
|
||
|
CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
|
||
|
CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
|
||
|
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
|
||
|
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
|
||
|
CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
|
||
|
CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
|
||
|
CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
|
||
|
CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
|
||
|
CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
|
||
|
CUDNN_ATTR_OPERATIONGRAPH_HANDLE = 800,
|
||
|
CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
|
||
|
CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
|
||
|
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
|
||
|
CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
|
||
|
CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
|
||
|
CUDNN_ATTR_TENSOR_STRIDES = 903,
|
||
|
CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
|
||
|
CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
|
||
|
CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
|
||
|
CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
|
||
|
CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
|
||
|
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
|
||
|
CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
|
||
|
CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
|
||
|
CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
|
||
|
CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
|
||
|
CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
|
||
|
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
|
||
|
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
|
||
|
CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
|
||
|
CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
|
||
|
CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
|
||
|
CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
|
||
|
CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
|
||
|
CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
|
||
|
} cudnnBackendAttributeName_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_TYPE_HANDLE = 0,
|
||
|
CUDNN_TYPE_DATA_TYPE,
|
||
|
CUDNN_TYPE_BOOLEAN,
|
||
|
CUDNN_TYPE_INT64,
|
||
|
CUDNN_TYPE_FLOAT,
|
||
|
CUDNN_TYPE_DOUBLE,
|
||
|
CUDNN_TYPE_VOID_PTR,
|
||
|
CUDNN_TYPE_CONVOLUTION_MODE,
|
||
|
CUDNN_TYPE_HEUR_MODE,
|
||
|
CUDNN_TYPE_KNOB_TYPE,
|
||
|
CUDNN_TYPE_NAN_PROPOGATION,
|
||
|
CUDNN_TYPE_NUMERICAL_NOTE,
|
||
|
CUDNN_TYPE_LAYOUT_TYPE,
|
||
|
CUDNN_TYPE_ATTRIB_NAME,
|
||
|
CUDNN_TYPE_POINTWISE_MODE,
|
||
|
CUDNN_TYPE_BACKEND_DESCRIPTOR,
|
||
|
CUDNN_TYPE_GENSTATS_MODE,
|
||
|
} cudnnBackendAttributeType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
|
||
|
CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_ENGINE_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_ENGINECFG_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR,
|
||
|
CUDNN_BACKEND_TENSOR_DESCRIPTOR,
|
||
|
} cudnnBackendDescriptorType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
|
||
|
CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS,
|
||
|
CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION,
|
||
|
CUDNN_NUMERICAL_NOTE_FFT,
|
||
|
CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC,
|
||
|
CUDNN_NUMERICAL_NOTE_WINOGRAD,
|
||
|
CUDNN_NUMERICAL_NOTE_TYPE_COUNT,
|
||
|
} cudnnBackendNumericalNote_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_KNOB_TYPE_SPLIT_K = 0,
|
||
|
CUDNN_KNOB_TYPE_SWIZZLE = 1,
|
||
|
CUDNN_KNOB_TYPE_TILE_SIZE = 2,
|
||
|
CUDNN_KNOB_TYPE_USE_TEX = 3,
|
||
|
CUDNN_KNOB_TYPE_EDGE = 4,
|
||
|
CUDNN_KNOB_TYPE_KBLOCK = 5,
|
||
|
CUDNN_KNOB_TYPE_LDGA = 6,
|
||
|
CUDNN_KNOB_TYPE_LDGB = 7,
|
||
|
CUDNN_KNOB_TYPE_CHUNK_K = 8,
|
||
|
CUDNN_KNOB_TYPE_SPLIT_H = 9,
|
||
|
CUDNN_KNOB_TYPE_WINO_TILE = 10,
|
||
|
CUDNN_KNOB_TYPE_MULTIPLY = 11,
|
||
|
CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
|
||
|
CUDNN_KNOB_TYPE_TILEK = 13,
|
||
|
CUDNN_KNOB_TYPE_STAGES = 14,
|
||
|
CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
|
||
|
CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE = 16,
|
||
|
CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
|
||
|
CUDNN_KNOB_TYPE_IDX_MODE = 18,
|
||
|
CUDNN_KNOB_TYPE_SLICED = 19,
|
||
|
CUDNN_KNOB_TYPE_SPLIT_RS = 20,
|
||
|
CUDNN_KNOB_TYPE_SINGLEBUFFER = 21,
|
||
|
CUDNN_KNOB_TYPE_LDGC = 22,
|
||
|
CUDNN_KNOB_TYPE_SPECFILT = 23,
|
||
|
CUDNN_KNOB_TYPE_COUNTS = 24,
|
||
|
} cudnnBackendKnobType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
|
||
|
CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
|
||
|
CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
|
||
|
CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
|
||
|
CUDNN_LAYOUT_TYPE_COUNT = 4,
|
||
|
} cudnnBackendLayoutType_t;
|
||
|
|
||
|
typedef enum {
|
||
|
CUDNN_HEUR_MODE_INSTANT = 0,
|
||
|
CUDNN_HEUR_MODES_COUNT,
|
||
|
} cudnnBackendHeurMode_t;
|
||
|
typedef unsigned int GLenum;
|
||
|
typedef unsigned int GLuint;
|
||
|
typedef int GLint;
|
||
|
|
||
|
typedef enum CUGLDeviceList_enum {
|
||
|
CU_GL_DEVICE_LIST_ALL = 0x01,
|
||
|
CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02,
|
||
|
CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03,
|
||
|
} CUGLDeviceList;
|
||
|
|
||
|
typedef enum CUGLmap_flags_enum {
|
||
|
CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
|
||
|
CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
|
||
|
CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
|
||
|
} CUGLmap_flags;
|
||
|
|
||
|
|
||
|
/* Function types. */
|
||
|
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pStr);
|
||
|
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pStr);
|
||
|
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetCount(int* count);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetName(char* name, int len, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetUuid(CUuuid* uuid, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetLuid(char* luid, unsigned int* deviceNodeMask, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, CUarray_format format, unsigned numChannels, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, CUdevice dev, int flags);
|
||
|
typedef CUresult CUDAAPI tcuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetMemPool(CUmemoryPool* pool, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease_v2(CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active);
|
||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset_v2(CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
|
||
|
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned int* flags);
|
||
|
typedef CUresult CUDAAPI tcuCtxSynchronize(void);
|
||
|
typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache* pconfig);
|
||
|
typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig* pConfig);
|
||
|
typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int* version);
|
||
|
typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
|
||
|
typedef CUresult CUDAAPI tcuCtxResetPersistingL2Cache(void);
|
||
|
typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname);
|
||
|
typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
|
||
|
typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues);
|
||
|
typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin);
|
||
|
typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
|
||
|
typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
|
||
|
typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
|
||
|
typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
|
||
|
typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
|
||
|
typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut);
|
||
|
typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues);
|
||
|
typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues);
|
||
|
typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut);
|
||
|
typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
|
||
|
typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
|
||
|
typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize);
|
||
|
typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
|
||
|
typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
|
||
|
typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr);
|
||
|
typedef CUresult CUDAAPI tcuMemAllocHost_v2(void** pp, size_t bytesize);
|
||
|
typedef CUresult CUDAAPI tcuMemFreeHost(void* p);
|
||
|
typedef CUresult CUDAAPI tcuMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int* pFlags, void* p);
|
||
|
typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char* pciBusId, int len, CUdevice dev);
|
||
|
typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle);
|
||
|
typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
|
||
|
typedef CUresult CUDAAPI tcuIpcOpenMemHandle_v2(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
|
||
|
typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray);
|
||
|
typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
|
||
|
typedef CUresult CUDAAPI tcuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUarray array);
|
||
|
typedef CUresult CUDAAPI tcuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUmipmappedArray mipmap);
|
||
|
typedef CUresult CUDAAPI tcuArrayGetPlane(CUarray* pPlaneArray, CUarray hArray, unsigned int planeIdx);
|
||
|
typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
|
||
|
typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray);
|
||
|
typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
|
||
|
typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels);
|
||
|
typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
|
||
|
typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
|
||
|
typedef CUresult CUDAAPI tcuMemAddressReserve(CUdeviceptr* ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags);
|
||
|
typedef CUresult CUDAAPI tcuMemAddressFree(CUdeviceptr ptr, size_t size);
|
||
|
typedef CUresult CUDAAPI tcuMemCreate(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags);
|
||
|
typedef CUresult CUDAAPI tcuMemRelease(CUmemGenericAllocationHandle handle);
|
||
|
typedef CUresult CUDAAPI tcuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags);
|
||
|
typedef CUresult CUDAAPI tcuMemMapArrayAsync(CUarrayMapInfo* mapInfoList, unsigned int count, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemUnmap(CUdeviceptr ptr, size_t size);
|
||
|
typedef CUresult CUDAAPI tcuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc* desc, size_t count);
|
||
|
typedef CUresult CUDAAPI tcuMemGetAccess(unsigned long long* flags, const CUmemLocation* location, CUdeviceptr ptr);
|
||
|
typedef CUresult CUDAAPI tcuMemExportToShareableHandle(void* shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags);
|
||
|
typedef CUresult CUDAAPI tcuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, void* osHandle, CUmemAllocationHandleType shHandleType);
|
||
|
typedef CUresult CUDAAPI tcuMemGetAllocationGranularity(size_t* granularity, const CUmemAllocationProp* prop, CUmemAllocationGranularity_flags option);
|
||
|
typedef CUresult CUDAAPI tcuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp* prop, CUmemGenericAllocationHandle handle);
|
||
|
typedef CUresult CUDAAPI tcuMemRetainAllocationHandle(CUmemGenericAllocationHandle* handle, void* addr);
|
||
|
typedef CUresult CUDAAPI tcuMemFreeAsync(CUdeviceptr dptr, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemAllocAsync(CUdeviceptr* dptr, size_t bytesize, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc* map, size_t count);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolGetAccess(CUmemAccess_flags* flags, CUmemoryPool memPool, CUmemLocation* location);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolCreate(CUmemoryPool* pool, const CUmemPoolProps* poolProps);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolDestroy(CUmemoryPool pool);
|
||
|
typedef CUresult CUDAAPI tcuMemAllocFromPoolAsync(CUdeviceptr* dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolExportToShareableHandle(void* handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolImportFromShareableHandle(CUmemoryPool* pool_out, void* handle, CUmemAllocationHandleType handleType, unsigned long long flags);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolExportPointer(CUmemPoolPtrExportData* shareData_out, CUdeviceptr ptr);
|
||
|
typedef CUresult CUDAAPI tcuMemPoolImportPointer(CUdeviceptr* ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData* shareData);
|
||
|
typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
|
||
|
typedef CUresult CUDAAPI tcuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device);
|
||
|
typedef CUresult CUDAAPI tcuMemRangeGetAttribute(void* data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count);
|
||
|
typedef CUresult CUDAAPI tcuMemRangeGetAttributes(void** data, size_t* dataSizes, CUmem_range_attribute* attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count);
|
||
|
typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
|
||
|
typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr);
|
||
|
typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority);
|
||
|
typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
|
||
|
typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned int* flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamGetCtx(CUstream hStream, CUcontext* pctx);
|
||
|
typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode);
|
||
|
typedef CUresult CUDAAPI tcuThreadExchangeStreamCaptureMode(CUstreamCaptureMode* mode);
|
||
|
typedef CUresult CUDAAPI tcuStreamEndCapture(CUstream hStream, CUgraph* phGraph);
|
||
|
typedef CUresult CUDAAPI tcuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus* captureStatus);
|
||
|
typedef CUresult CUDAAPI tcuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus* captureStatus, cuuint64_t* id);
|
||
|
typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuStreamCopyAttributes(CUstream dst, CUstream src);
|
||
|
typedef CUresult CUDAAPI tcuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue* value_out);
|
||
|
typedef CUresult CUDAAPI tcuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue* value);
|
||
|
typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
|
||
|
typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
|
||
|
typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
|
||
|
typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
|
||
|
typedef CUresult CUDAAPI tcuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc);
|
||
|
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc);
|
||
|
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc);
|
||
|
typedef CUresult CUDAAPI tcuDestroyExternalMemory(CUexternalMemory extMem);
|
||
|
typedef CUresult CUDAAPI tcuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc);
|
||
|
typedef CUresult CUDAAPI tcuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream);
|
||
|
typedef CUresult CUDAAPI tcuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream);
|
||
|
typedef CUresult CUDAAPI tcuDestroyExternalSemaphore(CUexternalSemaphore extSem);
|
||
|
typedef CUresult CUDAAPI tcuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
|
||
|
typedef CUresult CUDAAPI tcuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
|
||
|
typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
|
||
|
typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
|
||
|
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);
|
||
|
typedef CUresult CUDAAPI tcuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams);
|
||
|
typedef CUresult CUDAAPI tcuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuLaunchHostFunc(CUstream hStream, CUhostFn fn, void* userData);
|
||
|
typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
|
||
|
typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
|
||
|
typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
|
||
|
typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
|
||
|
typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
|
||
|
typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes);
|
||
|
typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
|
||
|
typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
|
||
|
typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuGraphCreate(CUgraph* phGraph, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddKernelNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddMemcpyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMCPY3D* copyParams, CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddMemsetNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddHostNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddChildGraphNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraph childGraph);
|
||
|
typedef CUresult CUDAAPI tcuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph* phGraph);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddEmptyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddEventRecordNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent* event_out);
|
||
|
typedef CUresult CUDAAPI tcuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddEventWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent* event_out);
|
||
|
typedef CUresult CUDAAPI tcuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddExternalSemaphoresSignalNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* params_out);
|
||
|
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddExternalSemaphoresWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS* params_out);
|
||
|
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphClone(CUgraph* phGraphClone, CUgraph originalGraph);
|
||
|
typedef CUresult CUDAAPI tcuGraphNodeFindInClone(CUgraphNode* phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph);
|
||
|
typedef CUresult CUDAAPI tcuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType* type);
|
||
|
typedef CUresult CUDAAPI tcuGraphGetNodes(CUgraph hGraph, CUgraphNode* nodes, size_t* numNodes);
|
||
|
typedef CUresult CUDAAPI tcuGraphGetRootNodes(CUgraph hGraph, CUgraphNode* rootNodes, size_t* numRootNodes);
|
||
|
typedef CUresult CUDAAPI tcuGraphGetEdges(CUgraph hGraph, CUgraphNode* from, CUgraphNode* to, size_t* numEdges);
|
||
|
typedef CUresult CUDAAPI tcuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode* dependencies, size_t* numDependencies);
|
||
|
typedef CUresult CUDAAPI tcuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode* dependentNodes, size_t* numDependentNodes);
|
||
|
typedef CUresult CUDAAPI tcuGraphAddDependencies(CUgraph hGraph, const CUgraphNode* from, const CUgraphNode* to, size_t numDependencies);
|
||
|
typedef CUresult CUDAAPI tcuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode* from, const CUgraphNode* to, size_t numDependencies);
|
||
|
typedef CUresult CUDAAPI tcuGraphDestroyNode(CUgraphNode hNode);
|
||
|
typedef CUresult CUDAAPI tcuGraphInstantiate_v2(CUgraphExec* phGraphExec, CUgraph hGraph, CUgraphNode* phErrorNode, char* logBuffer, size_t bufferSize);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D* copyParams, CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams);
|
||
|
typedef CUresult CUDAAPI tcuGraphUpload(CUgraphExec hGraphExec, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecDestroy(CUgraphExec hGraphExec);
|
||
|
typedef CUresult CUDAAPI tcuGraphDestroy(CUgraph hGraph);
|
||
|
typedef CUresult CUDAAPI tcuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode* hErrorNode_out, CUgraphExecUpdateResult* updateResult_out);
|
||
|
typedef CUresult CUDAAPI tcuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src);
|
||
|
typedef CUresult CUDAAPI tcuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue* value_out);
|
||
|
typedef CUresult CUDAAPI tcuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue* value);
|
||
|
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
|
||
|
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
|
||
|
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, CUfunction func, int numBlocks, int blockSize);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetBorderColor(CUtexref hTexRef, float* pBorderColor);
|
||
|
typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray* phArray, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetBorderColor(float* pBorderColor, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref* pTexRef);
|
||
|
typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
|
||
|
typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef);
|
||
|
typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
|
||
|
typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
|
||
|
typedef CUresult CUDAAPI tcuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject);
|
||
|
typedef CUresult CUDAAPI tcuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject);
|
||
|
typedef CUresult CUDAAPI tcuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject);
|
||
|
typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc);
|
||
|
typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
|
||
|
typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject);
|
||
|
typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev);
|
||
|
typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
|
||
|
typedef CUresult CUDAAPI tcuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuGetExportTable(const void** ppExportTable, const CUuuid* pExportTableId);
|
||
|
typedef CUresult CUDAAPI tcuFuncGetModule(CUmodule* hmod, CUfunction hfunc);
|
||
|
|
||
|
typedef const char* CUDAAPI tnvrtcGetErrorString(nvrtcResult result);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcVersion(int* major, int* minor);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetNumSupportedArchs(int* numArchs);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetSupportedArchs(int* supportedArchs);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcDestroyProgram(nvrtcProgram* prog);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char** options);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetPTX(nvrtcProgram prog, char* ptx);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetCUBINSize(nvrtcProgram prog, size_t* cubinSizeRet);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetCUBIN(nvrtcProgram prog, char* cubin);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetProgramLog(nvrtcProgram prog, char* log);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcAddNameExpression(nvrtcProgram prog, const char* name_expression);
|
||
|
typedef nvrtcResult CUDAAPI tnvrtcGetLoweredName(nvrtcProgram prog, const char* name_expression, const char** lowered_name);
|
||
|
|
||
|
typedef size_t CUDAAPI tcudnnGetVersion(void);
|
||
|
typedef size_t CUDAAPI tcudnnGetCudartVersion(void);
|
||
|
typedef const char* CUDAAPI tcudnnGetErrorString(cudnnStatus_t status);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t* rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t* tag);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetProperty(libraryPropertyType type, int* value);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreate(cudnnHandle_t* handle);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroy(cudnnHandle_t handle);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetStream(cudnnHandle_t handle, cudaStream_t* streamId);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateTensorDescriptor(cudnnTensorDescriptor_t* tensorDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int n, int c, int h, int w, int nStride, int cStride, int hStride, int wStride);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t* dataType, int* n, int* c, int* h, int* w, int* nStride, int* cStride, int* hStride, int* wStride);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const int strideA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int nbDims, const int dimA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, cudnnDataType_t* dataType, int* nbDims, int dimA[], int strideA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t* size);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc, const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, size_t* destSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t* transformDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], const int32_t padAfterA[], const uint32_t foldA[], const cudnnFoldingDirection_t direction);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, cudnnTensorFormat_t* destFormat, int32_t padBeforeA[], int32_t padAfterA[], uint32_t foldA[], cudnnFoldingDirection_t* direction);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnTransformTensor(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnTransformTensorEx(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void* alpha, const cudnnTensorDescriptor_t srcDesc, const void* srcData, const void* beta, const cudnnTensorDescriptor_t destDesc, void* destData);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnAddTensor(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t aDesc, const void* A, const void* beta, const cudnnTensorDescriptor_t cDesc, void* C);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t* opTensorDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t* opTensorOp, cudnnDataType_t* opTensorCompType, cudnnNanPropagation_t* opTensorNanOpt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnOpTensor(cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, const void* alpha1, const cudnnTensorDescriptor_t aDesc, const void* A, const void* alpha2, const cudnnTensorDescriptor_t bDesc, const void* B, const void* beta, const cudnnTensorDescriptor_t cDesc, void* C);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t* reduceTensorDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, cudnnNanPropagation_t reduceTensorNanOpt, cudnnReduceTensorIndices_t reduceTensorIndices, cudnnIndicesType_t reduceTensorIndicesType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t* reduceTensorOp, cudnnDataType_t* reduceTensorCompType, cudnnNanPropagation_t* reduceTensorNanOpt, cudnnReduceTensorIndices_t* reduceTensorIndices, cudnnIndicesType_t* reduceTensorIndicesType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetReductionIndicesSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetReductionWorkspaceSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnReduceTensor(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, void* indices, size_t indicesSizeInBytes, void* workspace, size_t workspaceSizeInBytes, const void* alpha, const cudnnTensorDescriptor_t aDesc, const void* A, const void* beta, const cudnnTensorDescriptor_t cDesc, void* C);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void* y, const void* valuePtr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void* y, const void* alpha);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateFilterDescriptor(cudnnFilterDescriptor_t* filterDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int k, int c, int h, int w);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc, cudnnDataType_t* dataType, cudnnTensorFormat_t* format, int* k, int* c, int* h, int* w);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int nbDims, const int filterDimA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, cudnnDataType_t* dataType, cudnnTensorFormat_t* format, int* nbDims, int filterDimA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t* size);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnTransformFilter(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void* alpha, const cudnnFilterDescriptor_t srcDesc, const void* srcData, const void* beta, const cudnnFilterDescriptor_t destDesc, void* destData);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSoftmaxForward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t* poolingDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t* mode, cudnnNanPropagation_t* maxpoolingNanOpt, int* windowHeight, int* windowWidth, int* verticalPadding, int* horizontalPadding, int* verticalStride, int* horizontalStride);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, const int windowDimA[], const int paddingA[], const int strideA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, cudnnPoolingMode_t* mode, cudnnNanPropagation_t* maxpoolingNanOpt, int* nbDims, int windowDimA[], int paddingA[], int strideA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int nbDims, int outputTensorDimA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int* n, int* c, int* h, int* w);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnPoolingForward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateActivationDescriptor(cudnnActivationDescriptor_t* activationDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, cudnnNanPropagation_t reluNanOpt, double coef);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t* mode, cudnnNanPropagation_t* reluNanOpt, double* coef);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnActivationForward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateLRNDescriptor(cudnnLRNDescriptor_t* normDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned* lrnN, double* lrnAlpha, double* lrnBeta, double* lrnK);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnLRNCrossChannelForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDivisiveNormalizationForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* means, void* temp, void* temp2, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, cudnnBatchNormMode_t mode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationForwardInference(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void* bnScale, const void* bnBias, const void* estimatedMean, const void* estimatedVariance, double epsilon);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc, cudnnTensorDescriptor_t derivedNormMeanVarDesc, const cudnnTensorDescriptor_t xDesc, cudnnNormMode_t mode, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnNormalizationForwardInference(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t normScaleBiasDesc, const void* normScale, const void* normBias, const cudnnTensorDescriptor_t normMeanVarDesc, const void* estimatedMean, const void* estimatedVariance, const cudnnTensorDescriptor_t zDesc, const void* z, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void* y, double epsilon, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t* stDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, cudnnDataType_t dataType, const int nbDims, const int dimA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void* theta, void* grid);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfSamplerForward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* grid, const void* beta, cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t* dropoutDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void* states, size_t stateSizeInBytes, unsigned long long seed);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void* states, size_t stateSizeInBytes, unsigned long long seed);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float* dropout, void** states, unsigned long long* seed);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDropoutForward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t xdesc, const void* x, const cudnnTensorDescriptor_t ydesc, void* y, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t* algoDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t* algorithm);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t* algoPerf, int numberToCreate);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, cudnnStatus_t status, float time, size_t memory);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t* algoDesc, cudnnStatus_t* status, float* time, size_t* memory);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t* algoPerf, int numberToDestroy);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t* algoSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, void* algoSpace, size_t algoSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRestoreAlgorithm(cudnnHandle_t handle, void* algoSpace, size_t algoSpaceSizeInBytes, cudnnAlgorithmDescriptor_t algoDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetCallback(unsigned mask, void* udata, cudnnCallback_t fptr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetCallback(unsigned* mask, void** udata, cudnnCallback_t* fptr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnOpsInferVersionCheck(void);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSoftmaxBackward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnPoolingBackward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnActivationBackward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnLRNCrossChannelBackward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDivisiveNormalizationBackward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* means, const void* dy, void* temp, void* temp2, const void* beta, const cudnnTensorDescriptor_t dXdMeansDesc, void* dx, void* dMeans);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const cudnnActivationDescriptor_t activationDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, const cudnnTensorDescriptor_t dxDesc, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t xDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationForwardTraining(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void* bnScale, const void* bnBias, double exponentialAverageFactor, void* resultRunningMean, void* resultRunningVariance, double epsilon, void* resultSaveMean, void* resultSaveInvVariance);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationForwardTrainingEx(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t zDesc, const void* zData, const cudnnTensorDescriptor_t yDesc, void* yData, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void* bnScale, const void* bnBias, double exponentialAverageFactor, void* resultRunningMean, void* resultRunningVariance, double epsilon, void* resultSaveMean, void* resultSaveInvVariance, cudnnActivationDescriptor_t activationDesc, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationBackward(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void* alphaDataDiff, const void* betaDataDiff, const void* alphaParamDiff, const void* betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t dxDesc, void* dx, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void* bnScale, void* dBnScaleResult, void* dBnBiasResult, double epsilon, const void* savedMean, const void* savedInvVariance);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationBackwardEx(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const void* alphaDataDiff, const void* betaDataDiff, const void* alphaParamDiff, const void* betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t yDesc, const void* yData, const cudnnTensorDescriptor_t dyDesc, const void* dyData, const cudnnTensorDescriptor_t dzDesc, void* dzData, const cudnnTensorDescriptor_t dxDesc, void* dxData, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void* bnScaleData, const void* bnBiasData, void* dBnScaleData, void* dBnBiasData, double epsilon, const void* savedMean, const void* savedInvVariance, cudnnActivationDescriptor_t activationDesc, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t normScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t normMeanVarDesc, size_t* sizeInBytes, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, const cudnnTensorDescriptor_t dxDesc, const cudnnTensorDescriptor_t dNormScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t normMeanVarDesc, size_t* sizeInBytes, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t xDesc, size_t* sizeInBytes, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnNormalizationForwardTraining(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t normScaleBiasDesc, const void* normScale, const void* normBias, double exponentialAverageFactor, const cudnnTensorDescriptor_t normMeanVarDesc, void* resultRunningMean, void* resultRunningVariance, double epsilon, void* resultSaveMean, void* resultSaveInvVariance, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t zDesc, const void* zData, const cudnnTensorDescriptor_t yDesc, void* yData, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnNormalizationBackward(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void* alphaDataDiff, const void* betaDataDiff, const void* alphaParamDiff, const void* betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t yDesc, const void* yData, const cudnnTensorDescriptor_t dyDesc, const void* dyData, const cudnnTensorDescriptor_t dzDesc, void* dzData, const cudnnTensorDescriptor_t dxDesc, void* dxData, const cudnnTensorDescriptor_t dNormScaleBiasDesc, const void* normScaleData, const void* normBiasData, void* dNormScaleData, void* dNormBiasData, double epsilon, const cudnnTensorDescriptor_t normMeanVarDesc, const void* savedMean, const void* savedInvVariance, cudnnActivationDescriptor_t activationDesc, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes, int groupCnt);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void* dgrid, void* dtheta);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfSamplerBackward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx, const void* alphaDgrid, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* grid, const void* betaDgrid, void* dgrid);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDropoutBackward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t dydesc, const void* dy, const cudnnTensorDescriptor_t dxdesc, void* dx, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnOpsTrainVersionCheck(void);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateRNNDescriptor(cudnnRNNDescriptor_t* rnnDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t algo, cudnnRNNMode_t cellMode, cudnnRNNBiasMode_t biasMode, cudnnDirectionMode_t dirMode, cudnnRNNInputMode_t inputMode, cudnnDataType_t dataType, cudnnDataType_t mathPrec, cudnnMathType_t mathType, int32_t inputSize, int32_t hiddenSize, int32_t projSize, int32_t numLayers, cudnnDropoutDescriptor_t dropoutDesc, uint32_t auxFlags);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t* algo, cudnnRNNMode_t* cellMode, cudnnRNNBiasMode_t* biasMode, cudnnDirectionMode_t* dirMode, cudnnRNNInputMode_t* inputMode, cudnnDataType_t* dataType, cudnnDataType_t* mathPrec, cudnnMathType_t* mathType, int32_t* inputSize, int32_t* hiddenSize, int32_t* projSize, int32_t* numLayers, cudnnDropoutDescriptor_t* dropoutDesc, uint32_t* auxFlags);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNDescriptor_v6(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, cudnnRNNMode_t cellMode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNDescriptor_v6(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int* hiddenSize, int* numLayers, cudnnDropoutDescriptor_t* dropoutDesc, cudnnRNNInputMode_t* inputMode, cudnnDirectionMode_t* direction, cudnnRNNMode_t* cellMode, cudnnRNNAlgo_t* algo, cudnnDataType_t* mathPrec);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t* mType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t biasMode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t* biasMode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNSetClip_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, cudnnNanPropagation_t clipNanOpt, double lclip, double rclip);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNGetClip_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t* clipMode, cudnnNanPropagation_t* clipNanOpt, double* lclip, double* rclip);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNSetClip(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, cudnnNanPropagation_t clipNanOpt, double lclip, double rclip);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNGetClip(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t* clipMode, cudnnNanPropagation_t* clipNanOpt, double* lclip, double* rclip);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int recProjSize, const int outProjSize);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNProjectionLayers(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* recProjSize, int* outProjSize);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, const int minibatch, const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t* plan);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBuildRNNDynamic(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int miniBatch);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNWorkspaceSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNTrainingReserveSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNTempSpaceSizes(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnForwardMode_t fMode, cudnnRNNDataDescriptor_t xDesc, size_t* workSpaceSize, size_t* reserveSpaceSize);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnTensorDescriptor_t xDesc, size_t* sizeInBytes, cudnnDataType_t dataType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, size_t* weightSpaceSize);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNLinLayerMatrixParams(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const void* w, const int linLayerID, cudnnFilterDescriptor_t linLayerMatDesc, void** linLayerMat);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNLinLayerBiasParams(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const void* w, const int linLayerID, cudnnFilterDescriptor_t linLayerBiasDesc, void** linLayerBias);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNWeightParams(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int32_t pseudoLayer, size_t weightSpaceSize, const void* weightSpace, int32_t linLayerID, cudnnTensorDescriptor_t mDesc, void** mAddr, cudnnTensorDescriptor_t bDesc, void** bAddr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardInference(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, void* workSpace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, unsigned paddingMode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, unsigned* paddingMode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t* rnnDataDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int seqLengthArray[], void* paddingFill);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t* dataType, cudnnRNNDataLayout_t* layout, int* maxSeqLength, int* batchSize, int* vectorSize, int arrayLengthRequested, int seqLengthArray[], void* paddingFill);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardInferenceEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnRNNDataDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const cudnnRNNDataDescriptor_t kDesc, const void* keys, const cudnnRNNDataDescriptor_t cDesc, void* cAttn, const cudnnRNNDataDescriptor_t iDesc, void* iAttn, const cudnnRNNDataDescriptor_t qDesc, void* queries, void* workSpace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNForward(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnForwardMode_t fwdMode, const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t xDesc, const void* x, cudnnRNNDataDescriptor_t yDesc, void* y, cudnnTensorDescriptor_t hDesc, const void* hx, void* hy, cudnnTensorDescriptor_t cDesc, const void* cx, void* cy, size_t weightSpaceSize, const void* weightSpace, size_t workSpaceSize, void* workSpace, size_t reserveSpaceSize, void* reserveSpace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNAlgorithmDescriptor(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnAlgorithmDescriptor_t algoDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNForwardInferenceAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, void* workspace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t* seqDataDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const cudnnSeqDataAxis_t axes[], size_t seqLengthArraySize, const int seqLengthArray[], void* paddingFill);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t* dataType, int* nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[], size_t* seqLengthArraySize, size_t seqLengthSizeRequested, int seqLengthArray[], void* paddingFill);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateAttnDescriptor(cudnnAttnDescriptor_t* attnDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc, unsigned attnMode, int nHeads, double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec, cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc, cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize, int qProjSize, int kProjSize, int vProjSize, int oProjSize, int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc, unsigned* attnMode, int* nHeads, double* smScaler, cudnnDataType_t* dataType, cudnnDataType_t* computePrec, cudnnMathType_t* mathType, cudnnDropoutDescriptor_t* attnDropoutDesc, cudnnDropoutDescriptor_t* postDropoutDesc, int* qSize, int* kSize, int* vSize, int* qProjSize, int* kProjSize, int* vProjSize, int* oProjSize, int* qoMaxSeqLength, int* kvMaxSeqLength, int* maxBatchSize, int* maxBeamSize);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetMultiHeadAttnBuffers(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, size_t* weightSizeInBytes, size_t* workSpaceSizeInBytes, size_t* reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetMultiHeadAttnWeights(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes, const void* weights, cudnnTensorDescriptor_t wDesc, void** wAddr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnMultiHeadAttnForward(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx, const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsQO[], const int devSeqLengthsKV[], const cudnnSeqDataDescriptor_t qDesc, const void* queries, const void* residuals, const cudnnSeqDataDescriptor_t kDesc, const void* keys, const cudnnSeqDataDescriptor_t vDesc, const void* values, const cudnnSeqDataDescriptor_t oDesc, void* out, size_t weightSizeInBytes, const void* weights, size_t workSpaceSizeInBytes, void* workSpace, size_t reserveSpaceSizeInBytes, void* reserveSpace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnAdvInferVersionCheck(void);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardTraining(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* yDesc, const void* y, const cudnnTensorDescriptor_t* dyDesc, const void* dy, const cudnnTensorDescriptor_t dhyDesc, const void* dhy, const cudnnTensorDescriptor_t dcyDesc, const void* dcy, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnTensorDescriptor_t* dxDesc, void* dx, const cudnnTensorDescriptor_t dhxDesc, void* dhx, const cudnnTensorDescriptor_t dcxDesc, void* dcx, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardData_v8(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t yDesc, const void* y, const void* dy, cudnnRNNDataDescriptor_t xDesc, void* dx, cudnnTensorDescriptor_t hDesc, const void* hx, const void* dhy, void* dhx, cudnnTensorDescriptor_t cDesc, const void* cx, const void* dcy, void* dcx, size_t weightSpaceSize, const void* weightSpace, size_t workSpaceSize, void* workSpace, size_t reserveSpaceSize, void* reserveSpace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardWeights(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t* yDesc, const void* y, const void* workSpace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, const void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardWeights_v8(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnWgradMode_t addGrad, const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t xDesc, const void* x, cudnnTensorDescriptor_t hDesc, const void* hx, cudnnRNNDataDescriptor_t yDesc, const void* y, size_t weightSpaceSize, void* dweightSpace, size_t workSpaceSize, void* workSpace, size_t reserveSpaceSize, void* reserveSpace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardTrainingEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnRNNDataDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const cudnnRNNDataDescriptor_t kDesc, const void* keys, const cudnnRNNDataDescriptor_t cDesc, void* cAttn, const cudnnRNNDataDescriptor_t iDesc, void* iAttn, const cudnnRNNDataDescriptor_t qDesc, void* queries, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardDataEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t yDesc, const void* y, const cudnnRNNDataDescriptor_t dyDesc, const void* dy, const cudnnRNNDataDescriptor_t dcDesc, const void* dcAttn, const cudnnTensorDescriptor_t dhyDesc, const void* dhy, const cudnnTensorDescriptor_t dcyDesc, const void* dcy, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnRNNDataDescriptor_t dxDesc, void* dx, const cudnnTensorDescriptor_t dhxDesc, void* dhx, const cudnnTensorDescriptor_t dcxDesc, void* dcx, const cudnnRNNDataDescriptor_t dkDesc, void* dkeys, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardWeightsEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnRNNDataDescriptor_t yDesc, const void* y, void* workSpace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNForwardTrainingAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNBackwardDataAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* yDesc, const void* y, const cudnnTensorDescriptor_t* dyDesc, const void* dy, const cudnnTensorDescriptor_t dhyDesc, const void* dhy, const cudnnTensorDescriptor_t dcyDesc, const void* dcy, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnTensorDescriptor_t* dxDesc, void* dx, const cudnnTensorDescriptor_t dhxDesc, void* dhx, const cudnnTensorDescriptor_t dcxDesc, void* dcx, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNBackwardWeightsAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t* yDesc, const void* y, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, const void* workspace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, const void* reserveSpace, size_t reserveSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnMultiHeadAttnBackwardData(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsDQDO[], const int devSeqLengthsDKDV[], const cudnnSeqDataDescriptor_t doDesc, const void* dout, const cudnnSeqDataDescriptor_t dqDesc, void* dqueries, const void* queries, const cudnnSeqDataDescriptor_t dkDesc, void* dkeys, const void* keys, const cudnnSeqDataDescriptor_t dvDesc, void* dvalues, const void* values, size_t weightSizeInBytes, const void* weights, size_t workSpaceSizeInBytes, void* workSpace, size_t reserveSpaceSizeInBytes, void* reserveSpace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc, const void* queries, const cudnnSeqDataDescriptor_t kDesc, const void* keys, const cudnnSeqDataDescriptor_t vDesc, const void* values, const cudnnSeqDataDescriptor_t doDesc, const void* dout, size_t weightSizeInBytes, const void* weights, void* dweights, size_t workSpaceSizeInBytes, void* workSpace, size_t reserveSpaceSizeInBytes, void* reserveSpace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t* ctcLossDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode, int maxLabelLength);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t* compType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t* compType, cudnnLossNormalizationMode_t* normMode, cudnnNanPropagation_t* gradMode);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t* compType, cudnnLossNormalizationMode_t* normMode, cudnnNanPropagation_t* gradMode, int* maxLabelLength);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCTCLoss(cudnnHandle_t handle, const cudnnTensorDescriptor_t probsDesc, const void* probs, const int hostLabels[], const int hostLabelLengths[], const int hostInputLengths[], void* costs, const cudnnTensorDescriptor_t gradientsDesc, void* gradients, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, void* workspace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCTCLoss_v8(cudnnHandle_t handle, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, const cudnnTensorDescriptor_t probsDesc, const void* probs, const int labels[], const int labelLengths[], const int inputLengths[], void* costs, const cudnnTensorDescriptor_t gradientsDesc, void* gradients, size_t workSpaceSizeInBytes, void* workspace);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t probsDesc, const cudnnTensorDescriptor_t gradientsDesc, const int* labels, const int* labelLengths, const int* inputLengths, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossWorkspaceSize_v8(cudnnHandle_t handle, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, const cudnnTensorDescriptor_t probsDesc, const cudnnTensorDescriptor_t gradientsDesc, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnAdvTrainVersionCheck(void);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t* mathType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int* groupCount);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t* reorderType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc, int pad_h, int pad_w, int u, int v, int dilation_h, int dilation_w, cudnnConvolutionMode_t mode, cudnnDataType_t computeType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc, int* pad_h, int* pad_w, int* u, int* v, int* dilation_h, int* dilation_w, cudnnConvolutionMode_t* mode, cudnnDataType_t* computeType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, int arrayLength, const int padA[], const int filterStrideA[], const int dilationA[], cudnnConvolutionMode_t mode, cudnnDataType_t computeType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, int* arrayLength, int padA[], int strideA[], int dilationA[], cudnnConvolutionMode_t* mode, cudnnDataType_t* computeType);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t inputTensorDesc, const cudnnFilterDescriptor_t filterDesc, int* n, int* c, int* h, int* w);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t inputTensorDesc, const cudnnFilterDescriptor_t filterDesc, int nbDims, int tensorOuputDimA[]);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, const cudnnFilterDescriptor_t filterDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t* perfResults);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t* perfResults);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, void* y, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t* perfResults, void* workSpace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, void* colBuffer);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnReorderFilterAndBias(cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, cudnnReorderType_t reorderType, const void* filterData, void* reorderedFilterData, int reorderBias, const void* biasData, void* reorderedBiasData);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionForward(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBiasActivationForward(cudnnHandle_t handle, const void* alpha1, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* alpha2, const cudnnTensorDescriptor_t zDesc, const void* z, const cudnnTensorDescriptor_t biasDesc, const void* bias, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void* y);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t* perfResults);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, void* dx, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t* perfResults, void* workSpace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, const cudnnTensorDescriptor_t diffDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t* perfResults);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBackwardData(cudnnHandle_t handle, const void* alpha, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionBwdDataAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, const cudnnTensorDescriptor_t diffDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t gradDesc, const cudnnTensorFormat_t transformFormat, cudnnFilterDescriptor_t foldedFilterDesc, cudnnTensorDescriptor_t paddedDiffDesc, cudnnConvolutionDescriptor_t foldedConvDesc, cudnnTensorDescriptor_t foldedGradDesc, cudnnTensorTransformDescriptor_t filterFoldTransDesc, cudnnTensorTransformDescriptor_t diffPadTransDesc, cudnnTensorTransformDescriptor_t gradFoldTransDesc, cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCnnInferVersionCheck(void);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int* count);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t* perfResults);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t dyDesc, const void* y, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t dwDesc, void* dw, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t* perfResults, void* workSpace, size_t workSpaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, const cudnnTensorDescriptor_t diffDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t* perfResults);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t gradDesc, cudnnConvolutionBwdFilterAlgo_t algo, size_t* sizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBackwardFilter(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionBwdFilterAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnFilterDescriptor_t dwDesc, void* dw);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBackwardBias(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* beta, const cudnnTensorDescriptor_t dbDesc, void* db);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t* constPack, cudnnFusedOps_t ops);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetFusedOpsConstParamPackAttribute(cudnnFusedOpsConstParamPack_t constPack, cudnnFusedOpsConstParamLabel_t paramLabel, const void* param);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetFusedOpsConstParamPackAttribute(const cudnnFusedOpsConstParamPack_t constPack, cudnnFusedOpsConstParamLabel_t paramLabel, void* param, int* isNULL);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t* varPack, cudnnFusedOps_t ops);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnSetFusedOpsVariantParamPackAttribute(cudnnFusedOpsVariantParamPack_t varPack, cudnnFusedOpsVariantParamLabel_t paramLabel, void* ptr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnGetFusedOpsVariantParamPackAttribute(const cudnnFusedOpsVariantParamPack_t varPack, cudnnFusedOpsVariantParamLabel_t paramLabel, void* ptr);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t* plan, cudnnFusedOps_t ops);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan, const cudnnFusedOpsConstParamPack_t constPack, size_t* workspaceSizeInBytes);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, cudnnFusedOpsVariantParamPack_t varPack);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnCnnTrainVersionCheck(void);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t* descriptor);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor, cudnnBackendAttributeName_t attributeName, cudnnBackendAttributeType_t attributeType, int64_t elementCount, const void* arrayOfElements);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendGetAttribute(const cudnnBackendDescriptor_t descriptor, cudnnBackendAttributeName_t attributeName, cudnnBackendAttributeType_t attributeType, int64_t requestedElementCount, int64_t* elementCount, void* arrayOfElements);
|
||
|
typedef cudnnStatus_t CUDAAPI tcudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
|
||
|
|
||
|
typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
|
||
|
typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned int Flags, CUdevice device);
|
||
|
typedef CUresult CUDAAPI tcuGLInit(void);
|
||
|
typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
|
||
|
typedef CUresult CUDAAPI tcuGLMapBufferObject_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer);
|
||
|
typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer);
|
||
|
typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer);
|
||
|
typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags);
|
||
|
typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream);
|
||
|
typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
|
||
|
|
||
|
|
||
|
/* Function declarations. */
|
||
|
extern tcuGetErrorString *cuGetErrorString;
|
||
|
extern tcuGetErrorName *cuGetErrorName;
|
||
|
extern tcuInit *cuInit;
|
||
|
extern tcuDriverGetVersion *cuDriverGetVersion;
|
||
|
extern tcuDeviceGet *cuDeviceGet;
|
||
|
extern tcuDeviceGetCount *cuDeviceGetCount;
|
||
|
extern tcuDeviceGetName *cuDeviceGetName;
|
||
|
extern tcuDeviceGetUuid *cuDeviceGetUuid;
|
||
|
extern tcuDeviceGetLuid *cuDeviceGetLuid;
|
||
|
extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
|
||
|
extern tcuDeviceGetTexture1DLinearMaxWidth *cuDeviceGetTexture1DLinearMaxWidth;
|
||
|
extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
|
||
|
extern tcuDeviceGetNvSciSyncAttributes *cuDeviceGetNvSciSyncAttributes;
|
||
|
extern tcuDeviceSetMemPool *cuDeviceSetMemPool;
|
||
|
extern tcuDeviceGetMemPool *cuDeviceGetMemPool;
|
||
|
extern tcuDeviceGetDefaultMemPool *cuDeviceGetDefaultMemPool;
|
||
|
extern tcuDeviceGetProperties *cuDeviceGetProperties;
|
||
|
extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
|
||
|
extern tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
|
||
|
extern tcuDevicePrimaryCtxRelease_v2 *cuDevicePrimaryCtxRelease_v2;
|
||
|
extern tcuDevicePrimaryCtxSetFlags_v2 *cuDevicePrimaryCtxSetFlags_v2;
|
||
|
extern tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
|
||
|
extern tcuDevicePrimaryCtxReset_v2 *cuDevicePrimaryCtxReset_v2;
|
||
|
extern tcuCtxCreate_v2 *cuCtxCreate_v2;
|
||
|
extern tcuCtxDestroy_v2 *cuCtxDestroy_v2;
|
||
|
extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
|
||
|
extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
|
||
|
extern tcuCtxSetCurrent *cuCtxSetCurrent;
|
||
|
extern tcuCtxGetCurrent *cuCtxGetCurrent;
|
||
|
extern tcuCtxGetDevice *cuCtxGetDevice;
|
||
|
extern tcuCtxGetFlags *cuCtxGetFlags;
|
||
|
extern tcuCtxSynchronize *cuCtxSynchronize;
|
||
|
extern tcuCtxSetLimit *cuCtxSetLimit;
|
||
|
extern tcuCtxGetLimit *cuCtxGetLimit;
|
||
|
extern tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
|
||
|
extern tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
|
||
|
extern tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
|
||
|
extern tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
|
||
|
extern tcuCtxGetApiVersion *cuCtxGetApiVersion;
|
||
|
extern tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
|
||
|
extern tcuCtxResetPersistingL2Cache *cuCtxResetPersistingL2Cache;
|
||
|
extern tcuCtxAttach *cuCtxAttach;
|
||
|
extern tcuCtxDetach *cuCtxDetach;
|
||
|
extern tcuModuleLoad *cuModuleLoad;
|
||
|
extern tcuModuleLoadData *cuModuleLoadData;
|
||
|
extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
|
||
|
extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
|
||
|
extern tcuModuleUnload *cuModuleUnload;
|
||
|
extern tcuModuleGetFunction *cuModuleGetFunction;
|
||
|
extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
|
||
|
extern tcuModuleGetTexRef *cuModuleGetTexRef;
|
||
|
extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
|
||
|
extern tcuLinkCreate_v2 *cuLinkCreate_v2;
|
||
|
extern tcuLinkAddData_v2 *cuLinkAddData_v2;
|
||
|
extern tcuLinkAddFile_v2 *cuLinkAddFile_v2;
|
||
|
extern tcuLinkComplete *cuLinkComplete;
|
||
|
extern tcuLinkDestroy *cuLinkDestroy;
|
||
|
extern tcuMemGetInfo_v2 *cuMemGetInfo_v2;
|
||
|
extern tcuMemAlloc_v2 *cuMemAlloc_v2;
|
||
|
extern tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
|
||
|
extern tcuMemFree_v2 *cuMemFree_v2;
|
||
|
extern tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
|
||
|
extern tcuMemAllocHost_v2 *cuMemAllocHost_v2;
|
||
|
extern tcuMemFreeHost *cuMemFreeHost;
|
||
|
extern tcuMemHostAlloc *cuMemHostAlloc;
|
||
|
extern tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
|
||
|
extern tcuMemHostGetFlags *cuMemHostGetFlags;
|
||
|
extern tcuMemAllocManaged *cuMemAllocManaged;
|
||
|
extern tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
|
||
|
extern tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
|
||
|
extern tcuIpcGetEventHandle *cuIpcGetEventHandle;
|
||
|
extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
|
||
|
extern tcuIpcGetMemHandle *cuIpcGetMemHandle;
|
||
|
extern tcuIpcOpenMemHandle_v2 *cuIpcOpenMemHandle_v2;
|
||
|
extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
|
||
|
extern tcuMemHostRegister_v2 *cuMemHostRegister_v2;
|
||
|
extern tcuMemHostUnregister *cuMemHostUnregister;
|
||
|
extern tcuMemcpy *cuMemcpy;
|
||
|
extern tcuMemcpyPeer *cuMemcpyPeer;
|
||
|
extern tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
|
||
|
extern tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
|
||
|
extern tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
|
||
|
extern tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
|
||
|
extern tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
|
||
|
extern tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
|
||
|
extern tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
|
||
|
extern tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
|
||
|
extern tcuMemcpy2D_v2 *cuMemcpy2D_v2;
|
||
|
extern tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
|
||
|
extern tcuMemcpy3D_v2 *cuMemcpy3D_v2;
|
||
|
extern tcuMemcpy3DPeer *cuMemcpy3DPeer;
|
||
|
extern tcuMemcpyAsync *cuMemcpyAsync;
|
||
|
extern tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
|
||
|
extern tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
|
||
|
extern tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
|
||
|
extern tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
|
||
|
extern tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
|
||
|
extern tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
|
||
|
extern tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
|
||
|
extern tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
|
||
|
extern tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
|
||
|
extern tcuMemsetD8_v2 *cuMemsetD8_v2;
|
||
|
extern tcuMemsetD16_v2 *cuMemsetD16_v2;
|
||
|
extern tcuMemsetD32_v2 *cuMemsetD32_v2;
|
||
|
extern tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
|
||
|
extern tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
|
||
|
extern tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
|
||
|
extern tcuMemsetD8Async *cuMemsetD8Async;
|
||
|
extern tcuMemsetD16Async *cuMemsetD16Async;
|
||
|
extern tcuMemsetD32Async *cuMemsetD32Async;
|
||
|
extern tcuMemsetD2D8Async *cuMemsetD2D8Async;
|
||
|
extern tcuMemsetD2D16Async *cuMemsetD2D16Async;
|
||
|
extern tcuMemsetD2D32Async *cuMemsetD2D32Async;
|
||
|
extern tcuArrayCreate_v2 *cuArrayCreate_v2;
|
||
|
extern tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
|
||
|
extern tcuArrayGetSparseProperties *cuArrayGetSparseProperties;
|
||
|
extern tcuMipmappedArrayGetSparseProperties *cuMipmappedArrayGetSparseProperties;
|
||
|
extern tcuArrayGetPlane *cuArrayGetPlane;
|
||
|
extern tcuArrayDestroy *cuArrayDestroy;
|
||
|
extern tcuArray3DCreate_v2 *cuArray3DCreate_v2;
|
||
|
extern tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
|
||
|
extern tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
|
||
|
extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
|
||
|
extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
|
||
|
extern tcuMemAddressReserve *cuMemAddressReserve;
|
||
|
extern tcuMemAddressFree *cuMemAddressFree;
|
||
|
extern tcuMemCreate *cuMemCreate;
|
||
|
extern tcuMemRelease *cuMemRelease;
|
||
|
extern tcuMemMap *cuMemMap;
|
||
|
extern tcuMemMapArrayAsync *cuMemMapArrayAsync;
|
||
|
extern tcuMemUnmap *cuMemUnmap;
|
||
|
extern tcuMemSetAccess *cuMemSetAccess;
|
||
|
extern tcuMemGetAccess *cuMemGetAccess;
|
||
|
extern tcuMemExportToShareableHandle *cuMemExportToShareableHandle;
|
||
|
extern tcuMemImportFromShareableHandle *cuMemImportFromShareableHandle;
|
||
|
extern tcuMemGetAllocationGranularity *cuMemGetAllocationGranularity;
|
||
|
extern tcuMemGetAllocationPropertiesFromHandle *cuMemGetAllocationPropertiesFromHandle;
|
||
|
extern tcuMemRetainAllocationHandle *cuMemRetainAllocationHandle;
|
||
|
extern tcuMemFreeAsync *cuMemFreeAsync;
|
||
|
extern tcuMemAllocAsync *cuMemAllocAsync;
|
||
|
extern tcuMemPoolTrimTo *cuMemPoolTrimTo;
|
||
|
extern tcuMemPoolSetAttribute *cuMemPoolSetAttribute;
|
||
|
extern tcuMemPoolGetAttribute *cuMemPoolGetAttribute;
|
||
|
extern tcuMemPoolSetAccess *cuMemPoolSetAccess;
|
||
|
extern tcuMemPoolGetAccess *cuMemPoolGetAccess;
|
||
|
extern tcuMemPoolCreate *cuMemPoolCreate;
|
||
|
extern tcuMemPoolDestroy *cuMemPoolDestroy;
|
||
|
extern tcuMemAllocFromPoolAsync *cuMemAllocFromPoolAsync;
|
||
|
extern tcuMemPoolExportToShareableHandle *cuMemPoolExportToShareableHandle;
|
||
|
extern tcuMemPoolImportFromShareableHandle *cuMemPoolImportFromShareableHandle;
|
||
|
extern tcuMemPoolExportPointer *cuMemPoolExportPointer;
|
||
|
extern tcuMemPoolImportPointer *cuMemPoolImportPointer;
|
||
|
extern tcuPointerGetAttribute *cuPointerGetAttribute;
|
||
|
extern tcuMemPrefetchAsync *cuMemPrefetchAsync;
|
||
|
extern tcuMemAdvise *cuMemAdvise;
|
||
|
extern tcuMemRangeGetAttribute *cuMemRangeGetAttribute;
|
||
|
extern tcuMemRangeGetAttributes *cuMemRangeGetAttributes;
|
||
|
extern tcuPointerSetAttribute *cuPointerSetAttribute;
|
||
|
extern tcuPointerGetAttributes *cuPointerGetAttributes;
|
||
|
extern tcuStreamCreate *cuStreamCreate;
|
||
|
extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
|
||
|
extern tcuStreamGetPriority *cuStreamGetPriority;
|
||
|
extern tcuStreamGetFlags *cuStreamGetFlags;
|
||
|
extern tcuStreamGetCtx *cuStreamGetCtx;
|
||
|
extern tcuStreamWaitEvent *cuStreamWaitEvent;
|
||
|
extern tcuStreamAddCallback *cuStreamAddCallback;
|
||
|
extern tcuStreamBeginCapture_v2 *cuStreamBeginCapture_v2;
|
||
|
extern tcuThreadExchangeStreamCaptureMode *cuThreadExchangeStreamCaptureMode;
|
||
|
extern tcuStreamEndCapture *cuStreamEndCapture;
|
||
|
extern tcuStreamIsCapturing *cuStreamIsCapturing;
|
||
|
extern tcuStreamGetCaptureInfo *cuStreamGetCaptureInfo;
|
||
|
extern tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
|
||
|
extern tcuStreamQuery *cuStreamQuery;
|
||
|
extern tcuStreamSynchronize *cuStreamSynchronize;
|
||
|
extern tcuStreamDestroy_v2 *cuStreamDestroy_v2;
|
||
|
extern tcuStreamCopyAttributes *cuStreamCopyAttributes;
|
||
|
extern tcuStreamGetAttribute *cuStreamGetAttribute;
|
||
|
extern tcuStreamSetAttribute *cuStreamSetAttribute;
|
||
|
extern tcuEventCreate *cuEventCreate;
|
||
|
extern tcuEventRecord *cuEventRecord;
|
||
|
extern tcuEventRecordWithFlags *cuEventRecordWithFlags;
|
||
|
extern tcuEventQuery *cuEventQuery;
|
||
|
extern tcuEventSynchronize *cuEventSynchronize;
|
||
|
extern tcuEventDestroy_v2 *cuEventDestroy_v2;
|
||
|
extern tcuEventElapsedTime *cuEventElapsedTime;
|
||
|
extern tcuImportExternalMemory *cuImportExternalMemory;
|
||
|
extern tcuExternalMemoryGetMappedBuffer *cuExternalMemoryGetMappedBuffer;
|
||
|
extern tcuExternalMemoryGetMappedMipmappedArray *cuExternalMemoryGetMappedMipmappedArray;
|
||
|
extern tcuDestroyExternalMemory *cuDestroyExternalMemory;
|
||
|
extern tcuImportExternalSemaphore *cuImportExternalSemaphore;
|
||
|
extern tcuSignalExternalSemaphoresAsync *cuSignalExternalSemaphoresAsync;
|
||
|
extern tcuWaitExternalSemaphoresAsync *cuWaitExternalSemaphoresAsync;
|
||
|
extern tcuDestroyExternalSemaphore *cuDestroyExternalSemaphore;
|
||
|
extern tcuStreamWaitValue32 *cuStreamWaitValue32;
|
||
|
extern tcuStreamWaitValue64 *cuStreamWaitValue64;
|
||
|
extern tcuStreamWriteValue32 *cuStreamWriteValue32;
|
||
|
extern tcuStreamWriteValue64 *cuStreamWriteValue64;
|
||
|
extern tcuStreamBatchMemOp *cuStreamBatchMemOp;
|
||
|
extern tcuFuncGetAttribute *cuFuncGetAttribute;
|
||
|
extern tcuFuncSetAttribute *cuFuncSetAttribute;
|
||
|
extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
|
||
|
extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
|
||
|
extern tcuLaunchKernel *cuLaunchKernel;
|
||
|
extern tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
|
||
|
extern tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
|
||
|
extern tcuLaunchHostFunc *cuLaunchHostFunc;
|
||
|
extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
|
||
|
extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
|
||
|
extern tcuParamSetSize *cuParamSetSize;
|
||
|
extern tcuParamSeti *cuParamSeti;
|
||
|
extern tcuParamSetf *cuParamSetf;
|
||
|
extern tcuParamSetv *cuParamSetv;
|
||
|
extern tcuLaunch *cuLaunch;
|
||
|
extern tcuLaunchGrid *cuLaunchGrid;
|
||
|
extern tcuLaunchGridAsync *cuLaunchGridAsync;
|
||
|
extern tcuParamSetTexRef *cuParamSetTexRef;
|
||
|
extern tcuGraphCreate *cuGraphCreate;
|
||
|
extern tcuGraphAddKernelNode *cuGraphAddKernelNode;
|
||
|
extern tcuGraphKernelNodeGetParams *cuGraphKernelNodeGetParams;
|
||
|
extern tcuGraphKernelNodeSetParams *cuGraphKernelNodeSetParams;
|
||
|
extern tcuGraphAddMemcpyNode *cuGraphAddMemcpyNode;
|
||
|
extern tcuGraphMemcpyNodeGetParams *cuGraphMemcpyNodeGetParams;
|
||
|
extern tcuGraphMemcpyNodeSetParams *cuGraphMemcpyNodeSetParams;
|
||
|
extern tcuGraphAddMemsetNode *cuGraphAddMemsetNode;
|
||
|
extern tcuGraphMemsetNodeGetParams *cuGraphMemsetNodeGetParams;
|
||
|
extern tcuGraphMemsetNodeSetParams *cuGraphMemsetNodeSetParams;
|
||
|
extern tcuGraphAddHostNode *cuGraphAddHostNode;
|
||
|
extern tcuGraphHostNodeGetParams *cuGraphHostNodeGetParams;
|
||
|
extern tcuGraphHostNodeSetParams *cuGraphHostNodeSetParams;
|
||
|
extern tcuGraphAddChildGraphNode *cuGraphAddChildGraphNode;
|
||
|
extern tcuGraphChildGraphNodeGetGraph *cuGraphChildGraphNodeGetGraph;
|
||
|
extern tcuGraphAddEmptyNode *cuGraphAddEmptyNode;
|
||
|
extern tcuGraphAddEventRecordNode *cuGraphAddEventRecordNode;
|
||
|
extern tcuGraphEventRecordNodeGetEvent *cuGraphEventRecordNodeGetEvent;
|
||
|
extern tcuGraphEventRecordNodeSetEvent *cuGraphEventRecordNodeSetEvent;
|
||
|
extern tcuGraphAddEventWaitNode *cuGraphAddEventWaitNode;
|
||
|
extern tcuGraphEventWaitNodeGetEvent *cuGraphEventWaitNodeGetEvent;
|
||
|
extern tcuGraphEventWaitNodeSetEvent *cuGraphEventWaitNodeSetEvent;
|
||
|
extern tcuGraphAddExternalSemaphoresSignalNode *cuGraphAddExternalSemaphoresSignalNode;
|
||
|
extern tcuGraphExternalSemaphoresSignalNodeGetParams *cuGraphExternalSemaphoresSignalNodeGetParams;
|
||
|
extern tcuGraphExternalSemaphoresSignalNodeSetParams *cuGraphExternalSemaphoresSignalNodeSetParams;
|
||
|
extern tcuGraphAddExternalSemaphoresWaitNode *cuGraphAddExternalSemaphoresWaitNode;
|
||
|
extern tcuGraphExternalSemaphoresWaitNodeGetParams *cuGraphExternalSemaphoresWaitNodeGetParams;
|
||
|
extern tcuGraphExternalSemaphoresWaitNodeSetParams *cuGraphExternalSemaphoresWaitNodeSetParams;
|
||
|
extern tcuGraphClone *cuGraphClone;
|
||
|
extern tcuGraphNodeFindInClone *cuGraphNodeFindInClone;
|
||
|
extern tcuGraphNodeGetType *cuGraphNodeGetType;
|
||
|
extern tcuGraphGetNodes *cuGraphGetNodes;
|
||
|
extern tcuGraphGetRootNodes *cuGraphGetRootNodes;
|
||
|
extern tcuGraphGetEdges *cuGraphGetEdges;
|
||
|
extern tcuGraphNodeGetDependencies *cuGraphNodeGetDependencies;
|
||
|
extern tcuGraphNodeGetDependentNodes *cuGraphNodeGetDependentNodes;
|
||
|
extern tcuGraphAddDependencies *cuGraphAddDependencies;
|
||
|
extern tcuGraphRemoveDependencies *cuGraphRemoveDependencies;
|
||
|
extern tcuGraphDestroyNode *cuGraphDestroyNode;
|
||
|
extern tcuGraphInstantiate_v2 *cuGraphInstantiate_v2;
|
||
|
extern tcuGraphExecKernelNodeSetParams *cuGraphExecKernelNodeSetParams;
|
||
|
extern tcuGraphExecMemcpyNodeSetParams *cuGraphExecMemcpyNodeSetParams;
|
||
|
extern tcuGraphExecMemsetNodeSetParams *cuGraphExecMemsetNodeSetParams;
|
||
|
extern tcuGraphExecHostNodeSetParams *cuGraphExecHostNodeSetParams;
|
||
|
extern tcuGraphExecChildGraphNodeSetParams *cuGraphExecChildGraphNodeSetParams;
|
||
|
extern tcuGraphExecEventRecordNodeSetEvent *cuGraphExecEventRecordNodeSetEvent;
|
||
|
extern tcuGraphExecEventWaitNodeSetEvent *cuGraphExecEventWaitNodeSetEvent;
|
||
|
extern tcuGraphExecExternalSemaphoresSignalNodeSetParams *cuGraphExecExternalSemaphoresSignalNodeSetParams;
|
||
|
extern tcuGraphExecExternalSemaphoresWaitNodeSetParams *cuGraphExecExternalSemaphoresWaitNodeSetParams;
|
||
|
extern tcuGraphUpload *cuGraphUpload;
|
||
|
extern tcuGraphLaunch *cuGraphLaunch;
|
||
|
extern tcuGraphExecDestroy *cuGraphExecDestroy;
|
||
|
extern tcuGraphDestroy *cuGraphDestroy;
|
||
|
extern tcuGraphExecUpdate *cuGraphExecUpdate;
|
||
|
extern tcuGraphKernelNodeCopyAttributes *cuGraphKernelNodeCopyAttributes;
|
||
|
extern tcuGraphKernelNodeGetAttribute *cuGraphKernelNodeGetAttribute;
|
||
|
extern tcuGraphKernelNodeSetAttribute *cuGraphKernelNodeSetAttribute;
|
||
|
extern tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
|
||
|
extern tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
|
||
|
extern tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
|
||
|
extern tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
|
||
|
extern tcuOccupancyAvailableDynamicSMemPerBlock *cuOccupancyAvailableDynamicSMemPerBlock;
|
||
|
extern tcuTexRefSetArray *cuTexRefSetArray;
|
||
|
extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
|
||
|
extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
|
||
|
extern tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
|
||
|
extern tcuTexRefSetFormat *cuTexRefSetFormat;
|
||
|
extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
|
||
|
extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
|
||
|
extern tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
|
||
|
extern tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
|
||
|
extern tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
|
||
|
extern tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
|
||
|
extern tcuTexRefSetBorderColor *cuTexRefSetBorderColor;
|
||
|
extern tcuTexRefSetFlags *cuTexRefSetFlags;
|
||
|
extern tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
|
||
|
extern tcuTexRefGetArray *cuTexRefGetArray;
|
||
|
extern tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
|
||
|
extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
|
||
|
extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
|
||
|
extern tcuTexRefGetFormat *cuTexRefGetFormat;
|
||
|
extern tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
|
||
|
extern tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
|
||
|
extern tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
|
||
|
extern tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
|
||
|
extern tcuTexRefGetBorderColor *cuTexRefGetBorderColor;
|
||
|
extern tcuTexRefGetFlags *cuTexRefGetFlags;
|
||
|
extern tcuTexRefCreate *cuTexRefCreate;
|
||
|
extern tcuTexRefDestroy *cuTexRefDestroy;
|
||
|
extern tcuSurfRefSetArray *cuSurfRefSetArray;
|
||
|
extern tcuSurfRefGetArray *cuSurfRefGetArray;
|
||
|
extern tcuTexObjectCreate *cuTexObjectCreate;
|
||
|
extern tcuTexObjectDestroy *cuTexObjectDestroy;
|
||
|
extern tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
|
||
|
extern tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
|
||
|
extern tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
|
||
|
extern tcuSurfObjectCreate *cuSurfObjectCreate;
|
||
|
extern tcuSurfObjectDestroy *cuSurfObjectDestroy;
|
||
|
extern tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
|
||
|
extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
|
||
|
extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
|
||
|
extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
|
||
|
extern tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
|
||
|
extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
|
||
|
extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
|
||
|
extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
|
||
|
extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
|
||
|
extern tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
|
||
|
extern tcuGraphicsMapResources *cuGraphicsMapResources;
|
||
|
extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
|
||
|
extern tcuGetExportTable *cuGetExportTable;
|
||
|
extern tcuFuncGetModule *cuFuncGetModule;
|
||
|
|
||
|
extern tnvrtcGetErrorString *nvrtcGetErrorString;
|
||
|
extern tnvrtcVersion *nvrtcVersion;
|
||
|
extern tnvrtcGetNumSupportedArchs *nvrtcGetNumSupportedArchs;
|
||
|
extern tnvrtcGetSupportedArchs *nvrtcGetSupportedArchs;
|
||
|
extern tnvrtcCreateProgram *nvrtcCreateProgram;
|
||
|
extern tnvrtcDestroyProgram *nvrtcDestroyProgram;
|
||
|
extern tnvrtcCompileProgram *nvrtcCompileProgram;
|
||
|
extern tnvrtcGetPTXSize *nvrtcGetPTXSize;
|
||
|
extern tnvrtcGetPTX *nvrtcGetPTX;
|
||
|
extern tnvrtcGetCUBINSize *nvrtcGetCUBINSize;
|
||
|
extern tnvrtcGetCUBIN *nvrtcGetCUBIN;
|
||
|
extern tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
|
||
|
extern tnvrtcGetProgramLog *nvrtcGetProgramLog;
|
||
|
extern tnvrtcAddNameExpression *nvrtcAddNameExpression;
|
||
|
extern tnvrtcGetLoweredName *nvrtcGetLoweredName;
|
||
|
|
||
|
extern tcudnnGetVersion *cudnnGetVersion;
|
||
|
extern tcudnnGetCudartVersion *cudnnGetCudartVersion;
|
||
|
extern tcudnnGetErrorString *cudnnGetErrorString;
|
||
|
extern tcudnnQueryRuntimeError *cudnnQueryRuntimeError;
|
||
|
extern tcudnnGetProperty *cudnnGetProperty;
|
||
|
extern tcudnnCreate *cudnnCreate;
|
||
|
extern tcudnnDestroy *cudnnDestroy;
|
||
|
extern tcudnnSetStream *cudnnSetStream;
|
||
|
extern tcudnnGetStream *cudnnGetStream;
|
||
|
extern tcudnnCreateTensorDescriptor *cudnnCreateTensorDescriptor;
|
||
|
extern tcudnnSetTensor4dDescriptor *cudnnSetTensor4dDescriptor;
|
||
|
extern tcudnnSetTensor4dDescriptorEx *cudnnSetTensor4dDescriptorEx;
|
||
|
extern tcudnnGetTensor4dDescriptor *cudnnGetTensor4dDescriptor;
|
||
|
extern tcudnnSetTensorNdDescriptor *cudnnSetTensorNdDescriptor;
|
||
|
extern tcudnnSetTensorNdDescriptorEx *cudnnSetTensorNdDescriptorEx;
|
||
|
extern tcudnnGetTensorNdDescriptor *cudnnGetTensorNdDescriptor;
|
||
|
extern tcudnnGetTensorSizeInBytes *cudnnGetTensorSizeInBytes;
|
||
|
extern tcudnnDestroyTensorDescriptor *cudnnDestroyTensorDescriptor;
|
||
|
extern tcudnnInitTransformDest *cudnnInitTransformDest;
|
||
|
extern tcudnnCreateTensorTransformDescriptor *cudnnCreateTensorTransformDescriptor;
|
||
|
extern tcudnnSetTensorTransformDescriptor *cudnnSetTensorTransformDescriptor;
|
||
|
extern tcudnnGetTensorTransformDescriptor *cudnnGetTensorTransformDescriptor;
|
||
|
extern tcudnnDestroyTensorTransformDescriptor *cudnnDestroyTensorTransformDescriptor;
|
||
|
extern tcudnnTransformTensor *cudnnTransformTensor;
|
||
|
extern tcudnnTransformTensorEx *cudnnTransformTensorEx;
|
||
|
extern tcudnnAddTensor *cudnnAddTensor;
|
||
|
extern tcudnnCreateOpTensorDescriptor *cudnnCreateOpTensorDescriptor;
|
||
|
extern tcudnnSetOpTensorDescriptor *cudnnSetOpTensorDescriptor;
|
||
|
extern tcudnnGetOpTensorDescriptor *cudnnGetOpTensorDescriptor;
|
||
|
extern tcudnnDestroyOpTensorDescriptor *cudnnDestroyOpTensorDescriptor;
|
||
|
extern tcudnnOpTensor *cudnnOpTensor;
|
||
|
extern tcudnnCreateReduceTensorDescriptor *cudnnCreateReduceTensorDescriptor;
|
||
|
extern tcudnnSetReduceTensorDescriptor *cudnnSetReduceTensorDescriptor;
|
||
|
extern tcudnnGetReduceTensorDescriptor *cudnnGetReduceTensorDescriptor;
|
||
|
extern tcudnnDestroyReduceTensorDescriptor *cudnnDestroyReduceTensorDescriptor;
|
||
|
extern tcudnnGetReductionIndicesSize *cudnnGetReductionIndicesSize;
|
||
|
extern tcudnnGetReductionWorkspaceSize *cudnnGetReductionWorkspaceSize;
|
||
|
extern tcudnnReduceTensor *cudnnReduceTensor;
|
||
|
extern tcudnnSetTensor *cudnnSetTensor;
|
||
|
extern tcudnnScaleTensor *cudnnScaleTensor;
|
||
|
extern tcudnnCreateFilterDescriptor *cudnnCreateFilterDescriptor;
|
||
|
extern tcudnnSetFilter4dDescriptor *cudnnSetFilter4dDescriptor;
|
||
|
extern tcudnnGetFilter4dDescriptor *cudnnGetFilter4dDescriptor;
|
||
|
extern tcudnnSetFilterNdDescriptor *cudnnSetFilterNdDescriptor;
|
||
|
extern tcudnnGetFilterNdDescriptor *cudnnGetFilterNdDescriptor;
|
||
|
extern tcudnnGetFilterSizeInBytes *cudnnGetFilterSizeInBytes;
|
||
|
extern tcudnnTransformFilter *cudnnTransformFilter;
|
||
|
extern tcudnnDestroyFilterDescriptor *cudnnDestroyFilterDescriptor;
|
||
|
extern tcudnnSoftmaxForward *cudnnSoftmaxForward;
|
||
|
extern tcudnnCreatePoolingDescriptor *cudnnCreatePoolingDescriptor;
|
||
|
extern tcudnnSetPooling2dDescriptor *cudnnSetPooling2dDescriptor;
|
||
|
extern tcudnnGetPooling2dDescriptor *cudnnGetPooling2dDescriptor;
|
||
|
extern tcudnnSetPoolingNdDescriptor *cudnnSetPoolingNdDescriptor;
|
||
|
extern tcudnnGetPoolingNdDescriptor *cudnnGetPoolingNdDescriptor;
|
||
|
extern tcudnnGetPoolingNdForwardOutputDim *cudnnGetPoolingNdForwardOutputDim;
|
||
|
extern tcudnnGetPooling2dForwardOutputDim *cudnnGetPooling2dForwardOutputDim;
|
||
|
extern tcudnnDestroyPoolingDescriptor *cudnnDestroyPoolingDescriptor;
|
||
|
extern tcudnnPoolingForward *cudnnPoolingForward;
|
||
|
extern tcudnnCreateActivationDescriptor *cudnnCreateActivationDescriptor;
|
||
|
extern tcudnnSetActivationDescriptor *cudnnSetActivationDescriptor;
|
||
|
extern tcudnnGetActivationDescriptor *cudnnGetActivationDescriptor;
|
||
|
extern tcudnnDestroyActivationDescriptor *cudnnDestroyActivationDescriptor;
|
||
|
extern tcudnnActivationForward *cudnnActivationForward;
|
||
|
extern tcudnnCreateLRNDescriptor *cudnnCreateLRNDescriptor;
|
||
|
extern tcudnnSetLRNDescriptor *cudnnSetLRNDescriptor;
|
||
|
extern tcudnnGetLRNDescriptor *cudnnGetLRNDescriptor;
|
||
|
extern tcudnnDestroyLRNDescriptor *cudnnDestroyLRNDescriptor;
|
||
|
extern tcudnnLRNCrossChannelForward *cudnnLRNCrossChannelForward;
|
||
|
extern tcudnnDivisiveNormalizationForward *cudnnDivisiveNormalizationForward;
|
||
|
extern tcudnnDeriveBNTensorDescriptor *cudnnDeriveBNTensorDescriptor;
|
||
|
extern tcudnnBatchNormalizationForwardInference *cudnnBatchNormalizationForwardInference;
|
||
|
extern tcudnnDeriveNormTensorDescriptor *cudnnDeriveNormTensorDescriptor;
|
||
|
extern tcudnnNormalizationForwardInference *cudnnNormalizationForwardInference;
|
||
|
extern tcudnnCreateSpatialTransformerDescriptor *cudnnCreateSpatialTransformerDescriptor;
|
||
|
extern tcudnnSetSpatialTransformerNdDescriptor *cudnnSetSpatialTransformerNdDescriptor;
|
||
|
extern tcudnnDestroySpatialTransformerDescriptor *cudnnDestroySpatialTransformerDescriptor;
|
||
|
extern tcudnnSpatialTfGridGeneratorForward *cudnnSpatialTfGridGeneratorForward;
|
||
|
extern tcudnnSpatialTfSamplerForward *cudnnSpatialTfSamplerForward;
|
||
|
extern tcudnnCreateDropoutDescriptor *cudnnCreateDropoutDescriptor;
|
||
|
extern tcudnnDestroyDropoutDescriptor *cudnnDestroyDropoutDescriptor;
|
||
|
extern tcudnnDropoutGetStatesSize *cudnnDropoutGetStatesSize;
|
||
|
extern tcudnnDropoutGetReserveSpaceSize *cudnnDropoutGetReserveSpaceSize;
|
||
|
extern tcudnnSetDropoutDescriptor *cudnnSetDropoutDescriptor;
|
||
|
extern tcudnnRestoreDropoutDescriptor *cudnnRestoreDropoutDescriptor;
|
||
|
extern tcudnnGetDropoutDescriptor *cudnnGetDropoutDescriptor;
|
||
|
extern tcudnnDropoutForward *cudnnDropoutForward;
|
||
|
extern tcudnnCreateAlgorithmDescriptor *cudnnCreateAlgorithmDescriptor;
|
||
|
extern tcudnnSetAlgorithmDescriptor *cudnnSetAlgorithmDescriptor;
|
||
|
extern tcudnnGetAlgorithmDescriptor *cudnnGetAlgorithmDescriptor;
|
||
|
extern tcudnnCopyAlgorithmDescriptor *cudnnCopyAlgorithmDescriptor;
|
||
|
extern tcudnnDestroyAlgorithmDescriptor *cudnnDestroyAlgorithmDescriptor;
|
||
|
extern tcudnnCreateAlgorithmPerformance *cudnnCreateAlgorithmPerformance;
|
||
|
extern tcudnnSetAlgorithmPerformance *cudnnSetAlgorithmPerformance;
|
||
|
extern tcudnnGetAlgorithmPerformance *cudnnGetAlgorithmPerformance;
|
||
|
extern tcudnnDestroyAlgorithmPerformance *cudnnDestroyAlgorithmPerformance;
|
||
|
extern tcudnnGetAlgorithmSpaceSize *cudnnGetAlgorithmSpaceSize;
|
||
|
extern tcudnnSaveAlgorithm *cudnnSaveAlgorithm;
|
||
|
extern tcudnnRestoreAlgorithm *cudnnRestoreAlgorithm;
|
||
|
extern tcudnnSetCallback *cudnnSetCallback;
|
||
|
extern tcudnnGetCallback *cudnnGetCallback;
|
||
|
extern tcudnnOpsInferVersionCheck *cudnnOpsInferVersionCheck;
|
||
|
extern tcudnnSoftmaxBackward *cudnnSoftmaxBackward;
|
||
|
extern tcudnnPoolingBackward *cudnnPoolingBackward;
|
||
|
extern tcudnnActivationBackward *cudnnActivationBackward;
|
||
|
extern tcudnnLRNCrossChannelBackward *cudnnLRNCrossChannelBackward;
|
||
|
extern tcudnnDivisiveNormalizationBackward *cudnnDivisiveNormalizationBackward;
|
||
|
extern tcudnnGetBatchNormalizationForwardTrainingExWorkspaceSize *cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize;
|
||
|
extern tcudnnGetBatchNormalizationBackwardExWorkspaceSize *cudnnGetBatchNormalizationBackwardExWorkspaceSize;
|
||
|
extern tcudnnGetBatchNormalizationTrainingExReserveSpaceSize *cudnnGetBatchNormalizationTrainingExReserveSpaceSize;
|
||
|
extern tcudnnBatchNormalizationForwardTraining *cudnnBatchNormalizationForwardTraining;
|
||
|
extern tcudnnBatchNormalizationForwardTrainingEx *cudnnBatchNormalizationForwardTrainingEx;
|
||
|
extern tcudnnBatchNormalizationBackward *cudnnBatchNormalizationBackward;
|
||
|
extern tcudnnBatchNormalizationBackwardEx *cudnnBatchNormalizationBackwardEx;
|
||
|
extern tcudnnGetNormalizationForwardTrainingWorkspaceSize *cudnnGetNormalizationForwardTrainingWorkspaceSize;
|
||
|
extern tcudnnGetNormalizationBackwardWorkspaceSize *cudnnGetNormalizationBackwardWorkspaceSize;
|
||
|
extern tcudnnGetNormalizationTrainingReserveSpaceSize *cudnnGetNormalizationTrainingReserveSpaceSize;
|
||
|
extern tcudnnNormalizationForwardTraining *cudnnNormalizationForwardTraining;
|
||
|
extern tcudnnNormalizationBackward *cudnnNormalizationBackward;
|
||
|
extern tcudnnSpatialTfGridGeneratorBackward *cudnnSpatialTfGridGeneratorBackward;
|
||
|
extern tcudnnSpatialTfSamplerBackward *cudnnSpatialTfSamplerBackward;
|
||
|
extern tcudnnDropoutBackward *cudnnDropoutBackward;
|
||
|
extern tcudnnOpsTrainVersionCheck *cudnnOpsTrainVersionCheck;
|
||
|
extern tcudnnCreateRNNDescriptor *cudnnCreateRNNDescriptor;
|
||
|
extern tcudnnDestroyRNNDescriptor *cudnnDestroyRNNDescriptor;
|
||
|
extern tcudnnSetRNNDescriptor_v8 *cudnnSetRNNDescriptor_v8;
|
||
|
extern tcudnnGetRNNDescriptor_v8 *cudnnGetRNNDescriptor_v8;
|
||
|
extern tcudnnSetRNNDescriptor_v6 *cudnnSetRNNDescriptor_v6;
|
||
|
extern tcudnnGetRNNDescriptor_v6 *cudnnGetRNNDescriptor_v6;
|
||
|
extern tcudnnSetRNNMatrixMathType *cudnnSetRNNMatrixMathType;
|
||
|
extern tcudnnGetRNNMatrixMathType *cudnnGetRNNMatrixMathType;
|
||
|
extern tcudnnSetRNNBiasMode *cudnnSetRNNBiasMode;
|
||
|
extern tcudnnGetRNNBiasMode *cudnnGetRNNBiasMode;
|
||
|
extern tcudnnRNNSetClip_v8 *cudnnRNNSetClip_v8;
|
||
|
extern tcudnnRNNGetClip_v8 *cudnnRNNGetClip_v8;
|
||
|
extern tcudnnRNNSetClip *cudnnRNNSetClip;
|
||
|
extern tcudnnRNNGetClip *cudnnRNNGetClip;
|
||
|
extern tcudnnSetRNNProjectionLayers *cudnnSetRNNProjectionLayers;
|
||
|
extern tcudnnGetRNNProjectionLayers *cudnnGetRNNProjectionLayers;
|
||
|
extern tcudnnCreatePersistentRNNPlan *cudnnCreatePersistentRNNPlan;
|
||
|
extern tcudnnDestroyPersistentRNNPlan *cudnnDestroyPersistentRNNPlan;
|
||
|
extern tcudnnSetPersistentRNNPlan *cudnnSetPersistentRNNPlan;
|
||
|
extern tcudnnBuildRNNDynamic *cudnnBuildRNNDynamic;
|
||
|
extern tcudnnGetRNNWorkspaceSize *cudnnGetRNNWorkspaceSize;
|
||
|
extern tcudnnGetRNNTrainingReserveSize *cudnnGetRNNTrainingReserveSize;
|
||
|
extern tcudnnGetRNNTempSpaceSizes *cudnnGetRNNTempSpaceSizes;
|
||
|
extern tcudnnGetRNNParamsSize *cudnnGetRNNParamsSize;
|
||
|
extern tcudnnGetRNNWeightSpaceSize *cudnnGetRNNWeightSpaceSize;
|
||
|
extern tcudnnGetRNNLinLayerMatrixParams *cudnnGetRNNLinLayerMatrixParams;
|
||
|
extern tcudnnGetRNNLinLayerBiasParams *cudnnGetRNNLinLayerBiasParams;
|
||
|
extern tcudnnGetRNNWeightParams *cudnnGetRNNWeightParams;
|
||
|
extern tcudnnRNNForwardInference *cudnnRNNForwardInference;
|
||
|
extern tcudnnSetRNNPaddingMode *cudnnSetRNNPaddingMode;
|
||
|
extern tcudnnGetRNNPaddingMode *cudnnGetRNNPaddingMode;
|
||
|
extern tcudnnCreateRNNDataDescriptor *cudnnCreateRNNDataDescriptor;
|
||
|
extern tcudnnDestroyRNNDataDescriptor *cudnnDestroyRNNDataDescriptor;
|
||
|
extern tcudnnSetRNNDataDescriptor *cudnnSetRNNDataDescriptor;
|
||
|
extern tcudnnGetRNNDataDescriptor *cudnnGetRNNDataDescriptor;
|
||
|
extern tcudnnRNNForwardInferenceEx *cudnnRNNForwardInferenceEx;
|
||
|
extern tcudnnRNNForward *cudnnRNNForward;
|
||
|
extern tcudnnSetRNNAlgorithmDescriptor *cudnnSetRNNAlgorithmDescriptor;
|
||
|
extern tcudnnGetRNNForwardInferenceAlgorithmMaxCount *cudnnGetRNNForwardInferenceAlgorithmMaxCount;
|
||
|
extern tcudnnFindRNNForwardInferenceAlgorithmEx *cudnnFindRNNForwardInferenceAlgorithmEx;
|
||
|
extern tcudnnCreateSeqDataDescriptor *cudnnCreateSeqDataDescriptor;
|
||
|
extern tcudnnDestroySeqDataDescriptor *cudnnDestroySeqDataDescriptor;
|
||
|
extern tcudnnSetSeqDataDescriptor *cudnnSetSeqDataDescriptor;
|
||
|
extern tcudnnGetSeqDataDescriptor *cudnnGetSeqDataDescriptor;
|
||
|
extern tcudnnCreateAttnDescriptor *cudnnCreateAttnDescriptor;
|
||
|
extern tcudnnDestroyAttnDescriptor *cudnnDestroyAttnDescriptor;
|
||
|
extern tcudnnSetAttnDescriptor *cudnnSetAttnDescriptor;
|
||
|
extern tcudnnGetAttnDescriptor *cudnnGetAttnDescriptor;
|
||
|
extern tcudnnGetMultiHeadAttnBuffers *cudnnGetMultiHeadAttnBuffers;
|
||
|
extern tcudnnGetMultiHeadAttnWeights *cudnnGetMultiHeadAttnWeights;
|
||
|
extern tcudnnMultiHeadAttnForward *cudnnMultiHeadAttnForward;
|
||
|
extern tcudnnAdvInferVersionCheck *cudnnAdvInferVersionCheck;
|
||
|
extern tcudnnRNNForwardTraining *cudnnRNNForwardTraining;
|
||
|
extern tcudnnRNNBackwardData *cudnnRNNBackwardData;
|
||
|
extern tcudnnRNNBackwardData_v8 *cudnnRNNBackwardData_v8;
|
||
|
extern tcudnnRNNBackwardWeights *cudnnRNNBackwardWeights;
|
||
|
extern tcudnnRNNBackwardWeights_v8 *cudnnRNNBackwardWeights_v8;
|
||
|
extern tcudnnRNNForwardTrainingEx *cudnnRNNForwardTrainingEx;
|
||
|
extern tcudnnRNNBackwardDataEx *cudnnRNNBackwardDataEx;
|
||
|
extern tcudnnRNNBackwardWeightsEx *cudnnRNNBackwardWeightsEx;
|
||
|
extern tcudnnGetRNNForwardTrainingAlgorithmMaxCount *cudnnGetRNNForwardTrainingAlgorithmMaxCount;
|
||
|
extern tcudnnFindRNNForwardTrainingAlgorithmEx *cudnnFindRNNForwardTrainingAlgorithmEx;
|
||
|
extern tcudnnGetRNNBackwardDataAlgorithmMaxCount *cudnnGetRNNBackwardDataAlgorithmMaxCount;
|
||
|
extern tcudnnFindRNNBackwardDataAlgorithmEx *cudnnFindRNNBackwardDataAlgorithmEx;
|
||
|
extern tcudnnGetRNNBackwardWeightsAlgorithmMaxCount *cudnnGetRNNBackwardWeightsAlgorithmMaxCount;
|
||
|
extern tcudnnFindRNNBackwardWeightsAlgorithmEx *cudnnFindRNNBackwardWeightsAlgorithmEx;
|
||
|
extern tcudnnMultiHeadAttnBackwardData *cudnnMultiHeadAttnBackwardData;
|
||
|
extern tcudnnMultiHeadAttnBackwardWeights *cudnnMultiHeadAttnBackwardWeights;
|
||
|
extern tcudnnCreateCTCLossDescriptor *cudnnCreateCTCLossDescriptor;
|
||
|
extern tcudnnSetCTCLossDescriptor *cudnnSetCTCLossDescriptor;
|
||
|
extern tcudnnSetCTCLossDescriptorEx *cudnnSetCTCLossDescriptorEx;
|
||
|
extern tcudnnSetCTCLossDescriptor_v8 *cudnnSetCTCLossDescriptor_v8;
|
||
|
extern tcudnnGetCTCLossDescriptor *cudnnGetCTCLossDescriptor;
|
||
|
extern tcudnnGetCTCLossDescriptorEx *cudnnGetCTCLossDescriptorEx;
|
||
|
extern tcudnnGetCTCLossDescriptor_v8 *cudnnGetCTCLossDescriptor_v8;
|
||
|
extern tcudnnDestroyCTCLossDescriptor *cudnnDestroyCTCLossDescriptor;
|
||
|
extern tcudnnCTCLoss *cudnnCTCLoss;
|
||
|
extern tcudnnCTCLoss_v8 *cudnnCTCLoss_v8;
|
||
|
extern tcudnnGetCTCLossWorkspaceSize *cudnnGetCTCLossWorkspaceSize;
|
||
|
extern tcudnnGetCTCLossWorkspaceSize_v8 *cudnnGetCTCLossWorkspaceSize_v8;
|
||
|
extern tcudnnAdvTrainVersionCheck *cudnnAdvTrainVersionCheck;
|
||
|
extern tcudnnCreateConvolutionDescriptor *cudnnCreateConvolutionDescriptor;
|
||
|
extern tcudnnDestroyConvolutionDescriptor *cudnnDestroyConvolutionDescriptor;
|
||
|
extern tcudnnSetConvolutionMathType *cudnnSetConvolutionMathType;
|
||
|
extern tcudnnGetConvolutionMathType *cudnnGetConvolutionMathType;
|
||
|
extern tcudnnSetConvolutionGroupCount *cudnnSetConvolutionGroupCount;
|
||
|
extern tcudnnGetConvolutionGroupCount *cudnnGetConvolutionGroupCount;
|
||
|
extern tcudnnSetConvolutionReorderType *cudnnSetConvolutionReorderType;
|
||
|
extern tcudnnGetConvolutionReorderType *cudnnGetConvolutionReorderType;
|
||
|
extern tcudnnSetConvolution2dDescriptor *cudnnSetConvolution2dDescriptor;
|
||
|
extern tcudnnGetConvolution2dDescriptor *cudnnGetConvolution2dDescriptor;
|
||
|
extern tcudnnSetConvolutionNdDescriptor *cudnnSetConvolutionNdDescriptor;
|
||
|
extern tcudnnGetConvolutionNdDescriptor *cudnnGetConvolutionNdDescriptor;
|
||
|
extern tcudnnGetConvolution2dForwardOutputDim *cudnnGetConvolution2dForwardOutputDim;
|
||
|
extern tcudnnGetConvolutionNdForwardOutputDim *cudnnGetConvolutionNdForwardOutputDim;
|
||
|
extern tcudnnGetConvolutionForwardAlgorithmMaxCount *cudnnGetConvolutionForwardAlgorithmMaxCount;
|
||
|
extern tcudnnGetConvolutionForwardAlgorithm_v7 *cudnnGetConvolutionForwardAlgorithm_v7;
|
||
|
extern tcudnnFindConvolutionForwardAlgorithm *cudnnFindConvolutionForwardAlgorithm;
|
||
|
extern tcudnnFindConvolutionForwardAlgorithmEx *cudnnFindConvolutionForwardAlgorithmEx;
|
||
|
extern tcudnnIm2Col *cudnnIm2Col;
|
||
|
extern tcudnnReorderFilterAndBias *cudnnReorderFilterAndBias;
|
||
|
extern tcudnnGetConvolutionForwardWorkspaceSize *cudnnGetConvolutionForwardWorkspaceSize;
|
||
|
extern tcudnnConvolutionForward *cudnnConvolutionForward;
|
||
|
extern tcudnnConvolutionBiasActivationForward *cudnnConvolutionBiasActivationForward;
|
||
|
extern tcudnnGetConvolutionBackwardDataAlgorithmMaxCount *cudnnGetConvolutionBackwardDataAlgorithmMaxCount;
|
||
|
extern tcudnnFindConvolutionBackwardDataAlgorithm *cudnnFindConvolutionBackwardDataAlgorithm;
|
||
|
extern tcudnnFindConvolutionBackwardDataAlgorithmEx *cudnnFindConvolutionBackwardDataAlgorithmEx;
|
||
|
extern tcudnnGetConvolutionBackwardDataAlgorithm_v7 *cudnnGetConvolutionBackwardDataAlgorithm_v7;
|
||
|
extern tcudnnGetConvolutionBackwardDataWorkspaceSize *cudnnGetConvolutionBackwardDataWorkspaceSize;
|
||
|
extern tcudnnConvolutionBackwardData *cudnnConvolutionBackwardData;
|
||
|
extern tcudnnGetFoldedConvBackwardDataDescriptors *cudnnGetFoldedConvBackwardDataDescriptors;
|
||
|
extern tcudnnCnnInferVersionCheck *cudnnCnnInferVersionCheck;
|
||
|
extern tcudnnGetConvolutionBackwardFilterAlgorithmMaxCount *cudnnGetConvolutionBackwardFilterAlgorithmMaxCount;
|
||
|
extern tcudnnFindConvolutionBackwardFilterAlgorithm *cudnnFindConvolutionBackwardFilterAlgorithm;
|
||
|
extern tcudnnFindConvolutionBackwardFilterAlgorithmEx *cudnnFindConvolutionBackwardFilterAlgorithmEx;
|
||
|
extern tcudnnGetConvolutionBackwardFilterAlgorithm_v7 *cudnnGetConvolutionBackwardFilterAlgorithm_v7;
|
||
|
extern tcudnnGetConvolutionBackwardFilterWorkspaceSize *cudnnGetConvolutionBackwardFilterWorkspaceSize;
|
||
|
extern tcudnnConvolutionBackwardFilter *cudnnConvolutionBackwardFilter;
|
||
|
extern tcudnnConvolutionBackwardBias *cudnnConvolutionBackwardBias;
|
||
|
extern tcudnnCreateFusedOpsConstParamPack *cudnnCreateFusedOpsConstParamPack;
|
||
|
extern tcudnnDestroyFusedOpsConstParamPack *cudnnDestroyFusedOpsConstParamPack;
|
||
|
extern tcudnnSetFusedOpsConstParamPackAttribute *cudnnSetFusedOpsConstParamPackAttribute;
|
||
|
extern tcudnnGetFusedOpsConstParamPackAttribute *cudnnGetFusedOpsConstParamPackAttribute;
|
||
|
extern tcudnnCreateFusedOpsVariantParamPack *cudnnCreateFusedOpsVariantParamPack;
|
||
|
extern tcudnnDestroyFusedOpsVariantParamPack *cudnnDestroyFusedOpsVariantParamPack;
|
||
|
extern tcudnnSetFusedOpsVariantParamPackAttribute *cudnnSetFusedOpsVariantParamPackAttribute;
|
||
|
extern tcudnnGetFusedOpsVariantParamPackAttribute *cudnnGetFusedOpsVariantParamPackAttribute;
|
||
|
extern tcudnnCreateFusedOpsPlan *cudnnCreateFusedOpsPlan;
|
||
|
extern tcudnnDestroyFusedOpsPlan *cudnnDestroyFusedOpsPlan;
|
||
|
extern tcudnnMakeFusedOpsPlan *cudnnMakeFusedOpsPlan;
|
||
|
extern tcudnnFusedOpsExecute *cudnnFusedOpsExecute;
|
||
|
extern tcudnnCnnTrainVersionCheck *cudnnCnnTrainVersionCheck;
|
||
|
extern tcudnnBackendCreateDescriptor *cudnnBackendCreateDescriptor;
|
||
|
extern tcudnnBackendDestroyDescriptor *cudnnBackendDestroyDescriptor;
|
||
|
extern tcudnnBackendInitialize *cudnnBackendInitialize;
|
||
|
extern tcudnnBackendFinalize *cudnnBackendFinalize;
|
||
|
extern tcudnnBackendSetAttribute *cudnnBackendSetAttribute;
|
||
|
extern tcudnnBackendGetAttribute *cudnnBackendGetAttribute;
|
||
|
extern tcudnnBackendExecute *cudnnBackendExecute;
|
||
|
|
||
|
extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
|
||
|
extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
|
||
|
extern tcuGLGetDevices_v2 *cuGLGetDevices_v2;
|
||
|
extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
|
||
|
extern tcuGLInit *cuGLInit;
|
||
|
extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
|
||
|
extern tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
|
||
|
extern tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
|
||
|
extern tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
|
||
|
extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
|
||
|
extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
|
||
|
extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
|
||
|
|
||
|
|
||
|
enum {
|
||
|
CUEW_SUCCESS = 0,
|
||
|
CUEW_ERROR_OPEN_FAILED = -1,
|
||
|
CUEW_ERROR_ATEXIT_FAILED = -2,
|
||
|
};
|
||
|
|
||
|
enum {
|
||
|
CUEW_INIT_CUDA = (1 << 0),
|
||
|
CUEW_INIT_NVRTC = (1 << 1),
|
||
|
CUEW_INIT_CUDNN = (1 << 2),
|
||
|
};
|
||
|
|
||
|
int cuewInit(cuuint32_t flags);
|
||
|
const char *cuewErrorString(CUresult result);
|
||
|
const char *cuewCompilerPath(void);
|
||
|
int cuewCompilerVersion(void);
|
||
|
int cuewNvrtcVersion(void);
|
||
|
|
||
|
#ifdef __cplusplus
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
#endif /* __CUEW_H__ */
|