assimp/contrib/tinyusdz/tinyusdz_repo/examples/common/cuew/cuew.h

3294 lines
199 KiB
C

/*
* Copyright 2011-2014 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifndef __CUEW_H__
#define __CUEW_H__
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
/* Defines. */
#define CUEW_VERSION_MAJOR 2
#define CUEW_VERSION_MINOR 0
#define cuTexRefSetAddress2D cuTexRefSetAddress2D_v3
#define CUDA_VERSION 11020
#define CU_UUID_HAS_BEEN_DEFINED
#define CU_IPC_HANDLE_SIZE 64
#define CU_STREAM_LEGACY ((CUstream)0x1)
#define CU_STREAM_PER_THREAD ((CUstream)0x2)
#define CU_MEMHOSTALLOC_PORTABLE 0x01
#define CU_MEMHOSTALLOC_DEVICEMAP 0x02
#define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
#define CU_MEMHOSTREGISTER_PORTABLE 0x01
#define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
#define CU_MEMHOSTREGISTER_IOMEMORY 0x04
#define CU_MEMHOSTREGISTER_READ_ONLY 0x08
#define CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL 0x1
#define CUDA_EXTERNAL_MEMORY_DEDICATED 0x1
#define CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC 0x01
#define CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC 0x02
#define CUDA_NVSCISYNC_ATTR_SIGNAL 0x1
#define CUDA_NVSCISYNC_ATTR_WAIT 0x2
#define CU_MEM_CREATE_USAGE_TILE_POOL 0x1
#define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC 0x01
#define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
#define CUDA_ARRAY3D_LAYERED 0x01
#define CUDA_ARRAY3D_2DARRAY 0x01
#define CUDA_ARRAY3D_SURFACE_LDST 0x02
#define CUDA_ARRAY3D_CUBEMAP 0x04
#define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
#define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
#define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20
#define CUDA_ARRAY3D_SPARSE 0x40
#define CU_TRSA_OVERRIDE_FORMAT 0x01
#define CU_TRSF_READ_AS_INTEGER 0x01
#define CU_TRSF_NORMALIZED_COORDINATES 0x02
#define CU_TRSF_SRGB 0x10
#define CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION 0x20
#define CU_LAUNCH_PARAM_END ((void*)0x00)
#define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
#define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
#define CU_PARAM_TR_DEFAULT -1
#define CU_DEVICE_CPU ((CUdevice)-1)
#define CU_DEVICE_INVALID ((CUdevice)-2)
/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
* the cuda library has both the old ones for compatibility and new
* ones with _v2 postfix,
*/
#define cuDeviceTotalMem cuDeviceTotalMem_v2
#define cuCtxCreate cuCtxCreate_v2
#define cuModuleGetGlobal cuModuleGetGlobal_v2
#define cuMemGetInfo cuMemGetInfo_v2
#define cuMemAlloc cuMemAlloc_v2
#define cuMemAllocPitch cuMemAllocPitch_v2
#define cuMemFree cuMemFree_v2
#define cuMemGetAddressRange cuMemGetAddressRange_v2
#define cuMemAllocHost cuMemAllocHost_v2
#define cuMemHostGetDevicePointer cuMemHostGetDevicePointer_v2
#define cuMemcpyHtoD cuMemcpyHtoD_v2
#define cuMemcpyDtoH cuMemcpyDtoH_v2
#define cuMemcpyDtoD cuMemcpyDtoD_v2
#define cuMemcpyDtoA cuMemcpyDtoA_v2
#define cuMemcpyAtoD cuMemcpyAtoD_v2
#define cuMemcpyHtoA cuMemcpyHtoA_v2
#define cuMemcpyAtoH cuMemcpyAtoH_v2
#define cuMemcpyAtoA cuMemcpyAtoA_v2
#define cuMemcpyHtoAAsync cuMemcpyHtoAAsync_v2
#define cuMemcpyAtoHAsync cuMemcpyAtoHAsync_v2
#define cuMemcpy2D cuMemcpy2D_v2
#define cuMemcpy2DUnaligned cuMemcpy2DUnaligned_v2
#define cuMemcpy3D cuMemcpy3D_v2
#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
#define cuMemcpy3DAsync cuMemcpy3DAsync_v2
#define cuMemsetD8 cuMemsetD8_v2
#define cuMemsetD16 cuMemsetD16_v2
#define cuMemsetD32 cuMemsetD32_v2
#define cuMemsetD2D8 cuMemsetD2D8_v2
#define cuMemsetD2D16 cuMemsetD2D16_v2
#define cuMemsetD2D32 cuMemsetD2D32_v2
#define cuArrayCreate cuArrayCreate_v2
#define cuArrayGetDescriptor cuArrayGetDescriptor_v2
#define cuArray3DCreate cuArray3DCreate_v2
#define cuArray3DGetDescriptor cuArray3DGetDescriptor_v2
#define cuTexRefSetAddress cuTexRefSetAddress_v2
#define cuTexRefGetAddress cuTexRefGetAddress_v2
#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2
#define cuCtxDestroy cuCtxDestroy_v2
#define cuCtxPopCurrent cuCtxPopCurrent_v2
#define cuCtxPushCurrent cuCtxPushCurrent_v2
#define cuStreamDestroy cuStreamDestroy_v2
#define cuEventDestroy cuEventDestroy_v2
#define cuLinkCreate cuLinkCreate_v2
#define cuLinkAddData cuLinkAddData_v2
#define cuLinkAddFile cuLinkAddFile_v2
#define cuMemHostRegister cuMemHostRegister_v2
#define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2
#define cuStreamBeginCapture cuStreamBeginCapture_v2
#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
#define cuDevicePrimaryCtxReset cuDevicePrimaryCtxReset_v2
#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
#define cuIpcOpenMemHandle cuIpcOpenMemHandle_v2
#define cuGraphInstantiate cuGraphInstantiate_v2
#define cuGLCtxCreate cuGLCtxCreate_v2
#define cuGLMapBufferObject cuGLMapBufferObject_v2
#define cuGLMapBufferObjectAsync cuGLMapBufferObjectAsync_v2
#define cuGLGetDevices cuGLGetDevices_v2
/* Types. */
#ifdef _MSC_VER
typedef unsigned __int32 cuuint32_t;
typedef unsigned __int64 cuuint64_t;
/* Assume VS2017 or later */
#include <stdint.h>
#else
#include <stdint.h>
typedef uint32_t cuuint32_t;
typedef uint64_t cuuint64_t;
#endif
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
typedef unsigned long long CUdeviceptr;
#else
typedef unsigned int CUdeviceptr;
#endif
#ifdef _WIN32
# define CUDAAPI __stdcall
# define CUDA_CB __stdcall
#else
# define CUDAAPI
# define CUDA_CB
#endif
#if !defined(__CUDACC__)
# define __device_builtin__
#else
# define __device_builtin__ __location__(device_builtin)
#endif
typedef __device_builtin__ struct CUstream_st *cudaStream_t;
typedef int CUdevice;
typedef struct CUctx_st* CUcontext;
typedef struct CUmod_st* CUmodule;
typedef struct CUfunc_st* CUfunction;
typedef struct CUarray_st* CUarray;
typedef struct CUmipmappedArray_st* CUmipmappedArray;
typedef struct CUtexref_st* CUtexref;
typedef struct CUsurfref_st* CUsurfref;
typedef struct CUevent_st* CUevent;
typedef struct CUstream_st* CUstream;
typedef struct CUgraphicsResource_st* CUgraphicsResource;
typedef unsigned long long CUtexObject;
typedef unsigned long long CUsurfObject;
typedef struct CUextMemory_st* CUexternalMemory;
typedef struct CUextSemaphore_st* CUexternalSemaphore;
typedef struct CUgraph_st* CUgraph;
typedef struct CUgraphNode_st* CUgraphNode;
typedef struct CUgraphExec_st* CUgraphExec;
typedef struct CUmemPoolHandle_st* CUmemoryPool;
typedef struct CUuuid_st {
char bytes[16];
} CUuuid;
typedef struct CUipcEventHandle_st {
char reserved[CU_IPC_HANDLE_SIZE];
} CUipcEventHandle;
typedef struct CUipcMemHandle_st {
char reserved[CU_IPC_HANDLE_SIZE];
} CUipcMemHandle;
typedef enum CUipcMem_flags_enum {
CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1,
} CUipcMem_flags;
typedef enum CUmemAttach_flags_enum {
CU_MEM_ATTACH_GLOBAL = 0x1,
CU_MEM_ATTACH_HOST = 0x2,
CU_MEM_ATTACH_SINGLE = 0x4,
} CUmemAttach_flags;
typedef enum CUctx_flags_enum {
CU_CTX_SCHED_AUTO = 0x00,
CU_CTX_SCHED_SPIN = 0x01,
CU_CTX_SCHED_YIELD = 0x02,
CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
CU_CTX_BLOCKING_SYNC = 0x04,
CU_CTX_SCHED_MASK = 0x07,
CU_CTX_MAP_HOST = 0x08,
CU_CTX_LMEM_RESIZE_TO_MAX = 0x10,
CU_CTX_FLAGS_MASK = 0x1f,
} CUctx_flags;
typedef enum CUstream_flags_enum {
CU_STREAM_DEFAULT = 0x0,
CU_STREAM_NON_BLOCKING = 0x1,
} CUstream_flags;
typedef enum CUevent_flags_enum {
CU_EVENT_DEFAULT = 0x0,
CU_EVENT_BLOCKING_SYNC = 0x1,
CU_EVENT_DISABLE_TIMING = 0x2,
CU_EVENT_INTERPROCESS = 0x4,
} CUevent_flags;
typedef enum CUevent_record_flags_enum {
CU_EVENT_RECORD_DEFAULT = 0x0,
CU_EVENT_RECORD_EXTERNAL = 0x1,
} CUevent_record_flags;
typedef enum CUevent_wait_flags_enum {
CU_EVENT_WAIT_DEFAULT = 0x0,
CU_EVENT_WAIT_EXTERNAL = 0x1,
} CUevent_wait_flags;
typedef enum CUstreamWaitValue_flags_enum {
CU_STREAM_WAIT_VALUE_GEQ = 0x0,
CU_STREAM_WAIT_VALUE_EQ = 0x1,
CU_STREAM_WAIT_VALUE_AND = 0x2,
CU_STREAM_WAIT_VALUE_NOR = 0x3,
CU_STREAM_WAIT_VALUE_FLUSH = (1 << 30),
} CUstreamWaitValue_flags;
typedef enum CUstreamWriteValue_flags_enum {
CU_STREAM_WRITE_VALUE_DEFAULT = 0x0,
CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1,
} CUstreamWriteValue_flags;
typedef enum CUstreamBatchMemOpType_enum {
CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1,
CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2,
CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4,
CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5,
CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3,
} CUstreamBatchMemOpType;
typedef union CUstreamBatchMemOpParams_union {
CUstreamBatchMemOpType operation;
struct CUstreamMemOpWaitValueParams_st {
CUstreamBatchMemOpType operation;
CUdeviceptr address;
union {
cuuint32_t value;
cuuint64_t value64;
};
unsigned int flags;
CUdeviceptr alias;
} waitValue;
struct CUstreamMemOpWriteValueParams_st {
CUstreamBatchMemOpType operation;
CUdeviceptr address;
union {
cuuint32_t value;
cuuint64_t value64;
};
unsigned int flags;
CUdeviceptr alias;
} writeValue;
struct CUstreamMemOpFlushRemoteWritesParams_st {
CUstreamBatchMemOpType operation;
unsigned int flags;
} flushRemoteWrites;
cuuint64_t pad[6];
} CUstreamBatchMemOpParams;
typedef enum CUoccupancy_flags_enum {
CU_OCCUPANCY_DEFAULT = 0x0,
CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
} CUoccupancy_flags;
typedef enum CUarray_format_enum {
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
CU_AD_FORMAT_HALF = 0x10,
CU_AD_FORMAT_FLOAT = 0x20,
CU_AD_FORMAT_NV12 = 0xb0,
} CUarray_format;
typedef enum CUaddress_mode_enum {
CU_TR_ADDRESS_MODE_WRAP = 0,
CU_TR_ADDRESS_MODE_CLAMP = 1,
CU_TR_ADDRESS_MODE_MIRROR = 2,
CU_TR_ADDRESS_MODE_BORDER = 3,
} CUaddress_mode;
typedef enum CUfilter_mode_enum {
CU_TR_FILTER_MODE_POINT = 0,
CU_TR_FILTER_MODE_LINEAR = 1,
} CUfilter_mode;
typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
CU_DEVICE_ATTRIBUTE_MAX,
} CUdevice_attribute;
typedef struct CUdevprop_st {
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
int sharedMemPerBlock;
int totalConstantMemory;
int SIMDWidth;
int memPitch;
int regsPerBlock;
int clockRate;
int textureAlign;
} CUdevprop;
typedef enum CUpointer_attribute_enum {
CU_POINTER_ATTRIBUTE_CONTEXT = 1,
CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
CU_POINTER_ATTRIBUTE_HOST_POINTER = 4,
CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5,
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
CU_POINTER_ATTRIBUTE_BUFFER_ID = 7,
CU_POINTER_ATTRIBUTE_IS_MANAGED = 8,
CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10,
CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11,
CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12,
CU_POINTER_ATTRIBUTE_MAPPED = 13,
CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14,
CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15,
CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16,
} CUpointer_attribute;
typedef enum CUfunction_attribute_enum {
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,
CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
CU_FUNC_ATTRIBUTE_MAX,
} CUfunction_attribute;
typedef enum CUfunc_cache_enum {
CU_FUNC_CACHE_PREFER_NONE = 0x00,
CU_FUNC_CACHE_PREFER_SHARED = 0x01,
CU_FUNC_CACHE_PREFER_L1 = 0x02,
CU_FUNC_CACHE_PREFER_EQUAL = 0x03,
} CUfunc_cache;
typedef enum CUsharedconfig_enum {
CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00,
CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01,
CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02,
} CUsharedconfig;
typedef enum CUshared_carveout_enum {
CU_SHAREDMEM_CARVEOUT_DEFAULT = -1,
CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100,
CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0,
} CUshared_carveout;
typedef enum CUmemorytype_enum {
CU_MEMORYTYPE_HOST = 0x01,
CU_MEMORYTYPE_DEVICE = 0x02,
CU_MEMORYTYPE_ARRAY = 0x03,
CU_MEMORYTYPE_UNIFIED = 0x04,
} CUmemorytype;
typedef enum CUcomputemode_enum {
CU_COMPUTEMODE_DEFAULT = 0,
CU_COMPUTEMODE_PROHIBITED = 2,
CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
} CUcomputemode;
typedef enum CUmem_advise_enum {
CU_MEM_ADVISE_SET_READ_MOSTLY = 1,
CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3,
CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4,
CU_MEM_ADVISE_SET_ACCESSED_BY = 5,
CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6,
} CUmem_advise;
typedef enum CUmem_range_attribute_enum {
CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1,
CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2,
CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3,
CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4,
} CUmem_range_attribute;
typedef enum CUjit_option_enum {
CU_JIT_MAX_REGISTERS = 0,
CU_JIT_THREADS_PER_BLOCK,
CU_JIT_WALL_TIME,
CU_JIT_INFO_LOG_BUFFER,
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
CU_JIT_ERROR_LOG_BUFFER,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
CU_JIT_OPTIMIZATION_LEVEL,
CU_JIT_TARGET_FROM_CUCONTEXT,
CU_JIT_TARGET,
CU_JIT_FALLBACK_STRATEGY,
CU_JIT_GENERATE_DEBUG_INFO,
CU_JIT_LOG_VERBOSE,
CU_JIT_GENERATE_LINE_INFO,
CU_JIT_CACHE_MODE,
CU_JIT_NEW_SM3X_OPT,
CU_JIT_FAST_COMPILE,
CU_JIT_GLOBAL_SYMBOL_NAMES,
CU_JIT_GLOBAL_SYMBOL_ADDRESSES,
CU_JIT_GLOBAL_SYMBOL_COUNT,
CU_JIT_NUM_OPTIONS,
} CUjit_option;
typedef enum CUjit_target_enum {
CU_TARGET_COMPUTE_20 = 20,
CU_TARGET_COMPUTE_21 = 21,
CU_TARGET_COMPUTE_30 = 30,
CU_TARGET_COMPUTE_32 = 32,
CU_TARGET_COMPUTE_35 = 35,
CU_TARGET_COMPUTE_37 = 37,
CU_TARGET_COMPUTE_50 = 50,
CU_TARGET_COMPUTE_52 = 52,
CU_TARGET_COMPUTE_53 = 53,
CU_TARGET_COMPUTE_60 = 60,
CU_TARGET_COMPUTE_61 = 61,
CU_TARGET_COMPUTE_62 = 62,
CU_TARGET_COMPUTE_70 = 70,
CU_TARGET_COMPUTE_72 = 72,
CU_TARGET_COMPUTE_75 = 75,
CU_TARGET_COMPUTE_80 = 80,
CU_TARGET_COMPUTE_86 = 86,
} CUjit_target;
typedef enum CUjit_fallback_enum {
CU_PREFER_PTX = 0,
CU_PREFER_BINARY,
} CUjit_fallback;
typedef enum CUjit_cacheMode_enum {
CU_JIT_CACHE_OPTION_NONE = 0,
CU_JIT_CACHE_OPTION_CG,
CU_JIT_CACHE_OPTION_CA,
} CUjit_cacheMode;
typedef enum CUjitInputType_enum {
CU_JIT_INPUT_CUBIN = 0,
CU_JIT_INPUT_PTX,
CU_JIT_INPUT_FATBINARY,
CU_JIT_INPUT_OBJECT,
CU_JIT_INPUT_LIBRARY,
CU_JIT_NUM_INPUT_TYPES,
} CUjitInputType;
typedef struct CUlinkState_st* CUlinkState;
typedef enum CUgraphicsRegisterFlags_enum {
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08,
} CUgraphicsRegisterFlags;
typedef enum CUgraphicsMapResourceFlags_enum {
CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
} CUgraphicsMapResourceFlags;
typedef enum CUarray_cubemap_face_enum {
CU_CUBEMAP_FACE_POSITIVE_X = 0x00,
CU_CUBEMAP_FACE_NEGATIVE_X = 0x01,
CU_CUBEMAP_FACE_POSITIVE_Y = 0x02,
CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03,
CU_CUBEMAP_FACE_POSITIVE_Z = 0x04,
CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05,
} CUarray_cubemap_face;
typedef enum CUlimit_enum {
CU_LIMIT_STACK_SIZE = 0x00,
CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05,
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06,
CU_LIMIT_MAX,
} CUlimit;
typedef enum CUresourcetype_enum {
CU_RESOURCE_TYPE_ARRAY = 0x00,
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
CU_RESOURCE_TYPE_LINEAR = 0x02,
CU_RESOURCE_TYPE_PITCH2D = 0x03,
} CUresourcetype;
typedef void (CUDA_CB *CUhostFn)(void* userData);
typedef enum CUaccessProperty_enum {
CU_ACCESS_PROPERTY_NORMAL = 0,
CU_ACCESS_PROPERTY_STREAMING = 1,
CU_ACCESS_PROPERTY_PERSISTING = 2,
} CUaccessProperty;
typedef struct CUaccessPolicyWindow_st {
void* base_ptr;
size_t num_bytes;
float hitRatio;
CUaccessProperty hitProp;
CUaccessProperty missProp;
} CUaccessPolicyWindow;
typedef struct CUDA_KERNEL_NODE_PARAMS_st {
CUfunction func;
unsigned int gridDimX;
unsigned int gridDimY;
unsigned int gridDimZ;
unsigned int blockDimX;
unsigned int blockDimY;
unsigned int blockDimZ;
unsigned int sharedMemBytes;
void** kernelParams;
void** extra;
} CUDA_KERNEL_NODE_PARAMS;
typedef struct CUDA_MEMSET_NODE_PARAMS_st {
CUdeviceptr dst;
size_t pitch;
unsigned int value;
unsigned int elementSize;
size_t width;
size_t height;
} CUDA_MEMSET_NODE_PARAMS;
typedef struct CUDA_HOST_NODE_PARAMS_st {
CUhostFn fn;
void* userData;
} CUDA_HOST_NODE_PARAMS;
typedef enum CUgraphNodeType_enum {
CU_GRAPH_NODE_TYPE_KERNEL = 0,
CU_GRAPH_NODE_TYPE_MEMCPY = 1,
CU_GRAPH_NODE_TYPE_MEMSET = 2,
CU_GRAPH_NODE_TYPE_HOST = 3,
CU_GRAPH_NODE_TYPE_GRAPH = 4,
CU_GRAPH_NODE_TYPE_EMPTY = 5,
CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6,
CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7,
CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8,
CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9,
} CUgraphNodeType;
typedef enum CUsynchronizationPolicy_enum {
CU_SYNC_POLICY_AUTO = 1,
CU_SYNC_POLICY_SPIN = 2,
CU_SYNC_POLICY_YIELD = 3,
CU_SYNC_POLICY_BLOCKING_SYNC = 4,
} CUsynchronizationPolicy;
typedef enum CUkernelNodeAttrID_enum {
CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2,
} CUkernelNodeAttrID;
typedef union CUkernelNodeAttrValue_union {
CUaccessPolicyWindow accessPolicyWindow;
int cooperative;
} CUkernelNodeAttrValue;
typedef enum CUstreamCaptureStatus_enum {
CU_STREAM_CAPTURE_STATUS_NONE = 0,
CU_STREAM_CAPTURE_STATUS_ACTIVE = 1,
CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2,
} CUstreamCaptureStatus;
typedef enum CUstreamCaptureMode_enum {
CU_STREAM_CAPTURE_MODE_GLOBAL = 0,
CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1,
CU_STREAM_CAPTURE_MODE_RELAXED = 2,
} CUstreamCaptureMode;
typedef enum CUstreamAttrID_enum {
CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3,
} CUstreamAttrID;
typedef union CUstreamAttrValue_union {
CUaccessPolicyWindow accessPolicyWindow;
CUsynchronizationPolicy syncPolicy;
} CUstreamAttrValue;
typedef enum cudaError_enum {
CUDA_SUCCESS = 0,
CUDA_ERROR_INVALID_VALUE = 1,
CUDA_ERROR_OUT_OF_MEMORY = 2,
CUDA_ERROR_NOT_INITIALIZED = 3,
CUDA_ERROR_DEINITIALIZED = 4,
CUDA_ERROR_PROFILER_DISABLED = 5,
CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,
CUDA_ERROR_STUB_LIBRARY = 34,
CUDA_ERROR_NO_DEVICE = 100,
CUDA_ERROR_INVALID_DEVICE = 101,
CUDA_ERROR_DEVICE_NOT_LICENSED = 102,
CUDA_ERROR_INVALID_IMAGE = 200,
CUDA_ERROR_INVALID_CONTEXT = 201,
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,
CUDA_ERROR_MAP_FAILED = 205,
CUDA_ERROR_UNMAP_FAILED = 206,
CUDA_ERROR_ARRAY_IS_MAPPED = 207,
CUDA_ERROR_ALREADY_MAPPED = 208,
CUDA_ERROR_NO_BINARY_FOR_GPU = 209,
CUDA_ERROR_ALREADY_ACQUIRED = 210,
CUDA_ERROR_NOT_MAPPED = 211,
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,
CUDA_ERROR_ECC_UNCORRECTABLE = 214,
CUDA_ERROR_UNSUPPORTED_LIMIT = 215,
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
CUDA_ERROR_INVALID_PTX = 218,
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,
CUDA_ERROR_JIT_COMPILATION_DISABLED = 223,
CUDA_ERROR_INVALID_SOURCE = 300,
CUDA_ERROR_FILE_NOT_FOUND = 301,
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,
CUDA_ERROR_OPERATING_SYSTEM = 304,
CUDA_ERROR_INVALID_HANDLE = 400,
CUDA_ERROR_ILLEGAL_STATE = 401,
CUDA_ERROR_NOT_FOUND = 500,
CUDA_ERROR_NOT_READY = 600,
CUDA_ERROR_ILLEGAL_ADDRESS = 700,
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
CUDA_ERROR_LAUNCH_TIMEOUT = 702,
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,
CUDA_ERROR_ASSERT = 710,
CUDA_ERROR_TOO_MANY_PEERS = 711,
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
CUDA_ERROR_HARDWARE_STACK_ERROR = 714,
CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
CUDA_ERROR_MISALIGNED_ADDRESS = 716,
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,
CUDA_ERROR_INVALID_PC = 718,
CUDA_ERROR_LAUNCH_FAILED = 719,
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,
CUDA_ERROR_NOT_PERMITTED = 800,
CUDA_ERROR_NOT_SUPPORTED = 801,
CUDA_ERROR_SYSTEM_NOT_READY = 802,
CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803,
CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900,
CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901,
CUDA_ERROR_STREAM_CAPTURE_MERGE = 902,
CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903,
CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904,
CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905,
CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906,
CUDA_ERROR_CAPTURED_EVENT = 907,
CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908,
CUDA_ERROR_TIMEOUT = 909,
CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910,
CUDA_ERROR_UNKNOWN = 999,
} CUresult;
typedef enum CUdevice_P2PAttribute_enum {
CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04,
CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04,
} CUdevice_P2PAttribute;
typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void* userData);
typedef size_t (CUDA_CB *CUoccupancyB2DSize)(int blockSize);
typedef struct CUDA_MEMCPY2D_st {
size_t srcXInBytes;
size_t srcY;
CUmemorytype srcMemoryType;
const void* srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
size_t srcPitch;
size_t dstXInBytes;
size_t dstY;
CUmemorytype dstMemoryType;
void* dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
size_t dstPitch;
size_t WidthInBytes;
size_t Height;
} CUDA_MEMCPY2D;
typedef struct CUDA_MEMCPY3D_st {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
CUmemorytype srcMemoryType;
const void* srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
void* reserved0;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
CUmemorytype dstMemoryType;
void* dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
void* reserved1;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} CUDA_MEMCPY3D;
typedef struct CUDA_MEMCPY3D_PEER_st {
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
size_t srcLOD;
CUmemorytype srcMemoryType;
const void* srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
CUcontext srcContext;
size_t srcPitch;
size_t srcHeight;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
size_t dstLOD;
CUmemorytype dstMemoryType;
void* dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
CUcontext dstContext;
size_t dstPitch;
size_t dstHeight;
size_t WidthInBytes;
size_t Height;
size_t Depth;
} CUDA_MEMCPY3D_PEER;
typedef struct CUDA_ARRAY_DESCRIPTOR_st {
size_t Width;
size_t Height;
CUarray_format Format;
unsigned int NumChannels;
} CUDA_ARRAY_DESCRIPTOR;
typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
size_t Width;
size_t Height;
size_t Depth;
CUarray_format Format;
unsigned int NumChannels;
unsigned int Flags;
} CUDA_ARRAY3D_DESCRIPTOR;
typedef struct CUDA_ARRAY_SPARSE_PROPERTIES_st {
struct {
unsigned int width;
unsigned int height;
unsigned int depth;
} tileExtent;
unsigned int miptailFirstLevel;
unsigned long long miptailSize;
unsigned int flags;
unsigned int reserved[4];
} CUDA_ARRAY_SPARSE_PROPERTIES;
typedef struct CUDA_RESOURCE_DESC_st {
CUresourcetype resType;
union {
struct {
CUarray hArray;
} array;
struct {
CUmipmappedArray hMipmappedArray;
} mipmap;
struct {
CUdeviceptr devPtr;
CUarray_format format;
unsigned int numChannels;
size_t sizeInBytes;
} linear;
struct {
CUdeviceptr devPtr;
CUarray_format format;
unsigned int numChannels;
size_t width;
size_t height;
size_t pitchInBytes;
} pitch2D;
struct {
int reserved[32];
} reserved;
} res;
unsigned int flags;
} CUDA_RESOURCE_DESC;
typedef struct CUDA_TEXTURE_DESC_st {
CUaddress_mode addressMode[3];
CUfilter_mode filterMode;
unsigned int flags;
unsigned int maxAnisotropy;
CUfilter_mode mipmapFilterMode;
float mipmapLevelBias;
float minMipmapLevelClamp;
float maxMipmapLevelClamp;
float borderColor[4];
int reserved[12];
} CUDA_TEXTURE_DESC;
typedef enum CUresourceViewFormat_enum {
CU_RES_VIEW_FORMAT_NONE = 0x00,
CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01,
CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02,
CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03,
CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04,
CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05,
CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06,
CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07,
CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08,
CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09,
CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a,
CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b,
CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c,
CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d,
CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e,
CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f,
CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10,
CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11,
CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12,
CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13,
CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14,
CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15,
CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16,
CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17,
CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18,
CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19,
CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a,
CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b,
CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c,
CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d,
CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e,
CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f,
CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20,
CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21,
CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22,
} CUresourceViewFormat;
typedef struct CUDA_RESOURCE_VIEW_DESC_st {
CUresourceViewFormat format;
size_t width;
size_t height;
size_t depth;
unsigned int firstMipmapLevel;
unsigned int lastMipmapLevel;
unsigned int firstLayer;
unsigned int lastLayer;
unsigned int reserved[16];
} CUDA_RESOURCE_VIEW_DESC;
typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
unsigned long long p2pToken;
unsigned int vaSpaceToken;
} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;
typedef enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum {
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0,
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1,
CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3,
} CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS;
typedef struct CUDA_LAUNCH_PARAMS_st {
CUfunction function;
unsigned int gridDimX;
unsigned int gridDimY;
unsigned int gridDimZ;
unsigned int blockDimX;
unsigned int blockDimY;
unsigned int blockDimZ;
unsigned int sharedMemBytes;
CUstream hStream;
void** kernelParams;
} CUDA_LAUNCH_PARAMS;
typedef enum CUexternalMemoryHandleType_enum {
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
} CUexternalMemoryHandleType;
typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
CUexternalMemoryHandleType type;
union {
int fd;
struct {
void* handle;
const void* name;
} win32;
const void* nvSciBufObject;
} handle;
unsigned long long size;
unsigned int flags;
unsigned int reserved[16];
} CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
unsigned long long offset;
unsigned long long size;
unsigned int flags;
unsigned int reserved[16];
} CUDA_EXTERNAL_MEMORY_BUFFER_DESC;
typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
unsigned long long offset;
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
unsigned int numLevels;
unsigned int reserved[16];
} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
typedef enum CUexternalSemaphoreHandleType_enum {
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10,
} CUexternalSemaphoreHandleType;
typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
CUexternalSemaphoreHandleType type;
union {
int fd;
struct {
void* handle;
const void* name;
} win32;
const void* nvSciSyncObj;
} handle;
unsigned int flags;
unsigned int reserved[16];
} CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;
typedef struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
struct {
struct {
unsigned long long value;
} fence;
union {
void* fence;
unsigned long long reserved;
} nvSciSync;
struct {
unsigned long long key;
} keyedMutex;
unsigned int reserved[12];
} params;
unsigned int flags;
unsigned int reserved[16];
} CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS;
typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
struct {
struct {
unsigned long long value;
} fence;
union {
void* fence;
unsigned long long reserved;
} nvSciSync;
struct {
unsigned long long key;
unsigned int timeoutMs;
} keyedMutex;
unsigned int reserved[10];
} params;
unsigned int flags;
unsigned int reserved[16];
} CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
typedef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {
CUexternalSemaphore* extSemArray;
const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray;
unsigned int numExtSems;
} CUDA_EXT_SEM_SIGNAL_NODE_PARAMS;
typedef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {
CUexternalSemaphore* extSemArray;
const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray;
unsigned int numExtSems;
} CUDA_EXT_SEM_WAIT_NODE_PARAMS;
typedef unsigned long long CUmemGenericAllocationHandle;
typedef enum CUmemAllocationHandleType_enum {
CU_MEM_HANDLE_TYPE_NONE = 0x0,
CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
CU_MEM_HANDLE_TYPE_MAX = 0xFFFFFFFF,
} CUmemAllocationHandleType;
typedef enum CUmemAccess_flags_enum {
CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
CU_MEM_ACCESS_FLAGS_PROT_MAX = 0xFFFFFFFF,
} CUmemAccess_flags;
typedef enum CUmemLocationType_enum {
CU_MEM_LOCATION_TYPE_INVALID = 0x0,
CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
CU_MEM_LOCATION_TYPE_MAX = 0xFFFFFFFF,
} CUmemLocationType;
typedef enum CUmemAllocationType_enum {
CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
CU_MEM_ALLOCATION_TYPE_MAX = 0xFFFFFFFF,
} CUmemAllocationType;
typedef enum CUmemAllocationGranularity_flags_enum {
CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1,
} CUmemAllocationGranularity_flags;
typedef enum CUarraySparseSubresourceType_enum {
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1,
} CUarraySparseSubresourceType;
typedef enum CUmemOperationType_enum {
CU_MEM_OPERATION_TYPE_MAP = 1,
CU_MEM_OPERATION_TYPE_UNMAP = 2,
} CUmemOperationType;
typedef enum CUmemHandleType_enum {
CU_MEM_HANDLE_TYPE_GENERIC = 0,
} CUmemHandleType;
typedef struct CUarrayMapInfo_st {
CUresourcetype resourceType;
union {
CUmipmappedArray mipmap;
CUarray array;
} resource;
CUarraySparseSubresourceType subresourceType;
union {
struct {
unsigned int level;
unsigned int layer;
unsigned int offsetX;
unsigned int offsetY;
unsigned int offsetZ;
unsigned int extentWidth;
unsigned int extentHeight;
unsigned int extentDepth;
} sparseLevel;
struct {
unsigned int layer;
unsigned long long offset;
unsigned long long size;
} miptail;
} subresource;
CUmemOperationType memOperationType;
CUmemHandleType memHandleType;
union {
CUmemGenericAllocationHandle memHandle;
} memHandle;
unsigned long long offset;
unsigned int deviceBitMask;
unsigned int flags;
unsigned int reserved[2];
} CUarrayMapInfo;
typedef struct CUmemLocation_st {
CUmemLocationType type;
int id;
} CUmemLocation;
typedef enum CUmemAllocationCompType_enum {
CU_MEM_ALLOCATION_COMP_NONE = 0x0,
CU_MEM_ALLOCATION_COMP_GENERIC = 0x1,
} CUmemAllocationCompType;
typedef struct CUmemAllocationProp_st {
CUmemAllocationType type;
CUmemAllocationHandleType requestedHandleTypes;
CUmemLocation location;
void* win32HandleMetaData;
struct {
unsigned char compressionType;
unsigned char gpuDirectRDMACapable;
unsigned short usage;
unsigned char reserved[4];
} allocFlags;
} CUmemAllocationProp;
typedef struct CUmemAccessDesc_st {
CUmemLocation location;
CUmemAccess_flags flags;
} CUmemAccessDesc;
typedef enum CUgraphExecUpdateResult_enum {
CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0,
CU_GRAPH_EXEC_UPDATE_ERROR = 0x1,
CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2,
CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3,
CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4,
CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5,
CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6,
CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7,
} CUgraphExecUpdateResult;
typedef enum CUmemPool_attribute_enum {
CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
} CUmemPool_attribute;
typedef struct CUmemPoolProps_st {
CUmemAllocationType allocType;
CUmemAllocationHandleType handleTypes;
CUmemLocation location;
void* win32SecurityAttributes;
unsigned char reserved[CU_IPC_HANDLE_SIZE];
} CUmemPoolProps;
typedef struct CUmemPoolPtrExportData_st {
unsigned char reserved[CU_IPC_HANDLE_SIZE];
} CUmemPoolPtrExportData;
typedef enum {
NVRTC_SUCCESS = 0,
NVRTC_ERROR_OUT_OF_MEMORY = 1,
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
NVRTC_ERROR_INVALID_INPUT = 3,
NVRTC_ERROR_INVALID_PROGRAM = 4,
NVRTC_ERROR_INVALID_OPTION = 5,
NVRTC_ERROR_COMPILATION = 6,
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
NVRTC_ERROR_INTERNAL_ERROR = 11,
} nvrtcResult;
typedef struct _nvrtcProgram* nvrtcProgram;
typedef struct cudnnContext* cudnnHandle_t;
typedef enum {
CUDNN_STATUS_SUCCESS = 0,
CUDNN_STATUS_NOT_INITIALIZED = 1,
CUDNN_STATUS_ALLOC_FAILED = 2,
CUDNN_STATUS_BAD_PARAM = 3,
CUDNN_STATUS_INTERNAL_ERROR = 4,
CUDNN_STATUS_INVALID_VALUE = 5,
CUDNN_STATUS_ARCH_MISMATCH = 6,
CUDNN_STATUS_MAPPING_ERROR = 7,
CUDNN_STATUS_EXECUTION_FAILED = 8,
CUDNN_STATUS_NOT_SUPPORTED = 9,
CUDNN_STATUS_LICENSE_ERROR = 10,
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
CUDNN_STATUS_VERSION_MISMATCH = 14,
} cudnnStatus_t;
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
typedef enum {
CUDNN_ERRQUERY_RAWCODE = 0,
CUDNN_ERRQUERY_NONBLOCKING = 1,
CUDNN_ERRQUERY_BLOCKING = 2,
} cudnnErrQueryMode_t;
typedef enum libraryPropertyType_t {
MAJOR_VERSION,
MINOR_VERSION,
PATCH_LEVEL,
} libraryPropertyType;
typedef struct cudnnTensorStruct* cudnnTensorDescriptor_t;
typedef struct cudnnPoolingStruct* cudnnPoolingDescriptor_t;
typedef struct cudnnFilterStruct* cudnnFilterDescriptor_t;
typedef struct cudnnLRNStruct* cudnnLRNDescriptor_t;
typedef struct cudnnActivationStruct* cudnnActivationDescriptor_t;
typedef struct cudnnSpatialTransformerStruct* cudnnSpatialTransformerDescriptor_t;
typedef struct cudnnOpTensorStruct* cudnnOpTensorDescriptor_t;
typedef struct cudnnReduceTensorStruct* cudnnReduceTensorDescriptor_t;
typedef struct cudnnCTCLossStruct* cudnnCTCLossDescriptor_t;
typedef struct cudnnTensorTransformStruct* cudnnTensorTransformDescriptor_t;
typedef enum {
CUDNN_DATA_FLOAT = 0,
CUDNN_DATA_DOUBLE = 1,
CUDNN_DATA_HALF = 2,
CUDNN_DATA_INT8 = 3,
CUDNN_DATA_INT32 = 4,
CUDNN_DATA_INT8x4 = 5,
CUDNN_DATA_UINT8 = 6,
CUDNN_DATA_UINT8x4 = 7,
CUDNN_DATA_INT8x32 = 8,
} cudnnDataType_t;
typedef enum {
CUDNN_DEFAULT_MATH = 0,
CUDNN_TENSOR_OP_MATH = 1,
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
CUDNN_FMA_MATH = 3,
} cudnnMathType_t;
typedef enum {
CUDNN_NOT_PROPAGATE_NAN = 0,
CUDNN_PROPAGATE_NAN = 1,
} cudnnNanPropagation_t;
typedef enum {
CUDNN_NON_DETERMINISTIC = 0,
CUDNN_DETERMINISTIC = 1,
} cudnnDeterminism_t;
typedef enum {
CUDNN_TENSOR_NCHW = 0,
CUDNN_TENSOR_NHWC = 1,
CUDNN_TENSOR_NCHW_VECT_C = 2,
} cudnnTensorFormat_t;
typedef enum {
CUDNN_TRANSFORM_FOLD = 0U,
CUDNN_TRANSFORM_UNFOLD = 1U,
} cudnnFoldingDirection_t;
typedef enum {
CUDNN_OP_TENSOR_ADD = 0,
CUDNN_OP_TENSOR_MUL = 1,
CUDNN_OP_TENSOR_MIN = 2,
CUDNN_OP_TENSOR_MAX = 3,
CUDNN_OP_TENSOR_SQRT = 4,
CUDNN_OP_TENSOR_NOT = 5,
} cudnnOpTensorOp_t;
typedef enum {
CUDNN_REDUCE_TENSOR_ADD = 0,
CUDNN_REDUCE_TENSOR_MUL = 1,
CUDNN_REDUCE_TENSOR_MIN = 2,
CUDNN_REDUCE_TENSOR_MAX = 3,
CUDNN_REDUCE_TENSOR_AMAX = 4,
CUDNN_REDUCE_TENSOR_AVG = 5,
CUDNN_REDUCE_TENSOR_NORM1 = 6,
CUDNN_REDUCE_TENSOR_NORM2 = 7,
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
} cudnnReduceTensorOp_t;
typedef enum {
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
} cudnnReduceTensorIndices_t;
typedef enum {
CUDNN_32BIT_INDICES = 0,
CUDNN_64BIT_INDICES = 1,
CUDNN_16BIT_INDICES = 2,
CUDNN_8BIT_INDICES = 3,
} cudnnIndicesType_t;
typedef enum {
CUDNN_SOFTMAX_FAST = 0,
CUDNN_SOFTMAX_ACCURATE = 1,
CUDNN_SOFTMAX_LOG = 2,
} cudnnSoftmaxAlgorithm_t;
typedef enum {
CUDNN_SOFTMAX_MODE_INSTANCE = 0,
CUDNN_SOFTMAX_MODE_CHANNEL = 1,
} cudnnSoftmaxMode_t;
typedef enum {
CUDNN_POOLING_MAX = 0,
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1,
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2,
CUDNN_POOLING_MAX_DETERMINISTIC = 3,
} cudnnPoolingMode_t;
typedef enum {
CUDNN_ACTIVATION_SIGMOID = 0,
CUDNN_ACTIVATION_RELU = 1,
CUDNN_ACTIVATION_TANH = 2,
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
CUDNN_ACTIVATION_ELU = 4,
CUDNN_ACTIVATION_IDENTITY = 5,
} cudnnActivationMode_t;
typedef enum {
CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0,
} cudnnLRNMode_t;
typedef enum {
CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
} cudnnDivNormMode_t;
typedef enum {
CUDNN_BATCHNORM_PER_ACTIVATION = 0,
CUDNN_BATCHNORM_SPATIAL = 1,
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
} cudnnBatchNormMode_t;
typedef enum {
CUDNN_BATCHNORM_OPS_BN = 0,
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1,
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2,
} cudnnBatchNormOps_t;
typedef enum {
CUDNN_NORM_PER_ACTIVATION = 0,
CUDNN_NORM_PER_CHANNEL = 1,
} cudnnNormMode_t;
typedef enum {
CUDNN_NORM_ALGO_STANDARD = 0,
CUDNN_NORM_ALGO_PERSIST = 1,
} cudnnNormAlgo_t;
typedef enum {
CUDNN_NORM_OPS_NORM = 0,
CUDNN_NORM_OPS_NORM_ACTIVATION = 1,
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2,
} cudnnNormOps_t;
typedef enum {
CUDNN_SAMPLER_BILINEAR = 0,
} cudnnSamplerType_t;
typedef struct cudnnDropoutStruct* cudnnDropoutDescriptor_t;
typedef struct cudnnAlgorithmStruct* cudnnAlgorithmDescriptor_t;
typedef struct cudnnAlgorithmPerformanceStruct* cudnnAlgorithmPerformance_t;
typedef enum {
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8,
} cudnnConvolutionFwdAlgo_t;
typedef enum {
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7,
} cudnnConvolutionBwdFilterAlgo_t;
typedef enum {
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6,
} cudnnConvolutionBwdDataAlgo_t;
typedef enum {
CUDNN_RNN_ALGO_STANDARD = 0,
CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
CUDNN_RNN_ALGO_COUNT = 3,
} cudnnRNNAlgo_t;
typedef enum {
CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0,
CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1,
} cudnnCTCLossAlgo_t;
typedef struct {
union Algorithm {
cudnnConvolutionFwdAlgo_t convFwdAlgo;
cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
cudnnRNNAlgo_t RNNAlgo;
cudnnCTCLossAlgo_t CTCLossAlgo;
} algo;
} cudnnAlgorithm_t;
typedef enum {
CUDNN_SEV_FATAL = 0,
CUDNN_SEV_ERROR = 1,
CUDNN_SEV_WARNING = 2,
CUDNN_SEV_INFO = 3,
} cudnnSeverity_t;
typedef struct {
unsigned cudnn_version;
cudnnStatus_t cudnnStatus;
unsigned time_sec;
unsigned time_usec;
unsigned time_delta;
cudnnHandle_t handle;
cudaStream_t stream;
unsigned long long pid;
unsigned long long tid;
int cudaDeviceId;
int reserved[15];
} cudnnDebug_t;
typedef void (CUDA_CB *cudnnCallback_t)(cudnnSeverity_t sev, void* udata, const cudnnDebug_t* dbg, const char* msg);
typedef enum {
CUDNN_FWD_MODE_INFERENCE = 0,
CUDNN_FWD_MODE_TRAINING = 1,
} cudnnForwardMode_t;
typedef enum {
CUDNN_RNN_RELU = 0,
CUDNN_RNN_TANH = 1,
CUDNN_LSTM = 2,
CUDNN_GRU = 3,
} cudnnRNNMode_t;
typedef enum {
CUDNN_RNN_NO_BIAS = 0,
CUDNN_RNN_SINGLE_INP_BIAS = 1,
CUDNN_RNN_DOUBLE_BIAS = 2,
CUDNN_RNN_SINGLE_REC_BIAS = 3,
} cudnnRNNBiasMode_t;
typedef enum {
CUDNN_UNIDIRECTIONAL = 0,
CUDNN_BIDIRECTIONAL = 1,
} cudnnDirectionMode_t;
typedef enum {
CUDNN_LINEAR_INPUT = 0,
CUDNN_SKIP_INPUT = 1,
} cudnnRNNInputMode_t;
typedef enum {
CUDNN_RNN_CLIP_NONE = 0,
CUDNN_RNN_CLIP_MINMAX = 1,
} cudnnRNNClipMode_t;
typedef enum {
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED = 0,
CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED = 1,
CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2,
} cudnnRNNDataLayout_t;
typedef unsigned cudnnRNNPaddingMode_t;
typedef struct cudnnRNNStruct* cudnnRNNDescriptor_t;
typedef struct cudnnPersistentRNNPlan* cudnnPersistentRNNPlan_t;
typedef struct cudnnRNNDataStruct* cudnnRNNDataDescriptor_t;
typedef enum {
CUDNN_SEQDATA_TIME_DIM = 0,
CUDNN_SEQDATA_BATCH_DIM = 1,
CUDNN_SEQDATA_BEAM_DIM = 2,
CUDNN_SEQDATA_VECT_DIM = 3,
} cudnnSeqDataAxis_t;
typedef struct cudnnSeqDataStruct* cudnnSeqDataDescriptor_t;
typedef unsigned cudnnAttnQueryMap_t;
typedef struct cudnnAttnStruct* cudnnAttnDescriptor_t;
typedef enum {
CUDNN_MH_ATTN_Q_WEIGHTS = 0,
CUDNN_MH_ATTN_K_WEIGHTS = 1,
CUDNN_MH_ATTN_V_WEIGHTS = 2,
CUDNN_MH_ATTN_O_WEIGHTS = 3,
CUDNN_MH_ATTN_Q_BIASES = 4,
CUDNN_MH_ATTN_K_BIASES = 5,
CUDNN_MH_ATTN_V_BIASES = 6,
CUDNN_MH_ATTN_O_BIASES = 7,
} cudnnMultiHeadAttnWeightKind_t;
typedef enum {
CUDNN_WGRAD_MODE_ADD = 0,
CUDNN_WGRAD_MODE_SET = 1,
} cudnnWgradMode_t;
typedef enum {
CUDNN_LOSS_NORMALIZATION_NONE = 0,
CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1,
} cudnnLossNormalizationMode_t;
typedef struct cudnnConvolutionStruct* cudnnConvolutionDescriptor_t;
typedef enum {
CUDNN_CONVOLUTION = 0,
CUDNN_CROSS_CORRELATION = 1,
} cudnnConvolutionMode_t;
typedef enum {
CUDNN_DEFAULT_REORDER = 0,
CUDNN_NO_REORDER = 1,
} cudnnReorderType_t;
typedef struct {
cudnnConvolutionFwdAlgo_t algo;
cudnnStatus_t status;
float time;
size_t memory;
cudnnDeterminism_t determinism;
cudnnMathType_t mathType;
int reserved[3];
} cudnnConvolutionFwdAlgoPerf_t;
typedef struct {
cudnnConvolutionBwdDataAlgo_t algo;
cudnnStatus_t status;
float time;
size_t memory;
cudnnDeterminism_t determinism;
cudnnMathType_t mathType;
int reserved[3];
} cudnnConvolutionBwdDataAlgoPerf_t;
typedef struct cudnnFusedOpsConstParamStruct* cudnnFusedOpsConstParamPack_t;
typedef struct cudnnFusedOpsVariantParamStruct* cudnnFusedOpsVariantParamPack_t;
typedef struct cudnnFusedOpsPlanStruct* cudnnFusedOpsPlan_t;
typedef enum {
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
} cudnnFusedOps_t;
typedef enum {
CUDNN_PARAM_XDESC = 0,
CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
CUDNN_PARAM_BN_MODE = 2,
CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
CUDNN_PARAM_ACTIVATION_DESC = 6,
CUDNN_PARAM_CONV_DESC = 7,
CUDNN_PARAM_WDESC = 8,
CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
CUDNN_PARAM_DWDESC = 10,
CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
CUDNN_PARAM_YDESC = 12,
CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
CUDNN_PARAM_DYDESC = 14,
CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
CUDNN_PARAM_YSTATS_DESC = 16,
CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
CUDNN_PARAM_ZDESC = 26,
CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
CUDNN_PARAM_DXDESC = 33,
CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
CUDNN_PARAM_DZDESC = 35,
CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
} cudnnFusedOpsConstParamLabel_t;
typedef enum {
CUDNN_PTR_NULL = 0,
CUDNN_PTR_ELEM_ALIGNED = 1,
CUDNN_PTR_16B_ALIGNED = 2,
} cudnnFusedOpsPointerPlaceHolder_t;
typedef enum {
CUDNN_PTR_XDATA = 0,
CUDNN_PTR_BN_EQSCALE = 1,
CUDNN_PTR_BN_EQBIAS = 2,
CUDNN_PTR_WDATA = 3,
CUDNN_PTR_DWDATA = 4,
CUDNN_PTR_YDATA = 5,
CUDNN_PTR_DYDATA = 6,
CUDNN_PTR_YSUM = 7,
CUDNN_PTR_YSQSUM = 8,
CUDNN_PTR_WORKSPACE = 9,
CUDNN_PTR_BN_SCALE = 10,
CUDNN_PTR_BN_BIAS = 11,
CUDNN_PTR_BN_SAVED_MEAN = 12,
CUDNN_PTR_BN_SAVED_INVSTD = 13,
CUDNN_PTR_BN_RUNNING_MEAN = 14,
CUDNN_PTR_BN_RUNNING_VAR = 15,
CUDNN_PTR_ZDATA = 16,
CUDNN_PTR_BN_Z_EQSCALE = 17,
CUDNN_PTR_BN_Z_EQBIAS = 18,
CUDNN_PTR_ACTIVATION_BITMASK = 19,
CUDNN_PTR_DXDATA = 20,
CUDNN_PTR_DZDATA = 21,
CUDNN_PTR_BN_DSCALE = 22,
CUDNN_PTR_BN_DBIAS = 23,
CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
} cudnnFusedOpsVariantParamLabel_t;
typedef struct {
cudnnConvolutionBwdFilterAlgo_t algo;
cudnnStatus_t status;
float time;
size_t memory;
cudnnDeterminism_t determinism;
cudnnMathType_t mathType;
int reserved[3];
} cudnnConvolutionBwdFilterAlgoPerf_t;
typedef void* cudnnBackendDescriptor_t;
typedef enum {
CUDNN_POINTWISE_ADD = 0,
CUDNN_POINTWISE_MUL = 1,
CUDNN_POINTWISE_MIN = 2,
CUDNN_POINTWISE_MAX = 3,
CUDNN_POINTWISE_SQRT = 4,
CUDNN_POINTWISE_RELU_FWD = 100,
CUDNN_POINTWISE_TANH_FWD = 101,
CUDNN_POINTWISE_SIGMOID_FWD = 102,
CUDNN_POINTWISE_ELU_FWD = 103,
} cudnnPointwiseMode_t;
typedef enum {
CUDNN_GENSTATS_SUM_SQSUM = 0,
} cudnnGenStatsMode_t;
typedef enum {
CUDNN_ATTR_POINTWISE_MODE = 0,
CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
CUDNN_ATTR_POINTWISE_NAN_PROPAGATION = 2,
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
CUDNN_ATTR_ENGINEHEUR_MODE = 200,
CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
CUDNN_ATTR_ENGINECFG_ENGINE = 300,
CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
CUDNN_ATTR_EXECUTION_PLAN_HANDLE = 400,
CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
CUDNN_ATTR_OPERATIONGRAPH_HANDLE = 800,
CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
CUDNN_ATTR_TENSOR_STRIDES = 903,
CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
} cudnnBackendAttributeName_t;
typedef enum {
CUDNN_TYPE_HANDLE = 0,
CUDNN_TYPE_DATA_TYPE,
CUDNN_TYPE_BOOLEAN,
CUDNN_TYPE_INT64,
CUDNN_TYPE_FLOAT,
CUDNN_TYPE_DOUBLE,
CUDNN_TYPE_VOID_PTR,
CUDNN_TYPE_CONVOLUTION_MODE,
CUDNN_TYPE_HEUR_MODE,
CUDNN_TYPE_KNOB_TYPE,
CUDNN_TYPE_NAN_PROPOGATION,
CUDNN_TYPE_NUMERICAL_NOTE,
CUDNN_TYPE_LAYOUT_TYPE,
CUDNN_TYPE_ATTRIB_NAME,
CUDNN_TYPE_POINTWISE_MODE,
CUDNN_TYPE_BACKEND_DESCRIPTOR,
CUDNN_TYPE_GENSTATS_MODE,
} cudnnBackendAttributeType_t;
typedef enum {
CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,
CUDNN_BACKEND_ENGINE_DESCRIPTOR,
CUDNN_BACKEND_ENGINECFG_DESCRIPTOR,
CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR,
CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,
CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR,
CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR,
CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR,
CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR,
CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,
CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR,
CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR,
CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,
CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR,
CUDNN_BACKEND_TENSOR_DESCRIPTOR,
} cudnnBackendDescriptorType_t;
typedef enum {
CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS,
CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION,
CUDNN_NUMERICAL_NOTE_FFT,
CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC,
CUDNN_NUMERICAL_NOTE_WINOGRAD,
CUDNN_NUMERICAL_NOTE_TYPE_COUNT,
} cudnnBackendNumericalNote_t;
typedef enum {
CUDNN_KNOB_TYPE_SPLIT_K = 0,
CUDNN_KNOB_TYPE_SWIZZLE = 1,
CUDNN_KNOB_TYPE_TILE_SIZE = 2,
CUDNN_KNOB_TYPE_USE_TEX = 3,
CUDNN_KNOB_TYPE_EDGE = 4,
CUDNN_KNOB_TYPE_KBLOCK = 5,
CUDNN_KNOB_TYPE_LDGA = 6,
CUDNN_KNOB_TYPE_LDGB = 7,
CUDNN_KNOB_TYPE_CHUNK_K = 8,
CUDNN_KNOB_TYPE_SPLIT_H = 9,
CUDNN_KNOB_TYPE_WINO_TILE = 10,
CUDNN_KNOB_TYPE_MULTIPLY = 11,
CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
CUDNN_KNOB_TYPE_TILEK = 13,
CUDNN_KNOB_TYPE_STAGES = 14,
CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE = 16,
CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
CUDNN_KNOB_TYPE_IDX_MODE = 18,
CUDNN_KNOB_TYPE_SLICED = 19,
CUDNN_KNOB_TYPE_SPLIT_RS = 20,
CUDNN_KNOB_TYPE_SINGLEBUFFER = 21,
CUDNN_KNOB_TYPE_LDGC = 22,
CUDNN_KNOB_TYPE_SPECFILT = 23,
CUDNN_KNOB_TYPE_COUNTS = 24,
} cudnnBackendKnobType_t;
typedef enum {
CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
CUDNN_LAYOUT_TYPE_COUNT = 4,
} cudnnBackendLayoutType_t;
typedef enum {
CUDNN_HEUR_MODE_INSTANT = 0,
CUDNN_HEUR_MODES_COUNT,
} cudnnBackendHeurMode_t;
typedef unsigned int GLenum;
typedef unsigned int GLuint;
typedef int GLint;
typedef enum CUGLDeviceList_enum {
CU_GL_DEVICE_LIST_ALL = 0x01,
CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02,
CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03,
} CUGLDeviceList;
typedef enum CUGLmap_flags_enum {
CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
} CUGLmap_flags;
/* Function types. */
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pStr);
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pStr);
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion);
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal);
typedef CUresult CUDAAPI tcuDeviceGetCount(int* count);
typedef CUresult CUDAAPI tcuDeviceGetName(char* name, int len, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetUuid(CUuuid* uuid, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetLuid(char* luid, unsigned int* deviceNodeMask, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceTotalMem_v2(size_t* bytes, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, CUarray_format format, unsigned numChannels, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetAttribute(int* pi, CUdevice_attribute attrib, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, CUdevice dev, int flags);
typedef CUresult CUDAAPI tcuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool);
typedef CUresult CUDAAPI tcuDeviceGetMemPool(CUmemoryPool* pool, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetDefaultMemPool(CUmemoryPool* pool_out, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop* prop, CUdevice dev);
typedef CUresult CUDAAPI tcuDeviceComputeCapability(int* major, int* minor, CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext* pctx, CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease_v2(CUdevice dev);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned int* flags, int* active);
typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset_v2(CUdevice dev);
typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev);
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext* pctx);
typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext* pctx);
typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice* device);
typedef CUresult CUDAAPI tcuCtxGetFlags(unsigned int* flags);
typedef CUresult CUDAAPI tcuCtxSynchronize(void);
typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
typedef CUresult CUDAAPI tcuCtxGetLimit(size_t* pvalue, CUlimit limit);
typedef CUresult CUDAAPI tcuCtxGetCacheConfig(CUfunc_cache* pconfig);
typedef CUresult CUDAAPI tcuCtxSetCacheConfig(CUfunc_cache config);
typedef CUresult CUDAAPI tcuCtxGetSharedMemConfig(CUsharedconfig* pConfig);
typedef CUresult CUDAAPI tcuCtxSetSharedMemConfig(CUsharedconfig config);
typedef CUresult CUDAAPI tcuCtxGetApiVersion(CUcontext ctx, unsigned int* version);
typedef CUresult CUDAAPI tcuCtxGetStreamPriorityRange(int* leastPriority, int* greatestPriority);
typedef CUresult CUDAAPI tcuCtxResetPersistingL2Cache(void);
typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned int flags);
typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname);
typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues);
typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin);
typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut);
typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues);
typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned int numOptions, CUjit_option* options, void** optionValues);
typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut);
typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
typedef CUresult CUDAAPI tcuMemGetInfo_v2(size_t* free, size_t* total);
typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize);
typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemGetAddressRange_v2(CUdeviceptr* pbase, size_t* psize, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemAllocHost_v2(void** pp, size_t bytesize);
typedef CUresult CUDAAPI tcuMemFreeHost(void* p);
typedef CUresult CUDAAPI tcuMemHostAlloc(void** pp, size_t bytesize, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostGetDevicePointer_v2(CUdeviceptr* pdptr, void* p, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int* pFlags, void* p);
typedef CUresult CUDAAPI tcuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags);
typedef CUresult CUDAAPI tcuDeviceGetByPCIBusId(CUdevice* dev, const char* pciBusId);
typedef CUresult CUDAAPI tcuDeviceGetPCIBusId(char* pciBusId, int len, CUdevice dev);
typedef CUresult CUDAAPI tcuIpcGetEventHandle(CUipcEventHandle* pHandle, CUevent event);
typedef CUresult CUDAAPI tcuIpcOpenEventHandle(CUevent* phEvent, CUipcEventHandle handle);
typedef CUresult CUDAAPI tcuIpcGetMemHandle(CUipcMemHandle* pHandle, CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuIpcOpenMemHandle_v2(CUdeviceptr* pdptr, CUipcMemHandle handle, unsigned int Flags);
typedef CUresult CUDAAPI tcuIpcCloseMemHandle(CUdeviceptr dptr);
typedef CUresult CUDAAPI tcuMemHostRegister_v2(void* p, size_t bytesize, unsigned int Flags);
typedef CUresult CUDAAPI tcuMemHostUnregister(void* p);
typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoH_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoH_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D* pCopy);
typedef CUresult CUDAAPI tcuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D* pCopy);
typedef CUresult CUDAAPI tcuMemcpy3D_v2(const CUDA_MEMCPY3D* pCopy);
typedef CUresult CUDAAPI tcuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER* pCopy);
typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void* srcHost, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyDtoHAsync_v2(void* dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void* srcHost, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpyAtoHAsync_v2(void* dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D* pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy3DAsync_v2(const CUDA_MEMCPY3D* pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER* pCopy, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N);
typedef CUresult CUDAAPI tcuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N);
typedef CUresult CUDAAPI tcuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N);
typedef CUresult CUDAAPI tcuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
typedef CUresult CUDAAPI tcuArrayCreate_v2(CUarray* pHandle, const CUDA_ARRAY_DESCRIPTOR* pAllocateArray);
typedef CUresult CUDAAPI tcuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
typedef CUresult CUDAAPI tcuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUarray array);
typedef CUresult CUDAAPI tcuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES* sparseProperties, CUmipmappedArray mipmap);
typedef CUresult CUDAAPI tcuArrayGetPlane(CUarray* pPlaneArray, CUarray hArray, unsigned int planeIdx);
typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray);
typedef CUresult CUDAAPI tcuArray3DCreate_v2(CUarray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pAllocateArray);
typedef CUresult CUDAAPI tcuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR* pArrayDescriptor, CUarray hArray);
typedef CUresult CUDAAPI tcuMipmappedArrayCreate(CUmipmappedArray* pHandle, const CUDA_ARRAY3D_DESCRIPTOR* pMipmappedArrayDesc, unsigned int numMipmapLevels);
typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
typedef CUresult CUDAAPI tcuMemAddressReserve(CUdeviceptr* ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemAddressFree(CUdeviceptr ptr, size_t size);
typedef CUresult CUDAAPI tcuMemCreate(CUmemGenericAllocationHandle* handle, size_t size, const CUmemAllocationProp* prop, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemRelease(CUmemGenericAllocationHandle handle);
typedef CUresult CUDAAPI tcuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemMapArrayAsync(CUarrayMapInfo* mapInfoList, unsigned int count, CUstream hStream);
typedef CUresult CUDAAPI tcuMemUnmap(CUdeviceptr ptr, size_t size);
typedef CUresult CUDAAPI tcuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc* desc, size_t count);
typedef CUresult CUDAAPI tcuMemGetAccess(unsigned long long* flags, const CUmemLocation* location, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuMemExportToShareableHandle(void* shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemImportFromShareableHandle(CUmemGenericAllocationHandle* handle, void* osHandle, CUmemAllocationHandleType shHandleType);
typedef CUresult CUDAAPI tcuMemGetAllocationGranularity(size_t* granularity, const CUmemAllocationProp* prop, CUmemAllocationGranularity_flags option);
typedef CUresult CUDAAPI tcuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp* prop, CUmemGenericAllocationHandle handle);
typedef CUresult CUDAAPI tcuMemRetainAllocationHandle(CUmemGenericAllocationHandle* handle, void* addr);
typedef CUresult CUDAAPI tcuMemFreeAsync(CUdeviceptr dptr, CUstream hStream);
typedef CUresult CUDAAPI tcuMemAllocAsync(CUdeviceptr* dptr, size_t bytesize, CUstream hStream);
typedef CUresult CUDAAPI tcuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep);
typedef CUresult CUDAAPI tcuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value);
typedef CUresult CUDAAPI tcuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void* value);
typedef CUresult CUDAAPI tcuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc* map, size_t count);
typedef CUresult CUDAAPI tcuMemPoolGetAccess(CUmemAccess_flags* flags, CUmemoryPool memPool, CUmemLocation* location);
typedef CUresult CUDAAPI tcuMemPoolCreate(CUmemoryPool* pool, const CUmemPoolProps* poolProps);
typedef CUresult CUDAAPI tcuMemPoolDestroy(CUmemoryPool pool);
typedef CUresult CUDAAPI tcuMemAllocFromPoolAsync(CUdeviceptr* dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream);
typedef CUresult CUDAAPI tcuMemPoolExportToShareableHandle(void* handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemPoolImportFromShareableHandle(CUmemoryPool* pool_out, void* handle, CUmemAllocationHandleType handleType, unsigned long long flags);
typedef CUresult CUDAAPI tcuMemPoolExportPointer(CUmemPoolPtrExportData* shareData_out, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuMemPoolImportPointer(CUdeviceptr* ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData* shareData);
typedef CUresult CUDAAPI tcuPointerGetAttribute(void* data, CUpointer_attribute attribute, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream);
typedef CUresult CUDAAPI tcuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device);
typedef CUresult CUDAAPI tcuMemRangeGetAttribute(void* data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count);
typedef CUresult CUDAAPI tcuMemRangeGetAttributes(void** data, size_t* dataSizes, CUmem_range_attribute* attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count);
typedef CUresult CUDAAPI tcuPointerSetAttribute(const void* value, CUpointer_attribute attribute, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute* attributes, void** data, CUdeviceptr ptr);
typedef CUresult CUDAAPI tcuStreamCreate(CUstream* phStream, unsigned int Flags);
typedef CUresult CUDAAPI tcuStreamCreateWithPriority(CUstream* phStream, unsigned int flags, int priority);
typedef CUresult CUDAAPI tcuStreamGetPriority(CUstream hStream, int* priority);
typedef CUresult CUDAAPI tcuStreamGetFlags(CUstream hStream, unsigned int* flags);
typedef CUresult CUDAAPI tcuStreamGetCtx(CUstream hStream, CUcontext* pctx);
typedef CUresult CUDAAPI tcuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void* userData, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode);
typedef CUresult CUDAAPI tcuThreadExchangeStreamCaptureMode(CUstreamCaptureMode* mode);
typedef CUresult CUDAAPI tcuStreamEndCapture(CUstream hStream, CUgraph* phGraph);
typedef CUresult CUDAAPI tcuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus* captureStatus);
typedef CUresult CUDAAPI tcuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus* captureStatus, cuuint64_t* id);
typedef CUresult CUDAAPI tcuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
typedef CUresult CUDAAPI tcuStreamCopyAttributes(CUstream dst, CUstream src);
typedef CUresult CUDAAPI tcuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, CUstreamAttrValue* value_out);
typedef CUresult CUDAAPI tcuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, const CUstreamAttrValue* value);
typedef CUresult CUDAAPI tcuEventCreate(CUevent* phEvent, unsigned int Flags);
typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
typedef CUresult CUDAAPI tcuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags);
typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
typedef CUresult CUDAAPI tcuEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
typedef CUresult CUDAAPI tcuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc);
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc);
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc);
typedef CUresult CUDAAPI tcuDestroyExternalMemory(CUexternalMemory extMem);
typedef CUresult CUDAAPI tcuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc);
typedef CUresult CUDAAPI tcuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream);
typedef CUresult CUDAAPI tcuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream);
typedef CUresult CUDAAPI tcuDestroyExternalSemaphore(CUexternalSemaphore extSem);
typedef CUresult CUDAAPI tcuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
typedef CUresult CUDAAPI tcuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams* paramArray, unsigned int flags);
typedef CUresult CUDAAPI tcuFuncGetAttribute(int* pi, CUfunction_attribute attrib, CUfunction hfunc);
typedef CUresult CUDAAPI tcuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
typedef CUresult CUDAAPI tcuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);
typedef CUresult CUDAAPI tcuLaunchCooperativeKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams);
typedef CUresult CUDAAPI tcuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS* launchParamsList, unsigned int numDevices, unsigned int flags);
typedef CUresult CUDAAPI tcuLaunchHostFunc(CUstream hStream, CUhostFn fn, void* userData);
typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void* ptr, unsigned int numbytes);
typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuGraphCreate(CUgraph* phGraph, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphAddKernelNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphAddMemcpyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMCPY3D* copyParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D* nodeParams);
typedef CUresult CUDAAPI tcuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D* nodeParams);
typedef CUresult CUDAAPI tcuGraphAddMemsetNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphAddHostNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphAddChildGraphNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUgraph childGraph);
typedef CUresult CUDAAPI tcuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph* phGraph);
typedef CUresult CUDAAPI tcuGraphAddEmptyNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies);
typedef CUresult CUDAAPI tcuGraphAddEventRecordNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event);
typedef CUresult CUDAAPI tcuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent* event_out);
typedef CUresult CUDAAPI tcuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphAddEventWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, CUevent event);
typedef CUresult CUDAAPI tcuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent* event_out);
typedef CUresult CUDAAPI tcuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphAddExternalSemaphoresSignalNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* params_out);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphAddExternalSemaphoresWaitNode(CUgraphNode* phGraphNode, CUgraph hGraph, const CUgraphNode* dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS* params_out);
typedef CUresult CUDAAPI tcuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphClone(CUgraph* phGraphClone, CUgraph originalGraph);
typedef CUresult CUDAAPI tcuGraphNodeFindInClone(CUgraphNode* phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph);
typedef CUresult CUDAAPI tcuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType* type);
typedef CUresult CUDAAPI tcuGraphGetNodes(CUgraph hGraph, CUgraphNode* nodes, size_t* numNodes);
typedef CUresult CUDAAPI tcuGraphGetRootNodes(CUgraph hGraph, CUgraphNode* rootNodes, size_t* numRootNodes);
typedef CUresult CUDAAPI tcuGraphGetEdges(CUgraph hGraph, CUgraphNode* from, CUgraphNode* to, size_t* numEdges);
typedef CUresult CUDAAPI tcuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode* dependencies, size_t* numDependencies);
typedef CUresult CUDAAPI tcuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode* dependentNodes, size_t* numDependentNodes);
typedef CUresult CUDAAPI tcuGraphAddDependencies(CUgraph hGraph, const CUgraphNode* from, const CUgraphNode* to, size_t numDependencies);
typedef CUresult CUDAAPI tcuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode* from, const CUgraphNode* to, size_t numDependencies);
typedef CUresult CUDAAPI tcuGraphDestroyNode(CUgraphNode hNode);
typedef CUresult CUDAAPI tcuGraphInstantiate_v2(CUgraphExec* phGraphExec, CUgraph hGraph, CUgraphNode* phErrorNode, char* logBuffer, size_t bufferSize);
typedef CUresult CUDAAPI tcuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D* copyParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS* memsetParams, CUcontext ctx);
typedef CUresult CUDAAPI tcuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);
typedef CUresult CUDAAPI tcuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
typedef CUresult CUDAAPI tcuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS* nodeParams);
typedef CUresult CUDAAPI tcuGraphUpload(CUgraphExec hGraphExec, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphExecDestroy(CUgraphExec hGraphExec);
typedef CUresult CUDAAPI tcuGraphDestroy(CUgraph hGraph);
typedef CUresult CUDAAPI tcuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode* hErrorNode_out, CUgraphExecUpdateResult* updateResult_out);
typedef CUresult CUDAAPI tcuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src);
typedef CUresult CUDAAPI tcuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, CUkernelNodeAttrValue* value_out);
typedef CUresult CUDAAPI tcuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, const CUkernelNodeAttrValue* value);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
typedef CUresult CUDAAPI tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
typedef CUresult CUDAAPI tcuOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags);
typedef CUresult CUDAAPI tcuOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, CUfunction func, int numBlocks, int blockSize);
typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefSetAddress_v2(size_t* ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
typedef CUresult CUDAAPI tcuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR* desc, CUdeviceptr dptr, size_t Pitch);
typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
typedef CUresult CUDAAPI tcuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
typedef CUresult CUDAAPI tcuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
typedef CUresult CUDAAPI tcuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso);
typedef CUresult CUDAAPI tcuTexRefSetBorderColor(CUtexref hTexRef, float* pBorderColor);
typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
typedef CUresult CUDAAPI tcuTexRefGetAddress_v2(CUdeviceptr* pdptr, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray* phArray, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmappedArray(CUmipmappedArray* phMipmappedArray, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode* pam, CUtexref hTexRef, int dim);
typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format* pFormat, int* pNumChannels, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmapFilterMode(CUfilter_mode* pfm, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelBias(float* pbias, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, float* pmaxMipmapLevelClamp, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetMaxAnisotropy(int* pmaxAniso, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetBorderColor(float* pBorderColor, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int* pFlags, CUtexref hTexRef);
typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref* pTexRef);
typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray* phArray, CUsurfref hSurfRef);
typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC* pTexDesc, CUtexObject texObject);
typedef CUresult CUDAAPI tcuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC* pResViewDesc, CUtexObject texObject);
typedef CUresult CUDAAPI tcuSurfObjectCreate(CUsurfObject* pSurfObject, const CUDA_RESOURCE_DESC* pResDesc);
typedef CUresult CUDAAPI tcuSurfObjectDestroy(CUsurfObject surfObject);
typedef CUresult CUDAAPI tcuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC* pResDesc, CUsurfObject surfObject);
typedef CUresult CUDAAPI tcuDeviceCanAccessPeer(int* canAccessPeer, CUdevice dev, CUdevice peerDev);
typedef CUresult CUDAAPI tcuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
typedef CUresult CUDAAPI tcuCtxDisablePeerAccess(CUcontext peerContext);
typedef CUresult CUDAAPI tcuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray* pMipmappedArray, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer_v2(CUdeviceptr* pDevPtr, size_t* pSize, CUgraphicsResource resource);
typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags);
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
typedef CUresult CUDAAPI tcuGetExportTable(const void** ppExportTable, const CUuuid* pExportTableId);
typedef CUresult CUDAAPI tcuFuncGetModule(CUmodule* hmod, CUfunction hfunc);
typedef const char* CUDAAPI tnvrtcGetErrorString(nvrtcResult result);
typedef nvrtcResult CUDAAPI tnvrtcVersion(int* major, int* minor);
typedef nvrtcResult CUDAAPI tnvrtcGetNumSupportedArchs(int* numArchs);
typedef nvrtcResult CUDAAPI tnvrtcGetSupportedArchs(int* supportedArchs);
typedef nvrtcResult CUDAAPI tnvrtcCreateProgram(nvrtcProgram* prog, const char* src, const char* name, int numHeaders, const char** headers, const char** includeNames);
typedef nvrtcResult CUDAAPI tnvrtcDestroyProgram(nvrtcProgram* prog);
typedef nvrtcResult CUDAAPI tnvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char** options);
typedef nvrtcResult CUDAAPI tnvrtcGetPTXSize(nvrtcProgram prog, size_t* ptxSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetPTX(nvrtcProgram prog, char* ptx);
typedef nvrtcResult CUDAAPI tnvrtcGetCUBINSize(nvrtcProgram prog, size_t* cubinSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetCUBIN(nvrtcProgram prog, char* cubin);
typedef nvrtcResult CUDAAPI tnvrtcGetProgramLogSize(nvrtcProgram prog, size_t* logSizeRet);
typedef nvrtcResult CUDAAPI tnvrtcGetProgramLog(nvrtcProgram prog, char* log);
typedef nvrtcResult CUDAAPI tnvrtcAddNameExpression(nvrtcProgram prog, const char* name_expression);
typedef nvrtcResult CUDAAPI tnvrtcGetLoweredName(nvrtcProgram prog, const char* name_expression, const char** lowered_name);
typedef size_t CUDAAPI tcudnnGetVersion(void);
typedef size_t CUDAAPI tcudnnGetCudartVersion(void);
typedef const char* CUDAAPI tcudnnGetErrorString(cudnnStatus_t status);
typedef cudnnStatus_t CUDAAPI tcudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t* rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t* tag);
typedef cudnnStatus_t CUDAAPI tcudnnGetProperty(libraryPropertyType type, int* value);
typedef cudnnStatus_t CUDAAPI tcudnnCreate(cudnnHandle_t* handle);
typedef cudnnStatus_t CUDAAPI tcudnnDestroy(cudnnHandle_t handle);
typedef cudnnStatus_t CUDAAPI tcudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
typedef cudnnStatus_t CUDAAPI tcudnnGetStream(cudnnHandle_t handle, cudaStream_t* streamId);
typedef cudnnStatus_t CUDAAPI tcudnnCreateTensorDescriptor(cudnnTensorDescriptor_t* tensorDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int n, int c, int h, int w);
typedef cudnnStatus_t CUDAAPI tcudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int n, int c, int h, int w, int nStride, int cStride, int hStride, int wStride);
typedef cudnnStatus_t CUDAAPI tcudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t* dataType, int* n, int* c, int* h, int* w, int* nStride, int* cStride, int* hStride, int* wStride);
typedef cudnnStatus_t CUDAAPI tcudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const int strideA[]);
typedef cudnnStatus_t CUDAAPI tcudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int nbDims, const int dimA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, cudnnDataType_t* dataType, int* nbDims, int dimA[], int strideA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t* size);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
typedef cudnnStatus_t CUDAAPI tcudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc, const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, size_t* destSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t* transformDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], const int32_t padAfterA[], const uint32_t foldA[], const cudnnFoldingDirection_t direction);
typedef cudnnStatus_t CUDAAPI tcudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, cudnnTensorFormat_t* destFormat, int32_t padBeforeA[], int32_t padAfterA[], uint32_t foldA[], cudnnFoldingDirection_t* direction);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
typedef cudnnStatus_t CUDAAPI tcudnnTransformTensor(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnTransformTensorEx(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void* alpha, const cudnnTensorDescriptor_t srcDesc, const void* srcData, const void* beta, const cudnnTensorDescriptor_t destDesc, void* destData);
typedef cudnnStatus_t CUDAAPI tcudnnAddTensor(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t aDesc, const void* A, const void* beta, const cudnnTensorDescriptor_t cDesc, void* C);
typedef cudnnStatus_t CUDAAPI tcudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t* opTensorDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt);
typedef cudnnStatus_t CUDAAPI tcudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t* opTensorOp, cudnnDataType_t* opTensorCompType, cudnnNanPropagation_t* opTensorNanOpt);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
typedef cudnnStatus_t CUDAAPI tcudnnOpTensor(cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, const void* alpha1, const cudnnTensorDescriptor_t aDesc, const void* A, const void* alpha2, const cudnnTensorDescriptor_t bDesc, const void* B, const void* beta, const cudnnTensorDescriptor_t cDesc, void* C);
typedef cudnnStatus_t CUDAAPI tcudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t* reduceTensorDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, cudnnNanPropagation_t reduceTensorNanOpt, cudnnReduceTensorIndices_t reduceTensorIndices, cudnnIndicesType_t reduceTensorIndicesType);
typedef cudnnStatus_t CUDAAPI tcudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t* reduceTensorOp, cudnnDataType_t* reduceTensorCompType, cudnnNanPropagation_t* reduceTensorNanOpt, cudnnReduceTensorIndices_t* reduceTensorIndices, cudnnIndicesType_t* reduceTensorIndicesType);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
typedef cudnnStatus_t CUDAAPI tcudnnGetReductionIndicesSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetReductionWorkspaceSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnReduceTensor(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, void* indices, size_t indicesSizeInBytes, void* workspace, size_t workspaceSizeInBytes, const void* alpha, const cudnnTensorDescriptor_t aDesc, const void* A, const void* beta, const cudnnTensorDescriptor_t cDesc, void* C);
typedef cudnnStatus_t CUDAAPI tcudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void* y, const void* valuePtr);
typedef cudnnStatus_t CUDAAPI tcudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void* y, const void* alpha);
typedef cudnnStatus_t CUDAAPI tcudnnCreateFilterDescriptor(cudnnFilterDescriptor_t* filterDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int k, int c, int h, int w);
typedef cudnnStatus_t CUDAAPI tcudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc, cudnnDataType_t* dataType, cudnnTensorFormat_t* format, int* k, int* c, int* h, int* w);
typedef cudnnStatus_t CUDAAPI tcudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, cudnnTensorFormat_t format, int nbDims, const int filterDimA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, cudnnDataType_t* dataType, cudnnTensorFormat_t* format, int* nbDims, int filterDimA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t* size);
typedef cudnnStatus_t CUDAAPI tcudnnTransformFilter(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void* alpha, const cudnnFilterDescriptor_t srcDesc, const void* srcData, const void* beta, const cudnnFilterDescriptor_t destDesc, void* destData);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSoftmaxForward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t* poolingDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride);
typedef cudnnStatus_t CUDAAPI tcudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t* mode, cudnnNanPropagation_t* maxpoolingNanOpt, int* windowHeight, int* windowWidth, int* verticalPadding, int* horizontalPadding, int* verticalStride, int* horizontalStride);
typedef cudnnStatus_t CUDAAPI tcudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, const int windowDimA[], const int paddingA[], const int strideA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, cudnnPoolingMode_t* mode, cudnnNanPropagation_t* maxpoolingNanOpt, int* nbDims, int windowDimA[], int paddingA[], int strideA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int nbDims, int outputTensorDimA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int* n, int* c, int* h, int* w);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
typedef cudnnStatus_t CUDAAPI tcudnnPoolingForward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnCreateActivationDescriptor(cudnnActivationDescriptor_t* activationDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, cudnnNanPropagation_t reluNanOpt, double coef);
typedef cudnnStatus_t CUDAAPI tcudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t* mode, cudnnNanPropagation_t* reluNanOpt, double* coef);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
typedef cudnnStatus_t CUDAAPI tcudnnActivationForward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnCreateLRNDescriptor(cudnnLRNDescriptor_t* normDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
typedef cudnnStatus_t CUDAAPI tcudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned* lrnN, double* lrnAlpha, double* lrnBeta, double* lrnK);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
typedef cudnnStatus_t CUDAAPI tcudnnLRNCrossChannelForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnDivisiveNormalizationForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* means, void* temp, void* temp2, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, cudnnBatchNormMode_t mode);
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationForwardInference(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void* bnScale, const void* bnBias, const void* estimatedMean, const void* estimatedVariance, double epsilon);
typedef cudnnStatus_t CUDAAPI tcudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc, cudnnTensorDescriptor_t derivedNormMeanVarDesc, const cudnnTensorDescriptor_t xDesc, cudnnNormMode_t mode, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnNormalizationForwardInference(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t normScaleBiasDesc, const void* normScale, const void* normBias, const cudnnTensorDescriptor_t normMeanVarDesc, const void* estimatedMean, const void* estimatedVariance, const cudnnTensorDescriptor_t zDesc, const void* z, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void* y, double epsilon, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t* stDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, cudnnDataType_t dataType, const int nbDims, const int dimA[]);
typedef cudnnStatus_t CUDAAPI tcudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void* theta, void* grid);
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfSamplerForward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* grid, const void* beta, cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t* dropoutDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void* states, size_t stateSizeInBytes, unsigned long long seed);
typedef cudnnStatus_t CUDAAPI tcudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void* states, size_t stateSizeInBytes, unsigned long long seed);
typedef cudnnStatus_t CUDAAPI tcudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float* dropout, void** states, unsigned long long* seed);
typedef cudnnStatus_t CUDAAPI tcudnnDropoutForward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t xdesc, const void* x, const cudnnTensorDescriptor_t ydesc, void* y, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t* algoDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
typedef cudnnStatus_t CUDAAPI tcudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t* algorithm);
typedef cudnnStatus_t CUDAAPI tcudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
typedef cudnnStatus_t CUDAAPI tcudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t* algoPerf, int numberToCreate);
typedef cudnnStatus_t CUDAAPI tcudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t algoDesc, cudnnStatus_t status, float time, size_t memory);
typedef cudnnStatus_t CUDAAPI tcudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf, cudnnAlgorithmDescriptor_t* algoDesc, cudnnStatus_t* status, float* time, size_t* memory);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t* algoPerf, int numberToDestroy);
typedef cudnnStatus_t CUDAAPI tcudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t* algoSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnSaveAlgorithm(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, void* algoSpace, size_t algoSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRestoreAlgorithm(cudnnHandle_t handle, void* algoSpace, size_t algoSpaceSizeInBytes, cudnnAlgorithmDescriptor_t algoDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetCallback(unsigned mask, void* udata, cudnnCallback_t fptr);
typedef cudnnStatus_t CUDAAPI tcudnnGetCallback(unsigned* mask, void** udata, cudnnCallback_t* fptr);
typedef cudnnStatus_t CUDAAPI tcudnnOpsInferVersionCheck(void);
typedef cudnnStatus_t CUDAAPI tcudnnSoftmaxBackward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
typedef cudnnStatus_t CUDAAPI tcudnnPoolingBackward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
typedef cudnnStatus_t CUDAAPI tcudnnActivationBackward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
typedef cudnnStatus_t CUDAAPI tcudnnLRNCrossChannelBackward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void* alpha, const cudnnTensorDescriptor_t yDesc, const void* y, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
typedef cudnnStatus_t CUDAAPI tcudnnDivisiveNormalizationBackward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* means, const void* dy, void* temp, void* temp2, const void* beta, const cudnnTensorDescriptor_t dXdMeansDesc, void* dx, void* dMeans);
typedef cudnnStatus_t CUDAAPI tcudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const cudnnActivationDescriptor_t activationDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, const cudnnTensorDescriptor_t dxDesc, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t xDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationForwardTraining(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void* bnScale, const void* bnBias, double exponentialAverageFactor, void* resultRunningMean, void* resultRunningVariance, double epsilon, void* resultSaveMean, void* resultSaveInvVariance);
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationForwardTrainingEx(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t zDesc, const void* zData, const cudnnTensorDescriptor_t yDesc, void* yData, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void* bnScale, const void* bnBias, double exponentialAverageFactor, void* resultRunningMean, void* resultRunningVariance, double epsilon, void* resultSaveMean, void* resultSaveInvVariance, cudnnActivationDescriptor_t activationDesc, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationBackward(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void* alphaDataDiff, const void* betaDataDiff, const void* alphaParamDiff, const void* betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnTensorDescriptor_t dxDesc, void* dx, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void* bnScale, void* dBnScaleResult, void* dBnBiasResult, double epsilon, const void* savedMean, const void* savedInvVariance);
typedef cudnnStatus_t CUDAAPI tcudnnBatchNormalizationBackwardEx(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const void* alphaDataDiff, const void* betaDataDiff, const void* alphaParamDiff, const void* betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t yDesc, const void* yData, const cudnnTensorDescriptor_t dyDesc, const void* dyData, const cudnnTensorDescriptor_t dzDesc, void* dzData, const cudnnTensorDescriptor_t dxDesc, void* dxData, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void* bnScaleData, const void* bnBiasData, void* dBnScaleData, void* dBnBiasData, double epsilon, const void* savedMean, const void* savedInvVariance, cudnnActivationDescriptor_t activationDesc, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t normScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t normMeanVarDesc, size_t* sizeInBytes, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, const cudnnTensorDescriptor_t dxDesc, const cudnnTensorDescriptor_t dNormScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t normMeanVarDesc, size_t* sizeInBytes, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t xDesc, size_t* sizeInBytes, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnNormalizationForwardTraining(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void* alpha, const void* beta, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t normScaleBiasDesc, const void* normScale, const void* normBias, double exponentialAverageFactor, const cudnnTensorDescriptor_t normMeanVarDesc, void* resultRunningMean, void* resultRunningVariance, double epsilon, void* resultSaveMean, void* resultSaveInvVariance, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t zDesc, const void* zData, const cudnnTensorDescriptor_t yDesc, void* yData, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnNormalizationBackward(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void* alphaDataDiff, const void* betaDataDiff, const void* alphaParamDiff, const void* betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void* xData, const cudnnTensorDescriptor_t yDesc, const void* yData, const cudnnTensorDescriptor_t dyDesc, const void* dyData, const cudnnTensorDescriptor_t dzDesc, void* dzData, const cudnnTensorDescriptor_t dxDesc, void* dxData, const cudnnTensorDescriptor_t dNormScaleBiasDesc, const void* normScaleData, const void* normBiasData, void* dNormScaleData, void* dNormBiasData, double epsilon, const cudnnTensorDescriptor_t normMeanVarDesc, const void* savedMean, const void* savedInvVariance, cudnnActivationDescriptor_t activationDesc, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes, int groupCnt);
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void* dgrid, void* dtheta);
typedef cudnnStatus_t CUDAAPI tcudnnSpatialTfSamplerBackward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx, const void* alphaDgrid, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* grid, const void* betaDgrid, void* dgrid);
typedef cudnnStatus_t CUDAAPI tcudnnDropoutBackward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t dydesc, const void* dy, const cudnnTensorDescriptor_t dxdesc, void* dx, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnOpsTrainVersionCheck(void);
typedef cudnnStatus_t CUDAAPI tcudnnCreateRNNDescriptor(cudnnRNNDescriptor_t* rnnDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t algo, cudnnRNNMode_t cellMode, cudnnRNNBiasMode_t biasMode, cudnnDirectionMode_t dirMode, cudnnRNNInputMode_t inputMode, cudnnDataType_t dataType, cudnnDataType_t mathPrec, cudnnMathType_t mathType, int32_t inputSize, int32_t hiddenSize, int32_t projSize, int32_t numLayers, cudnnDropoutDescriptor_t dropoutDesc, uint32_t auxFlags);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t* algo, cudnnRNNMode_t* cellMode, cudnnRNNBiasMode_t* biasMode, cudnnDirectionMode_t* dirMode, cudnnRNNInputMode_t* inputMode, cudnnDataType_t* dataType, cudnnDataType_t* mathPrec, cudnnMathType_t* mathType, int32_t* inputSize, int32_t* hiddenSize, int32_t* projSize, int32_t* numLayers, cudnnDropoutDescriptor_t* dropoutDesc, uint32_t* auxFlags);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNDescriptor_v6(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, cudnnRNNMode_t cellMode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNDescriptor_v6(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int* hiddenSize, int* numLayers, cudnnDropoutDescriptor_t* dropoutDesc, cudnnRNNInputMode_t* inputMode, cudnnDirectionMode_t* direction, cudnnRNNMode_t* cellMode, cudnnRNNAlgo_t* algo, cudnnDataType_t* mathPrec);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t* mType);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t biasMode);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t* biasMode);
typedef cudnnStatus_t CUDAAPI tcudnnRNNSetClip_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, cudnnNanPropagation_t clipNanOpt, double lclip, double rclip);
typedef cudnnStatus_t CUDAAPI tcudnnRNNGetClip_v8(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t* clipMode, cudnnNanPropagation_t* clipNanOpt, double* lclip, double* rclip);
typedef cudnnStatus_t CUDAAPI tcudnnRNNSetClip(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, cudnnNanPropagation_t clipNanOpt, double lclip, double rclip);
typedef cudnnStatus_t CUDAAPI tcudnnRNNGetClip(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t* clipMode, cudnnNanPropagation_t* clipNanOpt, double* lclip, double* rclip);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int recProjSize, const int outProjSize);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNProjectionLayers(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* recProjSize, int* outProjSize);
typedef cudnnStatus_t CUDAAPI tcudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, const int minibatch, const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t* plan);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan);
typedef cudnnStatus_t CUDAAPI tcudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan);
typedef cudnnStatus_t CUDAAPI tcudnnBuildRNNDynamic(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int miniBatch);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNWorkspaceSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNTrainingReserveSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNTempSpaceSizes(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnForwardMode_t fMode, cudnnRNNDataDescriptor_t xDesc, size_t* workSpaceSize, size_t* reserveSpaceSize);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnTensorDescriptor_t xDesc, size_t* sizeInBytes, cudnnDataType_t dataType);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, size_t* weightSpaceSize);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNLinLayerMatrixParams(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const void* w, const int linLayerID, cudnnFilterDescriptor_t linLayerMatDesc, void** linLayerMat);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNLinLayerBiasParams(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const void* w, const int linLayerID, cudnnFilterDescriptor_t linLayerBiasDesc, void** linLayerBias);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNWeightParams(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int32_t pseudoLayer, size_t weightSpaceSize, const void* weightSpace, int32_t linLayerID, cudnnTensorDescriptor_t mDesc, void** mAddr, cudnnTensorDescriptor_t bDesc, void** bAddr);
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardInference(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, void* workSpace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, unsigned paddingMode);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, unsigned* paddingMode);
typedef cudnnStatus_t CUDAAPI tcudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t* rnnDataDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int seqLengthArray[], void* paddingFill);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t* dataType, cudnnRNNDataLayout_t* layout, int* maxSeqLength, int* batchSize, int* vectorSize, int arrayLengthRequested, int seqLengthArray[], void* paddingFill);
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardInferenceEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnRNNDataDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const cudnnRNNDataDescriptor_t kDesc, const void* keys, const cudnnRNNDataDescriptor_t cDesc, void* cAttn, const cudnnRNNDataDescriptor_t iDesc, void* iAttn, const cudnnRNNDataDescriptor_t qDesc, void* queries, void* workSpace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRNNForward(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnForwardMode_t fwdMode, const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t xDesc, const void* x, cudnnRNNDataDescriptor_t yDesc, void* y, cudnnTensorDescriptor_t hDesc, const void* hx, void* hy, cudnnTensorDescriptor_t cDesc, const void* cx, void* cy, size_t weightSpaceSize, const void* weightSpace, size_t workSpaceSize, void* workSpace, size_t reserveSpaceSize, void* reserveSpace);
typedef cudnnStatus_t CUDAAPI tcudnnSetRNNAlgorithmDescriptor(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnAlgorithmDescriptor_t algoDesc);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNForwardInferenceAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, void* workspace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t* seqDataDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const cudnnSeqDataAxis_t axes[], size_t seqLengthArraySize, const int seqLengthArray[], void* paddingFill);
typedef cudnnStatus_t CUDAAPI tcudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t* dataType, int* nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[], size_t* seqLengthArraySize, size_t seqLengthSizeRequested, int seqLengthArray[], void* paddingFill);
typedef cudnnStatus_t CUDAAPI tcudnnCreateAttnDescriptor(cudnnAttnDescriptor_t* attnDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc, unsigned attnMode, int nHeads, double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec, cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc, cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize, int qProjSize, int kProjSize, int vProjSize, int oProjSize, int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize);
typedef cudnnStatus_t CUDAAPI tcudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc, unsigned* attnMode, int* nHeads, double* smScaler, cudnnDataType_t* dataType, cudnnDataType_t* computePrec, cudnnMathType_t* mathType, cudnnDropoutDescriptor_t* attnDropoutDesc, cudnnDropoutDescriptor_t* postDropoutDesc, int* qSize, int* kSize, int* vSize, int* qProjSize, int* kProjSize, int* vProjSize, int* oProjSize, int* qoMaxSeqLength, int* kvMaxSeqLength, int* maxBatchSize, int* maxBeamSize);
typedef cudnnStatus_t CUDAAPI tcudnnGetMultiHeadAttnBuffers(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, size_t* weightSizeInBytes, size_t* workSpaceSizeInBytes, size_t* reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetMultiHeadAttnWeights(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes, const void* weights, cudnnTensorDescriptor_t wDesc, void** wAddr);
typedef cudnnStatus_t CUDAAPI tcudnnMultiHeadAttnForward(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx, const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsQO[], const int devSeqLengthsKV[], const cudnnSeqDataDescriptor_t qDesc, const void* queries, const void* residuals, const cudnnSeqDataDescriptor_t kDesc, const void* keys, const cudnnSeqDataDescriptor_t vDesc, const void* values, const cudnnSeqDataDescriptor_t oDesc, void* out, size_t weightSizeInBytes, const void* weights, size_t workSpaceSizeInBytes, void* workSpace, size_t reserveSpaceSizeInBytes, void* reserveSpace);
typedef cudnnStatus_t CUDAAPI tcudnnAdvInferVersionCheck(void);
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardTraining(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* yDesc, const void* y, const cudnnTensorDescriptor_t* dyDesc, const void* dy, const cudnnTensorDescriptor_t dhyDesc, const void* dhy, const cudnnTensorDescriptor_t dcyDesc, const void* dcy, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnTensorDescriptor_t* dxDesc, void* dx, const cudnnTensorDescriptor_t dhxDesc, void* dhx, const cudnnTensorDescriptor_t dcxDesc, void* dcx, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardData_v8(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t yDesc, const void* y, const void* dy, cudnnRNNDataDescriptor_t xDesc, void* dx, cudnnTensorDescriptor_t hDesc, const void* hx, const void* dhy, void* dhx, cudnnTensorDescriptor_t cDesc, const void* cx, const void* dcy, void* dcx, size_t weightSpaceSize, const void* weightSpace, size_t workSpaceSize, void* workSpace, size_t reserveSpaceSize, void* reserveSpace);
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardWeights(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t* yDesc, const void* y, const void* workSpace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, const void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardWeights_v8(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnWgradMode_t addGrad, const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t xDesc, const void* x, cudnnTensorDescriptor_t hDesc, const void* hx, cudnnRNNDataDescriptor_t yDesc, const void* y, size_t weightSpaceSize, void* dweightSpace, size_t workSpaceSize, void* workSpace, size_t reserveSpaceSize, void* reserveSpace);
typedef cudnnStatus_t CUDAAPI tcudnnRNNForwardTrainingEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnRNNDataDescriptor_t yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const cudnnRNNDataDescriptor_t kDesc, const void* keys, const cudnnRNNDataDescriptor_t cDesc, void* cAttn, const cudnnRNNDataDescriptor_t iDesc, void* iAttn, const cudnnRNNDataDescriptor_t qDesc, void* queries, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardDataEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t yDesc, const void* y, const cudnnRNNDataDescriptor_t dyDesc, const void* dy, const cudnnRNNDataDescriptor_t dcDesc, const void* dcAttn, const cudnnTensorDescriptor_t dhyDesc, const void* dhy, const cudnnTensorDescriptor_t dcyDesc, const void* dcy, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnRNNDataDescriptor_t dxDesc, void* dx, const cudnnTensorDescriptor_t dhxDesc, void* dhx, const cudnnTensorDescriptor_t dcxDesc, void* dcx, const cudnnRNNDataDescriptor_t dkDesc, void* dkeys, void* workSpace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnRNNBackwardWeightsEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const cudnnRNNDataDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnRNNDataDescriptor_t yDesc, const void* y, void* workSpace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNForwardTrainingAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t* yDesc, void* y, const cudnnTensorDescriptor_t hyDesc, void* hy, const cudnnTensorDescriptor_t cyDesc, void* cy, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNBackwardDataAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* yDesc, const void* y, const cudnnTensorDescriptor_t* dyDesc, const void* dy, const cudnnTensorDescriptor_t dhyDesc, const void* dhy, const cudnnTensorDescriptor_t dcyDesc, const void* dcy, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t cxDesc, const void* cx, const cudnnTensorDescriptor_t* dxDesc, void* dx, const cudnnTensorDescriptor_t dhxDesc, void* dhx, const cudnnTensorDescriptor_t dcxDesc, void* dcx, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, void* workspace, size_t workSpaceSizeInBytes, void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnFindRNNBackwardWeightsAlgorithmEx(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, const int seqLength, const cudnnTensorDescriptor_t* xDesc, const void* x, const cudnnTensorDescriptor_t hxDesc, const void* hx, const cudnnTensorDescriptor_t* yDesc, const void* y, const float findIntensity, const int requestedAlgoCount, int* returnedAlgoCount, cudnnAlgorithmPerformance_t* perfResults, const void* workspace, size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void* dw, const void* reserveSpace, size_t reserveSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnMultiHeadAttnBackwardData(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsDQDO[], const int devSeqLengthsDKDV[], const cudnnSeqDataDescriptor_t doDesc, const void* dout, const cudnnSeqDataDescriptor_t dqDesc, void* dqueries, const void* queries, const cudnnSeqDataDescriptor_t dkDesc, void* dkeys, const void* keys, const cudnnSeqDataDescriptor_t dvDesc, void* dvalues, const void* values, size_t weightSizeInBytes, const void* weights, size_t workSpaceSizeInBytes, void* workSpace, size_t reserveSpaceSizeInBytes, void* reserveSpace);
typedef cudnnStatus_t CUDAAPI tcudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc, const void* queries, const cudnnSeqDataDescriptor_t kDesc, const void* keys, const cudnnSeqDataDescriptor_t vDesc, const void* values, const cudnnSeqDataDescriptor_t doDesc, const void* dout, size_t weightSizeInBytes, const void* weights, void* dweights, size_t workSpaceSizeInBytes, void* workSpace, size_t reserveSpaceSizeInBytes, void* reserveSpace);
typedef cudnnStatus_t CUDAAPI tcudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t* ctcLossDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType);
typedef cudnnStatus_t CUDAAPI tcudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode);
typedef cudnnStatus_t CUDAAPI tcudnnSetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode, int maxLabelLength);
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t* compType);
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t* compType, cudnnLossNormalizationMode_t* normMode, cudnnNanPropagation_t* gradMode);
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t* compType, cudnnLossNormalizationMode_t* normMode, cudnnNanPropagation_t* gradMode, int* maxLabelLength);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc);
typedef cudnnStatus_t CUDAAPI tcudnnCTCLoss(cudnnHandle_t handle, const cudnnTensorDescriptor_t probsDesc, const void* probs, const int hostLabels[], const int hostLabelLengths[], const int hostInputLengths[], void* costs, const cudnnTensorDescriptor_t gradientsDesc, void* gradients, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, void* workspace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnCTCLoss_v8(cudnnHandle_t handle, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, const cudnnTensorDescriptor_t probsDesc, const void* probs, const int labels[], const int labelLengths[], const int inputLengths[], void* costs, const cudnnTensorDescriptor_t gradientsDesc, void* gradients, size_t workSpaceSizeInBytes, void* workspace);
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t probsDesc, const cudnnTensorDescriptor_t gradientsDesc, const int* labels, const int* labelLengths, const int* inputLengths, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetCTCLossWorkspaceSize_v8(cudnnHandle_t handle, cudnnCTCLossAlgo_t algo, cudnnCTCLossDescriptor_t ctcLossDesc, const cudnnTensorDescriptor_t probsDesc, const cudnnTensorDescriptor_t gradientsDesc, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnAdvTrainVersionCheck(void);
typedef cudnnStatus_t CUDAAPI tcudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t* convDesc);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t* mathType);
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int* groupCount);
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t* reorderType);
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc, int pad_h, int pad_w, int u, int v, int dilation_h, int dilation_w, cudnnConvolutionMode_t mode, cudnnDataType_t computeType);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc, int* pad_h, int* pad_w, int* u, int* v, int* dilation_h, int* dilation_w, cudnnConvolutionMode_t* mode, cudnnDataType_t* computeType);
typedef cudnnStatus_t CUDAAPI tcudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, int arrayLength, const int padA[], const int filterStrideA[], const int dilationA[], cudnnConvolutionMode_t mode, cudnnDataType_t computeType);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, int* arrayLength, int padA[], int strideA[], int dilationA[], cudnnConvolutionMode_t* mode, cudnnDataType_t* computeType);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t inputTensorDesc, const cudnnFilterDescriptor_t filterDesc, int* n, int* c, int* h, int* w);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t inputTensorDesc, const cudnnFilterDescriptor_t filterDesc, int nbDims, int tensorOuputDimA[]);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, const cudnnFilterDescriptor_t filterDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t* perfResults);
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t* perfResults);
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, void* y, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t* perfResults, void* workSpace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, void* colBuffer);
typedef cudnnStatus_t CUDAAPI tcudnnReorderFilterAndBias(cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, cudnnReorderType_t reorderType, const void* filterData, void* reorderedFilterData, int reorderBias, const void* biasData, void* reorderedBiasData);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnFilterDescriptor_t wDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionForward(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBiasActivationForward(cudnnHandle_t handle, const void* alpha1, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* alpha2, const cudnnTensorDescriptor_t zDesc, const void* z, const cudnnTensorDescriptor_t biasDesc, const void* bias, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void* y);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t* perfResults);
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, void* dx, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t* perfResults, void* workSpace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, const cudnnTensorDescriptor_t diffDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t* perfResults);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBackwardData(cudnnHandle_t handle, const void* alpha, const cudnnFilterDescriptor_t wDesc, const void* w, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionBwdDataAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnTensorDescriptor_t dxDesc, void* dx);
typedef cudnnStatus_t CUDAAPI tcudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, const cudnnTensorDescriptor_t diffDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnTensorDescriptor_t gradDesc, const cudnnTensorFormat_t transformFormat, cudnnFilterDescriptor_t foldedFilterDesc, cudnnTensorDescriptor_t paddedDiffDesc, cudnnConvolutionDescriptor_t foldedConvDesc, cudnnTensorDescriptor_t foldedGradDesc, cudnnTensorTransformDescriptor_t filterFoldTransDesc, cudnnTensorTransformDescriptor_t diffPadTransDesc, cudnnTensorTransformDescriptor_t gradFoldTransDesc, cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
typedef cudnnStatus_t CUDAAPI tcudnnCnnInferVersionCheck(void);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int* count);
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t* perfResults);
typedef cudnnStatus_t CUDAAPI tcudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t dyDesc, const void* y, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t dwDesc, void* dw, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t* perfResults, void* workSpace, size_t workSpaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, const cudnnTensorDescriptor_t diffDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, int* returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t* perfResults);
typedef cudnnStatus_t CUDAAPI tcudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnConvolutionDescriptor_t convDesc, const cudnnFilterDescriptor_t gradDesc, cudnnConvolutionBwdFilterAlgo_t algo, size_t* sizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBackwardFilter(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t xDesc, const void* x, const cudnnTensorDescriptor_t dyDesc, const void* dy, const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionBwdFilterAlgo_t algo, void* workSpace, size_t workSpaceSizeInBytes, const void* beta, const cudnnFilterDescriptor_t dwDesc, void* dw);
typedef cudnnStatus_t CUDAAPI tcudnnConvolutionBackwardBias(cudnnHandle_t handle, const void* alpha, const cudnnTensorDescriptor_t dyDesc, const void* dy, const void* beta, const cudnnTensorDescriptor_t dbDesc, void* db);
typedef cudnnStatus_t CUDAAPI tcudnnCreateFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t* constPack, cudnnFusedOps_t ops);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack);
typedef cudnnStatus_t CUDAAPI tcudnnSetFusedOpsConstParamPackAttribute(cudnnFusedOpsConstParamPack_t constPack, cudnnFusedOpsConstParamLabel_t paramLabel, const void* param);
typedef cudnnStatus_t CUDAAPI tcudnnGetFusedOpsConstParamPackAttribute(const cudnnFusedOpsConstParamPack_t constPack, cudnnFusedOpsConstParamLabel_t paramLabel, void* param, int* isNULL);
typedef cudnnStatus_t CUDAAPI tcudnnCreateFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t* varPack, cudnnFusedOps_t ops);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack);
typedef cudnnStatus_t CUDAAPI tcudnnSetFusedOpsVariantParamPackAttribute(cudnnFusedOpsVariantParamPack_t varPack, cudnnFusedOpsVariantParamLabel_t paramLabel, void* ptr);
typedef cudnnStatus_t CUDAAPI tcudnnGetFusedOpsVariantParamPackAttribute(const cudnnFusedOpsVariantParamPack_t varPack, cudnnFusedOpsVariantParamLabel_t paramLabel, void* ptr);
typedef cudnnStatus_t CUDAAPI tcudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t* plan, cudnnFusedOps_t ops);
typedef cudnnStatus_t CUDAAPI tcudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan);
typedef cudnnStatus_t CUDAAPI tcudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan, const cudnnFusedOpsConstParamPack_t constPack, size_t* workspaceSizeInBytes);
typedef cudnnStatus_t CUDAAPI tcudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, cudnnFusedOpsVariantParamPack_t varPack);
typedef cudnnStatus_t CUDAAPI tcudnnCnnTrainVersionCheck(void);
typedef cudnnStatus_t CUDAAPI tcudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t* descriptor);
typedef cudnnStatus_t CUDAAPI tcudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
typedef cudnnStatus_t CUDAAPI tcudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
typedef cudnnStatus_t CUDAAPI tcudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
typedef cudnnStatus_t CUDAAPI tcudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor, cudnnBackendAttributeName_t attributeName, cudnnBackendAttributeType_t attributeType, int64_t elementCount, const void* arrayOfElements);
typedef cudnnStatus_t CUDAAPI tcudnnBackendGetAttribute(const cudnnBackendDescriptor_t descriptor, cudnnBackendAttributeName_t attributeName, cudnnBackendAttributeType_t attributeType, int64_t requestedElementCount, int64_t* elementCount, void* arrayOfElements);
typedef cudnnStatus_t CUDAAPI tcudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags);
typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
typedef CUresult CUDAAPI tcuGLCtxCreate_v2(CUcontext* pCtx, unsigned int Flags, CUdevice device);
typedef CUresult CUDAAPI tcuGLInit(void);
typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
typedef CUresult CUDAAPI tcuGLMapBufferObject_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer);
typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer);
typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer);
typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags);
typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync_v2(CUdeviceptr* dptr, size_t* size, GLuint buffer, CUstream hStream);
typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
/* Function declarations. */
extern tcuGetErrorString *cuGetErrorString;
extern tcuGetErrorName *cuGetErrorName;
extern tcuInit *cuInit;
extern tcuDriverGetVersion *cuDriverGetVersion;
extern tcuDeviceGet *cuDeviceGet;
extern tcuDeviceGetCount *cuDeviceGetCount;
extern tcuDeviceGetName *cuDeviceGetName;
extern tcuDeviceGetUuid *cuDeviceGetUuid;
extern tcuDeviceGetLuid *cuDeviceGetLuid;
extern tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
extern tcuDeviceGetTexture1DLinearMaxWidth *cuDeviceGetTexture1DLinearMaxWidth;
extern tcuDeviceGetAttribute *cuDeviceGetAttribute;
extern tcuDeviceGetNvSciSyncAttributes *cuDeviceGetNvSciSyncAttributes;
extern tcuDeviceSetMemPool *cuDeviceSetMemPool;
extern tcuDeviceGetMemPool *cuDeviceGetMemPool;
extern tcuDeviceGetDefaultMemPool *cuDeviceGetDefaultMemPool;
extern tcuDeviceGetProperties *cuDeviceGetProperties;
extern tcuDeviceComputeCapability *cuDeviceComputeCapability;
extern tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
extern tcuDevicePrimaryCtxRelease_v2 *cuDevicePrimaryCtxRelease_v2;
extern tcuDevicePrimaryCtxSetFlags_v2 *cuDevicePrimaryCtxSetFlags_v2;
extern tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
extern tcuDevicePrimaryCtxReset_v2 *cuDevicePrimaryCtxReset_v2;
extern tcuCtxCreate_v2 *cuCtxCreate_v2;
extern tcuCtxDestroy_v2 *cuCtxDestroy_v2;
extern tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
extern tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
extern tcuCtxSetCurrent *cuCtxSetCurrent;
extern tcuCtxGetCurrent *cuCtxGetCurrent;
extern tcuCtxGetDevice *cuCtxGetDevice;
extern tcuCtxGetFlags *cuCtxGetFlags;
extern tcuCtxSynchronize *cuCtxSynchronize;
extern tcuCtxSetLimit *cuCtxSetLimit;
extern tcuCtxGetLimit *cuCtxGetLimit;
extern tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
extern tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
extern tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
extern tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
extern tcuCtxGetApiVersion *cuCtxGetApiVersion;
extern tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
extern tcuCtxResetPersistingL2Cache *cuCtxResetPersistingL2Cache;
extern tcuCtxAttach *cuCtxAttach;
extern tcuCtxDetach *cuCtxDetach;
extern tcuModuleLoad *cuModuleLoad;
extern tcuModuleLoadData *cuModuleLoadData;
extern tcuModuleLoadDataEx *cuModuleLoadDataEx;
extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
extern tcuModuleUnload *cuModuleUnload;
extern tcuModuleGetFunction *cuModuleGetFunction;
extern tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
extern tcuModuleGetTexRef *cuModuleGetTexRef;
extern tcuModuleGetSurfRef *cuModuleGetSurfRef;
extern tcuLinkCreate_v2 *cuLinkCreate_v2;
extern tcuLinkAddData_v2 *cuLinkAddData_v2;
extern tcuLinkAddFile_v2 *cuLinkAddFile_v2;
extern tcuLinkComplete *cuLinkComplete;
extern tcuLinkDestroy *cuLinkDestroy;
extern tcuMemGetInfo_v2 *cuMemGetInfo_v2;
extern tcuMemAlloc_v2 *cuMemAlloc_v2;
extern tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
extern tcuMemFree_v2 *cuMemFree_v2;
extern tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
extern tcuMemAllocHost_v2 *cuMemAllocHost_v2;
extern tcuMemFreeHost *cuMemFreeHost;
extern tcuMemHostAlloc *cuMemHostAlloc;
extern tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
extern tcuMemHostGetFlags *cuMemHostGetFlags;
extern tcuMemAllocManaged *cuMemAllocManaged;
extern tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
extern tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
extern tcuIpcGetEventHandle *cuIpcGetEventHandle;
extern tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
extern tcuIpcGetMemHandle *cuIpcGetMemHandle;
extern tcuIpcOpenMemHandle_v2 *cuIpcOpenMemHandle_v2;
extern tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
extern tcuMemHostRegister_v2 *cuMemHostRegister_v2;
extern tcuMemHostUnregister *cuMemHostUnregister;
extern tcuMemcpy *cuMemcpy;
extern tcuMemcpyPeer *cuMemcpyPeer;
extern tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
extern tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
extern tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
extern tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
extern tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
extern tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
extern tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
extern tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
extern tcuMemcpy2D_v2 *cuMemcpy2D_v2;
extern tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
extern tcuMemcpy3D_v2 *cuMemcpy3D_v2;
extern tcuMemcpy3DPeer *cuMemcpy3DPeer;
extern tcuMemcpyAsync *cuMemcpyAsync;
extern tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
extern tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
extern tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
extern tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
extern tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
extern tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
extern tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
extern tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
extern tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
extern tcuMemsetD8_v2 *cuMemsetD8_v2;
extern tcuMemsetD16_v2 *cuMemsetD16_v2;
extern tcuMemsetD32_v2 *cuMemsetD32_v2;
extern tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
extern tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
extern tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
extern tcuMemsetD8Async *cuMemsetD8Async;
extern tcuMemsetD16Async *cuMemsetD16Async;
extern tcuMemsetD32Async *cuMemsetD32Async;
extern tcuMemsetD2D8Async *cuMemsetD2D8Async;
extern tcuMemsetD2D16Async *cuMemsetD2D16Async;
extern tcuMemsetD2D32Async *cuMemsetD2D32Async;
extern tcuArrayCreate_v2 *cuArrayCreate_v2;
extern tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
extern tcuArrayGetSparseProperties *cuArrayGetSparseProperties;
extern tcuMipmappedArrayGetSparseProperties *cuMipmappedArrayGetSparseProperties;
extern tcuArrayGetPlane *cuArrayGetPlane;
extern tcuArrayDestroy *cuArrayDestroy;
extern tcuArray3DCreate_v2 *cuArray3DCreate_v2;
extern tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
extern tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
extern tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
extern tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
extern tcuMemAddressReserve *cuMemAddressReserve;
extern tcuMemAddressFree *cuMemAddressFree;
extern tcuMemCreate *cuMemCreate;
extern tcuMemRelease *cuMemRelease;
extern tcuMemMap *cuMemMap;
extern tcuMemMapArrayAsync *cuMemMapArrayAsync;
extern tcuMemUnmap *cuMemUnmap;
extern tcuMemSetAccess *cuMemSetAccess;
extern tcuMemGetAccess *cuMemGetAccess;
extern tcuMemExportToShareableHandle *cuMemExportToShareableHandle;
extern tcuMemImportFromShareableHandle *cuMemImportFromShareableHandle;
extern tcuMemGetAllocationGranularity *cuMemGetAllocationGranularity;
extern tcuMemGetAllocationPropertiesFromHandle *cuMemGetAllocationPropertiesFromHandle;
extern tcuMemRetainAllocationHandle *cuMemRetainAllocationHandle;
extern tcuMemFreeAsync *cuMemFreeAsync;
extern tcuMemAllocAsync *cuMemAllocAsync;
extern tcuMemPoolTrimTo *cuMemPoolTrimTo;
extern tcuMemPoolSetAttribute *cuMemPoolSetAttribute;
extern tcuMemPoolGetAttribute *cuMemPoolGetAttribute;
extern tcuMemPoolSetAccess *cuMemPoolSetAccess;
extern tcuMemPoolGetAccess *cuMemPoolGetAccess;
extern tcuMemPoolCreate *cuMemPoolCreate;
extern tcuMemPoolDestroy *cuMemPoolDestroy;
extern tcuMemAllocFromPoolAsync *cuMemAllocFromPoolAsync;
extern tcuMemPoolExportToShareableHandle *cuMemPoolExportToShareableHandle;
extern tcuMemPoolImportFromShareableHandle *cuMemPoolImportFromShareableHandle;
extern tcuMemPoolExportPointer *cuMemPoolExportPointer;
extern tcuMemPoolImportPointer *cuMemPoolImportPointer;
extern tcuPointerGetAttribute *cuPointerGetAttribute;
extern tcuMemPrefetchAsync *cuMemPrefetchAsync;
extern tcuMemAdvise *cuMemAdvise;
extern tcuMemRangeGetAttribute *cuMemRangeGetAttribute;
extern tcuMemRangeGetAttributes *cuMemRangeGetAttributes;
extern tcuPointerSetAttribute *cuPointerSetAttribute;
extern tcuPointerGetAttributes *cuPointerGetAttributes;
extern tcuStreamCreate *cuStreamCreate;
extern tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
extern tcuStreamGetPriority *cuStreamGetPriority;
extern tcuStreamGetFlags *cuStreamGetFlags;
extern tcuStreamGetCtx *cuStreamGetCtx;
extern tcuStreamWaitEvent *cuStreamWaitEvent;
extern tcuStreamAddCallback *cuStreamAddCallback;
extern tcuStreamBeginCapture_v2 *cuStreamBeginCapture_v2;
extern tcuThreadExchangeStreamCaptureMode *cuThreadExchangeStreamCaptureMode;
extern tcuStreamEndCapture *cuStreamEndCapture;
extern tcuStreamIsCapturing *cuStreamIsCapturing;
extern tcuStreamGetCaptureInfo *cuStreamGetCaptureInfo;
extern tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
extern tcuStreamQuery *cuStreamQuery;
extern tcuStreamSynchronize *cuStreamSynchronize;
extern tcuStreamDestroy_v2 *cuStreamDestroy_v2;
extern tcuStreamCopyAttributes *cuStreamCopyAttributes;
extern tcuStreamGetAttribute *cuStreamGetAttribute;
extern tcuStreamSetAttribute *cuStreamSetAttribute;
extern tcuEventCreate *cuEventCreate;
extern tcuEventRecord *cuEventRecord;
extern tcuEventRecordWithFlags *cuEventRecordWithFlags;
extern tcuEventQuery *cuEventQuery;
extern tcuEventSynchronize *cuEventSynchronize;
extern tcuEventDestroy_v2 *cuEventDestroy_v2;
extern tcuEventElapsedTime *cuEventElapsedTime;
extern tcuImportExternalMemory *cuImportExternalMemory;
extern tcuExternalMemoryGetMappedBuffer *cuExternalMemoryGetMappedBuffer;
extern tcuExternalMemoryGetMappedMipmappedArray *cuExternalMemoryGetMappedMipmappedArray;
extern tcuDestroyExternalMemory *cuDestroyExternalMemory;
extern tcuImportExternalSemaphore *cuImportExternalSemaphore;
extern tcuSignalExternalSemaphoresAsync *cuSignalExternalSemaphoresAsync;
extern tcuWaitExternalSemaphoresAsync *cuWaitExternalSemaphoresAsync;
extern tcuDestroyExternalSemaphore *cuDestroyExternalSemaphore;
extern tcuStreamWaitValue32 *cuStreamWaitValue32;
extern tcuStreamWaitValue64 *cuStreamWaitValue64;
extern tcuStreamWriteValue32 *cuStreamWriteValue32;
extern tcuStreamWriteValue64 *cuStreamWriteValue64;
extern tcuStreamBatchMemOp *cuStreamBatchMemOp;
extern tcuFuncGetAttribute *cuFuncGetAttribute;
extern tcuFuncSetAttribute *cuFuncSetAttribute;
extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
extern tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
extern tcuLaunchKernel *cuLaunchKernel;
extern tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
extern tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
extern tcuLaunchHostFunc *cuLaunchHostFunc;
extern tcuFuncSetBlockShape *cuFuncSetBlockShape;
extern tcuFuncSetSharedSize *cuFuncSetSharedSize;
extern tcuParamSetSize *cuParamSetSize;
extern tcuParamSeti *cuParamSeti;
extern tcuParamSetf *cuParamSetf;
extern tcuParamSetv *cuParamSetv;
extern tcuLaunch *cuLaunch;
extern tcuLaunchGrid *cuLaunchGrid;
extern tcuLaunchGridAsync *cuLaunchGridAsync;
extern tcuParamSetTexRef *cuParamSetTexRef;
extern tcuGraphCreate *cuGraphCreate;
extern tcuGraphAddKernelNode *cuGraphAddKernelNode;
extern tcuGraphKernelNodeGetParams *cuGraphKernelNodeGetParams;
extern tcuGraphKernelNodeSetParams *cuGraphKernelNodeSetParams;
extern tcuGraphAddMemcpyNode *cuGraphAddMemcpyNode;
extern tcuGraphMemcpyNodeGetParams *cuGraphMemcpyNodeGetParams;
extern tcuGraphMemcpyNodeSetParams *cuGraphMemcpyNodeSetParams;
extern tcuGraphAddMemsetNode *cuGraphAddMemsetNode;
extern tcuGraphMemsetNodeGetParams *cuGraphMemsetNodeGetParams;
extern tcuGraphMemsetNodeSetParams *cuGraphMemsetNodeSetParams;
extern tcuGraphAddHostNode *cuGraphAddHostNode;
extern tcuGraphHostNodeGetParams *cuGraphHostNodeGetParams;
extern tcuGraphHostNodeSetParams *cuGraphHostNodeSetParams;
extern tcuGraphAddChildGraphNode *cuGraphAddChildGraphNode;
extern tcuGraphChildGraphNodeGetGraph *cuGraphChildGraphNodeGetGraph;
extern tcuGraphAddEmptyNode *cuGraphAddEmptyNode;
extern tcuGraphAddEventRecordNode *cuGraphAddEventRecordNode;
extern tcuGraphEventRecordNodeGetEvent *cuGraphEventRecordNodeGetEvent;
extern tcuGraphEventRecordNodeSetEvent *cuGraphEventRecordNodeSetEvent;
extern tcuGraphAddEventWaitNode *cuGraphAddEventWaitNode;
extern tcuGraphEventWaitNodeGetEvent *cuGraphEventWaitNodeGetEvent;
extern tcuGraphEventWaitNodeSetEvent *cuGraphEventWaitNodeSetEvent;
extern tcuGraphAddExternalSemaphoresSignalNode *cuGraphAddExternalSemaphoresSignalNode;
extern tcuGraphExternalSemaphoresSignalNodeGetParams *cuGraphExternalSemaphoresSignalNodeGetParams;
extern tcuGraphExternalSemaphoresSignalNodeSetParams *cuGraphExternalSemaphoresSignalNodeSetParams;
extern tcuGraphAddExternalSemaphoresWaitNode *cuGraphAddExternalSemaphoresWaitNode;
extern tcuGraphExternalSemaphoresWaitNodeGetParams *cuGraphExternalSemaphoresWaitNodeGetParams;
extern tcuGraphExternalSemaphoresWaitNodeSetParams *cuGraphExternalSemaphoresWaitNodeSetParams;
extern tcuGraphClone *cuGraphClone;
extern tcuGraphNodeFindInClone *cuGraphNodeFindInClone;
extern tcuGraphNodeGetType *cuGraphNodeGetType;
extern tcuGraphGetNodes *cuGraphGetNodes;
extern tcuGraphGetRootNodes *cuGraphGetRootNodes;
extern tcuGraphGetEdges *cuGraphGetEdges;
extern tcuGraphNodeGetDependencies *cuGraphNodeGetDependencies;
extern tcuGraphNodeGetDependentNodes *cuGraphNodeGetDependentNodes;
extern tcuGraphAddDependencies *cuGraphAddDependencies;
extern tcuGraphRemoveDependencies *cuGraphRemoveDependencies;
extern tcuGraphDestroyNode *cuGraphDestroyNode;
extern tcuGraphInstantiate_v2 *cuGraphInstantiate_v2;
extern tcuGraphExecKernelNodeSetParams *cuGraphExecKernelNodeSetParams;
extern tcuGraphExecMemcpyNodeSetParams *cuGraphExecMemcpyNodeSetParams;
extern tcuGraphExecMemsetNodeSetParams *cuGraphExecMemsetNodeSetParams;
extern tcuGraphExecHostNodeSetParams *cuGraphExecHostNodeSetParams;
extern tcuGraphExecChildGraphNodeSetParams *cuGraphExecChildGraphNodeSetParams;
extern tcuGraphExecEventRecordNodeSetEvent *cuGraphExecEventRecordNodeSetEvent;
extern tcuGraphExecEventWaitNodeSetEvent *cuGraphExecEventWaitNodeSetEvent;
extern tcuGraphExecExternalSemaphoresSignalNodeSetParams *cuGraphExecExternalSemaphoresSignalNodeSetParams;
extern tcuGraphExecExternalSemaphoresWaitNodeSetParams *cuGraphExecExternalSemaphoresWaitNodeSetParams;
extern tcuGraphUpload *cuGraphUpload;
extern tcuGraphLaunch *cuGraphLaunch;
extern tcuGraphExecDestroy *cuGraphExecDestroy;
extern tcuGraphDestroy *cuGraphDestroy;
extern tcuGraphExecUpdate *cuGraphExecUpdate;
extern tcuGraphKernelNodeCopyAttributes *cuGraphKernelNodeCopyAttributes;
extern tcuGraphKernelNodeGetAttribute *cuGraphKernelNodeGetAttribute;
extern tcuGraphKernelNodeSetAttribute *cuGraphKernelNodeSetAttribute;
extern tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
extern tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
extern tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
extern tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
extern tcuOccupancyAvailableDynamicSMemPerBlock *cuOccupancyAvailableDynamicSMemPerBlock;
extern tcuTexRefSetArray *cuTexRefSetArray;
extern tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
extern tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
extern tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
extern tcuTexRefSetFormat *cuTexRefSetFormat;
extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
extern tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
extern tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
extern tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
extern tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
extern tcuTexRefSetBorderColor *cuTexRefSetBorderColor;
extern tcuTexRefSetFlags *cuTexRefSetFlags;
extern tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
extern tcuTexRefGetArray *cuTexRefGetArray;
extern tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
extern tcuTexRefGetFormat *cuTexRefGetFormat;
extern tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
extern tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
extern tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
extern tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
extern tcuTexRefGetBorderColor *cuTexRefGetBorderColor;
extern tcuTexRefGetFlags *cuTexRefGetFlags;
extern tcuTexRefCreate *cuTexRefCreate;
extern tcuTexRefDestroy *cuTexRefDestroy;
extern tcuSurfRefSetArray *cuSurfRefSetArray;
extern tcuSurfRefGetArray *cuSurfRefGetArray;
extern tcuTexObjectCreate *cuTexObjectCreate;
extern tcuTexObjectDestroy *cuTexObjectDestroy;
extern tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
extern tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
extern tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
extern tcuSurfObjectCreate *cuSurfObjectCreate;
extern tcuSurfObjectDestroy *cuSurfObjectDestroy;
extern tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
extern tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
extern tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
extern tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
extern tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
extern tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
extern tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
extern tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
extern tcuGraphicsMapResources *cuGraphicsMapResources;
extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
extern tcuGetExportTable *cuGetExportTable;
extern tcuFuncGetModule *cuFuncGetModule;
extern tnvrtcGetErrorString *nvrtcGetErrorString;
extern tnvrtcVersion *nvrtcVersion;
extern tnvrtcGetNumSupportedArchs *nvrtcGetNumSupportedArchs;
extern tnvrtcGetSupportedArchs *nvrtcGetSupportedArchs;
extern tnvrtcCreateProgram *nvrtcCreateProgram;
extern tnvrtcDestroyProgram *nvrtcDestroyProgram;
extern tnvrtcCompileProgram *nvrtcCompileProgram;
extern tnvrtcGetPTXSize *nvrtcGetPTXSize;
extern tnvrtcGetPTX *nvrtcGetPTX;
extern tnvrtcGetCUBINSize *nvrtcGetCUBINSize;
extern tnvrtcGetCUBIN *nvrtcGetCUBIN;
extern tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
extern tnvrtcGetProgramLog *nvrtcGetProgramLog;
extern tnvrtcAddNameExpression *nvrtcAddNameExpression;
extern tnvrtcGetLoweredName *nvrtcGetLoweredName;
extern tcudnnGetVersion *cudnnGetVersion;
extern tcudnnGetCudartVersion *cudnnGetCudartVersion;
extern tcudnnGetErrorString *cudnnGetErrorString;
extern tcudnnQueryRuntimeError *cudnnQueryRuntimeError;
extern tcudnnGetProperty *cudnnGetProperty;
extern tcudnnCreate *cudnnCreate;
extern tcudnnDestroy *cudnnDestroy;
extern tcudnnSetStream *cudnnSetStream;
extern tcudnnGetStream *cudnnGetStream;
extern tcudnnCreateTensorDescriptor *cudnnCreateTensorDescriptor;
extern tcudnnSetTensor4dDescriptor *cudnnSetTensor4dDescriptor;
extern tcudnnSetTensor4dDescriptorEx *cudnnSetTensor4dDescriptorEx;
extern tcudnnGetTensor4dDescriptor *cudnnGetTensor4dDescriptor;
extern tcudnnSetTensorNdDescriptor *cudnnSetTensorNdDescriptor;
extern tcudnnSetTensorNdDescriptorEx *cudnnSetTensorNdDescriptorEx;
extern tcudnnGetTensorNdDescriptor *cudnnGetTensorNdDescriptor;
extern tcudnnGetTensorSizeInBytes *cudnnGetTensorSizeInBytes;
extern tcudnnDestroyTensorDescriptor *cudnnDestroyTensorDescriptor;
extern tcudnnInitTransformDest *cudnnInitTransformDest;
extern tcudnnCreateTensorTransformDescriptor *cudnnCreateTensorTransformDescriptor;
extern tcudnnSetTensorTransformDescriptor *cudnnSetTensorTransformDescriptor;
extern tcudnnGetTensorTransformDescriptor *cudnnGetTensorTransformDescriptor;
extern tcudnnDestroyTensorTransformDescriptor *cudnnDestroyTensorTransformDescriptor;
extern tcudnnTransformTensor *cudnnTransformTensor;
extern tcudnnTransformTensorEx *cudnnTransformTensorEx;
extern tcudnnAddTensor *cudnnAddTensor;
extern tcudnnCreateOpTensorDescriptor *cudnnCreateOpTensorDescriptor;
extern tcudnnSetOpTensorDescriptor *cudnnSetOpTensorDescriptor;
extern tcudnnGetOpTensorDescriptor *cudnnGetOpTensorDescriptor;
extern tcudnnDestroyOpTensorDescriptor *cudnnDestroyOpTensorDescriptor;
extern tcudnnOpTensor *cudnnOpTensor;
extern tcudnnCreateReduceTensorDescriptor *cudnnCreateReduceTensorDescriptor;
extern tcudnnSetReduceTensorDescriptor *cudnnSetReduceTensorDescriptor;
extern tcudnnGetReduceTensorDescriptor *cudnnGetReduceTensorDescriptor;
extern tcudnnDestroyReduceTensorDescriptor *cudnnDestroyReduceTensorDescriptor;
extern tcudnnGetReductionIndicesSize *cudnnGetReductionIndicesSize;
extern tcudnnGetReductionWorkspaceSize *cudnnGetReductionWorkspaceSize;
extern tcudnnReduceTensor *cudnnReduceTensor;
extern tcudnnSetTensor *cudnnSetTensor;
extern tcudnnScaleTensor *cudnnScaleTensor;
extern tcudnnCreateFilterDescriptor *cudnnCreateFilterDescriptor;
extern tcudnnSetFilter4dDescriptor *cudnnSetFilter4dDescriptor;
extern tcudnnGetFilter4dDescriptor *cudnnGetFilter4dDescriptor;
extern tcudnnSetFilterNdDescriptor *cudnnSetFilterNdDescriptor;
extern tcudnnGetFilterNdDescriptor *cudnnGetFilterNdDescriptor;
extern tcudnnGetFilterSizeInBytes *cudnnGetFilterSizeInBytes;
extern tcudnnTransformFilter *cudnnTransformFilter;
extern tcudnnDestroyFilterDescriptor *cudnnDestroyFilterDescriptor;
extern tcudnnSoftmaxForward *cudnnSoftmaxForward;
extern tcudnnCreatePoolingDescriptor *cudnnCreatePoolingDescriptor;
extern tcudnnSetPooling2dDescriptor *cudnnSetPooling2dDescriptor;
extern tcudnnGetPooling2dDescriptor *cudnnGetPooling2dDescriptor;
extern tcudnnSetPoolingNdDescriptor *cudnnSetPoolingNdDescriptor;
extern tcudnnGetPoolingNdDescriptor *cudnnGetPoolingNdDescriptor;
extern tcudnnGetPoolingNdForwardOutputDim *cudnnGetPoolingNdForwardOutputDim;
extern tcudnnGetPooling2dForwardOutputDim *cudnnGetPooling2dForwardOutputDim;
extern tcudnnDestroyPoolingDescriptor *cudnnDestroyPoolingDescriptor;
extern tcudnnPoolingForward *cudnnPoolingForward;
extern tcudnnCreateActivationDescriptor *cudnnCreateActivationDescriptor;
extern tcudnnSetActivationDescriptor *cudnnSetActivationDescriptor;
extern tcudnnGetActivationDescriptor *cudnnGetActivationDescriptor;
extern tcudnnDestroyActivationDescriptor *cudnnDestroyActivationDescriptor;
extern tcudnnActivationForward *cudnnActivationForward;
extern tcudnnCreateLRNDescriptor *cudnnCreateLRNDescriptor;
extern tcudnnSetLRNDescriptor *cudnnSetLRNDescriptor;
extern tcudnnGetLRNDescriptor *cudnnGetLRNDescriptor;
extern tcudnnDestroyLRNDescriptor *cudnnDestroyLRNDescriptor;
extern tcudnnLRNCrossChannelForward *cudnnLRNCrossChannelForward;
extern tcudnnDivisiveNormalizationForward *cudnnDivisiveNormalizationForward;
extern tcudnnDeriveBNTensorDescriptor *cudnnDeriveBNTensorDescriptor;
extern tcudnnBatchNormalizationForwardInference *cudnnBatchNormalizationForwardInference;
extern tcudnnDeriveNormTensorDescriptor *cudnnDeriveNormTensorDescriptor;
extern tcudnnNormalizationForwardInference *cudnnNormalizationForwardInference;
extern tcudnnCreateSpatialTransformerDescriptor *cudnnCreateSpatialTransformerDescriptor;
extern tcudnnSetSpatialTransformerNdDescriptor *cudnnSetSpatialTransformerNdDescriptor;
extern tcudnnDestroySpatialTransformerDescriptor *cudnnDestroySpatialTransformerDescriptor;
extern tcudnnSpatialTfGridGeneratorForward *cudnnSpatialTfGridGeneratorForward;
extern tcudnnSpatialTfSamplerForward *cudnnSpatialTfSamplerForward;
extern tcudnnCreateDropoutDescriptor *cudnnCreateDropoutDescriptor;
extern tcudnnDestroyDropoutDescriptor *cudnnDestroyDropoutDescriptor;
extern tcudnnDropoutGetStatesSize *cudnnDropoutGetStatesSize;
extern tcudnnDropoutGetReserveSpaceSize *cudnnDropoutGetReserveSpaceSize;
extern tcudnnSetDropoutDescriptor *cudnnSetDropoutDescriptor;
extern tcudnnRestoreDropoutDescriptor *cudnnRestoreDropoutDescriptor;
extern tcudnnGetDropoutDescriptor *cudnnGetDropoutDescriptor;
extern tcudnnDropoutForward *cudnnDropoutForward;
extern tcudnnCreateAlgorithmDescriptor *cudnnCreateAlgorithmDescriptor;
extern tcudnnSetAlgorithmDescriptor *cudnnSetAlgorithmDescriptor;
extern tcudnnGetAlgorithmDescriptor *cudnnGetAlgorithmDescriptor;
extern tcudnnCopyAlgorithmDescriptor *cudnnCopyAlgorithmDescriptor;
extern tcudnnDestroyAlgorithmDescriptor *cudnnDestroyAlgorithmDescriptor;
extern tcudnnCreateAlgorithmPerformance *cudnnCreateAlgorithmPerformance;
extern tcudnnSetAlgorithmPerformance *cudnnSetAlgorithmPerformance;
extern tcudnnGetAlgorithmPerformance *cudnnGetAlgorithmPerformance;
extern tcudnnDestroyAlgorithmPerformance *cudnnDestroyAlgorithmPerformance;
extern tcudnnGetAlgorithmSpaceSize *cudnnGetAlgorithmSpaceSize;
extern tcudnnSaveAlgorithm *cudnnSaveAlgorithm;
extern tcudnnRestoreAlgorithm *cudnnRestoreAlgorithm;
extern tcudnnSetCallback *cudnnSetCallback;
extern tcudnnGetCallback *cudnnGetCallback;
extern tcudnnOpsInferVersionCheck *cudnnOpsInferVersionCheck;
extern tcudnnSoftmaxBackward *cudnnSoftmaxBackward;
extern tcudnnPoolingBackward *cudnnPoolingBackward;
extern tcudnnActivationBackward *cudnnActivationBackward;
extern tcudnnLRNCrossChannelBackward *cudnnLRNCrossChannelBackward;
extern tcudnnDivisiveNormalizationBackward *cudnnDivisiveNormalizationBackward;
extern tcudnnGetBatchNormalizationForwardTrainingExWorkspaceSize *cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize;
extern tcudnnGetBatchNormalizationBackwardExWorkspaceSize *cudnnGetBatchNormalizationBackwardExWorkspaceSize;
extern tcudnnGetBatchNormalizationTrainingExReserveSpaceSize *cudnnGetBatchNormalizationTrainingExReserveSpaceSize;
extern tcudnnBatchNormalizationForwardTraining *cudnnBatchNormalizationForwardTraining;
extern tcudnnBatchNormalizationForwardTrainingEx *cudnnBatchNormalizationForwardTrainingEx;
extern tcudnnBatchNormalizationBackward *cudnnBatchNormalizationBackward;
extern tcudnnBatchNormalizationBackwardEx *cudnnBatchNormalizationBackwardEx;
extern tcudnnGetNormalizationForwardTrainingWorkspaceSize *cudnnGetNormalizationForwardTrainingWorkspaceSize;
extern tcudnnGetNormalizationBackwardWorkspaceSize *cudnnGetNormalizationBackwardWorkspaceSize;
extern tcudnnGetNormalizationTrainingReserveSpaceSize *cudnnGetNormalizationTrainingReserveSpaceSize;
extern tcudnnNormalizationForwardTraining *cudnnNormalizationForwardTraining;
extern tcudnnNormalizationBackward *cudnnNormalizationBackward;
extern tcudnnSpatialTfGridGeneratorBackward *cudnnSpatialTfGridGeneratorBackward;
extern tcudnnSpatialTfSamplerBackward *cudnnSpatialTfSamplerBackward;
extern tcudnnDropoutBackward *cudnnDropoutBackward;
extern tcudnnOpsTrainVersionCheck *cudnnOpsTrainVersionCheck;
extern tcudnnCreateRNNDescriptor *cudnnCreateRNNDescriptor;
extern tcudnnDestroyRNNDescriptor *cudnnDestroyRNNDescriptor;
extern tcudnnSetRNNDescriptor_v8 *cudnnSetRNNDescriptor_v8;
extern tcudnnGetRNNDescriptor_v8 *cudnnGetRNNDescriptor_v8;
extern tcudnnSetRNNDescriptor_v6 *cudnnSetRNNDescriptor_v6;
extern tcudnnGetRNNDescriptor_v6 *cudnnGetRNNDescriptor_v6;
extern tcudnnSetRNNMatrixMathType *cudnnSetRNNMatrixMathType;
extern tcudnnGetRNNMatrixMathType *cudnnGetRNNMatrixMathType;
extern tcudnnSetRNNBiasMode *cudnnSetRNNBiasMode;
extern tcudnnGetRNNBiasMode *cudnnGetRNNBiasMode;
extern tcudnnRNNSetClip_v8 *cudnnRNNSetClip_v8;
extern tcudnnRNNGetClip_v8 *cudnnRNNGetClip_v8;
extern tcudnnRNNSetClip *cudnnRNNSetClip;
extern tcudnnRNNGetClip *cudnnRNNGetClip;
extern tcudnnSetRNNProjectionLayers *cudnnSetRNNProjectionLayers;
extern tcudnnGetRNNProjectionLayers *cudnnGetRNNProjectionLayers;
extern tcudnnCreatePersistentRNNPlan *cudnnCreatePersistentRNNPlan;
extern tcudnnDestroyPersistentRNNPlan *cudnnDestroyPersistentRNNPlan;
extern tcudnnSetPersistentRNNPlan *cudnnSetPersistentRNNPlan;
extern tcudnnBuildRNNDynamic *cudnnBuildRNNDynamic;
extern tcudnnGetRNNWorkspaceSize *cudnnGetRNNWorkspaceSize;
extern tcudnnGetRNNTrainingReserveSize *cudnnGetRNNTrainingReserveSize;
extern tcudnnGetRNNTempSpaceSizes *cudnnGetRNNTempSpaceSizes;
extern tcudnnGetRNNParamsSize *cudnnGetRNNParamsSize;
extern tcudnnGetRNNWeightSpaceSize *cudnnGetRNNWeightSpaceSize;
extern tcudnnGetRNNLinLayerMatrixParams *cudnnGetRNNLinLayerMatrixParams;
extern tcudnnGetRNNLinLayerBiasParams *cudnnGetRNNLinLayerBiasParams;
extern tcudnnGetRNNWeightParams *cudnnGetRNNWeightParams;
extern tcudnnRNNForwardInference *cudnnRNNForwardInference;
extern tcudnnSetRNNPaddingMode *cudnnSetRNNPaddingMode;
extern tcudnnGetRNNPaddingMode *cudnnGetRNNPaddingMode;
extern tcudnnCreateRNNDataDescriptor *cudnnCreateRNNDataDescriptor;
extern tcudnnDestroyRNNDataDescriptor *cudnnDestroyRNNDataDescriptor;
extern tcudnnSetRNNDataDescriptor *cudnnSetRNNDataDescriptor;
extern tcudnnGetRNNDataDescriptor *cudnnGetRNNDataDescriptor;
extern tcudnnRNNForwardInferenceEx *cudnnRNNForwardInferenceEx;
extern tcudnnRNNForward *cudnnRNNForward;
extern tcudnnSetRNNAlgorithmDescriptor *cudnnSetRNNAlgorithmDescriptor;
extern tcudnnGetRNNForwardInferenceAlgorithmMaxCount *cudnnGetRNNForwardInferenceAlgorithmMaxCount;
extern tcudnnFindRNNForwardInferenceAlgorithmEx *cudnnFindRNNForwardInferenceAlgorithmEx;
extern tcudnnCreateSeqDataDescriptor *cudnnCreateSeqDataDescriptor;
extern tcudnnDestroySeqDataDescriptor *cudnnDestroySeqDataDescriptor;
extern tcudnnSetSeqDataDescriptor *cudnnSetSeqDataDescriptor;
extern tcudnnGetSeqDataDescriptor *cudnnGetSeqDataDescriptor;
extern tcudnnCreateAttnDescriptor *cudnnCreateAttnDescriptor;
extern tcudnnDestroyAttnDescriptor *cudnnDestroyAttnDescriptor;
extern tcudnnSetAttnDescriptor *cudnnSetAttnDescriptor;
extern tcudnnGetAttnDescriptor *cudnnGetAttnDescriptor;
extern tcudnnGetMultiHeadAttnBuffers *cudnnGetMultiHeadAttnBuffers;
extern tcudnnGetMultiHeadAttnWeights *cudnnGetMultiHeadAttnWeights;
extern tcudnnMultiHeadAttnForward *cudnnMultiHeadAttnForward;
extern tcudnnAdvInferVersionCheck *cudnnAdvInferVersionCheck;
extern tcudnnRNNForwardTraining *cudnnRNNForwardTraining;
extern tcudnnRNNBackwardData *cudnnRNNBackwardData;
extern tcudnnRNNBackwardData_v8 *cudnnRNNBackwardData_v8;
extern tcudnnRNNBackwardWeights *cudnnRNNBackwardWeights;
extern tcudnnRNNBackwardWeights_v8 *cudnnRNNBackwardWeights_v8;
extern tcudnnRNNForwardTrainingEx *cudnnRNNForwardTrainingEx;
extern tcudnnRNNBackwardDataEx *cudnnRNNBackwardDataEx;
extern tcudnnRNNBackwardWeightsEx *cudnnRNNBackwardWeightsEx;
extern tcudnnGetRNNForwardTrainingAlgorithmMaxCount *cudnnGetRNNForwardTrainingAlgorithmMaxCount;
extern tcudnnFindRNNForwardTrainingAlgorithmEx *cudnnFindRNNForwardTrainingAlgorithmEx;
extern tcudnnGetRNNBackwardDataAlgorithmMaxCount *cudnnGetRNNBackwardDataAlgorithmMaxCount;
extern tcudnnFindRNNBackwardDataAlgorithmEx *cudnnFindRNNBackwardDataAlgorithmEx;
extern tcudnnGetRNNBackwardWeightsAlgorithmMaxCount *cudnnGetRNNBackwardWeightsAlgorithmMaxCount;
extern tcudnnFindRNNBackwardWeightsAlgorithmEx *cudnnFindRNNBackwardWeightsAlgorithmEx;
extern tcudnnMultiHeadAttnBackwardData *cudnnMultiHeadAttnBackwardData;
extern tcudnnMultiHeadAttnBackwardWeights *cudnnMultiHeadAttnBackwardWeights;
extern tcudnnCreateCTCLossDescriptor *cudnnCreateCTCLossDescriptor;
extern tcudnnSetCTCLossDescriptor *cudnnSetCTCLossDescriptor;
extern tcudnnSetCTCLossDescriptorEx *cudnnSetCTCLossDescriptorEx;
extern tcudnnSetCTCLossDescriptor_v8 *cudnnSetCTCLossDescriptor_v8;
extern tcudnnGetCTCLossDescriptor *cudnnGetCTCLossDescriptor;
extern tcudnnGetCTCLossDescriptorEx *cudnnGetCTCLossDescriptorEx;
extern tcudnnGetCTCLossDescriptor_v8 *cudnnGetCTCLossDescriptor_v8;
extern tcudnnDestroyCTCLossDescriptor *cudnnDestroyCTCLossDescriptor;
extern tcudnnCTCLoss *cudnnCTCLoss;
extern tcudnnCTCLoss_v8 *cudnnCTCLoss_v8;
extern tcudnnGetCTCLossWorkspaceSize *cudnnGetCTCLossWorkspaceSize;
extern tcudnnGetCTCLossWorkspaceSize_v8 *cudnnGetCTCLossWorkspaceSize_v8;
extern tcudnnAdvTrainVersionCheck *cudnnAdvTrainVersionCheck;
extern tcudnnCreateConvolutionDescriptor *cudnnCreateConvolutionDescriptor;
extern tcudnnDestroyConvolutionDescriptor *cudnnDestroyConvolutionDescriptor;
extern tcudnnSetConvolutionMathType *cudnnSetConvolutionMathType;
extern tcudnnGetConvolutionMathType *cudnnGetConvolutionMathType;
extern tcudnnSetConvolutionGroupCount *cudnnSetConvolutionGroupCount;
extern tcudnnGetConvolutionGroupCount *cudnnGetConvolutionGroupCount;
extern tcudnnSetConvolutionReorderType *cudnnSetConvolutionReorderType;
extern tcudnnGetConvolutionReorderType *cudnnGetConvolutionReorderType;
extern tcudnnSetConvolution2dDescriptor *cudnnSetConvolution2dDescriptor;
extern tcudnnGetConvolution2dDescriptor *cudnnGetConvolution2dDescriptor;
extern tcudnnSetConvolutionNdDescriptor *cudnnSetConvolutionNdDescriptor;
extern tcudnnGetConvolutionNdDescriptor *cudnnGetConvolutionNdDescriptor;
extern tcudnnGetConvolution2dForwardOutputDim *cudnnGetConvolution2dForwardOutputDim;
extern tcudnnGetConvolutionNdForwardOutputDim *cudnnGetConvolutionNdForwardOutputDim;
extern tcudnnGetConvolutionForwardAlgorithmMaxCount *cudnnGetConvolutionForwardAlgorithmMaxCount;
extern tcudnnGetConvolutionForwardAlgorithm_v7 *cudnnGetConvolutionForwardAlgorithm_v7;
extern tcudnnFindConvolutionForwardAlgorithm *cudnnFindConvolutionForwardAlgorithm;
extern tcudnnFindConvolutionForwardAlgorithmEx *cudnnFindConvolutionForwardAlgorithmEx;
extern tcudnnIm2Col *cudnnIm2Col;
extern tcudnnReorderFilterAndBias *cudnnReorderFilterAndBias;
extern tcudnnGetConvolutionForwardWorkspaceSize *cudnnGetConvolutionForwardWorkspaceSize;
extern tcudnnConvolutionForward *cudnnConvolutionForward;
extern tcudnnConvolutionBiasActivationForward *cudnnConvolutionBiasActivationForward;
extern tcudnnGetConvolutionBackwardDataAlgorithmMaxCount *cudnnGetConvolutionBackwardDataAlgorithmMaxCount;
extern tcudnnFindConvolutionBackwardDataAlgorithm *cudnnFindConvolutionBackwardDataAlgorithm;
extern tcudnnFindConvolutionBackwardDataAlgorithmEx *cudnnFindConvolutionBackwardDataAlgorithmEx;
extern tcudnnGetConvolutionBackwardDataAlgorithm_v7 *cudnnGetConvolutionBackwardDataAlgorithm_v7;
extern tcudnnGetConvolutionBackwardDataWorkspaceSize *cudnnGetConvolutionBackwardDataWorkspaceSize;
extern tcudnnConvolutionBackwardData *cudnnConvolutionBackwardData;
extern tcudnnGetFoldedConvBackwardDataDescriptors *cudnnGetFoldedConvBackwardDataDescriptors;
extern tcudnnCnnInferVersionCheck *cudnnCnnInferVersionCheck;
extern tcudnnGetConvolutionBackwardFilterAlgorithmMaxCount *cudnnGetConvolutionBackwardFilterAlgorithmMaxCount;
extern tcudnnFindConvolutionBackwardFilterAlgorithm *cudnnFindConvolutionBackwardFilterAlgorithm;
extern tcudnnFindConvolutionBackwardFilterAlgorithmEx *cudnnFindConvolutionBackwardFilterAlgorithmEx;
extern tcudnnGetConvolutionBackwardFilterAlgorithm_v7 *cudnnGetConvolutionBackwardFilterAlgorithm_v7;
extern tcudnnGetConvolutionBackwardFilterWorkspaceSize *cudnnGetConvolutionBackwardFilterWorkspaceSize;
extern tcudnnConvolutionBackwardFilter *cudnnConvolutionBackwardFilter;
extern tcudnnConvolutionBackwardBias *cudnnConvolutionBackwardBias;
extern tcudnnCreateFusedOpsConstParamPack *cudnnCreateFusedOpsConstParamPack;
extern tcudnnDestroyFusedOpsConstParamPack *cudnnDestroyFusedOpsConstParamPack;
extern tcudnnSetFusedOpsConstParamPackAttribute *cudnnSetFusedOpsConstParamPackAttribute;
extern tcudnnGetFusedOpsConstParamPackAttribute *cudnnGetFusedOpsConstParamPackAttribute;
extern tcudnnCreateFusedOpsVariantParamPack *cudnnCreateFusedOpsVariantParamPack;
extern tcudnnDestroyFusedOpsVariantParamPack *cudnnDestroyFusedOpsVariantParamPack;
extern tcudnnSetFusedOpsVariantParamPackAttribute *cudnnSetFusedOpsVariantParamPackAttribute;
extern tcudnnGetFusedOpsVariantParamPackAttribute *cudnnGetFusedOpsVariantParamPackAttribute;
extern tcudnnCreateFusedOpsPlan *cudnnCreateFusedOpsPlan;
extern tcudnnDestroyFusedOpsPlan *cudnnDestroyFusedOpsPlan;
extern tcudnnMakeFusedOpsPlan *cudnnMakeFusedOpsPlan;
extern tcudnnFusedOpsExecute *cudnnFusedOpsExecute;
extern tcudnnCnnTrainVersionCheck *cudnnCnnTrainVersionCheck;
extern tcudnnBackendCreateDescriptor *cudnnBackendCreateDescriptor;
extern tcudnnBackendDestroyDescriptor *cudnnBackendDestroyDescriptor;
extern tcudnnBackendInitialize *cudnnBackendInitialize;
extern tcudnnBackendFinalize *cudnnBackendFinalize;
extern tcudnnBackendSetAttribute *cudnnBackendSetAttribute;
extern tcudnnBackendGetAttribute *cudnnBackendGetAttribute;
extern tcudnnBackendExecute *cudnnBackendExecute;
extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
extern tcuGLGetDevices_v2 *cuGLGetDevices_v2;
extern tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
extern tcuGLInit *cuGLInit;
extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
extern tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
extern tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
extern tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
extern tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
enum {
CUEW_SUCCESS = 0,
CUEW_ERROR_OPEN_FAILED = -1,
CUEW_ERROR_ATEXIT_FAILED = -2,
};
enum {
CUEW_INIT_CUDA = (1 << 0),
CUEW_INIT_NVRTC = (1 << 1),
CUEW_INIT_CUDNN = (1 << 2),
};
int cuewInit(cuuint32_t flags);
const char *cuewErrorString(CUresult result);
const char *cuewCompilerPath(void);
int cuewCompilerVersion(void);
int cuewNvrtcVersion(void);
#ifdef __cplusplus
}
#endif
#endif /* __CUEW_H__ */