assimp/contrib/tinyusdz/tinyusdz_repo/examples/common/cuew/cuew.c

1758 lines
76 KiB
C
Raw Normal View History

2024-03-30 02:33:07 +00:00
/*
* Copyright 2011-2014 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifdef _MSC_VER
# if _MSC_VER < 1900
# define snprintf _snprintf
# endif
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
#include "cuew.h"
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
/* Utility macros. */
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibraryA(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif
#define _LIBRARY_FIND_CHECKED(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name); \
assert(name);
#define _LIBRARY_FIND(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name);
#define CUDA_LIBRARY_FIND_CHECKED(name) \
_LIBRARY_FIND_CHECKED(cuda_lib, name)
#define CUDA_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
#define NVRTC_LIBRARY_FIND_CHECKED(name) \
_LIBRARY_FIND_CHECKED(nvrtc_lib, name)
#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
#define CUDNN_LIBRARY_FIND_CHECKED(name) \
_LIBRARY_FIND_CHECKED(cudnn_lib, name)
#define CUDNN_LIBRARY_FIND(name) _LIBRARY_FIND(cudnn_lib, name)
static DynamicLibrary cuda_lib;
static DynamicLibrary nvrtc_lib;
static DynamicLibrary cudnn_lib;
/* Function definitions. */
tcuGetErrorString *cuGetErrorString;
tcuGetErrorName *cuGetErrorName;
tcuInit *cuInit;
tcuDriverGetVersion *cuDriverGetVersion;
tcuDeviceGet *cuDeviceGet;
tcuDeviceGetCount *cuDeviceGetCount;
tcuDeviceGetName *cuDeviceGetName;
tcuDeviceGetUuid *cuDeviceGetUuid;
tcuDeviceGetLuid *cuDeviceGetLuid;
tcuDeviceTotalMem_v2 *cuDeviceTotalMem_v2;
tcuDeviceGetTexture1DLinearMaxWidth *cuDeviceGetTexture1DLinearMaxWidth;
tcuDeviceGetAttribute *cuDeviceGetAttribute;
tcuDeviceGetNvSciSyncAttributes *cuDeviceGetNvSciSyncAttributes;
tcuDeviceSetMemPool *cuDeviceSetMemPool;
tcuDeviceGetMemPool *cuDeviceGetMemPool;
tcuDeviceGetDefaultMemPool *cuDeviceGetDefaultMemPool;
tcuDeviceGetProperties *cuDeviceGetProperties;
tcuDeviceComputeCapability *cuDeviceComputeCapability;
tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
tcuDevicePrimaryCtxRelease_v2 *cuDevicePrimaryCtxRelease_v2;
tcuDevicePrimaryCtxSetFlags_v2 *cuDevicePrimaryCtxSetFlags_v2;
tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
tcuDevicePrimaryCtxReset_v2 *cuDevicePrimaryCtxReset_v2;
tcuCtxCreate_v2 *cuCtxCreate_v2;
tcuCtxDestroy_v2 *cuCtxDestroy_v2;
tcuCtxPushCurrent_v2 *cuCtxPushCurrent_v2;
tcuCtxPopCurrent_v2 *cuCtxPopCurrent_v2;
tcuCtxSetCurrent *cuCtxSetCurrent;
tcuCtxGetCurrent *cuCtxGetCurrent;
tcuCtxGetDevice *cuCtxGetDevice;
tcuCtxGetFlags *cuCtxGetFlags;
tcuCtxSynchronize *cuCtxSynchronize;
tcuCtxSetLimit *cuCtxSetLimit;
tcuCtxGetLimit *cuCtxGetLimit;
tcuCtxGetCacheConfig *cuCtxGetCacheConfig;
tcuCtxSetCacheConfig *cuCtxSetCacheConfig;
tcuCtxGetSharedMemConfig *cuCtxGetSharedMemConfig;
tcuCtxSetSharedMemConfig *cuCtxSetSharedMemConfig;
tcuCtxGetApiVersion *cuCtxGetApiVersion;
tcuCtxGetStreamPriorityRange *cuCtxGetStreamPriorityRange;
tcuCtxResetPersistingL2Cache *cuCtxResetPersistingL2Cache;
tcuCtxAttach *cuCtxAttach;
tcuCtxDetach *cuCtxDetach;
tcuModuleLoad *cuModuleLoad;
tcuModuleLoadData *cuModuleLoadData;
tcuModuleLoadDataEx *cuModuleLoadDataEx;
tcuModuleLoadFatBinary *cuModuleLoadFatBinary;
tcuModuleUnload *cuModuleUnload;
tcuModuleGetFunction *cuModuleGetFunction;
tcuModuleGetGlobal_v2 *cuModuleGetGlobal_v2;
tcuModuleGetTexRef *cuModuleGetTexRef;
tcuModuleGetSurfRef *cuModuleGetSurfRef;
tcuLinkCreate_v2 *cuLinkCreate_v2;
tcuLinkAddData_v2 *cuLinkAddData_v2;
tcuLinkAddFile_v2 *cuLinkAddFile_v2;
tcuLinkComplete *cuLinkComplete;
tcuLinkDestroy *cuLinkDestroy;
tcuMemGetInfo_v2 *cuMemGetInfo_v2;
tcuMemAlloc_v2 *cuMemAlloc_v2;
tcuMemAllocPitch_v2 *cuMemAllocPitch_v2;
tcuMemFree_v2 *cuMemFree_v2;
tcuMemGetAddressRange_v2 *cuMemGetAddressRange_v2;
tcuMemAllocHost_v2 *cuMemAllocHost_v2;
tcuMemFreeHost *cuMemFreeHost;
tcuMemHostAlloc *cuMemHostAlloc;
tcuMemHostGetDevicePointer_v2 *cuMemHostGetDevicePointer_v2;
tcuMemHostGetFlags *cuMemHostGetFlags;
tcuMemAllocManaged *cuMemAllocManaged;
tcuDeviceGetByPCIBusId *cuDeviceGetByPCIBusId;
tcuDeviceGetPCIBusId *cuDeviceGetPCIBusId;
tcuIpcGetEventHandle *cuIpcGetEventHandle;
tcuIpcOpenEventHandle *cuIpcOpenEventHandle;
tcuIpcGetMemHandle *cuIpcGetMemHandle;
tcuIpcOpenMemHandle_v2 *cuIpcOpenMemHandle_v2;
tcuIpcCloseMemHandle *cuIpcCloseMemHandle;
tcuMemHostRegister_v2 *cuMemHostRegister_v2;
tcuMemHostUnregister *cuMemHostUnregister;
tcuMemcpy *cuMemcpy;
tcuMemcpyPeer *cuMemcpyPeer;
tcuMemcpyHtoD_v2 *cuMemcpyHtoD_v2;
tcuMemcpyDtoH_v2 *cuMemcpyDtoH_v2;
tcuMemcpyDtoD_v2 *cuMemcpyDtoD_v2;
tcuMemcpyDtoA_v2 *cuMemcpyDtoA_v2;
tcuMemcpyAtoD_v2 *cuMemcpyAtoD_v2;
tcuMemcpyHtoA_v2 *cuMemcpyHtoA_v2;
tcuMemcpyAtoH_v2 *cuMemcpyAtoH_v2;
tcuMemcpyAtoA_v2 *cuMemcpyAtoA_v2;
tcuMemcpy2D_v2 *cuMemcpy2D_v2;
tcuMemcpy2DUnaligned_v2 *cuMemcpy2DUnaligned_v2;
tcuMemcpy3D_v2 *cuMemcpy3D_v2;
tcuMemcpy3DPeer *cuMemcpy3DPeer;
tcuMemcpyAsync *cuMemcpyAsync;
tcuMemcpyPeerAsync *cuMemcpyPeerAsync;
tcuMemcpyHtoDAsync_v2 *cuMemcpyHtoDAsync_v2;
tcuMemcpyDtoHAsync_v2 *cuMemcpyDtoHAsync_v2;
tcuMemcpyDtoDAsync_v2 *cuMemcpyDtoDAsync_v2;
tcuMemcpyHtoAAsync_v2 *cuMemcpyHtoAAsync_v2;
tcuMemcpyAtoHAsync_v2 *cuMemcpyAtoHAsync_v2;
tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync_v2;
tcuMemcpy3DAsync_v2 *cuMemcpy3DAsync_v2;
tcuMemcpy3DPeerAsync *cuMemcpy3DPeerAsync;
tcuMemsetD8_v2 *cuMemsetD8_v2;
tcuMemsetD16_v2 *cuMemsetD16_v2;
tcuMemsetD32_v2 *cuMemsetD32_v2;
tcuMemsetD2D8_v2 *cuMemsetD2D8_v2;
tcuMemsetD2D16_v2 *cuMemsetD2D16_v2;
tcuMemsetD2D32_v2 *cuMemsetD2D32_v2;
tcuMemsetD8Async *cuMemsetD8Async;
tcuMemsetD16Async *cuMemsetD16Async;
tcuMemsetD32Async *cuMemsetD32Async;
tcuMemsetD2D8Async *cuMemsetD2D8Async;
tcuMemsetD2D16Async *cuMemsetD2D16Async;
tcuMemsetD2D32Async *cuMemsetD2D32Async;
tcuArrayCreate_v2 *cuArrayCreate_v2;
tcuArrayGetDescriptor_v2 *cuArrayGetDescriptor_v2;
tcuArrayGetSparseProperties *cuArrayGetSparseProperties;
tcuMipmappedArrayGetSparseProperties *cuMipmappedArrayGetSparseProperties;
tcuArrayGetPlane *cuArrayGetPlane;
tcuArrayDestroy *cuArrayDestroy;
tcuArray3DCreate_v2 *cuArray3DCreate_v2;
tcuArray3DGetDescriptor_v2 *cuArray3DGetDescriptor_v2;
tcuMipmappedArrayCreate *cuMipmappedArrayCreate;
tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
tcuMemAddressReserve *cuMemAddressReserve;
tcuMemAddressFree *cuMemAddressFree;
tcuMemCreate *cuMemCreate;
tcuMemRelease *cuMemRelease;
tcuMemMap *cuMemMap;
tcuMemMapArrayAsync *cuMemMapArrayAsync;
tcuMemUnmap *cuMemUnmap;
tcuMemSetAccess *cuMemSetAccess;
tcuMemGetAccess *cuMemGetAccess;
tcuMemExportToShareableHandle *cuMemExportToShareableHandle;
tcuMemImportFromShareableHandle *cuMemImportFromShareableHandle;
tcuMemGetAllocationGranularity *cuMemGetAllocationGranularity;
tcuMemGetAllocationPropertiesFromHandle *cuMemGetAllocationPropertiesFromHandle;
tcuMemRetainAllocationHandle *cuMemRetainAllocationHandle;
tcuMemFreeAsync *cuMemFreeAsync;
tcuMemAllocAsync *cuMemAllocAsync;
tcuMemPoolTrimTo *cuMemPoolTrimTo;
tcuMemPoolSetAttribute *cuMemPoolSetAttribute;
tcuMemPoolGetAttribute *cuMemPoolGetAttribute;
tcuMemPoolSetAccess *cuMemPoolSetAccess;
tcuMemPoolGetAccess *cuMemPoolGetAccess;
tcuMemPoolCreate *cuMemPoolCreate;
tcuMemPoolDestroy *cuMemPoolDestroy;
tcuMemAllocFromPoolAsync *cuMemAllocFromPoolAsync;
tcuMemPoolExportToShareableHandle *cuMemPoolExportToShareableHandle;
tcuMemPoolImportFromShareableHandle *cuMemPoolImportFromShareableHandle;
tcuMemPoolExportPointer *cuMemPoolExportPointer;
tcuMemPoolImportPointer *cuMemPoolImportPointer;
tcuPointerGetAttribute *cuPointerGetAttribute;
tcuMemPrefetchAsync *cuMemPrefetchAsync;
tcuMemAdvise *cuMemAdvise;
tcuMemRangeGetAttribute *cuMemRangeGetAttribute;
tcuMemRangeGetAttributes *cuMemRangeGetAttributes;
tcuPointerSetAttribute *cuPointerSetAttribute;
tcuPointerGetAttributes *cuPointerGetAttributes;
tcuStreamCreate *cuStreamCreate;
tcuStreamCreateWithPriority *cuStreamCreateWithPriority;
tcuStreamGetPriority *cuStreamGetPriority;
tcuStreamGetFlags *cuStreamGetFlags;
tcuStreamGetCtx *cuStreamGetCtx;
tcuStreamWaitEvent *cuStreamWaitEvent;
tcuStreamAddCallback *cuStreamAddCallback;
tcuStreamBeginCapture_v2 *cuStreamBeginCapture_v2;
tcuThreadExchangeStreamCaptureMode *cuThreadExchangeStreamCaptureMode;
tcuStreamEndCapture *cuStreamEndCapture;
tcuStreamIsCapturing *cuStreamIsCapturing;
tcuStreamGetCaptureInfo *cuStreamGetCaptureInfo;
tcuStreamAttachMemAsync *cuStreamAttachMemAsync;
tcuStreamQuery *cuStreamQuery;
tcuStreamSynchronize *cuStreamSynchronize;
tcuStreamDestroy_v2 *cuStreamDestroy_v2;
tcuStreamCopyAttributes *cuStreamCopyAttributes;
tcuStreamGetAttribute *cuStreamGetAttribute;
tcuStreamSetAttribute *cuStreamSetAttribute;
tcuEventCreate *cuEventCreate;
tcuEventRecord *cuEventRecord;
tcuEventRecordWithFlags *cuEventRecordWithFlags;
tcuEventQuery *cuEventQuery;
tcuEventSynchronize *cuEventSynchronize;
tcuEventDestroy_v2 *cuEventDestroy_v2;
tcuEventElapsedTime *cuEventElapsedTime;
tcuImportExternalMemory *cuImportExternalMemory;
tcuExternalMemoryGetMappedBuffer *cuExternalMemoryGetMappedBuffer;
tcuExternalMemoryGetMappedMipmappedArray *cuExternalMemoryGetMappedMipmappedArray;
tcuDestroyExternalMemory *cuDestroyExternalMemory;
tcuImportExternalSemaphore *cuImportExternalSemaphore;
tcuSignalExternalSemaphoresAsync *cuSignalExternalSemaphoresAsync;
tcuWaitExternalSemaphoresAsync *cuWaitExternalSemaphoresAsync;
tcuDestroyExternalSemaphore *cuDestroyExternalSemaphore;
tcuStreamWaitValue32 *cuStreamWaitValue32;
tcuStreamWaitValue64 *cuStreamWaitValue64;
tcuStreamWriteValue32 *cuStreamWriteValue32;
tcuStreamWriteValue64 *cuStreamWriteValue64;
tcuStreamBatchMemOp *cuStreamBatchMemOp;
tcuFuncGetAttribute *cuFuncGetAttribute;
tcuFuncSetAttribute *cuFuncSetAttribute;
tcuFuncSetCacheConfig *cuFuncSetCacheConfig;
tcuFuncSetSharedMemConfig *cuFuncSetSharedMemConfig;
tcuLaunchKernel *cuLaunchKernel;
tcuLaunchCooperativeKernel *cuLaunchCooperativeKernel;
tcuLaunchCooperativeKernelMultiDevice *cuLaunchCooperativeKernelMultiDevice;
tcuLaunchHostFunc *cuLaunchHostFunc;
tcuFuncSetBlockShape *cuFuncSetBlockShape;
tcuFuncSetSharedSize *cuFuncSetSharedSize;
tcuParamSetSize *cuParamSetSize;
tcuParamSeti *cuParamSeti;
tcuParamSetf *cuParamSetf;
tcuParamSetv *cuParamSetv;
tcuLaunch *cuLaunch;
tcuLaunchGrid *cuLaunchGrid;
tcuLaunchGridAsync *cuLaunchGridAsync;
tcuParamSetTexRef *cuParamSetTexRef;
tcuGraphCreate *cuGraphCreate;
tcuGraphAddKernelNode *cuGraphAddKernelNode;
tcuGraphKernelNodeGetParams *cuGraphKernelNodeGetParams;
tcuGraphKernelNodeSetParams *cuGraphKernelNodeSetParams;
tcuGraphAddMemcpyNode *cuGraphAddMemcpyNode;
tcuGraphMemcpyNodeGetParams *cuGraphMemcpyNodeGetParams;
tcuGraphMemcpyNodeSetParams *cuGraphMemcpyNodeSetParams;
tcuGraphAddMemsetNode *cuGraphAddMemsetNode;
tcuGraphMemsetNodeGetParams *cuGraphMemsetNodeGetParams;
tcuGraphMemsetNodeSetParams *cuGraphMemsetNodeSetParams;
tcuGraphAddHostNode *cuGraphAddHostNode;
tcuGraphHostNodeGetParams *cuGraphHostNodeGetParams;
tcuGraphHostNodeSetParams *cuGraphHostNodeSetParams;
tcuGraphAddChildGraphNode *cuGraphAddChildGraphNode;
tcuGraphChildGraphNodeGetGraph *cuGraphChildGraphNodeGetGraph;
tcuGraphAddEmptyNode *cuGraphAddEmptyNode;
tcuGraphAddEventRecordNode *cuGraphAddEventRecordNode;
tcuGraphEventRecordNodeGetEvent *cuGraphEventRecordNodeGetEvent;
tcuGraphEventRecordNodeSetEvent *cuGraphEventRecordNodeSetEvent;
tcuGraphAddEventWaitNode *cuGraphAddEventWaitNode;
tcuGraphEventWaitNodeGetEvent *cuGraphEventWaitNodeGetEvent;
tcuGraphEventWaitNodeSetEvent *cuGraphEventWaitNodeSetEvent;
tcuGraphAddExternalSemaphoresSignalNode *cuGraphAddExternalSemaphoresSignalNode;
tcuGraphExternalSemaphoresSignalNodeGetParams *cuGraphExternalSemaphoresSignalNodeGetParams;
tcuGraphExternalSemaphoresSignalNodeSetParams *cuGraphExternalSemaphoresSignalNodeSetParams;
tcuGraphAddExternalSemaphoresWaitNode *cuGraphAddExternalSemaphoresWaitNode;
tcuGraphExternalSemaphoresWaitNodeGetParams *cuGraphExternalSemaphoresWaitNodeGetParams;
tcuGraphExternalSemaphoresWaitNodeSetParams *cuGraphExternalSemaphoresWaitNodeSetParams;
tcuGraphClone *cuGraphClone;
tcuGraphNodeFindInClone *cuGraphNodeFindInClone;
tcuGraphNodeGetType *cuGraphNodeGetType;
tcuGraphGetNodes *cuGraphGetNodes;
tcuGraphGetRootNodes *cuGraphGetRootNodes;
tcuGraphGetEdges *cuGraphGetEdges;
tcuGraphNodeGetDependencies *cuGraphNodeGetDependencies;
tcuGraphNodeGetDependentNodes *cuGraphNodeGetDependentNodes;
tcuGraphAddDependencies *cuGraphAddDependencies;
tcuGraphRemoveDependencies *cuGraphRemoveDependencies;
tcuGraphDestroyNode *cuGraphDestroyNode;
tcuGraphInstantiate_v2 *cuGraphInstantiate_v2;
tcuGraphExecKernelNodeSetParams *cuGraphExecKernelNodeSetParams;
tcuGraphExecMemcpyNodeSetParams *cuGraphExecMemcpyNodeSetParams;
tcuGraphExecMemsetNodeSetParams *cuGraphExecMemsetNodeSetParams;
tcuGraphExecHostNodeSetParams *cuGraphExecHostNodeSetParams;
tcuGraphExecChildGraphNodeSetParams *cuGraphExecChildGraphNodeSetParams;
tcuGraphExecEventRecordNodeSetEvent *cuGraphExecEventRecordNodeSetEvent;
tcuGraphExecEventWaitNodeSetEvent *cuGraphExecEventWaitNodeSetEvent;
tcuGraphExecExternalSemaphoresSignalNodeSetParams *cuGraphExecExternalSemaphoresSignalNodeSetParams;
tcuGraphExecExternalSemaphoresWaitNodeSetParams *cuGraphExecExternalSemaphoresWaitNodeSetParams;
tcuGraphUpload *cuGraphUpload;
tcuGraphLaunch *cuGraphLaunch;
tcuGraphExecDestroy *cuGraphExecDestroy;
tcuGraphDestroy *cuGraphDestroy;
tcuGraphExecUpdate *cuGraphExecUpdate;
tcuGraphKernelNodeCopyAttributes *cuGraphKernelNodeCopyAttributes;
tcuGraphKernelNodeGetAttribute *cuGraphKernelNodeGetAttribute;
tcuGraphKernelNodeSetAttribute *cuGraphKernelNodeSetAttribute;
tcuOccupancyMaxActiveBlocksPerMultiprocessor *cuOccupancyMaxActiveBlocksPerMultiprocessor;
tcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
tcuOccupancyMaxPotentialBlockSize *cuOccupancyMaxPotentialBlockSize;
tcuOccupancyMaxPotentialBlockSizeWithFlags *cuOccupancyMaxPotentialBlockSizeWithFlags;
tcuOccupancyAvailableDynamicSMemPerBlock *cuOccupancyAvailableDynamicSMemPerBlock;
tcuTexRefSetArray *cuTexRefSetArray;
tcuTexRefSetMipmappedArray *cuTexRefSetMipmappedArray;
tcuTexRefSetAddress_v2 *cuTexRefSetAddress_v2;
tcuTexRefSetAddress2D_v3 *cuTexRefSetAddress2D_v3;
tcuTexRefSetFormat *cuTexRefSetFormat;
tcuTexRefSetAddressMode *cuTexRefSetAddressMode;
tcuTexRefSetFilterMode *cuTexRefSetFilterMode;
tcuTexRefSetMipmapFilterMode *cuTexRefSetMipmapFilterMode;
tcuTexRefSetMipmapLevelBias *cuTexRefSetMipmapLevelBias;
tcuTexRefSetMipmapLevelClamp *cuTexRefSetMipmapLevelClamp;
tcuTexRefSetMaxAnisotropy *cuTexRefSetMaxAnisotropy;
tcuTexRefSetBorderColor *cuTexRefSetBorderColor;
tcuTexRefSetFlags *cuTexRefSetFlags;
tcuTexRefGetAddress_v2 *cuTexRefGetAddress_v2;
tcuTexRefGetArray *cuTexRefGetArray;
tcuTexRefGetMipmappedArray *cuTexRefGetMipmappedArray;
tcuTexRefGetAddressMode *cuTexRefGetAddressMode;
tcuTexRefGetFilterMode *cuTexRefGetFilterMode;
tcuTexRefGetFormat *cuTexRefGetFormat;
tcuTexRefGetMipmapFilterMode *cuTexRefGetMipmapFilterMode;
tcuTexRefGetMipmapLevelBias *cuTexRefGetMipmapLevelBias;
tcuTexRefGetMipmapLevelClamp *cuTexRefGetMipmapLevelClamp;
tcuTexRefGetMaxAnisotropy *cuTexRefGetMaxAnisotropy;
tcuTexRefGetBorderColor *cuTexRefGetBorderColor;
tcuTexRefGetFlags *cuTexRefGetFlags;
tcuTexRefCreate *cuTexRefCreate;
tcuTexRefDestroy *cuTexRefDestroy;
tcuSurfRefSetArray *cuSurfRefSetArray;
tcuSurfRefGetArray *cuSurfRefGetArray;
tcuTexObjectCreate *cuTexObjectCreate;
tcuTexObjectDestroy *cuTexObjectDestroy;
tcuTexObjectGetResourceDesc *cuTexObjectGetResourceDesc;
tcuTexObjectGetTextureDesc *cuTexObjectGetTextureDesc;
tcuTexObjectGetResourceViewDesc *cuTexObjectGetResourceViewDesc;
tcuSurfObjectCreate *cuSurfObjectCreate;
tcuSurfObjectDestroy *cuSurfObjectDestroy;
tcuSurfObjectGetResourceDesc *cuSurfObjectGetResourceDesc;
tcuDeviceCanAccessPeer *cuDeviceCanAccessPeer;
tcuCtxEnablePeerAccess *cuCtxEnablePeerAccess;
tcuCtxDisablePeerAccess *cuCtxDisablePeerAccess;
tcuDeviceGetP2PAttribute *cuDeviceGetP2PAttribute;
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
tcuGraphicsResourceGetMappedMipmappedArray *cuGraphicsResourceGetMappedMipmappedArray;
tcuGraphicsResourceGetMappedPointer_v2 *cuGraphicsResourceGetMappedPointer_v2;
tcuGraphicsResourceSetMapFlags_v2 *cuGraphicsResourceSetMapFlags_v2;
tcuGraphicsMapResources *cuGraphicsMapResources;
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
tcuGetExportTable *cuGetExportTable;
tcuFuncGetModule *cuFuncGetModule;
tnvrtcGetErrorString *nvrtcGetErrorString;
tnvrtcVersion *nvrtcVersion;
tnvrtcGetNumSupportedArchs *nvrtcGetNumSupportedArchs;
tnvrtcGetSupportedArchs *nvrtcGetSupportedArchs;
tnvrtcCreateProgram *nvrtcCreateProgram;
tnvrtcDestroyProgram *nvrtcDestroyProgram;
tnvrtcCompileProgram *nvrtcCompileProgram;
tnvrtcGetPTXSize *nvrtcGetPTXSize;
tnvrtcGetPTX *nvrtcGetPTX;
tnvrtcGetCUBINSize *nvrtcGetCUBINSize;
tnvrtcGetCUBIN *nvrtcGetCUBIN;
tnvrtcGetProgramLogSize *nvrtcGetProgramLogSize;
tnvrtcGetProgramLog *nvrtcGetProgramLog;
tnvrtcAddNameExpression *nvrtcAddNameExpression;
tnvrtcGetLoweredName *nvrtcGetLoweredName;
tcudnnGetVersion *cudnnGetVersion;
tcudnnGetCudartVersion *cudnnGetCudartVersion;
tcudnnGetErrorString *cudnnGetErrorString;
tcudnnQueryRuntimeError *cudnnQueryRuntimeError;
tcudnnGetProperty *cudnnGetProperty;
tcudnnCreate *cudnnCreate;
tcudnnDestroy *cudnnDestroy;
tcudnnSetStream *cudnnSetStream;
tcudnnGetStream *cudnnGetStream;
tcudnnCreateTensorDescriptor *cudnnCreateTensorDescriptor;
tcudnnSetTensor4dDescriptor *cudnnSetTensor4dDescriptor;
tcudnnSetTensor4dDescriptorEx *cudnnSetTensor4dDescriptorEx;
tcudnnGetTensor4dDescriptor *cudnnGetTensor4dDescriptor;
tcudnnSetTensorNdDescriptor *cudnnSetTensorNdDescriptor;
tcudnnSetTensorNdDescriptorEx *cudnnSetTensorNdDescriptorEx;
tcudnnGetTensorNdDescriptor *cudnnGetTensorNdDescriptor;
tcudnnGetTensorSizeInBytes *cudnnGetTensorSizeInBytes;
tcudnnDestroyTensorDescriptor *cudnnDestroyTensorDescriptor;
tcudnnInitTransformDest *cudnnInitTransformDest;
tcudnnCreateTensorTransformDescriptor *cudnnCreateTensorTransformDescriptor;
tcudnnSetTensorTransformDescriptor *cudnnSetTensorTransformDescriptor;
tcudnnGetTensorTransformDescriptor *cudnnGetTensorTransformDescriptor;
tcudnnDestroyTensorTransformDescriptor *cudnnDestroyTensorTransformDescriptor;
tcudnnTransformTensor *cudnnTransformTensor;
tcudnnTransformTensorEx *cudnnTransformTensorEx;
tcudnnAddTensor *cudnnAddTensor;
tcudnnCreateOpTensorDescriptor *cudnnCreateOpTensorDescriptor;
tcudnnSetOpTensorDescriptor *cudnnSetOpTensorDescriptor;
tcudnnGetOpTensorDescriptor *cudnnGetOpTensorDescriptor;
tcudnnDestroyOpTensorDescriptor *cudnnDestroyOpTensorDescriptor;
tcudnnOpTensor *cudnnOpTensor;
tcudnnCreateReduceTensorDescriptor *cudnnCreateReduceTensorDescriptor;
tcudnnSetReduceTensorDescriptor *cudnnSetReduceTensorDescriptor;
tcudnnGetReduceTensorDescriptor *cudnnGetReduceTensorDescriptor;
tcudnnDestroyReduceTensorDescriptor *cudnnDestroyReduceTensorDescriptor;
tcudnnGetReductionIndicesSize *cudnnGetReductionIndicesSize;
tcudnnGetReductionWorkspaceSize *cudnnGetReductionWorkspaceSize;
tcudnnReduceTensor *cudnnReduceTensor;
tcudnnSetTensor *cudnnSetTensor;
tcudnnScaleTensor *cudnnScaleTensor;
tcudnnCreateFilterDescriptor *cudnnCreateFilterDescriptor;
tcudnnSetFilter4dDescriptor *cudnnSetFilter4dDescriptor;
tcudnnGetFilter4dDescriptor *cudnnGetFilter4dDescriptor;
tcudnnSetFilterNdDescriptor *cudnnSetFilterNdDescriptor;
tcudnnGetFilterNdDescriptor *cudnnGetFilterNdDescriptor;
tcudnnGetFilterSizeInBytes *cudnnGetFilterSizeInBytes;
tcudnnTransformFilter *cudnnTransformFilter;
tcudnnDestroyFilterDescriptor *cudnnDestroyFilterDescriptor;
tcudnnSoftmaxForward *cudnnSoftmaxForward;
tcudnnCreatePoolingDescriptor *cudnnCreatePoolingDescriptor;
tcudnnSetPooling2dDescriptor *cudnnSetPooling2dDescriptor;
tcudnnGetPooling2dDescriptor *cudnnGetPooling2dDescriptor;
tcudnnSetPoolingNdDescriptor *cudnnSetPoolingNdDescriptor;
tcudnnGetPoolingNdDescriptor *cudnnGetPoolingNdDescriptor;
tcudnnGetPoolingNdForwardOutputDim *cudnnGetPoolingNdForwardOutputDim;
tcudnnGetPooling2dForwardOutputDim *cudnnGetPooling2dForwardOutputDim;
tcudnnDestroyPoolingDescriptor *cudnnDestroyPoolingDescriptor;
tcudnnPoolingForward *cudnnPoolingForward;
tcudnnCreateActivationDescriptor *cudnnCreateActivationDescriptor;
tcudnnSetActivationDescriptor *cudnnSetActivationDescriptor;
tcudnnGetActivationDescriptor *cudnnGetActivationDescriptor;
tcudnnDestroyActivationDescriptor *cudnnDestroyActivationDescriptor;
tcudnnActivationForward *cudnnActivationForward;
tcudnnCreateLRNDescriptor *cudnnCreateLRNDescriptor;
tcudnnSetLRNDescriptor *cudnnSetLRNDescriptor;
tcudnnGetLRNDescriptor *cudnnGetLRNDescriptor;
tcudnnDestroyLRNDescriptor *cudnnDestroyLRNDescriptor;
tcudnnLRNCrossChannelForward *cudnnLRNCrossChannelForward;
tcudnnDivisiveNormalizationForward *cudnnDivisiveNormalizationForward;
tcudnnDeriveBNTensorDescriptor *cudnnDeriveBNTensorDescriptor;
tcudnnBatchNormalizationForwardInference *cudnnBatchNormalizationForwardInference;
tcudnnDeriveNormTensorDescriptor *cudnnDeriveNormTensorDescriptor;
tcudnnNormalizationForwardInference *cudnnNormalizationForwardInference;
tcudnnCreateSpatialTransformerDescriptor *cudnnCreateSpatialTransformerDescriptor;
tcudnnSetSpatialTransformerNdDescriptor *cudnnSetSpatialTransformerNdDescriptor;
tcudnnDestroySpatialTransformerDescriptor *cudnnDestroySpatialTransformerDescriptor;
tcudnnSpatialTfGridGeneratorForward *cudnnSpatialTfGridGeneratorForward;
tcudnnSpatialTfSamplerForward *cudnnSpatialTfSamplerForward;
tcudnnCreateDropoutDescriptor *cudnnCreateDropoutDescriptor;
tcudnnDestroyDropoutDescriptor *cudnnDestroyDropoutDescriptor;
tcudnnDropoutGetStatesSize *cudnnDropoutGetStatesSize;
tcudnnDropoutGetReserveSpaceSize *cudnnDropoutGetReserveSpaceSize;
tcudnnSetDropoutDescriptor *cudnnSetDropoutDescriptor;
tcudnnRestoreDropoutDescriptor *cudnnRestoreDropoutDescriptor;
tcudnnGetDropoutDescriptor *cudnnGetDropoutDescriptor;
tcudnnDropoutForward *cudnnDropoutForward;
tcudnnCreateAlgorithmDescriptor *cudnnCreateAlgorithmDescriptor;
tcudnnSetAlgorithmDescriptor *cudnnSetAlgorithmDescriptor;
tcudnnGetAlgorithmDescriptor *cudnnGetAlgorithmDescriptor;
tcudnnCopyAlgorithmDescriptor *cudnnCopyAlgorithmDescriptor;
tcudnnDestroyAlgorithmDescriptor *cudnnDestroyAlgorithmDescriptor;
tcudnnCreateAlgorithmPerformance *cudnnCreateAlgorithmPerformance;
tcudnnSetAlgorithmPerformance *cudnnSetAlgorithmPerformance;
tcudnnGetAlgorithmPerformance *cudnnGetAlgorithmPerformance;
tcudnnDestroyAlgorithmPerformance *cudnnDestroyAlgorithmPerformance;
tcudnnGetAlgorithmSpaceSize *cudnnGetAlgorithmSpaceSize;
tcudnnSaveAlgorithm *cudnnSaveAlgorithm;
tcudnnRestoreAlgorithm *cudnnRestoreAlgorithm;
tcudnnSetCallback *cudnnSetCallback;
tcudnnGetCallback *cudnnGetCallback;
tcudnnOpsInferVersionCheck *cudnnOpsInferVersionCheck;
tcudnnSoftmaxBackward *cudnnSoftmaxBackward;
tcudnnPoolingBackward *cudnnPoolingBackward;
tcudnnActivationBackward *cudnnActivationBackward;
tcudnnLRNCrossChannelBackward *cudnnLRNCrossChannelBackward;
tcudnnDivisiveNormalizationBackward *cudnnDivisiveNormalizationBackward;
tcudnnGetBatchNormalizationForwardTrainingExWorkspaceSize *cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize;
tcudnnGetBatchNormalizationBackwardExWorkspaceSize *cudnnGetBatchNormalizationBackwardExWorkspaceSize;
tcudnnGetBatchNormalizationTrainingExReserveSpaceSize *cudnnGetBatchNormalizationTrainingExReserveSpaceSize;
tcudnnBatchNormalizationForwardTraining *cudnnBatchNormalizationForwardTraining;
tcudnnBatchNormalizationForwardTrainingEx *cudnnBatchNormalizationForwardTrainingEx;
tcudnnBatchNormalizationBackward *cudnnBatchNormalizationBackward;
tcudnnBatchNormalizationBackwardEx *cudnnBatchNormalizationBackwardEx;
tcudnnGetNormalizationForwardTrainingWorkspaceSize *cudnnGetNormalizationForwardTrainingWorkspaceSize;
tcudnnGetNormalizationBackwardWorkspaceSize *cudnnGetNormalizationBackwardWorkspaceSize;
tcudnnGetNormalizationTrainingReserveSpaceSize *cudnnGetNormalizationTrainingReserveSpaceSize;
tcudnnNormalizationForwardTraining *cudnnNormalizationForwardTraining;
tcudnnNormalizationBackward *cudnnNormalizationBackward;
tcudnnSpatialTfGridGeneratorBackward *cudnnSpatialTfGridGeneratorBackward;
tcudnnSpatialTfSamplerBackward *cudnnSpatialTfSamplerBackward;
tcudnnDropoutBackward *cudnnDropoutBackward;
tcudnnOpsTrainVersionCheck *cudnnOpsTrainVersionCheck;
tcudnnCreateRNNDescriptor *cudnnCreateRNNDescriptor;
tcudnnDestroyRNNDescriptor *cudnnDestroyRNNDescriptor;
tcudnnSetRNNDescriptor_v8 *cudnnSetRNNDescriptor_v8;
tcudnnGetRNNDescriptor_v8 *cudnnGetRNNDescriptor_v8;
tcudnnSetRNNDescriptor_v6 *cudnnSetRNNDescriptor_v6;
tcudnnGetRNNDescriptor_v6 *cudnnGetRNNDescriptor_v6;
tcudnnSetRNNMatrixMathType *cudnnSetRNNMatrixMathType;
tcudnnGetRNNMatrixMathType *cudnnGetRNNMatrixMathType;
tcudnnSetRNNBiasMode *cudnnSetRNNBiasMode;
tcudnnGetRNNBiasMode *cudnnGetRNNBiasMode;
tcudnnRNNSetClip_v8 *cudnnRNNSetClip_v8;
tcudnnRNNGetClip_v8 *cudnnRNNGetClip_v8;
tcudnnRNNSetClip *cudnnRNNSetClip;
tcudnnRNNGetClip *cudnnRNNGetClip;
tcudnnSetRNNProjectionLayers *cudnnSetRNNProjectionLayers;
tcudnnGetRNNProjectionLayers *cudnnGetRNNProjectionLayers;
tcudnnCreatePersistentRNNPlan *cudnnCreatePersistentRNNPlan;
tcudnnDestroyPersistentRNNPlan *cudnnDestroyPersistentRNNPlan;
tcudnnSetPersistentRNNPlan *cudnnSetPersistentRNNPlan;
tcudnnBuildRNNDynamic *cudnnBuildRNNDynamic;
tcudnnGetRNNWorkspaceSize *cudnnGetRNNWorkspaceSize;
tcudnnGetRNNTrainingReserveSize *cudnnGetRNNTrainingReserveSize;
tcudnnGetRNNTempSpaceSizes *cudnnGetRNNTempSpaceSizes;
tcudnnGetRNNParamsSize *cudnnGetRNNParamsSize;
tcudnnGetRNNWeightSpaceSize *cudnnGetRNNWeightSpaceSize;
tcudnnGetRNNLinLayerMatrixParams *cudnnGetRNNLinLayerMatrixParams;
tcudnnGetRNNLinLayerBiasParams *cudnnGetRNNLinLayerBiasParams;
tcudnnGetRNNWeightParams *cudnnGetRNNWeightParams;
tcudnnRNNForwardInference *cudnnRNNForwardInference;
tcudnnSetRNNPaddingMode *cudnnSetRNNPaddingMode;
tcudnnGetRNNPaddingMode *cudnnGetRNNPaddingMode;
tcudnnCreateRNNDataDescriptor *cudnnCreateRNNDataDescriptor;
tcudnnDestroyRNNDataDescriptor *cudnnDestroyRNNDataDescriptor;
tcudnnSetRNNDataDescriptor *cudnnSetRNNDataDescriptor;
tcudnnGetRNNDataDescriptor *cudnnGetRNNDataDescriptor;
tcudnnRNNForwardInferenceEx *cudnnRNNForwardInferenceEx;
tcudnnRNNForward *cudnnRNNForward;
tcudnnSetRNNAlgorithmDescriptor *cudnnSetRNNAlgorithmDescriptor;
tcudnnGetRNNForwardInferenceAlgorithmMaxCount *cudnnGetRNNForwardInferenceAlgorithmMaxCount;
tcudnnFindRNNForwardInferenceAlgorithmEx *cudnnFindRNNForwardInferenceAlgorithmEx;
tcudnnCreateSeqDataDescriptor *cudnnCreateSeqDataDescriptor;
tcudnnDestroySeqDataDescriptor *cudnnDestroySeqDataDescriptor;
tcudnnSetSeqDataDescriptor *cudnnSetSeqDataDescriptor;
tcudnnGetSeqDataDescriptor *cudnnGetSeqDataDescriptor;
tcudnnCreateAttnDescriptor *cudnnCreateAttnDescriptor;
tcudnnDestroyAttnDescriptor *cudnnDestroyAttnDescriptor;
tcudnnSetAttnDescriptor *cudnnSetAttnDescriptor;
tcudnnGetAttnDescriptor *cudnnGetAttnDescriptor;
tcudnnGetMultiHeadAttnBuffers *cudnnGetMultiHeadAttnBuffers;
tcudnnGetMultiHeadAttnWeights *cudnnGetMultiHeadAttnWeights;
tcudnnMultiHeadAttnForward *cudnnMultiHeadAttnForward;
tcudnnAdvInferVersionCheck *cudnnAdvInferVersionCheck;
tcudnnRNNForwardTraining *cudnnRNNForwardTraining;
tcudnnRNNBackwardData *cudnnRNNBackwardData;
tcudnnRNNBackwardData_v8 *cudnnRNNBackwardData_v8;
tcudnnRNNBackwardWeights *cudnnRNNBackwardWeights;
tcudnnRNNBackwardWeights_v8 *cudnnRNNBackwardWeights_v8;
tcudnnRNNForwardTrainingEx *cudnnRNNForwardTrainingEx;
tcudnnRNNBackwardDataEx *cudnnRNNBackwardDataEx;
tcudnnRNNBackwardWeightsEx *cudnnRNNBackwardWeightsEx;
tcudnnGetRNNForwardTrainingAlgorithmMaxCount *cudnnGetRNNForwardTrainingAlgorithmMaxCount;
tcudnnFindRNNForwardTrainingAlgorithmEx *cudnnFindRNNForwardTrainingAlgorithmEx;
tcudnnGetRNNBackwardDataAlgorithmMaxCount *cudnnGetRNNBackwardDataAlgorithmMaxCount;
tcudnnFindRNNBackwardDataAlgorithmEx *cudnnFindRNNBackwardDataAlgorithmEx;
tcudnnGetRNNBackwardWeightsAlgorithmMaxCount *cudnnGetRNNBackwardWeightsAlgorithmMaxCount;
tcudnnFindRNNBackwardWeightsAlgorithmEx *cudnnFindRNNBackwardWeightsAlgorithmEx;
tcudnnMultiHeadAttnBackwardData *cudnnMultiHeadAttnBackwardData;
tcudnnMultiHeadAttnBackwardWeights *cudnnMultiHeadAttnBackwardWeights;
tcudnnCreateCTCLossDescriptor *cudnnCreateCTCLossDescriptor;
tcudnnSetCTCLossDescriptor *cudnnSetCTCLossDescriptor;
tcudnnSetCTCLossDescriptorEx *cudnnSetCTCLossDescriptorEx;
tcudnnSetCTCLossDescriptor_v8 *cudnnSetCTCLossDescriptor_v8;
tcudnnGetCTCLossDescriptor *cudnnGetCTCLossDescriptor;
tcudnnGetCTCLossDescriptorEx *cudnnGetCTCLossDescriptorEx;
tcudnnGetCTCLossDescriptor_v8 *cudnnGetCTCLossDescriptor_v8;
tcudnnDestroyCTCLossDescriptor *cudnnDestroyCTCLossDescriptor;
tcudnnCTCLoss *cudnnCTCLoss;
tcudnnCTCLoss_v8 *cudnnCTCLoss_v8;
tcudnnGetCTCLossWorkspaceSize *cudnnGetCTCLossWorkspaceSize;
tcudnnGetCTCLossWorkspaceSize_v8 *cudnnGetCTCLossWorkspaceSize_v8;
tcudnnAdvTrainVersionCheck *cudnnAdvTrainVersionCheck;
tcudnnCreateConvolutionDescriptor *cudnnCreateConvolutionDescriptor;
tcudnnDestroyConvolutionDescriptor *cudnnDestroyConvolutionDescriptor;
tcudnnSetConvolutionMathType *cudnnSetConvolutionMathType;
tcudnnGetConvolutionMathType *cudnnGetConvolutionMathType;
tcudnnSetConvolutionGroupCount *cudnnSetConvolutionGroupCount;
tcudnnGetConvolutionGroupCount *cudnnGetConvolutionGroupCount;
tcudnnSetConvolutionReorderType *cudnnSetConvolutionReorderType;
tcudnnGetConvolutionReorderType *cudnnGetConvolutionReorderType;
tcudnnSetConvolution2dDescriptor *cudnnSetConvolution2dDescriptor;
tcudnnGetConvolution2dDescriptor *cudnnGetConvolution2dDescriptor;
tcudnnSetConvolutionNdDescriptor *cudnnSetConvolutionNdDescriptor;
tcudnnGetConvolutionNdDescriptor *cudnnGetConvolutionNdDescriptor;
tcudnnGetConvolution2dForwardOutputDim *cudnnGetConvolution2dForwardOutputDim;
tcudnnGetConvolutionNdForwardOutputDim *cudnnGetConvolutionNdForwardOutputDim;
tcudnnGetConvolutionForwardAlgorithmMaxCount *cudnnGetConvolutionForwardAlgorithmMaxCount;
tcudnnGetConvolutionForwardAlgorithm_v7 *cudnnGetConvolutionForwardAlgorithm_v7;
tcudnnFindConvolutionForwardAlgorithm *cudnnFindConvolutionForwardAlgorithm;
tcudnnFindConvolutionForwardAlgorithmEx *cudnnFindConvolutionForwardAlgorithmEx;
tcudnnIm2Col *cudnnIm2Col;
tcudnnReorderFilterAndBias *cudnnReorderFilterAndBias;
tcudnnGetConvolutionForwardWorkspaceSize *cudnnGetConvolutionForwardWorkspaceSize;
tcudnnConvolutionForward *cudnnConvolutionForward;
tcudnnConvolutionBiasActivationForward *cudnnConvolutionBiasActivationForward;
tcudnnGetConvolutionBackwardDataAlgorithmMaxCount *cudnnGetConvolutionBackwardDataAlgorithmMaxCount;
tcudnnFindConvolutionBackwardDataAlgorithm *cudnnFindConvolutionBackwardDataAlgorithm;
tcudnnFindConvolutionBackwardDataAlgorithmEx *cudnnFindConvolutionBackwardDataAlgorithmEx;
tcudnnGetConvolutionBackwardDataAlgorithm_v7 *cudnnGetConvolutionBackwardDataAlgorithm_v7;
tcudnnGetConvolutionBackwardDataWorkspaceSize *cudnnGetConvolutionBackwardDataWorkspaceSize;
tcudnnConvolutionBackwardData *cudnnConvolutionBackwardData;
tcudnnGetFoldedConvBackwardDataDescriptors *cudnnGetFoldedConvBackwardDataDescriptors;
tcudnnCnnInferVersionCheck *cudnnCnnInferVersionCheck;
tcudnnGetConvolutionBackwardFilterAlgorithmMaxCount *cudnnGetConvolutionBackwardFilterAlgorithmMaxCount;
tcudnnFindConvolutionBackwardFilterAlgorithm *cudnnFindConvolutionBackwardFilterAlgorithm;
tcudnnFindConvolutionBackwardFilterAlgorithmEx *cudnnFindConvolutionBackwardFilterAlgorithmEx;
tcudnnGetConvolutionBackwardFilterAlgorithm_v7 *cudnnGetConvolutionBackwardFilterAlgorithm_v7;
tcudnnGetConvolutionBackwardFilterWorkspaceSize *cudnnGetConvolutionBackwardFilterWorkspaceSize;
tcudnnConvolutionBackwardFilter *cudnnConvolutionBackwardFilter;
tcudnnConvolutionBackwardBias *cudnnConvolutionBackwardBias;
tcudnnCreateFusedOpsConstParamPack *cudnnCreateFusedOpsConstParamPack;
tcudnnDestroyFusedOpsConstParamPack *cudnnDestroyFusedOpsConstParamPack;
tcudnnSetFusedOpsConstParamPackAttribute *cudnnSetFusedOpsConstParamPackAttribute;
tcudnnGetFusedOpsConstParamPackAttribute *cudnnGetFusedOpsConstParamPackAttribute;
tcudnnCreateFusedOpsVariantParamPack *cudnnCreateFusedOpsVariantParamPack;
tcudnnDestroyFusedOpsVariantParamPack *cudnnDestroyFusedOpsVariantParamPack;
tcudnnSetFusedOpsVariantParamPackAttribute *cudnnSetFusedOpsVariantParamPackAttribute;
tcudnnGetFusedOpsVariantParamPackAttribute *cudnnGetFusedOpsVariantParamPackAttribute;
tcudnnCreateFusedOpsPlan *cudnnCreateFusedOpsPlan;
tcudnnDestroyFusedOpsPlan *cudnnDestroyFusedOpsPlan;
tcudnnMakeFusedOpsPlan *cudnnMakeFusedOpsPlan;
tcudnnFusedOpsExecute *cudnnFusedOpsExecute;
tcudnnCnnTrainVersionCheck *cudnnCnnTrainVersionCheck;
tcudnnBackendCreateDescriptor *cudnnBackendCreateDescriptor;
tcudnnBackendDestroyDescriptor *cudnnBackendDestroyDescriptor;
tcudnnBackendInitialize *cudnnBackendInitialize;
tcudnnBackendFinalize *cudnnBackendFinalize;
tcudnnBackendSetAttribute *cudnnBackendSetAttribute;
tcudnnBackendGetAttribute *cudnnBackendGetAttribute;
tcudnnBackendExecute *cudnnBackendExecute;
tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer;
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
tcuGLGetDevices_v2 *cuGLGetDevices_v2;
tcuGLCtxCreate_v2 *cuGLCtxCreate_v2;
tcuGLInit *cuGLInit;
tcuGLRegisterBufferObject *cuGLRegisterBufferObject;
tcuGLMapBufferObject_v2 *cuGLMapBufferObject_v2;
tcuGLUnmapBufferObject *cuGLUnmapBufferObject;
tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject;
tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags;
tcuGLMapBufferObjectAsync_v2 *cuGLMapBufferObjectAsync_v2;
tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync;
static DynamicLibrary dynamic_library_open_find(const char **paths) {
int i = 0;
while (paths[i] != NULL) {
DynamicLibrary lib = dynamic_library_open(paths[i]);
if (lib != NULL) {
return lib;
}
++i;
}
return NULL;
}
/* Implementation function. */
static void cuewCudaExit(void) {
if (cuda_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(cuda_lib);
cuda_lib = NULL;
}
}
static int cuewCudaInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *cuda_paths[] = {"nvcuda.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
#else
const char *cuda_paths[] = {"libcuda.so", "libcuda.so.1", NULL};
#endif
static int initialized = 0;
static int result = 0;
int error, driver_version;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(cuewCudaExit);
if (error) {
result = CUEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
cuda_lib = dynamic_library_open_find(cuda_paths);
if (cuda_lib == NULL) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}
/* Detect driver version. */
driver_version = 1000;
CUDA_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
if (cuDriverGetVersion) {
cuDriverGetVersion(&driver_version);
}
/* We require version 4.0. */
if (driver_version < 4000) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}
/* Fetch all function pointers. */
CUDA_LIBRARY_FIND(cuGetErrorString);
CUDA_LIBRARY_FIND(cuGetErrorName);
CUDA_LIBRARY_FIND(cuInit);
CUDA_LIBRARY_FIND(cuDriverGetVersion);
CUDA_LIBRARY_FIND(cuDeviceGet);
CUDA_LIBRARY_FIND(cuDeviceGetCount);
CUDA_LIBRARY_FIND(cuDeviceGetName);
CUDA_LIBRARY_FIND(cuDeviceGetUuid);
CUDA_LIBRARY_FIND(cuDeviceGetLuid);
CUDA_LIBRARY_FIND(cuDeviceTotalMem_v2);
CUDA_LIBRARY_FIND(cuDeviceGetTexture1DLinearMaxWidth);
CUDA_LIBRARY_FIND(cuDeviceGetAttribute);
CUDA_LIBRARY_FIND(cuDeviceGetNvSciSyncAttributes);
CUDA_LIBRARY_FIND(cuDeviceSetMemPool);
CUDA_LIBRARY_FIND(cuDeviceGetMemPool);
CUDA_LIBRARY_FIND(cuDeviceGetDefaultMemPool);
CUDA_LIBRARY_FIND(cuDeviceGetProperties);
CUDA_LIBRARY_FIND(cuDeviceComputeCapability);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRetain);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxRelease_v2);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxSetFlags_v2);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxGetState);
CUDA_LIBRARY_FIND(cuDevicePrimaryCtxReset_v2);
CUDA_LIBRARY_FIND(cuCtxCreate_v2);
CUDA_LIBRARY_FIND(cuCtxDestroy_v2);
CUDA_LIBRARY_FIND(cuCtxPushCurrent_v2);
CUDA_LIBRARY_FIND(cuCtxPopCurrent_v2);
CUDA_LIBRARY_FIND(cuCtxSetCurrent);
CUDA_LIBRARY_FIND(cuCtxGetCurrent);
CUDA_LIBRARY_FIND(cuCtxGetDevice);
CUDA_LIBRARY_FIND(cuCtxGetFlags);
CUDA_LIBRARY_FIND(cuCtxSynchronize);
CUDA_LIBRARY_FIND(cuCtxSetLimit);
CUDA_LIBRARY_FIND(cuCtxGetLimit);
CUDA_LIBRARY_FIND(cuCtxGetCacheConfig);
CUDA_LIBRARY_FIND(cuCtxSetCacheConfig);
CUDA_LIBRARY_FIND(cuCtxGetSharedMemConfig);
CUDA_LIBRARY_FIND(cuCtxSetSharedMemConfig);
CUDA_LIBRARY_FIND(cuCtxGetApiVersion);
CUDA_LIBRARY_FIND(cuCtxGetStreamPriorityRange);
CUDA_LIBRARY_FIND(cuCtxResetPersistingL2Cache);
CUDA_LIBRARY_FIND(cuCtxAttach);
CUDA_LIBRARY_FIND(cuCtxDetach);
CUDA_LIBRARY_FIND(cuModuleLoad);
CUDA_LIBRARY_FIND(cuModuleLoadData);
CUDA_LIBRARY_FIND(cuModuleLoadDataEx);
CUDA_LIBRARY_FIND(cuModuleLoadFatBinary);
CUDA_LIBRARY_FIND(cuModuleUnload);
CUDA_LIBRARY_FIND(cuModuleGetFunction);
CUDA_LIBRARY_FIND(cuModuleGetGlobal_v2);
CUDA_LIBRARY_FIND(cuModuleGetTexRef);
CUDA_LIBRARY_FIND(cuModuleGetSurfRef);
CUDA_LIBRARY_FIND(cuLinkCreate_v2);
CUDA_LIBRARY_FIND(cuLinkAddData_v2);
CUDA_LIBRARY_FIND(cuLinkAddFile_v2);
CUDA_LIBRARY_FIND(cuLinkComplete);
CUDA_LIBRARY_FIND(cuLinkDestroy);
CUDA_LIBRARY_FIND(cuMemGetInfo_v2);
CUDA_LIBRARY_FIND(cuMemAlloc_v2);
CUDA_LIBRARY_FIND(cuMemAllocPitch_v2);
CUDA_LIBRARY_FIND(cuMemFree_v2);
CUDA_LIBRARY_FIND(cuMemGetAddressRange_v2);
CUDA_LIBRARY_FIND(cuMemAllocHost_v2);
CUDA_LIBRARY_FIND(cuMemFreeHost);
CUDA_LIBRARY_FIND(cuMemHostAlloc);
CUDA_LIBRARY_FIND(cuMemHostGetDevicePointer_v2);
CUDA_LIBRARY_FIND(cuMemHostGetFlags);
CUDA_LIBRARY_FIND(cuMemAllocManaged);
CUDA_LIBRARY_FIND(cuDeviceGetByPCIBusId);
CUDA_LIBRARY_FIND(cuDeviceGetPCIBusId);
CUDA_LIBRARY_FIND(cuIpcGetEventHandle);
CUDA_LIBRARY_FIND(cuIpcOpenEventHandle);
CUDA_LIBRARY_FIND(cuIpcGetMemHandle);
CUDA_LIBRARY_FIND(cuIpcOpenMemHandle_v2);
CUDA_LIBRARY_FIND(cuIpcCloseMemHandle);
CUDA_LIBRARY_FIND(cuMemHostRegister_v2);
CUDA_LIBRARY_FIND(cuMemHostUnregister);
CUDA_LIBRARY_FIND(cuMemcpy);
CUDA_LIBRARY_FIND(cuMemcpyPeer);
CUDA_LIBRARY_FIND(cuMemcpyHtoD_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoH_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoD_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoA_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoD_v2);
CUDA_LIBRARY_FIND(cuMemcpyHtoA_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoH_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoA_v2);
CUDA_LIBRARY_FIND(cuMemcpy2D_v2);
CUDA_LIBRARY_FIND(cuMemcpy2DUnaligned_v2);
CUDA_LIBRARY_FIND(cuMemcpy3D_v2);
CUDA_LIBRARY_FIND(cuMemcpy3DPeer);
CUDA_LIBRARY_FIND(cuMemcpyAsync);
CUDA_LIBRARY_FIND(cuMemcpyPeerAsync);
CUDA_LIBRARY_FIND(cuMemcpyHtoDAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoHAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyDtoDAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyHtoAAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpyAtoHAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpy2DAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpy3DAsync_v2);
CUDA_LIBRARY_FIND(cuMemcpy3DPeerAsync);
CUDA_LIBRARY_FIND(cuMemsetD8_v2);
CUDA_LIBRARY_FIND(cuMemsetD16_v2);
CUDA_LIBRARY_FIND(cuMemsetD32_v2);
CUDA_LIBRARY_FIND(cuMemsetD2D8_v2);
CUDA_LIBRARY_FIND(cuMemsetD2D16_v2);
CUDA_LIBRARY_FIND(cuMemsetD2D32_v2);
CUDA_LIBRARY_FIND(cuMemsetD8Async);
CUDA_LIBRARY_FIND(cuMemsetD16Async);
CUDA_LIBRARY_FIND(cuMemsetD32Async);
CUDA_LIBRARY_FIND(cuMemsetD2D8Async);
CUDA_LIBRARY_FIND(cuMemsetD2D16Async);
CUDA_LIBRARY_FIND(cuMemsetD2D32Async);
CUDA_LIBRARY_FIND(cuArrayCreate_v2);
CUDA_LIBRARY_FIND(cuArrayGetDescriptor_v2);
CUDA_LIBRARY_FIND(cuArrayGetSparseProperties);
CUDA_LIBRARY_FIND(cuMipmappedArrayGetSparseProperties);
CUDA_LIBRARY_FIND(cuArrayGetPlane);
CUDA_LIBRARY_FIND(cuArrayDestroy);
CUDA_LIBRARY_FIND(cuArray3DCreate_v2);
CUDA_LIBRARY_FIND(cuArray3DGetDescriptor_v2);
CUDA_LIBRARY_FIND(cuMipmappedArrayCreate);
CUDA_LIBRARY_FIND(cuMipmappedArrayGetLevel);
CUDA_LIBRARY_FIND(cuMipmappedArrayDestroy);
CUDA_LIBRARY_FIND(cuMemAddressReserve);
CUDA_LIBRARY_FIND(cuMemAddressFree);
CUDA_LIBRARY_FIND(cuMemCreate);
CUDA_LIBRARY_FIND(cuMemRelease);
CUDA_LIBRARY_FIND(cuMemMap);
CUDA_LIBRARY_FIND(cuMemMapArrayAsync);
CUDA_LIBRARY_FIND(cuMemUnmap);
CUDA_LIBRARY_FIND(cuMemSetAccess);
CUDA_LIBRARY_FIND(cuMemGetAccess);
CUDA_LIBRARY_FIND(cuMemExportToShareableHandle);
CUDA_LIBRARY_FIND(cuMemImportFromShareableHandle);
CUDA_LIBRARY_FIND(cuMemGetAllocationGranularity);
CUDA_LIBRARY_FIND(cuMemGetAllocationPropertiesFromHandle);
CUDA_LIBRARY_FIND(cuMemRetainAllocationHandle);
CUDA_LIBRARY_FIND(cuMemFreeAsync);
CUDA_LIBRARY_FIND(cuMemAllocAsync);
CUDA_LIBRARY_FIND(cuMemPoolTrimTo);
CUDA_LIBRARY_FIND(cuMemPoolSetAttribute);
CUDA_LIBRARY_FIND(cuMemPoolGetAttribute);
CUDA_LIBRARY_FIND(cuMemPoolSetAccess);
CUDA_LIBRARY_FIND(cuMemPoolGetAccess);
CUDA_LIBRARY_FIND(cuMemPoolCreate);
CUDA_LIBRARY_FIND(cuMemPoolDestroy);
CUDA_LIBRARY_FIND(cuMemAllocFromPoolAsync);
CUDA_LIBRARY_FIND(cuMemPoolExportToShareableHandle);
CUDA_LIBRARY_FIND(cuMemPoolImportFromShareableHandle);
CUDA_LIBRARY_FIND(cuMemPoolExportPointer);
CUDA_LIBRARY_FIND(cuMemPoolImportPointer);
CUDA_LIBRARY_FIND(cuPointerGetAttribute);
CUDA_LIBRARY_FIND(cuMemPrefetchAsync);
CUDA_LIBRARY_FIND(cuMemAdvise);
CUDA_LIBRARY_FIND(cuMemRangeGetAttribute);
CUDA_LIBRARY_FIND(cuMemRangeGetAttributes);
CUDA_LIBRARY_FIND(cuPointerSetAttribute);
CUDA_LIBRARY_FIND(cuPointerGetAttributes);
CUDA_LIBRARY_FIND(cuStreamCreate);
CUDA_LIBRARY_FIND(cuStreamCreateWithPriority);
CUDA_LIBRARY_FIND(cuStreamGetPriority);
CUDA_LIBRARY_FIND(cuStreamGetFlags);
CUDA_LIBRARY_FIND(cuStreamGetCtx);
CUDA_LIBRARY_FIND(cuStreamWaitEvent);
CUDA_LIBRARY_FIND(cuStreamAddCallback);
CUDA_LIBRARY_FIND(cuStreamBeginCapture_v2);
CUDA_LIBRARY_FIND(cuThreadExchangeStreamCaptureMode);
CUDA_LIBRARY_FIND(cuStreamEndCapture);
CUDA_LIBRARY_FIND(cuStreamIsCapturing);
CUDA_LIBRARY_FIND(cuStreamGetCaptureInfo);
CUDA_LIBRARY_FIND(cuStreamAttachMemAsync);
CUDA_LIBRARY_FIND(cuStreamQuery);
CUDA_LIBRARY_FIND(cuStreamSynchronize);
CUDA_LIBRARY_FIND(cuStreamDestroy_v2);
CUDA_LIBRARY_FIND(cuStreamCopyAttributes);
CUDA_LIBRARY_FIND(cuStreamGetAttribute);
CUDA_LIBRARY_FIND(cuStreamSetAttribute);
CUDA_LIBRARY_FIND(cuEventCreate);
CUDA_LIBRARY_FIND(cuEventRecord);
CUDA_LIBRARY_FIND(cuEventRecordWithFlags);
CUDA_LIBRARY_FIND(cuEventQuery);
CUDA_LIBRARY_FIND(cuEventSynchronize);
CUDA_LIBRARY_FIND(cuEventDestroy_v2);
CUDA_LIBRARY_FIND(cuEventElapsedTime);
CUDA_LIBRARY_FIND(cuImportExternalMemory);
CUDA_LIBRARY_FIND(cuExternalMemoryGetMappedBuffer);
CUDA_LIBRARY_FIND(cuExternalMemoryGetMappedMipmappedArray);
CUDA_LIBRARY_FIND(cuDestroyExternalMemory);
CUDA_LIBRARY_FIND(cuImportExternalSemaphore);
CUDA_LIBRARY_FIND(cuSignalExternalSemaphoresAsync);
CUDA_LIBRARY_FIND(cuWaitExternalSemaphoresAsync);
CUDA_LIBRARY_FIND(cuDestroyExternalSemaphore);
CUDA_LIBRARY_FIND(cuStreamWaitValue32);
CUDA_LIBRARY_FIND(cuStreamWaitValue64);
CUDA_LIBRARY_FIND(cuStreamWriteValue32);
CUDA_LIBRARY_FIND(cuStreamWriteValue64);
CUDA_LIBRARY_FIND(cuStreamBatchMemOp);
CUDA_LIBRARY_FIND(cuFuncGetAttribute);
CUDA_LIBRARY_FIND(cuFuncSetAttribute);
CUDA_LIBRARY_FIND(cuFuncSetCacheConfig);
CUDA_LIBRARY_FIND(cuFuncSetSharedMemConfig);
CUDA_LIBRARY_FIND(cuLaunchKernel);
CUDA_LIBRARY_FIND(cuLaunchCooperativeKernel);
CUDA_LIBRARY_FIND(cuLaunchCooperativeKernelMultiDevice);
CUDA_LIBRARY_FIND(cuLaunchHostFunc);
CUDA_LIBRARY_FIND(cuFuncSetBlockShape);
CUDA_LIBRARY_FIND(cuFuncSetSharedSize);
CUDA_LIBRARY_FIND(cuParamSetSize);
CUDA_LIBRARY_FIND(cuParamSeti);
CUDA_LIBRARY_FIND(cuParamSetf);
CUDA_LIBRARY_FIND(cuParamSetv);
CUDA_LIBRARY_FIND(cuLaunch);
CUDA_LIBRARY_FIND(cuLaunchGrid);
CUDA_LIBRARY_FIND(cuLaunchGridAsync);
CUDA_LIBRARY_FIND(cuParamSetTexRef);
CUDA_LIBRARY_FIND(cuGraphCreate);
CUDA_LIBRARY_FIND(cuGraphAddKernelNode);
CUDA_LIBRARY_FIND(cuGraphKernelNodeGetParams);
CUDA_LIBRARY_FIND(cuGraphKernelNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphAddMemcpyNode);
CUDA_LIBRARY_FIND(cuGraphMemcpyNodeGetParams);
CUDA_LIBRARY_FIND(cuGraphMemcpyNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphAddMemsetNode);
CUDA_LIBRARY_FIND(cuGraphMemsetNodeGetParams);
CUDA_LIBRARY_FIND(cuGraphMemsetNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphAddHostNode);
CUDA_LIBRARY_FIND(cuGraphHostNodeGetParams);
CUDA_LIBRARY_FIND(cuGraphHostNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphAddChildGraphNode);
CUDA_LIBRARY_FIND(cuGraphChildGraphNodeGetGraph);
CUDA_LIBRARY_FIND(cuGraphAddEmptyNode);
CUDA_LIBRARY_FIND(cuGraphAddEventRecordNode);
CUDA_LIBRARY_FIND(cuGraphEventRecordNodeGetEvent);
CUDA_LIBRARY_FIND(cuGraphEventRecordNodeSetEvent);
CUDA_LIBRARY_FIND(cuGraphAddEventWaitNode);
CUDA_LIBRARY_FIND(cuGraphEventWaitNodeGetEvent);
CUDA_LIBRARY_FIND(cuGraphEventWaitNodeSetEvent);
CUDA_LIBRARY_FIND(cuGraphAddExternalSemaphoresSignalNode);
CUDA_LIBRARY_FIND(cuGraphExternalSemaphoresSignalNodeGetParams);
CUDA_LIBRARY_FIND(cuGraphExternalSemaphoresSignalNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphAddExternalSemaphoresWaitNode);
CUDA_LIBRARY_FIND(cuGraphExternalSemaphoresWaitNodeGetParams);
CUDA_LIBRARY_FIND(cuGraphExternalSemaphoresWaitNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphClone);
CUDA_LIBRARY_FIND(cuGraphNodeFindInClone);
CUDA_LIBRARY_FIND(cuGraphNodeGetType);
CUDA_LIBRARY_FIND(cuGraphGetNodes);
CUDA_LIBRARY_FIND(cuGraphGetRootNodes);
CUDA_LIBRARY_FIND(cuGraphGetEdges);
CUDA_LIBRARY_FIND(cuGraphNodeGetDependencies);
CUDA_LIBRARY_FIND(cuGraphNodeGetDependentNodes);
CUDA_LIBRARY_FIND(cuGraphAddDependencies);
CUDA_LIBRARY_FIND(cuGraphRemoveDependencies);
CUDA_LIBRARY_FIND(cuGraphDestroyNode);
CUDA_LIBRARY_FIND(cuGraphInstantiate_v2);
CUDA_LIBRARY_FIND(cuGraphExecKernelNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphExecMemcpyNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphExecMemsetNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphExecHostNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphExecChildGraphNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphExecEventRecordNodeSetEvent);
CUDA_LIBRARY_FIND(cuGraphExecEventWaitNodeSetEvent);
CUDA_LIBRARY_FIND(cuGraphExecExternalSemaphoresSignalNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphExecExternalSemaphoresWaitNodeSetParams);
CUDA_LIBRARY_FIND(cuGraphUpload);
CUDA_LIBRARY_FIND(cuGraphLaunch);
CUDA_LIBRARY_FIND(cuGraphExecDestroy);
CUDA_LIBRARY_FIND(cuGraphDestroy);
CUDA_LIBRARY_FIND(cuGraphExecUpdate);
CUDA_LIBRARY_FIND(cuGraphKernelNodeCopyAttributes);
CUDA_LIBRARY_FIND(cuGraphKernelNodeGetAttribute);
CUDA_LIBRARY_FIND(cuGraphKernelNodeSetAttribute);
CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessor);
CUDA_LIBRARY_FIND(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags);
CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSize);
CUDA_LIBRARY_FIND(cuOccupancyMaxPotentialBlockSizeWithFlags);
CUDA_LIBRARY_FIND(cuOccupancyAvailableDynamicSMemPerBlock);
CUDA_LIBRARY_FIND(cuTexRefSetArray);
CUDA_LIBRARY_FIND(cuTexRefSetMipmappedArray);
CUDA_LIBRARY_FIND(cuTexRefSetAddress_v2);
CUDA_LIBRARY_FIND(cuTexRefSetAddress2D_v3);
CUDA_LIBRARY_FIND(cuTexRefSetFormat);
CUDA_LIBRARY_FIND(cuTexRefSetAddressMode);
CUDA_LIBRARY_FIND(cuTexRefSetFilterMode);
CUDA_LIBRARY_FIND(cuTexRefSetMipmapFilterMode);
CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelBias);
CUDA_LIBRARY_FIND(cuTexRefSetMipmapLevelClamp);
CUDA_LIBRARY_FIND(cuTexRefSetMaxAnisotropy);
CUDA_LIBRARY_FIND(cuTexRefSetBorderColor);
CUDA_LIBRARY_FIND(cuTexRefSetFlags);
CUDA_LIBRARY_FIND(cuTexRefGetAddress_v2);
CUDA_LIBRARY_FIND(cuTexRefGetArray);
CUDA_LIBRARY_FIND(cuTexRefGetMipmappedArray);
CUDA_LIBRARY_FIND(cuTexRefGetAddressMode);
CUDA_LIBRARY_FIND(cuTexRefGetFilterMode);
CUDA_LIBRARY_FIND(cuTexRefGetFormat);
CUDA_LIBRARY_FIND(cuTexRefGetMipmapFilterMode);
CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelBias);
CUDA_LIBRARY_FIND(cuTexRefGetMipmapLevelClamp);
CUDA_LIBRARY_FIND(cuTexRefGetMaxAnisotropy);
CUDA_LIBRARY_FIND(cuTexRefGetBorderColor);
CUDA_LIBRARY_FIND(cuTexRefGetFlags);
CUDA_LIBRARY_FIND(cuTexRefCreate);
CUDA_LIBRARY_FIND(cuTexRefDestroy);
CUDA_LIBRARY_FIND(cuSurfRefSetArray);
CUDA_LIBRARY_FIND(cuSurfRefGetArray);
CUDA_LIBRARY_FIND(cuTexObjectCreate);
CUDA_LIBRARY_FIND(cuTexObjectDestroy);
CUDA_LIBRARY_FIND(cuTexObjectGetResourceDesc);
CUDA_LIBRARY_FIND(cuTexObjectGetTextureDesc);
CUDA_LIBRARY_FIND(cuTexObjectGetResourceViewDesc);
CUDA_LIBRARY_FIND(cuSurfObjectCreate);
CUDA_LIBRARY_FIND(cuSurfObjectDestroy);
CUDA_LIBRARY_FIND(cuSurfObjectGetResourceDesc);
CUDA_LIBRARY_FIND(cuDeviceCanAccessPeer);
CUDA_LIBRARY_FIND(cuCtxEnablePeerAccess);
CUDA_LIBRARY_FIND(cuCtxDisablePeerAccess);
CUDA_LIBRARY_FIND(cuDeviceGetP2PAttribute);
CUDA_LIBRARY_FIND(cuGraphicsUnregisterResource);
CUDA_LIBRARY_FIND(cuGraphicsSubResourceGetMappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedMipmappedArray);
CUDA_LIBRARY_FIND(cuGraphicsResourceGetMappedPointer_v2);
CUDA_LIBRARY_FIND(cuGraphicsResourceSetMapFlags_v2);
CUDA_LIBRARY_FIND(cuGraphicsMapResources);
CUDA_LIBRARY_FIND(cuGraphicsUnmapResources);
CUDA_LIBRARY_FIND(cuGetExportTable);
CUDA_LIBRARY_FIND(cuFuncGetModule);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterBuffer);
CUDA_LIBRARY_FIND(cuGraphicsGLRegisterImage);
CUDA_LIBRARY_FIND(cuGLGetDevices_v2);
CUDA_LIBRARY_FIND(cuGLCtxCreate_v2);
CUDA_LIBRARY_FIND(cuGLInit);
CUDA_LIBRARY_FIND(cuGLRegisterBufferObject);
CUDA_LIBRARY_FIND(cuGLMapBufferObject_v2);
CUDA_LIBRARY_FIND(cuGLUnmapBufferObject);
CUDA_LIBRARY_FIND(cuGLUnregisterBufferObject);
CUDA_LIBRARY_FIND(cuGLSetBufferObjectMapFlags);
CUDA_LIBRARY_FIND(cuGLMapBufferObjectAsync_v2);
CUDA_LIBRARY_FIND(cuGLUnmapBufferObjectAsync);
result = CUEW_SUCCESS;
return result;
}
static void cuewExitNvrtc(void) {
if (nvrtc_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(nvrtc_lib);
nvrtc_lib = NULL;
}
}
static int cuewNvrtcInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *nvrtc_paths[] = {"nvrtc64_110_0.dll",
"nvrtc64_102_0.dll",
"nvrtc64_101_0.dll",
"nvrtc64_100_0.dll",
"nvrtc64_91.dll",
"nvrtc64_90.dll",
"nvrtc64_80.dll",
NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
#else
const char *nvrtc_paths[] = {"libnvrtc.so",
# if defined(__x86_64__) || defined(_M_X64)
"/usr/local/cuda/lib64/libnvrtc.so",
#else
"/usr/local/cuda/lib/libnvrtc.so",
#endif
NULL};
#endif
static int initialized = 0;
static int result = 0;
int error;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(cuewExitNvrtc);
if (error) {
result = CUEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
if (nvrtc_lib == NULL) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}
NVRTC_LIBRARY_FIND(nvrtcGetErrorString);
NVRTC_LIBRARY_FIND(nvrtcVersion);
NVRTC_LIBRARY_FIND(nvrtcGetNumSupportedArchs);
NVRTC_LIBRARY_FIND(nvrtcGetSupportedArchs);
NVRTC_LIBRARY_FIND(nvrtcCreateProgram);
NVRTC_LIBRARY_FIND(nvrtcDestroyProgram);
NVRTC_LIBRARY_FIND(nvrtcCompileProgram);
NVRTC_LIBRARY_FIND(nvrtcGetPTXSize);
NVRTC_LIBRARY_FIND(nvrtcGetPTX);
NVRTC_LIBRARY_FIND(nvrtcGetCUBINSize);
NVRTC_LIBRARY_FIND(nvrtcGetCUBIN);
NVRTC_LIBRARY_FIND(nvrtcGetProgramLogSize);
NVRTC_LIBRARY_FIND(nvrtcGetProgramLog);
NVRTC_LIBRARY_FIND(nvrtcAddNameExpression);
NVRTC_LIBRARY_FIND(nvrtcGetLoweredName);
result = CUEW_SUCCESS;
return result;
}
static void cuewExitCudnn(void) {
if (cudnn_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(cudnn_lib);
cudnn_lib = NULL;
}
}
static int cuewCudnnInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *cudnn_paths[] = {"cudnn.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *cudnn_paths[] = {"/usr/local/cuda/lib/libcudnn.dylib", NULL};
#else
const char *cudnn_paths[] = {"libcudnn.so",
# if defined(__x86_64__) || defined(_M_X64)
"/usr/local/cuda/lib64/libcudnn.so",
#else
"/usr/local/cuda/lib/libcudnn.so",
#endif
NULL};
#endif
static int initialized = 0;
static int result = 0;
int error;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(cuewExitCudnn);
if (error) {
result = CUEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
cudnn_lib = dynamic_library_open_find(cudnn_paths);
if (cudnn_lib == NULL) {
result = CUEW_ERROR_OPEN_FAILED;
return result;
}
CUDNN_LIBRARY_FIND(cudnnGetVersion);
CUDNN_LIBRARY_FIND(cudnnGetCudartVersion);
CUDNN_LIBRARY_FIND(cudnnGetErrorString);
CUDNN_LIBRARY_FIND(cudnnQueryRuntimeError);
CUDNN_LIBRARY_FIND(cudnnGetProperty);
CUDNN_LIBRARY_FIND(cudnnCreate);
CUDNN_LIBRARY_FIND(cudnnDestroy);
CUDNN_LIBRARY_FIND(cudnnSetStream);
CUDNN_LIBRARY_FIND(cudnnGetStream);
CUDNN_LIBRARY_FIND(cudnnCreateTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetTensor4dDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetTensor4dDescriptorEx);
CUDNN_LIBRARY_FIND(cudnnGetTensor4dDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetTensorNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetTensorNdDescriptorEx);
CUDNN_LIBRARY_FIND(cudnnGetTensorNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetTensorSizeInBytes);
CUDNN_LIBRARY_FIND(cudnnDestroyTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnInitTransformDest);
CUDNN_LIBRARY_FIND(cudnnCreateTensorTransformDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetTensorTransformDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetTensorTransformDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyTensorTransformDescriptor);
CUDNN_LIBRARY_FIND(cudnnTransformTensor);
CUDNN_LIBRARY_FIND(cudnnTransformTensorEx);
CUDNN_LIBRARY_FIND(cudnnAddTensor);
CUDNN_LIBRARY_FIND(cudnnCreateOpTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetOpTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetOpTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyOpTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnOpTensor);
CUDNN_LIBRARY_FIND(cudnnCreateReduceTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetReduceTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetReduceTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyReduceTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetReductionIndicesSize);
CUDNN_LIBRARY_FIND(cudnnGetReductionWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnReduceTensor);
CUDNN_LIBRARY_FIND(cudnnSetTensor);
CUDNN_LIBRARY_FIND(cudnnScaleTensor);
CUDNN_LIBRARY_FIND(cudnnCreateFilterDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetFilter4dDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetFilter4dDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetFilterNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetFilterNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetFilterSizeInBytes);
CUDNN_LIBRARY_FIND(cudnnTransformFilter);
CUDNN_LIBRARY_FIND(cudnnDestroyFilterDescriptor);
CUDNN_LIBRARY_FIND(cudnnSoftmaxForward);
CUDNN_LIBRARY_FIND(cudnnCreatePoolingDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetPooling2dDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetPooling2dDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetPoolingNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetPoolingNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetPoolingNdForwardOutputDim);
CUDNN_LIBRARY_FIND(cudnnGetPooling2dForwardOutputDim);
CUDNN_LIBRARY_FIND(cudnnDestroyPoolingDescriptor);
CUDNN_LIBRARY_FIND(cudnnPoolingForward);
CUDNN_LIBRARY_FIND(cudnnCreateActivationDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetActivationDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetActivationDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyActivationDescriptor);
CUDNN_LIBRARY_FIND(cudnnActivationForward);
CUDNN_LIBRARY_FIND(cudnnCreateLRNDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetLRNDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetLRNDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyLRNDescriptor);
CUDNN_LIBRARY_FIND(cudnnLRNCrossChannelForward);
CUDNN_LIBRARY_FIND(cudnnDivisiveNormalizationForward);
CUDNN_LIBRARY_FIND(cudnnDeriveBNTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnBatchNormalizationForwardInference);
CUDNN_LIBRARY_FIND(cudnnDeriveNormTensorDescriptor);
CUDNN_LIBRARY_FIND(cudnnNormalizationForwardInference);
CUDNN_LIBRARY_FIND(cudnnCreateSpatialTransformerDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetSpatialTransformerNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroySpatialTransformerDescriptor);
CUDNN_LIBRARY_FIND(cudnnSpatialTfGridGeneratorForward);
CUDNN_LIBRARY_FIND(cudnnSpatialTfSamplerForward);
CUDNN_LIBRARY_FIND(cudnnCreateDropoutDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyDropoutDescriptor);
CUDNN_LIBRARY_FIND(cudnnDropoutGetStatesSize);
CUDNN_LIBRARY_FIND(cudnnDropoutGetReserveSpaceSize);
CUDNN_LIBRARY_FIND(cudnnSetDropoutDescriptor);
CUDNN_LIBRARY_FIND(cudnnRestoreDropoutDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetDropoutDescriptor);
CUDNN_LIBRARY_FIND(cudnnDropoutForward);
CUDNN_LIBRARY_FIND(cudnnCreateAlgorithmDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetAlgorithmDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetAlgorithmDescriptor);
CUDNN_LIBRARY_FIND(cudnnCopyAlgorithmDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyAlgorithmDescriptor);
CUDNN_LIBRARY_FIND(cudnnCreateAlgorithmPerformance);
CUDNN_LIBRARY_FIND(cudnnSetAlgorithmPerformance);
CUDNN_LIBRARY_FIND(cudnnGetAlgorithmPerformance);
CUDNN_LIBRARY_FIND(cudnnDestroyAlgorithmPerformance);
CUDNN_LIBRARY_FIND(cudnnGetAlgorithmSpaceSize);
CUDNN_LIBRARY_FIND(cudnnSaveAlgorithm);
CUDNN_LIBRARY_FIND(cudnnRestoreAlgorithm);
CUDNN_LIBRARY_FIND(cudnnSetCallback);
CUDNN_LIBRARY_FIND(cudnnGetCallback);
CUDNN_LIBRARY_FIND(cudnnOpsInferVersionCheck);
CUDNN_LIBRARY_FIND(cudnnSoftmaxBackward);
CUDNN_LIBRARY_FIND(cudnnPoolingBackward);
CUDNN_LIBRARY_FIND(cudnnActivationBackward);
CUDNN_LIBRARY_FIND(cudnnLRNCrossChannelBackward);
CUDNN_LIBRARY_FIND(cudnnDivisiveNormalizationBackward);
CUDNN_LIBRARY_FIND(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnGetBatchNormalizationBackwardExWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnGetBatchNormalizationTrainingExReserveSpaceSize);
CUDNN_LIBRARY_FIND(cudnnBatchNormalizationForwardTraining);
CUDNN_LIBRARY_FIND(cudnnBatchNormalizationForwardTrainingEx);
CUDNN_LIBRARY_FIND(cudnnBatchNormalizationBackward);
CUDNN_LIBRARY_FIND(cudnnBatchNormalizationBackwardEx);
CUDNN_LIBRARY_FIND(cudnnGetNormalizationForwardTrainingWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnGetNormalizationBackwardWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnGetNormalizationTrainingReserveSpaceSize);
CUDNN_LIBRARY_FIND(cudnnNormalizationForwardTraining);
CUDNN_LIBRARY_FIND(cudnnNormalizationBackward);
CUDNN_LIBRARY_FIND(cudnnSpatialTfGridGeneratorBackward);
CUDNN_LIBRARY_FIND(cudnnSpatialTfSamplerBackward);
CUDNN_LIBRARY_FIND(cudnnDropoutBackward);
CUDNN_LIBRARY_FIND(cudnnOpsTrainVersionCheck);
CUDNN_LIBRARY_FIND(cudnnCreateRNNDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyRNNDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetRNNDescriptor_v8);
CUDNN_LIBRARY_FIND(cudnnGetRNNDescriptor_v8);
CUDNN_LIBRARY_FIND(cudnnSetRNNDescriptor_v6);
CUDNN_LIBRARY_FIND(cudnnGetRNNDescriptor_v6);
CUDNN_LIBRARY_FIND(cudnnSetRNNMatrixMathType);
CUDNN_LIBRARY_FIND(cudnnGetRNNMatrixMathType);
CUDNN_LIBRARY_FIND(cudnnSetRNNBiasMode);
CUDNN_LIBRARY_FIND(cudnnGetRNNBiasMode);
CUDNN_LIBRARY_FIND(cudnnRNNSetClip_v8);
CUDNN_LIBRARY_FIND(cudnnRNNGetClip_v8);
CUDNN_LIBRARY_FIND(cudnnRNNSetClip);
CUDNN_LIBRARY_FIND(cudnnRNNGetClip);
CUDNN_LIBRARY_FIND(cudnnSetRNNProjectionLayers);
CUDNN_LIBRARY_FIND(cudnnGetRNNProjectionLayers);
CUDNN_LIBRARY_FIND(cudnnCreatePersistentRNNPlan);
CUDNN_LIBRARY_FIND(cudnnDestroyPersistentRNNPlan);
CUDNN_LIBRARY_FIND(cudnnSetPersistentRNNPlan);
CUDNN_LIBRARY_FIND(cudnnBuildRNNDynamic);
CUDNN_LIBRARY_FIND(cudnnGetRNNWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnGetRNNTrainingReserveSize);
CUDNN_LIBRARY_FIND(cudnnGetRNNTempSpaceSizes);
CUDNN_LIBRARY_FIND(cudnnGetRNNParamsSize);
CUDNN_LIBRARY_FIND(cudnnGetRNNWeightSpaceSize);
CUDNN_LIBRARY_FIND(cudnnGetRNNLinLayerMatrixParams);
CUDNN_LIBRARY_FIND(cudnnGetRNNLinLayerBiasParams);
CUDNN_LIBRARY_FIND(cudnnGetRNNWeightParams);
CUDNN_LIBRARY_FIND(cudnnRNNForwardInference);
CUDNN_LIBRARY_FIND(cudnnSetRNNPaddingMode);
CUDNN_LIBRARY_FIND(cudnnGetRNNPaddingMode);
CUDNN_LIBRARY_FIND(cudnnCreateRNNDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyRNNDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetRNNDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetRNNDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnRNNForwardInferenceEx);
CUDNN_LIBRARY_FIND(cudnnRNNForward);
CUDNN_LIBRARY_FIND(cudnnSetRNNAlgorithmDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetRNNForwardInferenceAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnFindRNNForwardInferenceAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnCreateSeqDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroySeqDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetSeqDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetSeqDataDescriptor);
CUDNN_LIBRARY_FIND(cudnnCreateAttnDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyAttnDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetAttnDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetAttnDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetMultiHeadAttnBuffers);
CUDNN_LIBRARY_FIND(cudnnGetMultiHeadAttnWeights);
CUDNN_LIBRARY_FIND(cudnnMultiHeadAttnForward);
CUDNN_LIBRARY_FIND(cudnnAdvInferVersionCheck);
CUDNN_LIBRARY_FIND(cudnnRNNForwardTraining);
CUDNN_LIBRARY_FIND(cudnnRNNBackwardData);
CUDNN_LIBRARY_FIND(cudnnRNNBackwardData_v8);
CUDNN_LIBRARY_FIND(cudnnRNNBackwardWeights);
CUDNN_LIBRARY_FIND(cudnnRNNBackwardWeights_v8);
CUDNN_LIBRARY_FIND(cudnnRNNForwardTrainingEx);
CUDNN_LIBRARY_FIND(cudnnRNNBackwardDataEx);
CUDNN_LIBRARY_FIND(cudnnRNNBackwardWeightsEx);
CUDNN_LIBRARY_FIND(cudnnGetRNNForwardTrainingAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnFindRNNForwardTrainingAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnGetRNNBackwardDataAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnFindRNNBackwardDataAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnGetRNNBackwardWeightsAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnFindRNNBackwardWeightsAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnMultiHeadAttnBackwardData);
CUDNN_LIBRARY_FIND(cudnnMultiHeadAttnBackwardWeights);
CUDNN_LIBRARY_FIND(cudnnCreateCTCLossDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetCTCLossDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetCTCLossDescriptorEx);
CUDNN_LIBRARY_FIND(cudnnSetCTCLossDescriptor_v8);
CUDNN_LIBRARY_FIND(cudnnGetCTCLossDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetCTCLossDescriptorEx);
CUDNN_LIBRARY_FIND(cudnnGetCTCLossDescriptor_v8);
CUDNN_LIBRARY_FIND(cudnnDestroyCTCLossDescriptor);
CUDNN_LIBRARY_FIND(cudnnCTCLoss);
CUDNN_LIBRARY_FIND(cudnnCTCLoss_v8);
CUDNN_LIBRARY_FIND(cudnnGetCTCLossWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnGetCTCLossWorkspaceSize_v8);
CUDNN_LIBRARY_FIND(cudnnAdvTrainVersionCheck);
CUDNN_LIBRARY_FIND(cudnnCreateConvolutionDescriptor);
CUDNN_LIBRARY_FIND(cudnnDestroyConvolutionDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetConvolutionMathType);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionMathType);
CUDNN_LIBRARY_FIND(cudnnSetConvolutionGroupCount);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionGroupCount);
CUDNN_LIBRARY_FIND(cudnnSetConvolutionReorderType);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionReorderType);
CUDNN_LIBRARY_FIND(cudnnSetConvolution2dDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetConvolution2dDescriptor);
CUDNN_LIBRARY_FIND(cudnnSetConvolutionNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionNdDescriptor);
CUDNN_LIBRARY_FIND(cudnnGetConvolution2dForwardOutputDim);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionNdForwardOutputDim);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionForwardAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionForwardAlgorithm_v7);
CUDNN_LIBRARY_FIND(cudnnFindConvolutionForwardAlgorithm);
CUDNN_LIBRARY_FIND(cudnnFindConvolutionForwardAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnIm2Col);
CUDNN_LIBRARY_FIND(cudnnReorderFilterAndBias);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionForwardWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnConvolutionForward);
CUDNN_LIBRARY_FIND(cudnnConvolutionBiasActivationForward);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionBackwardDataAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnFindConvolutionBackwardDataAlgorithm);
CUDNN_LIBRARY_FIND(cudnnFindConvolutionBackwardDataAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionBackwardDataAlgorithm_v7);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionBackwardDataWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnConvolutionBackwardData);
CUDNN_LIBRARY_FIND(cudnnGetFoldedConvBackwardDataDescriptors);
CUDNN_LIBRARY_FIND(cudnnCnnInferVersionCheck);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount);
CUDNN_LIBRARY_FIND(cudnnFindConvolutionBackwardFilterAlgorithm);
CUDNN_LIBRARY_FIND(cudnnFindConvolutionBackwardFilterAlgorithmEx);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionBackwardFilterAlgorithm_v7);
CUDNN_LIBRARY_FIND(cudnnGetConvolutionBackwardFilterWorkspaceSize);
CUDNN_LIBRARY_FIND(cudnnConvolutionBackwardFilter);
CUDNN_LIBRARY_FIND(cudnnConvolutionBackwardBias);
CUDNN_LIBRARY_FIND(cudnnCreateFusedOpsConstParamPack);
CUDNN_LIBRARY_FIND(cudnnDestroyFusedOpsConstParamPack);
CUDNN_LIBRARY_FIND(cudnnSetFusedOpsConstParamPackAttribute);
CUDNN_LIBRARY_FIND(cudnnGetFusedOpsConstParamPackAttribute);
CUDNN_LIBRARY_FIND(cudnnCreateFusedOpsVariantParamPack);
CUDNN_LIBRARY_FIND(cudnnDestroyFusedOpsVariantParamPack);
CUDNN_LIBRARY_FIND(cudnnSetFusedOpsVariantParamPackAttribute);
CUDNN_LIBRARY_FIND(cudnnGetFusedOpsVariantParamPackAttribute);
CUDNN_LIBRARY_FIND(cudnnCreateFusedOpsPlan);
CUDNN_LIBRARY_FIND(cudnnDestroyFusedOpsPlan);
CUDNN_LIBRARY_FIND(cudnnMakeFusedOpsPlan);
CUDNN_LIBRARY_FIND(cudnnFusedOpsExecute);
CUDNN_LIBRARY_FIND(cudnnCnnTrainVersionCheck);
CUDNN_LIBRARY_FIND(cudnnBackendCreateDescriptor);
CUDNN_LIBRARY_FIND(cudnnBackendDestroyDescriptor);
CUDNN_LIBRARY_FIND(cudnnBackendInitialize);
CUDNN_LIBRARY_FIND(cudnnBackendFinalize);
CUDNN_LIBRARY_FIND(cudnnBackendSetAttribute);
CUDNN_LIBRARY_FIND(cudnnBackendGetAttribute);
CUDNN_LIBRARY_FIND(cudnnBackendExecute);
result = CUEW_SUCCESS;
return result;
}
int cuewInit(cuuint32_t flags) {
int result = CUEW_SUCCESS;
if (flags & CUEW_INIT_CUDA) {
result = cuewCudaInit();
if (result != CUEW_SUCCESS) {
return result;
}
}
if (flags & CUEW_INIT_NVRTC) {
result = cuewNvrtcInit();
if (result != CUEW_SUCCESS) {
return result;
}
}
if (flags & CUEW_INIT_CUDNN) {
result = cuewCudnnInit();
if (result != CUEW_SUCCESS) {
return result;
}
}
return result;
}
const char *cuewErrorString(CUresult result) {
switch (result) {
case CUDA_SUCCESS: return "No errors";
case CUDA_ERROR_INVALID_VALUE: return "Invalid value";
case CUDA_ERROR_OUT_OF_MEMORY: return "Out of memory";
case CUDA_ERROR_NOT_INITIALIZED: return "Driver not initialized";
case CUDA_ERROR_DEINITIALIZED: return "Driver deinitialized";
case CUDA_ERROR_PROFILER_DISABLED: return "Profiler disabled";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED: return "Profiler not initialized";
case CUDA_ERROR_PROFILER_ALREADY_STARTED: return "Profiler already started";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED: return "Profiler already stopped";
case CUDA_ERROR_STUB_LIBRARY: return "Stub library";
case CUDA_ERROR_NO_DEVICE: return "No CUDA-capable device available";
case CUDA_ERROR_INVALID_DEVICE: return "Invalid device";
case CUDA_ERROR_DEVICE_NOT_LICENSED: return "Device not licensed";
case CUDA_ERROR_INVALID_IMAGE: return "Invalid kernel image";
case CUDA_ERROR_INVALID_CONTEXT: return "Invalid context";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: return "Context already current";
case CUDA_ERROR_MAP_FAILED: return "Map failed";
case CUDA_ERROR_UNMAP_FAILED: return "Unmap failed";
case CUDA_ERROR_ARRAY_IS_MAPPED: return "Array is mapped";
case CUDA_ERROR_ALREADY_MAPPED: return "Already mapped";
case CUDA_ERROR_NO_BINARY_FOR_GPU: return "No binary for GPU";
case CUDA_ERROR_ALREADY_ACQUIRED: return "Already acquired";
case CUDA_ERROR_NOT_MAPPED: return "Not mapped";
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: return "Mapped resource not available for access as an array";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER: return "Mapped resource not available for access as a pointer";
case CUDA_ERROR_ECC_UNCORRECTABLE: return "Uncorrectable ECC error detected";
case CUDA_ERROR_UNSUPPORTED_LIMIT: return "CUlimit not supported by device";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: return "Context already in use";
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: return "Peer access unsupported";
case CUDA_ERROR_INVALID_PTX: return "Invalid ptx";
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: return "Invalid graphics context";
case CUDA_ERROR_NVLINK_UNCORRECTABLE: return "Nvlink uncorrectable";
case CUDA_ERROR_JIT_COMPILER_NOT_FOUND: return "Jit compiler not found";
case CUDA_ERROR_UNSUPPORTED_PTX_VERSION: return "Unsupported ptx version";
case CUDA_ERROR_JIT_COMPILATION_DISABLED: return "Jit compilation disabled";
case CUDA_ERROR_INVALID_SOURCE: return "Invalid source";
case CUDA_ERROR_FILE_NOT_FOUND: return "File not found";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: return "Link to a shared object failed to resolve";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: return "Shared object initialization failed";
case CUDA_ERROR_OPERATING_SYSTEM: return "Operating system";
case CUDA_ERROR_INVALID_HANDLE: return "Invalid handle";
case CUDA_ERROR_ILLEGAL_STATE: return "Illegal state";
case CUDA_ERROR_NOT_FOUND: return "Not found";
case CUDA_ERROR_NOT_READY: return "CUDA not ready";
case CUDA_ERROR_ILLEGAL_ADDRESS: return "Illegal address";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: return "Launch exceeded resources";
case CUDA_ERROR_LAUNCH_TIMEOUT: return "Launch exceeded timeout";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: return "Launch with incompatible texturing";
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: return "Peer access already enabled";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: return "Peer access not enabled";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: return "Primary context active";
case CUDA_ERROR_CONTEXT_IS_DESTROYED: return "Context is destroyed";
case CUDA_ERROR_ASSERT: return "Assert";
case CUDA_ERROR_TOO_MANY_PEERS: return "Too many peers";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: return "Host memory already registered";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: return "Host memory not registered";
case CUDA_ERROR_HARDWARE_STACK_ERROR: return "Hardware stack error";
case CUDA_ERROR_ILLEGAL_INSTRUCTION: return "Illegal instruction";
case CUDA_ERROR_MISALIGNED_ADDRESS: return "Misaligned address";
case CUDA_ERROR_INVALID_ADDRESS_SPACE: return "Invalid address space";
case CUDA_ERROR_INVALID_PC: return "Invalid pc";
case CUDA_ERROR_LAUNCH_FAILED: return "Launch failed";
case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: return "Cooperative launch too large";
case CUDA_ERROR_NOT_PERMITTED: return "Not permitted";
case CUDA_ERROR_NOT_SUPPORTED: return "Not supported";
case CUDA_ERROR_SYSTEM_NOT_READY: return "System not ready";
case CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: return "System driver mismatch";
case CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: return "Compat not supported on device";
case CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED: return "Stream capture unsupported";
case CUDA_ERROR_STREAM_CAPTURE_INVALIDATED: return "Stream capture invalidated";
case CUDA_ERROR_STREAM_CAPTURE_MERGE: return "Stream capture merge";
case CUDA_ERROR_STREAM_CAPTURE_UNMATCHED: return "Stream capture unmatched";
case CUDA_ERROR_STREAM_CAPTURE_UNJOINED: return "Stream capture unjoined";
case CUDA_ERROR_STREAM_CAPTURE_ISOLATION: return "Stream capture isolation";
case CUDA_ERROR_STREAM_CAPTURE_IMPLICIT: return "Stream capture implicit";
case CUDA_ERROR_CAPTURED_EVENT: return "Captured event";
case CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD: return "Stream capture wrong thread";
case CUDA_ERROR_TIMEOUT: return "Timeout";
case CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE: return "Graph exec update failure";
case CUDA_ERROR_UNKNOWN: return "Unknown error";
default: return "Unknown CUDA error value";
}
}
static void path_join(const char *path1,
const char *path2,
int maxlen,
char *result) {
#if defined(WIN32) || defined(_WIN32)
const char separator = '\\';
#else
const char separator = '/';
#endif
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
if (n != -1 && n < maxlen) {
result[n] = '\0';
}
else {
result[maxlen - 1] = '\0';
}
}
static int path_exists(const char *path) {
struct stat st;
if (stat(path, &st)) {
return 0;
}
return 1;
}
const char *cuewCompilerPath(void) {
#ifdef _WIN32
const char *defaultpaths[] = {"C:/CUDA/bin", NULL};
const char *executable = "nvcc.exe";
#else
const char *defaultpaths[] = {
"/Developer/NVIDIA/CUDA-5.0/bin",
"/usr/local/cuda-5.0/bin",
"/usr/local/cuda/bin",
"/Developer/NVIDIA/CUDA-6.0/bin",
"/usr/local/cuda-6.0/bin",
"/Developer/NVIDIA/CUDA-5.5/bin",
"/usr/local/cuda-5.5/bin",
NULL};
const char *executable = "nvcc";
#endif
int i;
const char *binpath = getenv("CUDA_BIN_PATH");
static char nvcc[65536];
if (binpath) {
path_join(binpath, executable, sizeof(nvcc), nvcc);
if (path_exists(nvcc)) {
return nvcc;
}
}
for (i = 0; defaultpaths[i]; ++i) {
path_join(defaultpaths[i], executable, sizeof(nvcc), nvcc);
if (path_exists(nvcc)) {
return nvcc;
}
}
#ifndef _WIN32
{
FILE *handle = popen("which nvcc", "r");
if (handle) {
char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\0';
pclose(handle);
if (buffer[0]) {
return "nvcc";
}
}
}
#endif
return NULL;
}
int cuewNvrtcVersion(void) {
int major, minor;
if (nvrtcVersion) {
nvrtcVersion(&major, &minor);
return 10 * major + minor;
}
return 0;
}
int cuewCompilerVersion(void) {
const char *path = cuewCompilerPath();
const char *marker = "Cuda compilation tools, release ";
FILE *pipe;
int major, minor;
char *versionstr;
char buf[128];
char output[65536] = "\0";
char command[65536] = "\0";
if (path == NULL) {
return 0;
}
/* get --version output */
strncpy(command, path, sizeof(command));
strncat(command, " --version", sizeof(command) - strlen(path));
pipe = popen(command, "r");
if (!pipe) {
fprintf(stderr, "CUDA: failed to run compiler to retrieve version");
return 0;
}
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output) - 1);
}
}
pclose(pipe);
/* parse version number */
versionstr = strstr(output, marker);
if (versionstr == NULL) {
fprintf(stderr, "CUDA: failed to find version number in:\n\n%s\n", output);
return 0;
}
versionstr += strlen(marker);
if (sscanf(versionstr, "%d.%d", &major, &minor) < 2) {
fprintf(stderr, "CUDA: failed to parse version number from:\n\n%s\n", output);
return 0;
}
return 10 * major + minor;
}