[Bf-blender-cvs] [12834fe0f00] master: Update CUDA wrangler to latest version

Sergey Sharybin noreply at git.blender.org
Tue Aug 8 13:58:47 CEST 2017


Commit: 12834fe0f00daf766e515102a3ff071423b66abe
Author: Sergey Sharybin
Date:   Tue Aug 8 13:58:07 2017 +0200
Branches: master
https://developer.blender.org/rB12834fe0f00daf766e515102a3ff071423b66abe

Update CUDA wrangler to latest version

Brings new declarations from toolkit version 8.0, also fixes some
pointers used in function declarations.

===================================================================

M	extern/cuew/README.blender
M	extern/cuew/include/cuew.h
M	extern/cuew/src/cuew.c

===================================================================

diff --git a/extern/cuew/README.blender b/extern/cuew/README.blender
index 7b77935d750..ef36c110e3f 100644
--- a/extern/cuew/README.blender
+++ b/extern/cuew/README.blender
@@ -1,5 +1,5 @@
 Project: Cuda Wrangler
 URL: https://github.com/CudaWrangler/cuew
 License: Apache 2.0
-Upstream version: 63d2a0f
+Upstream version: 3dd0b01
 Local modifications: None
diff --git a/extern/cuew/include/cuew.h b/extern/cuew/include/cuew.h
index 4cce29d38ab..c90ab39601a 100644
--- a/extern/cuew/include/cuew.h
+++ b/extern/cuew/include/cuew.h
@@ -27,7 +27,7 @@ extern "C" {
 #define CUEW_VERSION_MAJOR 1
 #define CUEW_VERSION_MINOR 2
 
-#define CUDA_VERSION 7050
+#define CUDA_VERSION 8000
 #define CU_IPC_HANDLE_SIZE 64
 #define CU_STREAM_LEGACY ((CUstream)0x1)
 #define CU_STREAM_PER_THREAD ((CUstream)0x2)
@@ -51,6 +51,8 @@ extern "C" {
 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
 #define CU_PARAM_TR_DEFAULT -1
+#define CU_DEVICE_CPU ((CUdevice)-1)
+#define CU_DEVICE_INVALID ((CUdevice)-2)
 
 /* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
  * the cuda library has both the old ones for compatibility and new
@@ -120,6 +122,45 @@ typedef unsigned long long CUdeviceptr;
 typedef unsigned int CUdeviceptr;
 #endif
 
+
+#ifdef _WIN32
+#  define CUDAAPI __stdcall
+#  define CUDA_CB __stdcall
+#else
+#  define CUDAAPI
+#  define CUDA_CB
+#endif
+
+typedef signed char int8_t;
+typedef short int int16_t;
+typedef int int32_t;
+typedef long int int64_t;
+typedef unsigned char uint8_t;
+typedef unsigned short int uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long int uint64_t;
+typedef signed char int_least8_t;
+typedef short int int_least16_t;
+typedef int int_least32_t;
+typedef long int int_least64_t;
+typedef unsigned char uint_least8_t;
+typedef unsigned short int uint_least16_t;
+typedef unsigned int uint_least32_t;
+typedef unsigned long int uint_least64_t;
+typedef signed char int_fast8_t;
+typedef long int int_fast16_t;
+typedef long int int_fast32_t;
+typedef long int int_fast64_t;
+typedef unsigned char uint_fast8_t;
+typedef unsigned long int uint_fast16_t;
+typedef unsigned long int uint_fast32_t;
+typedef unsigned long int uint_fast64_t;
+typedef long int intptr_t;
+typedef unsigned long int uintptr_t;
+typedef long int intmax_t;
+typedef unsigned long int uintmax_t;
+typedef uint32_t cuuint32_t;
+typedef uint64_t cuuint64_t;
 typedef int CUdevice;
 typedef struct CUctx_st* CUcontext;
 typedef struct CUmod_st* CUmodule;
@@ -180,6 +221,53 @@ typedef enum CUevent_flags_enum {
   CU_EVENT_INTERPROCESS = 0x4,
 } CUevent_flags;
 
+typedef enum CUstreamWaitValue_flags_enum {
+  CU_STREAM_WAIT_VALUE_GEQ = 0x0,
+  CU_STREAM_WAIT_VALUE_EQ = 0x1,
+  CU_STREAM_WAIT_VALUE_AND = 0x2,
+  CU_STREAM_WAIT_VALUE_FLUSH = (1 << 30),
+} CUstreamWaitValue_flags;
+
+typedef enum CUstreamWriteValue_flags_enum {
+  CU_STREAM_WRITE_VALUE_DEFAULT = 0x0,
+  CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1,
+} CUstreamWriteValue_flags;
+
+typedef enum CUstreamBatchMemOpType_enum {
+  CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1,
+  CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2,
+  CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3,
+} CUstreamBatchMemOpType;
+
+typedef union CUstreamBatchMemOpParams_union {
+  CUstreamBatchMemOpType operation;
+  struct CUstreamMemOpWaitValueParams_st {
+    CUstreamBatchMemOpType operation;
+    CUdeviceptr address;
+    union {
+      cuuint32_t value;
+      cuuint64_t pad;
+    };
+    unsigned int flags;
+    CUdeviceptr alias;
+  } waitValue;
+  struct CUstreamMemOpWriteValueParams_st {
+    CUstreamBatchMemOpType operation;
+    CUdeviceptr address;
+    union {
+      cuuint32_t value;
+      cuuint64_t pad;
+    };
+    unsigned int flags;
+    CUdeviceptr alias;
+  } writeValue;
+  struct CUstreamMemOpFlushRemoteWritesParams_st {
+    CUstreamBatchMemOpType operation;
+    unsigned int flags;
+  } flushRemoteWrites;
+  cuuint64_t pad[6];
+} CUstreamBatchMemOpParams;
+
 typedef enum CUoccupancy_flags_enum {
   CU_OCCUPANCY_DEFAULT = 0x0,
   CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1,
@@ -299,6 +387,12 @@ typedef enum CUdevice_attribute_enum {
   CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
   CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
   CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
+  CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
+  CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
+  CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
+  CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
+  CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
+  CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
   CU_DEVICE_ATTRIBUTE_MAX,
 } CUdevice_attribute;
 
@@ -360,11 +454,26 @@ typedef enum CUmemorytype_enum {
 
 typedef enum CUcomputemode_enum {
   CU_COMPUTEMODE_DEFAULT = 0,
-  CU_COMPUTEMODE_EXCLUSIVE = 1,
   CU_COMPUTEMODE_PROHIBITED = 2,
   CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,
 } CUcomputemode;
 
+typedef enum CUmem_advise_enum {
+  CU_MEM_ADVISE_SET_READ_MOSTLY = 1,
+  CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
+  CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3,
+  CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4,
+  CU_MEM_ADVISE_SET_ACCESSED_BY = 5,
+  CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6,
+} CUmem_advise;
+
+typedef enum CUmem_range_attribute_enum {
+  CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1,
+  CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2,
+  CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3,
+  CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4,
+} CUmem_range_attribute;
+
 typedef enum CUjit_option_enum {
   CU_JIT_MAX_REGISTERS = 0,
   CU_JIT_THREADS_PER_BLOCK,
@@ -381,6 +490,8 @@ typedef enum CUjit_option_enum {
   CU_JIT_LOG_VERBOSE,
   CU_JIT_GENERATE_LINE_INFO,
   CU_JIT_CACHE_MODE,
+  CU_JIT_NEW_SM3X_OPT,
+  CU_JIT_FAST_COMPILE,
   CU_JIT_NUM_OPTIONS,
 } CUjit_option;
 
@@ -397,6 +508,10 @@ typedef enum CUjit_target_enum {
   CU_TARGET_COMPUTE_37 = 37,
   CU_TARGET_COMPUTE_50 = 50,
   CU_TARGET_COMPUTE_52 = 52,
+  CU_TARGET_COMPUTE_53 = 53,
+  CU_TARGET_COMPUTE_60 = 60,
+  CU_TARGET_COMPUTE_61 = 61,
+  CU_TARGET_COMPUTE_62 = 62,
 } CUjit_target;
 
 typedef enum CUjit_fallback_enum {
@@ -490,6 +605,7 @@ typedef enum cudaError_enum {
   CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,
   CUDA_ERROR_INVALID_PTX = 218,
   CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,
+  CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,
   CUDA_ERROR_INVALID_SOURCE = 300,
   CUDA_ERROR_FILE_NOT_FOUND = 301,
   CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
@@ -521,8 +637,14 @@ typedef enum cudaError_enum {
   CUDA_ERROR_UNKNOWN = 999,
 } CUresult;
 
-typedef void* CUstreamCallback;
-typedef size_t* CUoccupancyB2DSize;
+typedef enum CUdevice_P2PAttribute_enum {
+  CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
+  CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
+  CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
+} CUdevice_P2PAttribute;
+
+typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void* userData);
+typedef size_t (CUDA_CB *CUoccupancyB2DSize)(int blockSize);
 
 typedef struct CUDA_MEMCPY2D_st {
   size_t srcXInBytes;
@@ -654,7 +776,8 @@ typedef struct CUDA_TEXTURE_DESC_st {
   float mipmapLevelBias;
   float minMipmapLevelClamp;
   float maxMipmapLevelClamp;
-  int reserved[16];
+  float borderColor[4];
+  int reserved[12];
 } CUDA_TEXTURE_DESC;
 
 typedef enum CUresourceViewFormat_enum {
@@ -736,21 +859,16 @@ typedef enum  {
   NVRTC_ERROR_INVALID_OPTION = 5,
   NVRTC_ERROR_COMPILATION = 6,
   NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
+  NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
+  NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
+  NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
+  NVRTC_ERROR_INTERNAL_ERROR = 11,
 } nvrtcResult;
 
 typedef struct _nvrtcProgram* nvrtcProgram;
-
-#ifdef _WIN32
-#  define CUDAAPI __stdcall
-#  define CUDA_CB __stdcall
-#else
-#  define CUDAAPI
-#  define CUDA_CB
-#endif
-
 /* Function types. */
-typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char* pStr);
-typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char* pStr);
+typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pStr);
+typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pStr);
 typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
 typedef CUresult CUDAAPI tcuDriverGetVersion(int* driverVersion);
 typedef CUresult CUDAAPI tcuDeviceGet(CUdevice* device, int ordinal);
@@ -786,26 +904,26 @@ typedef CUresult CUDAAPI tcuCtxAttach(CUcontext* pctx, unsigned int flags);
 typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx);
 typedef CUresult CUDAAPI tcuModuleLoad(CUmodule* module, const char* fname);
 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
-typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void* optionValues);
+typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule* module, const void* image, unsigned int numOptions, CUjit_option* options, void** optionValues);
 typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule* module, const void* fatCubin);
 typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetGlobal_v2(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref* pTexRef, CUmodule hmod, const char* name);
 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref* pSurfRef, CUmodule hmod, const char* name);
-typedef CUresult CUDAAPI tcuLinkCreate_v2(unsigned int numOptions, CUjit_option* options, void* optionValues, CUlinkState* stateOut);
-typedef CUresult CUDAAPI tcuLinkAddData_v2(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void* optionValues);
-typedef CUresult CUDAAPI tcuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char* path, unsigned int numOptions, CUjit_option* options, void* optionValues);
-typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void* cubinOut, size_t* sizeOut

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list