[Bf-blender-cvs] [96868a39419] master: Fix T50888: Numeric overflow in split kernel state buffer size calculation

Mai Lavelle noreply at git.blender.org
Sat Mar 11 12:24:38 CET 2017


Commit: 96868a39419f1c9a8962c56e02480fabbf1e5156
Author: Mai Lavelle
Date:   Sat Mar 11 05:23:11 2017 -0500
Branches: master
https://developer.blender.org/rB96868a39419f1c9a8962c56e02480fabbf1e5156

Fix T50888: Numeric overflow in split kernel state buffer size calculation

Overflow led to the state buffer being too small and the split kernel to
get stuck doing nothing forever.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_memory.h
M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/device/opencl/opencl_base.cpp
M	intern/cycles/device/opencl/opencl_split.cpp
M	intern/cycles/kernel/kernels/cuda/kernel_split.cu
M	intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
M	intern/cycles/kernel/split/kernel_split_data.h
M	intern/cycles/util/util_types.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 06a1568b4d6..273c3b48936 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -72,7 +72,7 @@ public:
 	virtual SplitKernelFunction* get_split_kernel_function(string kernel_name, const DeviceRequestedFeatures&);
 	virtual int2 split_kernel_local_size();
 	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask *task);
-	virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
+	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
 };
 
 class CPUDevice : public Device
@@ -860,7 +860,7 @@ int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memo
 	return task->requested_tile_size;
 }
 
-size_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
+uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
 	KernelGlobals *kg = (KernelGlobals*)kernel_globals.device_pointer;
 
 	return split_data_buffer_size(kg, num_threads);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index a630a3d1183..58471ba67c2 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -89,7 +89,7 @@ class CUDASplitKernel : public DeviceSplitKernel {
 public:
 	explicit CUDASplitKernel(CUDADevice *device);
 
-	virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
+	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads);
 
 	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
 	                                            RenderTile& rtile,
@@ -1473,9 +1473,9 @@ CUDASplitKernel::CUDASplitKernel(CUDADevice *device) : DeviceSplitKernel(device)
 {
 }
 
-size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
+uint64_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory& /*data*/, size_t num_threads)
 {
-	device_vector<uint> size_buffer;
+	device_vector<uint64_t> size_buffer;
 	size_buffer.resize(1);
 	device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
 
@@ -1504,7 +1504,7 @@ size_t CUDASplitKernel::state_buffer_size(device_memory& /*kg*/, device_memory&
 
 	device->cuda_pop_context();
 
-	device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
+	device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
 	device->mem_free(size_buffer);
 
 	return *size_buffer.get_data();
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index b69c3dad604..60d166b43ba 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -48,7 +48,8 @@ enum DataType {
 	TYPE_UINT,
 	TYPE_INT,
 	TYPE_FLOAT,
-	TYPE_HALF
+	TYPE_HALF,
+	TYPE_UINT64,
 };
 
 static inline size_t datatype_size(DataType datatype) 
@@ -59,6 +60,7 @@ static inline size_t datatype_size(DataType datatype)
 		case TYPE_UINT: return sizeof(uint);
 		case TYPE_INT: return sizeof(int);
 		case TYPE_HALF: return sizeof(half);
+		case TYPE_UINT64: return sizeof(uint64_t);
 		default: return 0;
 	}
 }
@@ -160,6 +162,11 @@ template<> struct device_type_traits<half4> {
 	static const int num_elements = 4;
 };
 
+template<> struct device_type_traits<uint64_t> {
+	static const DataType data_type = TYPE_UINT64;
+	static const int num_elements = 1;
+};
+
 /* Device Memory */
 
 class device_memory
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 10a642ed4d0..5b892038ebb 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -105,9 +105,9 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
 	return true;
 }
 
-size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size)
+size_t DeviceSplitKernel::max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size)
 {
-	size_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
+	uint64_t size_per_element = state_buffer_size(kg, data, 1024) / 1024;
 	return max_buffer_size / size_per_element;
 }
 
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index ae61f9e38c1..6739e754862 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -105,8 +105,8 @@ public:
 	                device_memory& kgbuffer,
 	                device_memory& kernel_data);
 
-	virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
-	size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, size_t max_buffer_size);
+	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads) = 0;
+	size_t max_elements_for_max_buffer_size(device_memory& kg, device_memory& data, uint64_t max_buffer_size);
 
 	virtual bool enqueue_split_kernel_data_init(const KernelDimensions& dim,
 	                                            RenderTile& rtile,
diff --git a/intern/cycles/device/opencl/opencl_base.cpp b/intern/cycles/device/opencl/opencl_base.cpp
index c5f44f84e8c..51ff39f0ad3 100644
--- a/intern/cycles/device/opencl/opencl_base.cpp
+++ b/intern/cycles/device/opencl/opencl_base.cpp
@@ -334,11 +334,11 @@ void OpenCLDeviceBase::mem_zero(device_memory& mem)
 			size_t num_threads = global_size[0] * global_size[1];
 
 			cl_mem d_buffer = CL_MEM_PTR(mem.device_pointer);
-			unsigned long long d_offset = 0;
-			unsigned long long d_size = 0;
+			cl_ulong d_offset = 0;
+			cl_ulong d_size = 0;
 
 			while(d_offset < mem.memory_size()) {
-				d_size = std::min<unsigned long long>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
+				d_size = std::min<cl_ulong>(num_threads*sizeof(float4), mem.memory_size() - d_offset);
 
 				kernel_set_args(ckZeroBuffer, 0, d_buffer, d_size, d_offset);
 
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 89ab19ca93b..a09d93c625e 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -227,9 +227,9 @@ public:
 		return kernel;
 	}
 
-	virtual size_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
+	virtual uint64_t state_buffer_size(device_memory& kg, device_memory& data, size_t num_threads)
 	{
-		device_vector<uint> size_buffer;
+		device_vector<uint64_t> size_buffer;
 		size_buffer.resize(1);
 		device->mem_alloc(NULL, size_buffer, MEM_READ_WRITE);
 
@@ -249,7 +249,7 @@ public:
 
 		device->opencl_assert_err(device->ciErr, "clEnqueueNDRangeKernel");
 
-		device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint));
+		device->mem_copy_from(size_buffer, 0, 1, 1, sizeof(uint64_t));
 		device->mem_free(size_buffer);
 
 		if(device->ciErr != CL_SUCCESS) {
@@ -346,8 +346,8 @@ public:
 
 	virtual int2 split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask */*task*/)
 	{
-		size_t max_buffer_size;
-		clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_buffer_size, NULL);
+		cl_ulong max_buffer_size;
+		clGetDeviceInfo(device->cdDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_buffer_size, NULL);
 		VLOG(1) << "Maximum device allocation side: "
 		        << string_human_readable_number(max_buffer_size) << " bytes. ("
 		        << string_human_readable_size(max_buffer_size) << ").";
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_split.cu b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
index 6c508c2cd79..fbdf79697d5 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_split.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel_split.cu
@@ -46,7 +46,7 @@
 /* kernels */
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
-kernel_cuda_state_buffer_size(uint num_threads, uint *size)
+kernel_cuda_state_buffer_size(uint num_threads, uint64_t *size)
 {
 	*size = split_data_buffer_size(NULL, num_threads);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
index 0a1843ff8bd..4c9bf63ef51 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel_state_buffer_size.cl
@@ -21,7 +21,7 @@ __kernel void kernel_ocl_path_trace_state_buffer_size(
         KernelGlobals *kg,
         ccl_constant KernelData *data,
         uint num_threads,
-        ccl_global uint *size)
+        ccl_global uint64_t *size)
 {
 	kg->data = data;
 	*size = split_data_buffer_size(kg, num_threads);
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index 81dcdbaedde..d319514c190 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -22,11 +22,11 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
+ccl_device_inline uint64_t split_data_buffer_size(KernelGlobals *kg, size_t num_elements)
 {
 	(void)kg;  /* Unused on CPU. */
 
-	size_t size = 0;
+	uint64_t size = 0;
 #define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
 	size = size SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 36d2f1053c7..dcd0b78e4a4 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -106,10 +106,16 @@ typedef unsigned int uint;
 
 #endif
 
-#ifndef __KERNEL_GPU__
-
 /* Fixed Bits Types */
 
+#ifdef __KERNEL_OPENCL__
+
+typedef ulong uint64_t;
+
+#endif
+
+#ifndef __KERNEL_GPU__
+
 #ifdef _WIN32
 
 typedef signed char int8_t;
@@ -474,17 +480,17 @@ ccl_device_inline int4 make

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list