[Bf-blender-cvs] [dd610cb] openvdb: Improved thread data handling for the OpenVDB Cycles node.

Lukas Tönne noreply at git.blender.org
Sun Nov 13 21:44:46 CET 2016


Commit: dd610cb6e97bbc435f3b437c832283d54144a4dd
Author: Lukas Tönne
Date:   Sat Nov 12 13:47:35 2016 +0100
Branches: openvdb
https://developer.blender.org/rBdd610cb6e97bbc435f3b437c832283d54144a4dd

Improved thread data handling for the OpenVDB Cycles node.

OpenVDB grid accessors, samplers, and ray intersectors must be stored per thread.
Previously this data was stored in a per-thread map in each grid. This causes problems
because the pthread_id keys can become invalid, and it also creates a lot of unused
accessors.

The patch switches thread data storage around, so that thread-local data is now passed
along KernelGlobals for every work task as needed (tasks only run in one thread).
This system is much more reliable and similar to other cases of thread-local data,
such as OSL shaders.

The patch also removes the virtual base class for "volumes", which might give a minor
performance improvement. If a generic volume type for Cycles is needed it can be
abstracted from scratch.

Removed the unused thread_id list in Cycles CPU tasks, which were only needed
for the now deprecated OpenVDB grid accessor lookup.

===================================================================

M	intern/cycles/device/CMakeLists.txt
M	intern/cycles/device/device.h
M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/bvh/bvh_volume.h
M	intern/cycles/kernel/bvh/bvh_volume_all.h
M	intern/cycles/kernel/bvh/qbvh_volume.h
M	intern/cycles/kernel/bvh/qbvh_volume_all.h
M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/kernel/kernel_globals.h
M	intern/cycles/kernel/kernel_volume.h
M	intern/cycles/kernel/kernels/cpu/kernel.cpp
A	intern/cycles/kernel/openvdb/vdb_globals.h
A	intern/cycles/kernel/openvdb/vdb_intern.h
A	intern/cycles/kernel/openvdb/vdb_thread.cpp
A	intern/cycles/kernel/openvdb/vdb_thread.h
M	intern/cycles/render/attribute.cpp
M	intern/cycles/render/volume.cpp
M	intern/cycles/render/volume.h
M	intern/cycles/util/util_task.cpp
M	intern/cycles/util/util_task.h
M	intern/cycles/util/util_thread.h
M	intern/cycles/util/util_volume.cpp
M	intern/cycles/util/util_volume.h

===================================================================

diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 966ff5e..091bb76 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -4,6 +4,7 @@ set(INC
 	../graph
 	../kernel
 	../kernel/svm
+	../kernel/openvdb
 	../kernel/osl
 	../util
 	../render
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index 77dc1fa..d990218 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -34,6 +34,8 @@ CCL_NAMESPACE_BEGIN
 class Progress;
 class RenderTile;
 
+struct OpenVDBGlobals;
+
 /* Device Types */
 
 enum DeviceType {
@@ -249,6 +251,9 @@ public:
 	/* open shading language, only for CPU device */
 	virtual void *osl_memory() { return NULL; }
 
+	/* OpenVDB data */
+	virtual OpenVDBGlobals *vdb_memory() { return NULL; }
+
 	/* load/compile kernels, must be called before adding tasks */ 
 	virtual bool load_kernels(
 	        const DeviceRequestedFeatures& /*requested_features*/)
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index aed86d8..fe870b1 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -35,6 +35,11 @@
 #include "osl_shader.h"
 #include "osl_globals.h"
 
+#ifdef WITH_OPENVDB
+#include "vdb_globals.h"
+#include "vdb_thread.h"
+#endif
+
 #include "buffers.h"
 
 #include "util_debug.h"
@@ -57,7 +62,11 @@ public:
 #ifdef WITH_OSL
 	OSLGlobals osl_globals;
 #endif
-	
+
+#ifdef WITH_OPENVDB
+	OpenVDBGlobals vdb_globals;
+#endif
+
 	CPUDevice(DeviceInfo& info, Stats &stats, bool background)
 	: Device(info, stats, background)
 	{
@@ -189,6 +198,15 @@ public:
 #endif
 	}
 
+	OpenVDBGlobals *vdb_memory()
+	{
+#ifdef WITH_OPENVDB
+		return &vdb_globals;
+#else
+		return NULL;
+#endif
+	}
+
 	void thread_run(DeviceTask *task)
 	{
 		if(task->type == DeviceTask::PATH_TRACE)
@@ -385,6 +403,10 @@ public:
 #ifdef WITH_OSL
 		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
 #endif
+#ifdef WITH_OPENVDB
+		vdb_thread_init(&kg, &kernel_globals, &vdb_globals);
+#endif
+
 		void(*shader_kernel)(KernelGlobals*, uint4*, float4*, float*, int, int, int, int, int);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@@ -443,6 +465,9 @@ public:
 #ifdef WITH_OSL
 		OSLShader::thread_free(&kg);
 #endif
+#ifdef WITH_OPENVDB
+		vdb_thread_free(&kg);
+#endif
 	}
 
 	int get_split_task_count(DeviceTask& task)
@@ -491,6 +516,9 @@ protected:
 #ifdef WITH_OSL
 		OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
 #endif
+#ifdef WITH_OPENVDB
+		vdb_thread_init(&kg, &kernel_globals, &vdb_globals);
+#endif
 		return kg;
 	}
 
@@ -509,6 +537,9 @@ protected:
 #ifdef WITH_OSL
 		OSLShader::thread_free(kg);
 #endif
+#ifdef WITH_OPENVDB
+		vdb_thread_free(kg);
+#endif
 	}
 };
 
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 7a9b828..2c95825 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -302,6 +302,18 @@ if(WITH_CYCLES_OSL)
 	add_subdirectory(shaders)
 endif()
 
+# OpenVDB module
+
+list(APPEND SRC
+	openvdb/vdb_thread.cpp
+	)
+
+list(APPEND SRC_HEADERS
+	openvdb/vdb_globals.h
+	openvdb/vdb_thread.h
+	openvdb/vdb_intern.h
+	)
+
 # CPU module
 
 include_directories(${INC})
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 300f813..f6db399 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -107,7 +107,7 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	for(int i = 0; i < num_volumes; i++) {
 		float t;
 
-		if(kg->float_volumes[i]->intersect(ray, &t)) {
+		if(vdb_volume_intersect(kg->vdb_tdata, i, ray, &t)) {
 			isect->type = PRIMITIVE_VOLUME;
 			isect->prim = i;
 			isect->t = t;
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index c5a3bf3..04fe6e0 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -108,7 +108,7 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 	for(int i = 0; i < num_volumes; i++) {
 		float t;
 
-		if(kg->float_volumes[i]->intersect(ray, &t)) {
+		if(vdb_volume_intersect(kg->vdb_tdata, i, ray, &t)) {
 			isect_array->type = PRIMITIVE_VOLUME;
 			isect_array->prim = i;
 			isect_array->t = t;
diff --git a/intern/cycles/kernel/bvh/qbvh_volume.h b/intern/cycles/kernel/bvh/qbvh_volume.h
index 3728a02..989873b 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume.h
@@ -101,7 +101,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 	for(int i = 0; i < num_volumes; i++) {
 		float t;
 
-		if(kg->float_volumes[i]->intersect(ray, &t)) {
+		if(vdb_volume_intersect(kg->vdb_tdata, i, ray, &t)) {
 			isect->type = PRIMITIVE_VOLUME;
 			isect->prim = i;
 			isect->t = t;
diff --git a/intern/cycles/kernel/bvh/qbvh_volume_all.h b/intern/cycles/kernel/bvh/qbvh_volume_all.h
index 8d75b81..87bbca5 100644
--- a/intern/cycles/kernel/bvh/qbvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/qbvh_volume_all.h
@@ -105,7 +105,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 	for(int i = 0; i < num_volumes; i++) {
 		float t;
 
-		if(kg->float_volumes[i]->intersect(ray, &t)) {
+		if(vdb_volume_intersect(kg->vdb_tdata, i, ray, &t)) {
 			isect_array->type = PRIMITIVE_VOLUME;
 			isect_array->prim = i;
 			isect_array->t = t;
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index 317af4c..d51a0c5 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -528,8 +528,8 @@ typedef texture_image<half4> texture_image_half4;
 #define kernel_tex_image_interp(tex,x,y) kernel_tex_image_interp_impl(kg,tex,x,y)
 #define kernel_tex_image_interp_3d(tex, x, y, z) kernel_tex_image_interp_3d_impl(kg,tex,x,y,z)
 #define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) kernel_tex_image_interp_3d_ex_impl(kg,tex, x, y, z, interpolation)
-#define kernel_tex_voxel_float(tex, x, y, z, sampling) (kg->float_volumes[tex]->sample(x, y, z, sampling))
-#define kernel_tex_voxel_float3(tex, x, y, z, sampling) (kg->float3_volumes[tex]->sample(x, y, z, sampling))
+#define kernel_tex_voxel_float(tex, x, y, z, sampling) (vdb_volume_sample_scalar(kg->vdb, kg->vdb_tdata, tex, x, y, z, sampling))
+#define kernel_tex_voxel_float3(tex, x, y, z, sampling) (vdb_volume_sample_vector(kg->vdb, kg->vdb_tdata, tex, x, y, z, sampling))
 
 #define kernel_data (kg->__data)
 
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index d8c4b7c..74357bd 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -31,6 +31,11 @@ struct OSLThreadData;
 struct OSLShadingSystem;
 #  endif
 
+#  ifdef WITH_OPENVDB
+struct OpenVDBGlobals;
+struct OpenVDBThreadData;
+#  endif
+
 struct Intersection;
 struct VolumeStep;
 
@@ -44,9 +49,6 @@ typedef struct KernelGlobals {
 	texture_image_uchar texture_byte_images[TEX_NUM_BYTE_CPU];
 	texture_image_half texture_half_images[TEX_NUM_HALF_CPU];
 
-	float_volume *float_volumes[MAX_VOLUME];
-	float3_volume *float3_volumes[MAX_VOLUME];
-
 #  define KERNEL_TEX(type, ttype, name) ttype name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name)
 #  include "kernel_textures.h"
@@ -69,6 +71,11 @@ typedef struct KernelGlobals {
 	/* Storage for decoupled volume steps. */
 	VolumeStep *decoupled_volume_steps[2];
 	int decoupled_volume_steps_index;
+
+#  ifdef WITH_OPENVDB
+	OpenVDBGlobals *vdb;
+	OpenVDBThreadData *vdb_tdata;
+#  endif
 } KernelGlobals;
 
 #endif  /* __KERNEL_CPU__ */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 107d0d5..e973afe 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "openvdb/vdb_thread.h"
+
 CCL_NAMESPACE_BEGIN
 
 /* Events for probalistic scattering */
@@ -220,26 +222,25 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
 	float3 sum = make_float3(0.0f, 0.0f, 0.0f);
 
 #ifdef __OPENVDB__
-	//int vdb_index = kernel_data.tables.density_index;
+//	int density_index = kernel_data.tables.density_index;
 	int num_volumes = kernel_data.tables.num_volumes;
 	bool has_vdb_volume = num_volumes > 0;
 	float t1 = 0.0f;
 	int v = 0;
-	float_volume *volume = kg->float_volumes[0];
 
 	float isec_t = 0.0f;
-	for(; v < num_volumes; v++, volume++) {
-		if(volume->intersect(ray, &isec_t)) {
+	for(; v < num_volumes; v++) {
+		if(vdb_volume_intersect(kg->vdb_tdata, v, ray, &isec_t)) {
 			break;
 		}
 	}
 
-	if(has_vdb_volume && v < num_volumes && kg->float_volumes[v]->has_uniform_voxels()) {
+	if(has_vdb_volume && v < num_volumes && vdb_volume_scalar_has_uniform_voxels(kg->vdb, v)) {
 		/* TODO(kevin): this call should be moved out of here, all it does is
 		 * checking if we have an intersection with the boundbox of the volumue
 		 * which in most cases corresponds to the boundbox of the object that has
 		 * this volume. Also it initializes the rays for the ray marching. */
-		//if(!kg->float_volumes[vdb_index]->intersect(ray, NULL)) {
+		//if(!vdb_volume_intersect(kg->vdb_tdata, density_index, ray, NULL)) {
 		//	return;
 		//}
 
@@ -247,7 +248,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
 		 * containing active voxels. If we don't have any active node in the current
 		 * ray path (i.e. empty space) the ray march loop is not executed,
 		 * otherwise we loop through all leaves until the end of the volume. */
-		while(kg->float_volumes[v]->march(&t, &t1)) {
+		while(vdb_volume_march(kg->vdb_tdata, v, &t, &t1)) {
 			int i = 0;
 
 			/* Perform small steps through the current leaf or tile. */
@@ -629,7 +630,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
 	bool path_missed = true;
 
 #ifdef __OPENVDB__
-//	int vdb_index = kernel_data.tables.density_index;
+//	int density_index = kernel_data.tables.density_index;
 	int num_volumes = kernel_data.tables.num_volumes;
 	bool has_vdb_

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list