[Bf-blender-cvs] [ff1883307f1] master: Cleanup: renaming and consistency for kernel data

Brecht Van Lommel noreply at git.blender.org
Mon Jun 20 12:37:58 CEST 2022


Commit: ff1883307f12a8b734bfcf87b01743dc73afae75
Author: Brecht Van Lommel
Date:   Fri Jun 17 17:16:37 2022 +0200
Branches: master
https://developer.blender.org/rBff1883307f12a8b734bfcf87b01743dc73afae75

Cleanup: renaming and consistency for kernel data

* Rename "texture" to "data array". This has not used textures for a long time,
  there are just global memory arrays now. (On old CUDA GPUs there was a cache
  for textures but not global memory, so we used to put all data in textures.)
* For CUDA and HIP, put globals in KernelParams struct like other devices.
* Drop __ prefix for data array names, no possibility for naming conflict now that
  these are in a struct.

===================================================================

M	intern/cycles/bvh/embree.cpp
M	intern/cycles/device/cpu/device_impl.cpp
M	intern/cycles/device/cuda/device_impl.cpp
M	intern/cycles/device/hip/device_impl.cpp
M	intern/cycles/device/memory.h
M	intern/cycles/device/metal/device_impl.mm
M	intern/cycles/device/metal/queue.mm
M	intern/cycles/device/optix/device_impl.cpp
M	intern/cycles/integrator/path_trace_work_gpu.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/bvh/bvh.h
M	intern/cycles/kernel/bvh/embree.h
M	intern/cycles/kernel/bvh/local.h
M	intern/cycles/kernel/bvh/nodes.h
M	intern/cycles/kernel/bvh/shadow_all.h
M	intern/cycles/kernel/bvh/traversal.h
M	intern/cycles/kernel/bvh/util.h
M	intern/cycles/kernel/bvh/volume.h
M	intern/cycles/kernel/bvh/volume_all.h
M	intern/cycles/kernel/camera/camera.h
M	intern/cycles/kernel/closure/bsdf.h
A	intern/cycles/kernel/data_arrays.h
M	intern/cycles/kernel/device/cpu/compat.h
M	intern/cycles/kernel/device/cpu/globals.h
M	intern/cycles/kernel/device/cpu/image.h
M	intern/cycles/kernel/device/cpu/kernel.cpp
M	intern/cycles/kernel/device/cuda/globals.h
M	intern/cycles/kernel/device/gpu/image.h
M	intern/cycles/kernel/device/hip/globals.h
M	intern/cycles/kernel/device/metal/context_end.h
M	intern/cycles/kernel/device/metal/globals.h
M	intern/cycles/kernel/device/metal/kernel.metal
M	intern/cycles/kernel/device/optix/globals.h
M	intern/cycles/kernel/device/optix/kernel.cu
M	intern/cycles/kernel/device/optix/kernel_shader_raytrace.cu
M	intern/cycles/kernel/geom/attribute.h
M	intern/cycles/kernel/geom/curve.h
M	intern/cycles/kernel/geom/curve_intersect.h
M	intern/cycles/kernel/geom/motion_curve.h
M	intern/cycles/kernel/geom/motion_point.h
M	intern/cycles/kernel/geom/motion_triangle.h
M	intern/cycles/kernel/geom/motion_triangle_intersect.h
M	intern/cycles/kernel/geom/motion_triangle_shader.h
M	intern/cycles/kernel/geom/object.h
M	intern/cycles/kernel/geom/patch.h
M	intern/cycles/kernel/geom/point.h
M	intern/cycles/kernel/geom/point_intersect.h
M	intern/cycles/kernel/geom/shader_data.h
M	intern/cycles/kernel/geom/subd_triangle.h
M	intern/cycles/kernel/geom/triangle.h
M	intern/cycles/kernel/geom/triangle_intersect.h
M	intern/cycles/kernel/geom/volume.h
M	intern/cycles/kernel/integrator/init_from_bake.h
M	intern/cycles/kernel/integrator/intersect_closest.h
M	intern/cycles/kernel/integrator/mnee.h
M	intern/cycles/kernel/integrator/shade_background.h
M	intern/cycles/kernel/integrator/shade_surface.h
M	intern/cycles/kernel/integrator/shader_eval.h
M	intern/cycles/kernel/integrator/subsurface.h
M	intern/cycles/kernel/integrator/subsurface_disk.h
M	intern/cycles/kernel/integrator/volume_stack.h
M	intern/cycles/kernel/light/background.h
M	intern/cycles/kernel/light/light.h
M	intern/cycles/kernel/light/sample.h
M	intern/cycles/kernel/sample/jitter.h
M	intern/cycles/kernel/sample/pattern.h
M	intern/cycles/kernel/svm/bevel.h
M	intern/cycles/kernel/svm/ies.h
M	intern/cycles/kernel/svm/ramp.h
M	intern/cycles/kernel/svm/svm.h
D	intern/cycles/kernel/textures.h
M	intern/cycles/kernel/util/lookup_table.h
M	intern/cycles/scene/film.cpp
M	intern/cycles/scene/geometry.cpp
M	intern/cycles/scene/image.cpp
M	intern/cycles/scene/light.cpp
M	intern/cycles/scene/scene.cpp

===================================================================

diff --git a/intern/cycles/bvh/embree.cpp b/intern/cycles/bvh/embree.cpp
index ea7480bd193..eed7ae19965 100644
--- a/intern/cycles/bvh/embree.cpp
+++ b/intern/cycles/bvh/embree.cpp
@@ -250,7 +250,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
         *isect = current_isect;
         /* Only primitives from volume object. */
         uint tri_object = isect->object;
-        int object_flag = kernel_tex_fetch(__object_flag, tri_object);
+        int object_flag = kernel_data_fetch(object_flag, tri_object);
         if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
           --ctx->num_hits;
         }
diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp
index 0a4eb089037..d4f0532aa5e 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -51,7 +51,7 @@
 CCL_NAMESPACE_BEGIN
 
 CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
-    : Device(info_, stats_, profiler_), texture_info(this, "__texture_info", MEM_GLOBAL)
+    : Device(info_, stats_, profiler_), texture_info(this, "texture_info", MEM_GLOBAL)
 {
   /* Pick any kernel, all of them are supposed to have same level of microarchitecture
    * optimization. */
@@ -192,7 +192,7 @@ device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_
 void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
 {
 #ifdef WITH_EMBREE
-  if (strcmp(name, "__data") == 0) {
+  if (strcmp(name, "data") == 0) {
     assert(size <= sizeof(KernelData));
 
     // Update scene handle (since it is different for each device on multi devices)
diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp
index e75224abe90..00851a8e91c 100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -23,6 +23,8 @@
 #  include "util/types.h"
 #  include "util/windows.h"
 
+#  include "kernel/device/cuda/globals.h"
+
 CCL_NAMESPACE_BEGIN
 
 class CUDADevice;
@@ -51,7 +53,7 @@ void CUDADevice::set_error(const string &error)
 }
 
 CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-    : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL)
+    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
   first_error = true;
 
@@ -900,9 +902,19 @@ void CUDADevice::const_copy_to(const char *name, void *host, size_t size)
   CUdeviceptr mem;
   size_t bytes;
 
-  cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, name));
-  // assert(bytes == size);
-  cuda_assert(cuMemcpyHtoD(mem, host, size));
+  cuda_assert(cuModuleGetGlobal(&mem, &bytes, cuModule, "kernel_params"));
+  assert(bytes == sizeof(KernelParamsCUDA));
+
+  /* Update data storage pointers in launch parameters. */
+#  define KERNEL_DATA_ARRAY(data_type, data_name) \
+    if (strcmp(name, #data_name) == 0) { \
+      cuda_assert(cuMemcpyHtoD(mem + offsetof(KernelParamsCUDA, data_name), host, size)); \
+      return; \
+    }
+  KERNEL_DATA_ARRAY(KernelData, data)
+  KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
+#  include "kernel/data_arrays.h"
+#  undef KERNEL_DATA_ARRAY
 }
 
 void CUDADevice::global_alloc(device_memory &mem)
@@ -926,7 +938,6 @@ void CUDADevice::tex_alloc(device_texture &mem)
 {
   CUDAContextScope scope(this);
 
-  string bind_name = mem.name;
   size_t dsize = datatype_size(mem.data_type);
   size_t size = mem.memory_size();
 
diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp
index 652c1001f85..82db55ea715 100644
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -24,6 +24,8 @@
 #  include "util/types.h"
 #  include "util/windows.h"
 
+#  include "kernel/device/hip/globals.h"
+
 CCL_NAMESPACE_BEGIN
 
 class HIPDevice;
@@ -52,7 +54,7 @@ void HIPDevice::set_error(const string &error)
 }
 
 HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-    : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL)
+    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
   first_error = true;
 
@@ -856,8 +858,19 @@ void HIPDevice::const_copy_to(const char *name, void *host, size_t size)
   hipDeviceptr_t mem;
   size_t bytes;
 
-  hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, name));
-  hip_assert(hipMemcpyHtoD(mem, host, size));
+  hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params"));
+  assert(bytes == sizeof(KernelParamsHIP));
+
+  /* Update data storage pointers in launch parameters. */
+#  define KERNEL_DATA_ARRAY(data_type, data_name) \
+    if (strcmp(name, #data_name) == 0) { \
+      hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
+      return; \
+    }
+  KERNEL_DATA_ARRAY(KernelData, data)
+  KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
+#  include "kernel/data_arrays.h"
+#  undef KERNEL_DATA_ARRAY
 }
 
 void HIPDevice::global_alloc(device_memory &mem)
@@ -881,7 +894,6 @@ void HIPDevice::tex_alloc(device_texture &mem)
 {
   HIPContextScope scope(this);
 
-  string bind_name = mem.name;
   size_t dsize = datatype_size(mem.data_type);
   size_t size = mem.memory_size();
 
diff --git a/intern/cycles/device/memory.h b/intern/cycles/device/memory.h
index 55d6d39cef8..5f44475077e 100644
--- a/intern/cycles/device/memory.h
+++ b/intern/cycles/device/memory.h
@@ -350,7 +350,7 @@ template<typename T> class device_only_memory : public device_memory {
  *
  * When using memory type MEM_GLOBAL, a pointer to this memory will be
  * automatically attached to kernel globals, using the provided name
- * matching an entry in kernel_textures.h. */
+ * matching an entry in kernel/data_arrays.h. */
 
 template<typename T> class device_vector : public device_memory {
  public:
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index a0ac677beda..0a89055af34 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -35,7 +35,7 @@ void MetalDevice::set_error(const string &error)
 }
 
 MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
-    : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL)
+    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
   mtlDevId = info.num;
 
@@ -625,7 +625,7 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
 
 void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
 {
-  if (strcmp(name, "__data") == 0) {
+  if (strcmp(name, "data") == 0) {
     assert(size == sizeof(KernelData));
     memcpy((uint8_t *)&launch_params + offsetof(KernelParamsMetal, data), host, size);
     return;
@@ -646,19 +646,19 @@ void MetalDevice::const_copy_to(const char *name, void *host, size_t size)
       };
 
   /* Update data storage pointers in launch parameters. */
-  if (strcmp(name, "__integrator_state") == 0) {
+  if (strcmp(name, "integrator_state") == 0) {
     /* IntegratorStateGPU is contiguous pointers */
     const size_t pointer_block_size = sizeof(IntegratorStateGPU);
     update_launch_pointers(
-        offsetof(KernelParamsMetal, __integrator_state), host, size, pointer_block_size);
+        offsetof(KernelParamsMetal, integrator_state), host, size, pointer_block_size);
   }
-#  define KERNEL_TEX(data_type, tex_name) \
+#  define KERNEL_DATA_ARRAY(data_type, tex_name) \
     else if (strcmp(name, #tex_name) == 0) \
     { \
       update_launch_pointers(offsetof(KernelParamsMetal, tex_name), host, size, size); \
     }
-#  include "kernel/textures.h"
-#  undef KERNEL_TEX
+#  include "kernel/data_arrays.h"
+#  undef KERNEL_DATA_ARRAY
 }
 
 void MetalDevice::global_alloc(device_memory &mem)
diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm
index 55db7c5afce..da5408373bb 100644
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@@ -358,7 +358,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
 
   /* Prepare any non-pointer (i.e. plain-old-data) KernelParamsMetal data */
   /* The plain-old-data is contiguous, continuing to the end of KernelParamsMetal */
-  size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, __integrator_state) +
+  size_t plain_old_launch_data_offset = offsetof(KernelParamsMetal, integrator_state) +
                                         sizeof(IntegratorStateGPU);
   size_t plain_old_launch_data_size = sizeof(KernelParamsMetal) - plain_old_launch_data_offset;
   memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
@@ -415,7 +415,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
   }
 
   /* this relies on IntegratorStateGPU layout being contiguous device_ptrs  */
-  const size_t pointer_block_end = offsetof(KernelParamsMetal, __integrator_state) +
+  const size_t pointer_block_end = offsetof(KernelParamsMetal, integrator_state) +
                                    sizeof(IntegratorStateGPU);
   for (size_t offset = 0; offset < pointer_block_end; offset += sizeof(device_ptr)) {
     int pointer_index = int(offset / sizeof(device_ptr));
diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp
index 53697db5c04..e7dcc29a2da 100644
--- a/intern/cycles/device/optix/device_impl.cpp
+++ b/intern/cycles/device/optix/device_impl.cpp
@@ -246,7 +246,7 @@ OptiXDevice::Denoiser::Denoiser(OptiXDevice *device)
 OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
     : CUDADevice(info, stats, profiler),
       sbt_data(this, "__sbt", MEM_READ_ONLY),
-      launch_params(this, "__params", false),
+      launch_params(this, "kernel_params", false),
       denoiser_(this)
 {
   /* Make the CUDA context current. */
@@ -421,7 +421,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features)
   pipeline_options.numPayloadValues = 8;
   pipeline_options.numAttributeValues = 2; /* u, v */
   pipeline_options.ex

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list