[Bf-blender-cvs] [8c577cd4040] cycles-x: Cycles X: restore CPU sampling profiler

Brecht Van Lommel noreply at git.blender.org
Thu Jul 29 16:43:04 CEST 2021


Commit: 8c577cd4040a2e8d643d573db87288eb597e0922
Author: Brecht Van Lommel
Date:   Tue Jul 27 15:54:40 2021 +0200
Branches: cycles-x
https://developer.blender.org/rB8c577cd4040a2e8d643d573db87288eb597e0922

Cycles X: restore CPU sampling profiler

Stats now reflect the new kernels. Also change implementation to unset active
shader and object, previously there were some hard to verify assumptions about
for which kernels we can assume they are correctly set.

Differential Revision: https://developer.blender.org/D12042

===================================================================

M	intern/cycles/device/cpu/device_impl.cpp
M	intern/cycles/device/cpu/kernel_thread_globals.cpp
M	intern/cycles/device/cpu/kernel_thread_globals.h
M	intern/cycles/integrator/path_trace_work_cpu.cpp
M	intern/cycles/kernel/bvh/bvh.h
M	intern/cycles/kernel/geom/geom_shader_data.h
M	intern/cycles/kernel/integrator/integrator_init_from_bake.h
M	intern/cycles/kernel/integrator/integrator_init_from_camera.h
M	intern/cycles/kernel/integrator/integrator_intersect_closest.h
M	intern/cycles/kernel/integrator/integrator_intersect_shadow.h
M	intern/cycles/kernel/integrator/integrator_intersect_subsurface.h
M	intern/cycles/kernel/integrator/integrator_intersect_volume_stack.h
M	intern/cycles/kernel/integrator/integrator_shade_background.h
M	intern/cycles/kernel/integrator/integrator_shade_light.h
M	intern/cycles/kernel/integrator/integrator_shade_shadow.h
M	intern/cycles/kernel/integrator/integrator_shade_surface.h
M	intern/cycles/kernel/integrator/integrator_shade_volume.h
M	intern/cycles/kernel/kernel_emission.h
M	intern/cycles/kernel/kernel_profiling.h
M	intern/cycles/kernel/kernel_shader.h
M	intern/cycles/render/stats.cpp
M	intern/cycles/util/util_profiling.cpp
M	intern/cycles/util/util_profiling.h

===================================================================

diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp
index 5b54d99303b..4d262db90d1 100644
--- a/intern/cycles/device/cpu/device_impl.cpp
+++ b/intern/cycles/device/cpu/device_impl.cpp
@@ -461,7 +461,7 @@ void CPUDevice::get_cpu_kernel_thread_globals(
   kernel_thread_globals.clear();
   void *osl_memory = get_cpu_osl_memory();
   for (int i = 0; i < info.cpu_threads; i++) {
-    kernel_thread_globals.emplace_back(kernel_globals, osl_memory);
+    kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
   }
 }
 
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp b/intern/cycles/device/cpu/kernel_thread_globals.cpp
index f0089e34a7a..e2d0c4a3400 100644
--- a/intern/cycles/device/cpu/kernel_thread_globals.cpp
+++ b/intern/cycles/device/cpu/kernel_thread_globals.cpp
@@ -21,16 +21,14 @@
 #include "kernel/osl/osl_globals.h"
 // clang-format on
 
-CCL_NAMESPACE_BEGIN
+#include "util/util_profiling.h"
 
-CPUKernelThreadGlobals::CPUKernelThreadGlobals()
-{
-  reset_runtime_memory();
-}
+CCL_NAMESPACE_BEGIN
 
 CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
-                                               void *osl_globals_memory)
-    : KernelGlobals(kernel_globals)
+                                               void *osl_globals_memory,
+                                               Profiler &cpu_profiler)
+    : KernelGlobals(kernel_globals), cpu_profiler_(cpu_profiler)
 {
   reset_runtime_memory();
 
@@ -46,7 +44,7 @@ CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobals &kernel_globa
 }
 
 CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept
-    : KernelGlobals(std::move(other))
+    : KernelGlobals(std::move(other)), cpu_profiler_(other.cpu_profiler_)
 {
   other.reset_runtime_memory();
 }
@@ -78,4 +76,14 @@ void CPUKernelThreadGlobals::reset_runtime_memory()
 #endif
 }
 
+void CPUKernelThreadGlobals::start_profiling()
+{
+  cpu_profiler_.add_state(&profiler);
+}
+
+void CPUKernelThreadGlobals::stop_profiling()
+{
+  cpu_profiler_.remove_state(&profiler);
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/device/cpu/kernel_thread_globals.h b/intern/cycles/device/cpu/kernel_thread_globals.h
index 6af31dd8e48..d005c3bb56c 100644
--- a/intern/cycles/device/cpu/kernel_thread_globals.h
+++ b/intern/cycles/device/cpu/kernel_thread_globals.h
@@ -21,6 +21,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+class Profiler;
+
 /* A special class which extends memory ownership of the `KernelGlobals` decoupling any resource
  * which is not thread-safe for access. Every worker thread which needs to operate on
  * `KernelGlobals` needs to initialize its own copy of this object.
@@ -29,11 +31,11 @@ CCL_NAMESPACE_BEGIN
  * there is no unnecessary data duplication happening when using this object. */
 class CPUKernelThreadGlobals : public KernelGlobals {
  public:
-  CPUKernelThreadGlobals();
-
   /* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building
    * without OSL support. Will avoid need to those unnamed pointers and casts. */
-  CPUKernelThreadGlobals(const KernelGlobals &kernel_globals, void *osl_globals_memory);
+  CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
+                         void *osl_globals_memory,
+                         Profiler &cpu_profiler);
 
   ~CPUKernelThreadGlobals();
 
@@ -43,8 +45,13 @@ class CPUKernelThreadGlobals : public KernelGlobals {
   CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete;
   CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other);
 
+  void start_profiling();
+  void stop_profiling();
+
  protected:
   void reset_runtime_memory();
+
+  Profiler &cpu_profiler_;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp
index 12baf7cf94a..fe26b66ca79 100644
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -72,6 +72,10 @@ void PathTraceWorkCPU::render_samples(int start_sample, int samples_num)
   const int64_t image_height = effective_buffer_params_.height;
   const int64_t total_pixels_num = image_width * image_height;
 
+  for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
+    kernel_globals.start_profiling();
+  }
+
   tbb::task_arena local_arena = local_tbb_arena_create(device_);
   local_arena.execute([&]() {
     tbb::parallel_for(int64_t(0), total_pixels_num, [&](int64_t work_index) {
@@ -97,6 +101,10 @@ void PathTraceWorkCPU::render_samples(int start_sample, int samples_num)
       render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
     });
   });
+
+  for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
+    kernel_globals.stop_profiling();
+  }
 }
 
 void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobals *kernel_globals,
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index 0b517577167..539e9fd05fb 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -159,8 +159,6 @@ ccl_device_intersect bool scene_intersect(const KernelGlobals *kg,
                                           const uint visibility,
                                           Intersection *isect)
 {
-  PROFILING_INIT(kg, PROFILING_INTERSECT);
-
 #ifdef __KERNEL_OPTIX__
   uint p0 = 0;
   uint p1 = 0;
@@ -247,8 +245,6 @@ ccl_device_intersect bool scene_intersect_local(const KernelGlobals *kg,
                                                 uint *lcg_state,
                                                 int max_hits)
 {
-  PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
-
 #  ifdef __KERNEL_OPTIX__
   uint p0 = ((uint64_t)lcg_state) & 0xFFFFFFFF;
   uint p1 = (((uint64_t)lcg_state) >> 32) & 0xFFFFFFFF;
@@ -362,8 +358,6 @@ ccl_device_intersect bool scene_intersect_shadow_all(const KernelGlobals *kg,
                                                      uint max_hits,
                                                      uint *num_hits)
 {
-  PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL);
-
 #  ifdef __KERNEL_OPTIX__
   uint p0 = ((uint64_t)isect) & 0xFFFFFFFF;
   uint p1 = (((uint64_t)isect) >> 32) & 0xFFFFFFFF;
@@ -442,8 +436,6 @@ ccl_device_intersect bool scene_intersect_volume(const KernelGlobals *kg,
                                                  Intersection *isect,
                                                  const uint visibility)
 {
-  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME);
-
 #  ifdef __KERNEL_OPTIX__
   uint p0 = 0;
   uint p1 = 0;
@@ -502,8 +494,6 @@ ccl_device_intersect uint scene_intersect_volume_all(const KernelGlobals *kg,
                                                      const uint max_hits,
                                                      const uint visibility)
 {
-  PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME_ALL);
-
   if (!scene_intersect_valid(ray)) {
     return false;
   }
diff --git a/intern/cycles/kernel/geom/geom_shader_data.h b/intern/cycles/kernel/geom/geom_shader_data.h
index 68670992ed7..fb2cb5cb1ea 100644
--- a/intern/cycles/kernel/geom/geom_shader_data.h
+++ b/intern/cycles/kernel/geom/geom_shader_data.h
@@ -43,8 +43,6 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
                                              const Ray *ccl_restrict ray,
                                              const Intersection *ccl_restrict isect)
 {
-  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
   /* Read intersection data into shader globals.
    *
    * TODO: this is redundant, could potentially remove some of this from
@@ -134,8 +132,6 @@ ccl_device_inline void shader_setup_from_ray(const KernelGlobals *ccl_restrict k
   differential_incoming_compact(&sd->dI, ray->D, ray->dD);
   differential_dudv(&sd->du, &sd->dv, sd->dPdu, sd->dPdv, sd->dP, sd->Ng);
 #endif
-  PROFILING_SHADER(sd->shader);
-  PROFILING_OBJECT(sd->object);
 }
 
 /* ShaderData setup from position sampled on mesh */
@@ -155,8 +151,6 @@ ccl_device_inline void shader_setup_from_sample(const KernelGlobals *ccl_restric
                                                 bool object_space,
                                                 int lamp)
 {
-  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
   /* vectors */
   sd->P = P;
   sd->N = Ng;
@@ -250,9 +244,6 @@ ccl_device_inline void shader_setup_from_sample(const KernelGlobals *ccl_restric
   sd->du = differential_zero();
   sd->dv = differential_zero();
 #endif
-
-  PROFILING_SHADER(sd->shader);
-  PROFILING_OBJECT(sd->object);
 }
 
 /* ShaderData setup for displacement */
@@ -297,8 +288,6 @@ ccl_device_inline void shader_setup_from_background(const KernelGlobals *ccl_res
                                                     const float3 ray_D,
                                                     const float ray_time)
 {
-  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
-
   /* for NDC coordinates */
   sd->ray_P = ray_P;
 
@@ -332,9 +321,6 @@ ccl_device_inline void shader_setup_from_background(const KernelGlobals *ccl_res
   sd->du = differential_zero();
   sd->dv = differential_zero();
 #endif
-
-  PROFILING_SHADER(sd->shader);
-  PROFILING_OBJECT(sd->object);
 }
 
 /* ShaderData setup from point inside volume */
@@ -344,7 +330,6 @@ ccl_device_inline void shader_setup_from_volume(const KernelGlobals *ccl_restric
                                                 ShaderData *ccl_restrict sd,
                                                 const Ray *ccl_restrict ray)
 {
-  PROFILING_INIT(kg, PROFILING_SHADER_SETUP);
 
   /* vectors */
   sd->P = ray->P;
@@ -382,9 +367,6 @@ ccl_device_inline void shader_setup_from_volume(const KernelGlobals *ccl_restric
   /* for NDC coordinates */
   sd->ray_P = ray->P;
   sd->ray_dP = ray->dP;
-
-  PROFILING_SHADER(sd->shader);
-  PROFILING_OBJECT(sd->object);
 }
 #endif /* __VOLUME__ */
 
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index 98ba0708e60..4898ff936c6 100644
--- a/intern/cycles/kernel/integrator/integrator_init_f

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list