[Bf-blender-cvs] [2430f752797] master: Cycles: reduce GPU state memory a little

Brecht Van Lommel noreply at git.blender.org
Mon Oct 18 19:03:54 CEST 2021


Commit: 2430f752797b83cd43892f656f5297fd6e0bb619
Author: Brecht Van Lommel
Date:   Mon Oct 18 17:53:32 2021 +0200
Branches: master
https://developer.blender.org/rB2430f752797b83cd43892f656f5297fd6e0bb619

Cycles: reduce GPU state memory a little

* isect Ng is no longer needed for shadows, for main path needed for SSS only
* Reduce rng_offset and queued_kernel to 16 bits

Ref D12889

===================================================================

M	intern/cycles/kernel/bvh/bvh_embree.h
M	intern/cycles/kernel/integrator/integrator_init_from_bake.h
M	intern/cycles/kernel/integrator/integrator_state_template.h
M	intern/cycles/kernel/integrator/integrator_state_util.h
M	intern/cycles/kernel/integrator/integrator_subsurface.h
M	intern/cycles/kernel/integrator/integrator_subsurface_disk.h
M	intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
M	intern/cycles/kernel/kernel_types.h

===================================================================

diff --git a/intern/cycles/kernel/bvh/bvh_embree.h b/intern/cycles/kernel/bvh/bvh_embree.h
index 7fa0cfdc510..4f85e8bee4b 100644
--- a/intern/cycles/kernel/bvh/bvh_embree.h
+++ b/intern/cycles/kernel/bvh/bvh_embree.h
@@ -107,7 +107,6 @@ ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
                                                  Intersection *isect)
 {
   isect->t = ray->tfar;
-  isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
   if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
     RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
         rtcGetGeometry(kernel_data.bvh.scene, hit->instID[0]));
@@ -142,7 +141,6 @@ ccl_device_inline void kernel_embree_convert_sss_hit(
   isect->u = 1.0f - hit->v - hit->u;
   isect->v = hit->u;
   isect->t = ray->tfar;
-  isect->Ng = make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z);
   RTCScene inst_scene = (RTCScene)rtcGetGeometryUserData(
       rtcGetGeometry(kernel_data.bvh.scene, object * 2));
   isect->prim = hit->primID +
diff --git a/intern/cycles/kernel/integrator/integrator_init_from_bake.h b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
index df3c2103c5b..9bc115150ff 100644
--- a/intern/cycles/kernel/integrator/integrator_init_from_bake.h
+++ b/intern/cycles/kernel/integrator/integrator_init_from_bake.h
@@ -180,9 +180,6 @@ ccl_device bool integrator_init_from_bake(KernelGlobals kg,
     isect.v = v;
     isect.t = 1.0f;
     isect.type = PRIMITIVE_TRIANGLE;
-#ifdef __EMBREE__
-    isect.Ng = Ng;
-#endif
     integrator_state_write_isect(kg, state, &isect);
 
     /* Setup next kernel to execute. */
diff --git a/intern/cycles/kernel/integrator/integrator_state_template.h b/intern/cycles/kernel/integrator/integrator_state_template.h
index 0fe47cf13bc..d9801574d4f 100644
--- a/intern/cycles/kernel/integrator/integrator_state_template.h
+++ b/intern/cycles/kernel/integrator/integrator_state_template.h
@@ -40,13 +40,12 @@ KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounce, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(path, uint16_t, volume_bounds_bounce, KERNEL_FEATURE_PATH_TRACING)
 /* Current transparent ray bounce depth. */
 KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* DeviceKernel bit indicating queued kernels.
- * TODO: reduce size? */
-KERNEL_STRUCT_MEMBER(path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels. */
+KERNEL_STRUCT_MEMBER(path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
 /* Random number generator seed. */
 KERNEL_STRUCT_MEMBER(path, uint32_t, rng_hash, KERNEL_FEATURE_PATH_TRACING)
 /* Random number dimension offset. */
-KERNEL_STRUCT_MEMBER(path, uint32_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
+KERNEL_STRUCT_MEMBER(path, uint16_t, rng_offset, KERNEL_FEATURE_PATH_TRACING)
 /* enum PathRayFlag */
 KERNEL_STRUCT_MEMBER(path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
 /* Multiple importance sampling
@@ -89,8 +88,6 @@ KERNEL_STRUCT_MEMBER(isect, float, v, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(isect, int, object, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_MEMBER(isect, int, type, KERNEL_FEATURE_PATH_TRACING)
-/* TODO: exclude for GPU. */
-KERNEL_STRUCT_MEMBER(isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_END(isect)
 
 /*************** Subsurface closure state for subsurface kernel ***************/
@@ -99,6 +96,7 @@ KERNEL_STRUCT_BEGIN(subsurface)
 KERNEL_STRUCT_MEMBER(subsurface, float3, albedo, KERNEL_FEATURE_SUBSURFACE)
 KERNEL_STRUCT_MEMBER(subsurface, float3, radius, KERNEL_FEATURE_SUBSURFACE)
 KERNEL_STRUCT_MEMBER(subsurface, float, anisotropy, KERNEL_FEATURE_SUBSURFACE)
+KERNEL_STRUCT_MEMBER(subsurface, float3, Ng, KERNEL_FEATURE_SUBSURFACE)
 KERNEL_STRUCT_END(subsurface)
 
 /********************************** Volume Stack ******************************/
@@ -117,9 +115,8 @@ KERNEL_STRUCT_BEGIN(shadow_path)
 KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
 /* Current transparent ray bounce depth. */
 KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
-/* DeviceKernel bit indicating queued kernels.
- * TODO: reduce size? */
-KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
+/* DeviceKernel bit indicating queued kernels. */
+KERNEL_STRUCT_MEMBER(shadow_path, uint16_t, queued_kernel, KERNEL_FEATURE_PATH_TRACING)
 /* enum PathRayFlag */
 KERNEL_STRUCT_MEMBER(shadow_path, uint32_t, flag, KERNEL_FEATURE_PATH_TRACING)
 /* Throughput. */
@@ -152,8 +149,6 @@ KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float, v, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, prim, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, object, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, int, type, KERNEL_FEATURE_PATH_TRACING)
-/* TODO: exclude for GPU. */
-KERNEL_STRUCT_ARRAY_MEMBER(shadow_isect, float3, Ng, KERNEL_FEATURE_PATH_TRACING)
 KERNEL_STRUCT_END_ARRAY(shadow_isect,
                         INTEGRATOR_SHADOW_ISECT_SIZE_CPU,
                         INTEGRATOR_SHADOW_ISECT_SIZE_GPU)
diff --git a/intern/cycles/kernel/integrator/integrator_state_util.h b/intern/cycles/kernel/integrator/integrator_state_util.h
index bb372f9e984..18dcdff12ad 100644
--- a/intern/cycles/kernel/integrator/integrator_state_util.h
+++ b/intern/cycles/kernel/integrator/integrator_state_util.h
@@ -82,9 +82,6 @@ ccl_device_forceinline void integrator_state_write_isect(
   INTEGRATOR_STATE_WRITE(state, isect, object) = isect->object;
   INTEGRATOR_STATE_WRITE(state, isect, prim) = isect->prim;
   INTEGRATOR_STATE_WRITE(state, isect, type) = isect->type;
-#ifdef __EMBREE__
-  INTEGRATOR_STATE_WRITE(state, isect, Ng) = isect->Ng;
-#endif
 }
 
 ccl_device_forceinline void integrator_state_read_isect(
@@ -96,9 +93,6 @@ ccl_device_forceinline void integrator_state_read_isect(
   isect->u = INTEGRATOR_STATE(state, isect, u);
   isect->v = INTEGRATOR_STATE(state, isect, v);
   isect->t = INTEGRATOR_STATE(state, isect, t);
-#ifdef __EMBREE__
-  isect->Ng = INTEGRATOR_STATE(state, isect, Ng);
-#endif
 }
 
 ccl_device_forceinline VolumeStack integrator_state_read_volume_stack(ConstIntegratorState state,
@@ -136,9 +130,6 @@ ccl_device_forceinline void integrator_state_write_shadow_isect(
   INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, object) = isect->object;
   INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, prim) = isect->prim;
   INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, type) = isect->type;
-#ifdef __EMBREE__
-  INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, index, Ng) = isect->Ng;
-#endif
 }
 
 ccl_device_forceinline void integrator_state_read_shadow_isect(
@@ -150,9 +141,6 @@ ccl_device_forceinline void integrator_state_read_shadow_isect(
   isect->u = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, u);
   isect->v = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, v);
   isect->t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, t);
-#ifdef __EMBREE__
-  isect->Ng = INTEGRATOR_STATE_ARRAY(state, shadow_isect, index, Ng);
-#endif
 }
 
 ccl_device_forceinline void integrator_state_copy_volume_stack_to_shadow(KernelGlobals kg,
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface.h b/intern/cycles/kernel/integrator/integrator_subsurface.h
index 448c99765e3..e9517a82453 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface.h
@@ -56,7 +56,7 @@ ccl_device int subsurface_bounce(KernelGlobals kg,
   INTEGRATOR_STATE_WRITE(state, ray, dD) = differential_zero_compact();
 
   /* Pass along object info, reusing isect to save memory. */
-  INTEGRATOR_STATE_WRITE(state, isect, Ng) = sd->Ng;
+  INTEGRATOR_STATE_WRITE(state, subsurface, Ng) = sd->Ng;
   INTEGRATOR_STATE_WRITE(state, isect, object) = sd->object;
 
   uint32_t path_flag = (INTEGRATOR_STATE(state, path, flag) & ~PATH_RAY_CAMERA) |
@@ -160,7 +160,7 @@ ccl_device_inline bool subsurface_scatter(KernelGlobals kg, IntegratorState stat
 
     if (object_flag & SD_OBJECT_INTERSECTS_VOLUME) {
       float3 P = INTEGRATOR_STATE(state, ray, P);
-      const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+      const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
       const float3 offset_P = ray_offset(P, -Ng);
 
       integrator_volume_stack_update_for_subsurface(kg, state, offset_P, ray.P);
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
index 1de05ea2696..e1cce13fb30 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_disk.h
@@ -45,7 +45,7 @@ ccl_device_inline bool subsurface_disk(KernelGlobals kg,
   const float3 P = INTEGRATOR_STATE(state, ray, P);
   const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
   const float time = INTEGRATOR_STATE(state, ray, time);
-  const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+  const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
   const int object = INTEGRATOR_STATE(state, isect, object);
 
   /* Read subsurface scattering parameters. */
diff --git a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
index 5365093decf..2ab6d0961e3 100644
--- a/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
+++ b/intern/cycles/kernel/integrator/integrator_subsurface_random_walk.h
@@ -193,7 +193,7 @@ ccl_device_inline bool subsurface_random_walk(KernelGlobals kg,
   const float3 N = INTEGRATOR_STATE(state, ray, D);
   const float ray_dP = INTEGRATOR_STATE(state, ray, dP);
   const float time = INTEGRATOR_STATE(state, ray, time);
-  const float3 Ng = INTEGRATOR_STATE(state, isect, Ng);
+  const float3 Ng = INTEGRATOR_STATE(state, subsurface, Ng);
   const int object = INTEGRATOR_STATE(state, isect, object);
 
   /* Sample diffuse surface scatter into the object. */
diff --git a/intern/cycles/kernel/ker

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list