[Bf-blender-cvs] [0e411172cd] cycles_split_kernel: Cycles: Split path initialization into own kernel

Mai Lavelle noreply at git.blender.org
Fri Mar 3 12:02:29 CET 2017


Commit: 0e411172cd0d1f5a6bdbe2d76a8b2a5486af2ee5
Author: Mai Lavelle
Date:   Wed Mar 1 01:05:55 2017 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB0e411172cd0d1f5a6bdbe2d76a8b2a5486af2ee5

Cycles: Split path initialization into own kernel

This makes it easier to initialize things correctly in the data_init kernel
before they are needed by path tracing.

===================================================================

M	intern/cycles/device/device_split_kernel.cpp
M	intern/cycles/device/device_split_kernel.h
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
A	intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
M	intern/cycles/kernel/split/kernel_data_init.h
A	intern/cycles/kernel/split/kernel_path_init.h

===================================================================

diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index c50afe85da..85da7024a2 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -41,6 +41,7 @@ DeviceSplitKernel::~DeviceSplitKernel()
 	device->mem_free(queue_index);
 	device->mem_free(work_pool_wgs);
 
+	delete kernel_path_init;
 	delete kernel_scene_intersect;
 	delete kernel_lamp_emission;
 	delete kernel_queue_enqueue;
@@ -61,6 +62,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
 			return false; \
 		}
 
+	LOAD_KERNEL(path_init);
 	LOAD_KERNEL(scene_intersect);
 	LOAD_KERNEL(lamp_emission);
 	LOAD_KERNEL(queue_enqueue);
@@ -200,6 +202,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
 			return false;
 		}
 
+		ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
+
 		bool activeRaysAvailable = true;
 
 		while(activeRaysAvailable) {
diff --git a/intern/cycles/device/device_split_kernel.h b/intern/cycles/device/device_split_kernel.h
index b3106fd563..1903574f0b 100644
--- a/intern/cycles/device/device_split_kernel.h
+++ b/intern/cycles/device/device_split_kernel.h
@@ -55,6 +55,7 @@ class DeviceSplitKernel {
 private:
 	Device *device;
 
+	SplitKernelFunction *kernel_path_init;
 	SplitKernelFunction *kernel_scene_intersect;
 	SplitKernelFunction *kernel_lamp_emission;
 	SplitKernelFunction *kernel_queue_enqueue;
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 685955170b..d467e40b3e 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRC
 	kernels/cpu/kernel_split.cpp
 	kernels/opencl/kernel.cl
 	kernels/opencl/kernel_data_init.cl
+	kernels/opencl/kernel_path_init.cl
 	kernels/opencl/kernel_queue_enqueue.cl
 	kernels/opencl/kernel_scene_intersect.cl
 	kernels/opencl/kernel_lamp_emission.cl
@@ -201,6 +202,7 @@ set(SRC_SPLIT_HEADERS
 	split/kernel_holdout_emission_blurring_pathtermination_ao.h
 	split/kernel_lamp_emission.h
 	split/kernel_next_iteration_setup.h
+	split/kernel_path_init.h
 	split/kernel_queue_enqueue.h
 	split/kernel_scene_intersect.h
 	split/kernel_shader_eval.h
@@ -400,6 +402,7 @@ endif()
 
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
index 1d71015781..8c1675665c 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu.h
@@ -71,6 +71,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 #define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
 	void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
 
+DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
 DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index c59f489254..f6e0591ef2 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -38,6 +38,7 @@
 #  include "split/kernel_split_common.h"
 
 #  include "split/kernel_data_init.h"
+#  include "split/kernel_path_init.h"
 #  include "split/kernel_scene_intersect.h"
 #  include "split/kernel_lamp_emission.h"
 #  include "split/kernel_queue_enqueue.h"
@@ -163,6 +164,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 		kernel_##name(kg); \
 	}
 
+DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
@@ -186,6 +188,7 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
 	REGISTER(shader);
 
 	REGISTER(data_init);
+	REGISTER(path_init);
 	REGISTER(scene_intersect);
 	REGISTER(lamp_emission);
 	REGISTER(queue_enqueue);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
new file mode 100644
index 0000000000..7e9e4a0252
--- /dev/null
+++ b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_path_init.h"
+
+__kernel void kernel_ocl_path_trace_path_init(
+        KernelGlobals *kg,
+        ccl_constant KernelData *data)
+{
+	kernel_path_init(kg);
+}
diff --git a/intern/cycles/kernel/split/kernel_data_init.h b/intern/cycles/kernel/split/kernel_data_init.h
index 5604363dcd..982c7be200 100644
--- a/intern/cycles/kernel/split/kernel_data_init.h
+++ b/intern/cycles/kernel/split/kernel_data_init.h
@@ -18,33 +18,6 @@ CCL_NAMESPACE_BEGIN
 
 /* Note on kernel_data_initialization kernel
  * This kernel Initializes structures needed in path-iteration kernels.
- * This is the first kernel in ray-tracing logic.
- *
- * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
- *
- * Its input and output are as follows,
- *
- * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
- * Un-initialized throughput -------|                                  |--- Initialized throughput
- * Un-initialized L_transparent ----|                                  |--- Initialized L_transparent
- * Un-initialized PathRadiance -----|                                  |--- Initialized PathRadiance
- * Un-initialized Ray --------------|                                  |--- Initialized Ray
- * Un-initialized PathState --------|                                  |--- Initialized PathState
- * Un-initialized QueueData --------|                                  |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
- * Un-initialized QueueIndex -------|                                  |--- Initialized QueueIndex (to 0)
- * Un-initialized use_queues_flag---|                                  |--- Initialized use_queues_flag (to false)
- * Un-initialized ray_state --------|                                  |--- Initialized ray_state
- * parallel_samples --------------- |                                  |--- Initialized per_sample_output_buffers
- * rng_state -----------------------|                                  |--- Initialized work_array
- * data ----------------------------|                                  |--- Initialized work_pool_wgs
- * start_sample --------------------|                                  |
- * sx ------------------------------|                                  |
- * sy ------------------------------|                                  |
- * sw ------------------------------|                                  |
- * sh ------------------------------|                                  |
- * stride --------------------------|                                  |
- * queuesize -----------------------|                                  |
- * num_samples ---------------------|                                  |
  *
  * Note on Queues :
  * All slots in queues are initialized to queue empty slot;
@@ -137,80 +110,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 		 */
 		*use_queues_flag = 0;
 	}
-
-	int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
-
-	/* This is the first assignment to ray_state;
-	 * So we dont use ASSIGN_RAY_STATE macro.
-	 */
-	kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
-
-	unsigned int my_sample;
-	unsigned int pixel_x;
-	unsigned int pixel_y;
-	unsigned int tile_x;
-	unsigned int tile_y;
-	unsigned int my_sample_tile;
-
-	unsigned int work_index = 0;
-	/* Get work. */
-	if(!get_next_work(kg, &work_index, ray_index)) {
-		/* No more work, mark ray as inactive */
-		kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
-
-		return;
-	}
-
-	/* Get the sample associated with the work. */
-	my_sample = get_work_sample(kg, work_index, ray_index) + start_sample;
-
-	my_sample_tile = 0;
-
-	/* Get pixel and tile position associated with the work. */
-	get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
-	                        &tile_x, &tile_y,
-	                        work_index,
-	                        ray_index);
-	kernel_split_state.work_array[ray_index] = work_index;
-
-	rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
-
-	ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
-	per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
-
-	/* Initialize random numbers and ray. */
-	kernel_path_trace_setup(kg,
-	                        rng_state,
-	                        my_sample,
-	                        pixel_x, pixel_y,
-	                        &kernel_split_state.rng[ray_index],
-	                        &kernel_split_state.ray[ray_

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list