[Bf-blender-cvs] [4b33667] master: Deduplicate some code by using a function pointer to the real kernel This has no performance impact what so ever and is already used in the adaptive sampling patch

Martijn Berger noreply at git.blender.org
Thu Oct 30 10:23:50 CET 2014


Commit: 4b33667b93a3c3cf28478ce06e5200cb2ac2493e
Author: Martijn Berger
Date:   Thu Oct 30 10:17:42 2014 +0100
Branches: master
https://developer.blender.org/rB4b33667b93a3c3cf28478ce06e5200cb2ac2493e

Deduplicate some code by using a function pointer to the real kernel
This has no performance impact what so ever and is already used in the adaptive sampling patch

===================================================================

M	intern/cycles/device/device_cpu.cpp

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index c9b8a5b..242cc65 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -170,124 +170,42 @@ public:
 #endif
 
 		RenderTile tile;
-		
-		while(task.acquire_tile(this, tile)) {
-			float *render_buffer = (float*)tile.buffer;
-			uint *rng_state = (uint*)tile.rng_state;
-			int start_sample = tile.start_sample;
-			int end_sample = tile.start_sample + tile.num_samples;
-
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-			if(system_cpu_support_avx2()) {
-				for(int sample = start_sample; sample < end_sample; sample++) {
-					if (task.get_cancel() || task_pool.canceled()) {
-						if(task.need_finish_queue == false)
-							break;
-					}
 
-					for(int y = tile.y; y < tile.y + tile.h; y++) {
-						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							kernel_cpu_avx2_path_trace(&kg, render_buffer, rng_state,
-													  sample, x, y, tile.offset, tile.stride);
-						}
-					}
+		void(*path_trace_kernel)(KernelGlobals*, float*, unsigned int*, int, int, int, int, int);
 
-					tile.sample = sample + 1;
-
-					task.update_progress(&tile);
-				}
-			}
-			else
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
+		if(system_cpu_support_avx2())
+			path_trace_kernel = kernel_cpu_avx2_path_trace;
+		else
 #endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-			if(system_cpu_support_avx()) {
-				for(int sample = start_sample; sample < end_sample; sample++) {
-					if (task.get_cancel() || task_pool.canceled()) {
-						if(task.need_finish_queue == false)
-							break;
-					}
-
-					for(int y = tile.y; y < tile.y + tile.h; y++) {
-						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							kernel_cpu_avx_path_trace(&kg, render_buffer, rng_state,
-								sample, x, y, tile.offset, tile.stride);
-						}
-					}
-
-					tile.sample = sample + 1;
-
-					task.update_progress(&tile);
-				}
-			}
-			else
+		if(system_cpu_support_avx())
+			path_trace_kernel = kernel_cpu_avx_path_trace;
+		else
 #endif
-#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
-			if(system_cpu_support_sse41()) {
-				for(int sample = start_sample; sample < end_sample; sample++) {
-					if (task.get_cancel() || task_pool.canceled()) {
-						if(task.need_finish_queue == false)
-							break;
-					}
-
-					for(int y = tile.y; y < tile.y + tile.h; y++) {
-						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state,
-								sample, x, y, tile.offset, tile.stride);
-						}
-					}
-
-					tile.sample = sample + 1;
-
-					task.update_progress(&tile);
-				}
-			}
-			else
+#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
+		if(system_cpu_support_sse41())
+			path_trace_kernel = kernel_cpu_sse41_path_trace;
+		else
 #endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-			if(system_cpu_support_sse3()) {
-				for(int sample = start_sample; sample < end_sample; sample++) {
-					if (task.get_cancel() || task_pool.canceled()) {
-						if(task.need_finish_queue == false)
-							break;
-					}
-
-					for(int y = tile.y; y < tile.y + tile.h; y++) {
-						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
-								sample, x, y, tile.offset, tile.stride);
-						}
-					}
-
-					tile.sample = sample + 1;
-
-					task.update_progress(&tile);
-				}
-			}
-			else
+		if(system_cpu_support_sse3())
+			path_trace_kernel = kernel_cpu_sse3_path_trace;
+		else
 #endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
-			if(system_cpu_support_sse2()) {
-				for(int sample = start_sample; sample < end_sample; sample++) {
-					if (task.get_cancel() || task_pool.canceled()) {
-						if(task.need_finish_queue == false)
-							break;
-					}
-
-					for(int y = tile.y; y < tile.y + tile.h; y++) {
-						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
-								sample, x, y, tile.offset, tile.stride);
-						}
-					}
-
-					tile.sample = sample + 1;
-
-					task.update_progress(&tile);
-				}
-			}
-			else
+		if(system_cpu_support_sse2())
+			path_trace_kernel = kernel_cpu_sse2_path_trace;
+		else
 #endif
-			{
+			path_trace_kernel = kernel_cpu_path_trace;
+		
+		while(task.acquire_tile(this, tile)) {
+			float *render_buffer = (float*)tile.buffer;
+			uint *rng_state = (uint*)tile.rng_state;
+			int start_sample = tile.start_sample;
+			int end_sample = tile.start_sample + tile.num_samples;
+
 				for(int sample = start_sample; sample < end_sample; sample++) {
 					if (task.get_cancel() || task_pool.canceled()) {
 						if(task.need_finish_queue == false)
@@ -296,7 +214,7 @@ public:
 
 					for(int y = tile.y; y < tile.y + tile.h; y++) {
 						for(int x = tile.x; x < tile.x + tile.w; x++) {
-							kernel_cpu_path_trace(&kg, render_buffer, rng_state,
+							path_trace_kernel(&kg, render_buffer, rng_state,
 								sample, x, y, tile.offset, tile.stride);
 						}
 					}
@@ -305,7 +223,7 @@ public:
 
 					task.update_progress(&tile);
 				}
-			}
+
 
 			task.release_tile(tile);
 
@@ -325,110 +243,74 @@ public:
 		float sample_scale = 1.0f/(task.sample + 1);
 
 		if(task.rgba_half) {
+			void(*convert_to_half_float_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int);
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-			if(system_cpu_support_avx2()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_avx2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
-															 sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_avx2())
+				convert_to_half_float_kernel = kernel_cpu_avx2_convert_to_half_float;
 			else
 #endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-			if(system_cpu_support_avx()) {
+			if(system_cpu_support_avx())
 				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_avx_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+				convert_to_half_float_kernel = kernel_cpu_avx_convert_to_half_float;
 			else
 #endif	
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
-			if(system_cpu_support_sse41()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_sse41_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_sse41())
+				convert_to_half_float_kernel = kernel_cpu_sse41_convert_to_half_float;
 			else
 #endif		
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3		
-			if(system_cpu_support_sse3()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_sse3())
+				convert_to_half_float_kernel = kernel_cpu_sse3_convert_to_half_float;
 			else
 #endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
 			if(system_cpu_support_sse2()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+				convert_to_half_float_kernel = kernel_cpu_sse2_convert_to_half_float;
 			else
 #endif
-			{
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+				convert_to_half_float_kernel = kernel_cpu_convert_to_half_float;
+
+			for(int y = task.y; y < task.y + task.h; y++)
+				for(int x = task.x; x < task.x + task.w; x++)
+					convert_to_half_float_kernel(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
+						sample_scale, x, y, task.offset, task.stride);
 		}
 		else {
+			void(*convert_to_byte_kernel)(KernelGlobals *, uchar4 *, float *, float, int, int, int, int);
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
-			if(system_cpu_support_avx2()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_avx2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
-													   sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_avx2())
+				convert_to_byte_kernel = kernel_cpu_avx2_convert_to_byte;
 			else
 #endif
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
-			if(system_cpu_support_avx()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_avx_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_avx())
+				convert_to_byte_kernel = kernel_cpu_avx_convert_to_byte;
 			else
 #endif		
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41			
-			if(system_cpu_support_sse41()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_sse41_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_sse41())
+				convert_to_byte_kernel = kernel_cpu_sse41_convert_to_byte;
 			else
 #endif			
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
-			if(system_cpu_support_sse3()) {
-				for(int y = task.y; y < task.y + task.h; y++)
-					for(int x = task.x; x < task.x + task.w; x++)
-						kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
-							sample_scale, x, y, task.offset, task.stride);
-			}
+			if(system_cpu_support_sse3())
+				convert_to_byte_kernel = kernel_cpu_sse3_convert_to_byte;
 			

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list