[Bf-blender-cvs] [11f8238315] soc-2016-cycles_denoising: Cycles: Clean up passing of int4 to CPU kernels

Lukas Stockner noreply at git.blender.org
Thu Jan 12 05:14:12 CET 2017


Commit: 11f8238315a3b451b589770adbeda4cfa1b79f2f
Author: Lukas Stockner
Date:   Wed Jan 11 07:13:33 2017 +0100
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB11f8238315a3b451b589770adbeda4cfa1b79f2f

Cycles: Clean up passing of int4 to CPU kernels

Since the type of int4 depends on whether SSE is enabled, the SSE kernels expect a different type than the device code.
Therefore, the content must be passed as a pointer...

===================================================================

M	intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
M	intern/cycles/util/util_math.h

===================================================================

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
index dcc8cef1ab..465cb241f7 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
@@ -172,8 +172,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_divide_shadow)(KernelGlobals *kg,
 #ifdef KERNEL_STUB
 	STUB_ASSERT(KERNEL_ARCH, filter_divide_shadow);
 #else
-	int4 rect = make_int4(prefilter_rect[0], prefilter_rect[1], prefilter_rect[2], prefilter_rect[3]);
-	kernel_filter_divide_shadow(kg, sample, buffers, x, y, tile_x, tile_y, offset, stride, unfiltered, sampleVariance, sampleVarianceV, bufferVariance, rect);
+	kernel_filter_divide_shadow(kg, sample, buffers, x, y, tile_x, tile_y, offset, stride, unfiltered, sampleVariance, sampleVarianceV, bufferVariance, load_int4(prefilter_rect));
 #endif
 }
 
@@ -194,8 +193,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_get_feature)(KernelGlobals *kg,
 #ifdef KERNEL_STUB
 	STUB_ASSERT(KERNEL_ARCH, filter_get_feature);
 #else
-	int4 rect = make_int4(prefilter_rect[0], prefilter_rect[1], prefilter_rect[2], prefilter_rect[3]);
-	kernel_filter_get_feature(kg, sample, buffers, m_offset, v_offset, x, y, tile_x, tile_y, offset, stride, mean, variance, rect);
+	kernel_filter_get_feature(kg, sample, buffers, m_offset, v_offset, x, y, tile_x, tile_y, offset, stride, mean, variance, load_int4(prefilter_rect));
 #endif
 }
 
@@ -211,8 +209,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_non_local_means)(int x, int y,
 #ifdef KERNEL_STUB
 	STUB_ASSERT(KERNEL_ARCH, filter_non_local_means);
 #else
-	int4 rect = make_int4(filter_rect[0], filter_rect[1], filter_rect[2], filter_rect[3]);
-	kernel_filter_non_local_means(x, y, noisyImage, weightImage, variance, filteredImage, rect, r, f, a, k_2);
+	kernel_filter_non_local_means(x, y, noisyImage, weightImage, variance, filteredImage, load_int4(filter_rect), r, f, a, k_2);
 #endif
 }
 
@@ -228,8 +225,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_combine_halves)(int x, int y,
 #ifdef KERNEL_STUB
 	STUB_ASSERT(KERNEL_ARCH, filter_combine_halves);
 #else
-	int4 rect = make_int4(prefilter_rect[0], prefilter_rect[1], prefilter_rect[2], prefilter_rect[3]);
-	kernel_filter_combine_halves(x, y, mean, variance, a, b, rect, r);
+	kernel_filter_combine_halves(x, y, mean, variance, a, b, load_int4(prefilter_rect), r);
 #endif
 }
 
@@ -244,8 +240,7 @@ void KERNEL_FUNCTION_FULL_NAME(filter_construct_transform)(KernelGlobals *kg,
 #ifdef KERNEL_STUB
 	STUB_ASSERT(KERNEL_ARCH, filter_construct_transform);
 #else
-	int4 rect = make_int4(prefilter_rect[0], prefilter_rect[1], prefilter_rect[2], prefilter_rect[3]);
-	kernel_filter_construct_transform(kg, sample, buffer, x, y, (FilterStorage*) storage, rect);
+	kernel_filter_construct_transform(kg, sample, buffer, x, y, (FilterStorage*) storage, load_int4(prefilter_rect));
 #endif
 }
 
@@ -265,8 +260,8 @@ void KERNEL_FUNCTION_FULL_NAME(filter_reconstruct)(KernelGlobals *kg,
 #ifdef KERNEL_STUB
 	STUB_ASSERT(KERNEL_ARCH, filter_reconstruct);
 #else
-	int4 rect = make_int4(prefilter_rect[0], prefilter_rect[1], prefilter_rect[2], prefilter_rect[3]);
-	int4 area = make_int4(filter_area[0], filter_area[1], filter_area[2], filter_area[3]);
+	int4 rect = load_int4(prefilter_rect);
+	int4 area = load_int4(filter_area);
 	FilterStorage *storage = (FilterStorage*) storage_ptr;
 	if(kernel_data.film.denoise_cross) {
 		kernel_filter_reconstruct(kg, sample, buffer, x, y, offset, stride, buffers, 0, make_int2(0, 6), storage, weight_cache, storage->transform, 1, area, rect);
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 76666159ae..db4946fe5b 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -1198,6 +1198,15 @@ ccl_device_inline void print_int4(const char *label, const int4& a)
 	printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
 }
 
+ccl_device_inline int4 load_int4(const int *v)
+{
+#ifdef __KERNEL_SSE__
+	return _mm_loadu_si128((__m128i*)v);
+#else
+	return make_int4(v[0], v[1], v[2], v[3]);
+#endif
+}
+
 #endif
 
 /* Int/Float conversion */




More information about the Bf-blender-cvs mailing list