[Bf-blender-cvs] [5c0a67b] master: Cycles: Add single channel texture support for OpenCL.

Thomas Dinges noreply at git.blender.org
Sun Aug 14 20:22:18 CEST 2016


Commit: 5c0a67b325b2c07574ef303947d5c026ab3f55d5
Author: Thomas Dinges
Date:   Sun Aug 14 20:21:08 2016 +0200
Branches: master
https://developer.blender.org/rB5c0a67b325b2c07574ef303947d5c026ab3f55d5

Cycles: Add single channel texture support for OpenCL.

This way OpenCL devices can also benefit from a smaller memory footprint, when using e.g. bumpmaps (greyscale, 1 channel).

Additional target for my GSoC 2016.

===================================================================

M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/kernel/kernel_textures.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/render/image.cpp
M	intern/cycles/render/scene.h
M	intern/cycles/util/util_texture.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index c882b47..3775934 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -495,6 +495,7 @@ typedef texture<uint> texture_uint;
 typedef texture<int> texture_int;
 typedef texture<uint4> texture_uint4;
 typedef texture<uchar4> texture_uchar4;
+typedef texture<uchar> texture_uchar;
 typedef texture_image<float> texture_image_float;
 typedef texture_image<uchar> texture_image_uchar;
 typedef texture_image<half> texture_image_half;
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 7d6fec0..8d5bb75 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -188,6 +188,8 @@ KERNEL_TEX(uint, texture_uint, __bindless_mapping)
 /* packed image (opencl) */
 KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
 KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
+KERNEL_TEX(uchar, texture_uchar, __tex_image_byte_packed)
+KERNEL_TEX(float, texture_float, __tex_image_float_packed)
 KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
 
 #undef KERNEL_TEX
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 9050ce9..5d02be1 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -36,13 +36,26 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
 {
-	if(id >= TEX_NUM_FLOAT4_IMAGES) {
+	/* Float4 */
+	if(id < TEX_START_BYTE4_OPENCL) {
+		return kernel_tex_fetch(__tex_image_float4_packed, offset);
+	}
+	/* Byte4 */
+	else if(id < TEX_START_FLOAT_OPENCL) {
 		uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
 		float f = 1.0f/255.0f;
 		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
 	}
+	/* Float */
+	else if(id < TEX_START_BYTE_OPENCL) {
+		float f = kernel_tex_fetch(__tex_image_float_packed, offset);
+		return make_float4(f, f, f, 1.0f);
+	}
+	/* Byte */
 	else {
-		return kernel_tex_fetch(__tex_image_float4_packed, offset);
+		uchar r = kernel_tex_fetch(__tex_image_byte_packed, offset);
+		float f = r * (1.0f/255.0f);
+		return make_float4(f, f, f, 1.0f);
 	}
 }
 
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 284af5f..2454360 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -284,7 +284,7 @@ int ImageManager::add_image(const string& filename,
 	if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
 		is_float = true;
 
-	/* No single channel and half textures on CUDA (Fermi) and OpenCL, use available slots */
+	/* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
 	if((type == IMAGE_DATA_TYPE_FLOAT ||
 	    type == IMAGE_DATA_TYPE_HALF4 ||
 	    type == IMAGE_DATA_TYPE_HALF) &&
@@ -1105,10 +1105,11 @@ void ImageManager::device_pack_images(Device *device,
 	size_t size = 0, offset = 0;
 	ImageDataType type;
 
-	int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
+	int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
+	                + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
 	uint4 *info = dscene->tex_image_packed_info.resize(info_size);
 
-	/* Byte Textures*/
+	/* Byte4 Textures*/
 	type = IMAGE_DATA_TYPE_BYTE4;
 
 	for(size_t slot = 0; slot < images[type].size(); slot++) {
@@ -1119,7 +1120,7 @@ void ImageManager::device_pack_images(Device *device,
 		size += tex_img.size();
 	}
 
-	uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
+	uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size);
 
 	for(size_t slot = 0; slot < images[type].size(); slot++) {
 		if(!images[type][slot])
@@ -1131,11 +1132,11 @@ void ImageManager::device_pack_images(Device *device,
 
 		info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
 
-		memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+		memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
 		offset += tex_img.size();
 	}
 
-	/* Float Textures*/
+	/* Float4 Textures*/
 	type = IMAGE_DATA_TYPE_FLOAT4;
 	size = 0, offset = 0;
 
@@ -1147,7 +1148,7 @@ void ImageManager::device_pack_images(Device *device,
 		size += tex_img.size();
 	}
 
-	float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
+	float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size);
 
 	for(size_t slot = 0; slot < images[type].size(); slot++) {
 		if(!images[type][slot])
@@ -1160,6 +1161,63 @@ void ImageManager::device_pack_images(Device *device,
 		uint8_t options = pack_image_options(type, slot);
 		info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
 
+		memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+		offset += tex_img.size();
+	}
+
+	/* Byte Textures*/
+	type = IMAGE_DATA_TYPE_BYTE;
+	size = 0, offset = 0;
+
+	for(size_t slot = 0; slot < images[type].size(); slot++) {
+		if(!images[type][slot])
+			continue;
+
+		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+		size += tex_img.size();
+	}
+
+	uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size);
+
+	for(size_t slot = 0; slot < images[type].size(); slot++) {
+		if(!images[type][slot])
+			continue;
+
+		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+
+		uint8_t options = pack_image_options(type, slot);
+
+		info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
+		memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+		offset += tex_img.size();
+	}
+
+	/* Float Textures*/
+	type = IMAGE_DATA_TYPE_FLOAT;
+	size = 0, offset = 0;
+
+	for(size_t slot = 0; slot < images[type].size(); slot++) {
+		if(!images[type][slot])
+			continue;
+
+		device_vector<float>& tex_img = dscene->tex_float_image[slot];
+		size += tex_img.size();
+	}
+
+	float *pixels_float = dscene->tex_image_float_packed.resize(size);
+
+	for(size_t slot = 0; slot < images[type].size(); slot++) {
+		if(!images[type][slot])
+			continue;
+
+		device_vector<float>& tex_img = dscene->tex_float_image[slot];
+
+		/* todo: support 3D textures, only CPU for now */
+
+		uint8_t options = pack_image_options(type, slot);
+		info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
+
 		memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
 		offset += tex_img.size();
 	}
@@ -1178,6 +1236,20 @@ void ImageManager::device_pack_images(Device *device,
 		}
 		device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
 	}
+	if(dscene->tex_image_byte_packed.size()) {
+		if(dscene->tex_image_byte_packed.device_pointer) {
+			thread_scoped_lock device_lock(device_mutex);
+			device->tex_free(dscene->tex_image_byte_packed);
+		}
+		device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
+	}
+	if(dscene->tex_image_float_packed.size()) {
+		if(dscene->tex_image_float_packed.device_pointer) {
+			thread_scoped_lock device_lock(device_mutex);
+			device->tex_free(dscene->tex_image_float_packed);
+		}
+		device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
+	}
 	if(dscene->tex_image_packed_info.size()) {
 		if(dscene->tex_image_packed_info.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -1208,10 +1280,14 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
 
 	device->tex_free(dscene->tex_image_byte4_packed);
 	device->tex_free(dscene->tex_image_float4_packed);
+	device->tex_free(dscene->tex_image_byte_packed);
+	device->tex_free(dscene->tex_image_float_packed);
 	device->tex_free(dscene->tex_image_packed_info);
 
 	dscene->tex_image_byte4_packed.clear();
 	dscene->tex_image_float4_packed.clear();
+	dscene->tex_image_byte_packed.clear();
+	dscene->tex_image_float_packed.clear();
 	dscene->tex_image_packed_info.clear();
 }
 
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 9e72f19..8fec171 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -123,6 +123,8 @@ public:
 	/* opencl images */
 	device_vector<uchar4> tex_image_byte4_packed;
 	device_vector<float4> tex_image_float4_packed;
+	device_vector<uchar> tex_image_byte_packed;
+	device_vector<float> tex_image_float_packed;
 	device_vector<uint4> tex_image_packed_info;
 
 	KernelData data;
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index be1177d..aff928e 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -67,8 +67,8 @@ CCL_NAMESPACE_BEGIN
 #define TEX_NUM_FLOAT4_OPENCL	1024
 #define TEX_NUM_BYTE4_OPENCL	1024
 #define TEX_NUM_HALF4_OPENCL	0
-#define TEX_NUM_FLOAT_OPENCL	0
-#define TEX_NUM_BYTE_OPENCL		0
+#define TEX_NUM_FLOAT_OPENCL	1024
+#define TEX_NUM_BYTE_OPENCL		1024
 #define TEX_NUM_HALF_OPENCL		0
 #define TEX_START_FLOAT4_OPENCL	0
 #define TEX_START_BYTE4_OPENCL	TEX_NUM_FLOAT4_OPENCL




More information about the Bf-blender-cvs mailing list