[Bf-blender-cvs] [76481ea] master: Cycles: Add support for float4 textures on OpenCL.

Thomas Dinges noreply at git.blender.org
Tue May 10 02:53:51 CEST 2016


Commit: 76481eaeff77e46555f8a0458d860911a9a57a9c
Author: Thomas Dinges
Date:   Mon May 9 17:06:22 2016 +0200
Branches: master
https://developer.blender.org/rB76481eaeff77e46555f8a0458d860911a9a57a9c

Cycles: Add support for float4 textures on OpenCL.

Title says it all, this adds OpenCL float4 texture support.

There is a bug in the code still, I get a "Out of ressources error" on nvidia hardware here, not sure whats wrong yet.
Will investigate further, but maybe someone else has an idea. :)

Reviewers: #cycles, brecht

Subscribers: brecht, candreacchio

Differential Revision: https://developer.blender.org/D1983

===================================================================

M	intern/cycles/kernel/kernel_textures.h
M	intern/cycles/kernel/svm/svm_image.h
M	intern/cycles/render/image.cpp
M	intern/cycles/render/scene.h
M	intern/cycles/util/util_texture.h

===================================================================

diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index 86979d8..62b0a6f 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -236,7 +236,8 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_149)
 KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_byte4_150)
 
 /* packed image (opencl) */
-KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
+KERNEL_TEX(uchar4, texture_uchar4, __tex_image_byte4_packed)
+KERNEL_TEX(float4, texture_float4, __tex_image_float4_packed)
 KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
 
 #undef KERNEL_TEX
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 1f5ea8c..faff4ce 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -30,11 +30,16 @@ CCL_NAMESPACE_BEGIN
 /* For OpenCL all images are packed in a single array, and we do manual lookup
  * and interpolation. */
 
-ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset)
+ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
 {
-	uchar4 r = kernel_tex_fetch(__tex_image_packed, offset);
-	float f = 1.0f/255.0f;
-	return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+	if(id >= TEX_NUM_FLOAT4_IMAGES) {
+		uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
+		float f = 1.0f/255.0f;
+		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+	}
+	else {
+		return kernel_tex_fetch(__tex_image_float4_packed, offset);
+	}
 }
 
 ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
@@ -81,7 +86,7 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 			iy = svm_image_texture_wrap_clamp(iy, height);
 
 		}
-		r = svm_image_texture_read(kg, offset + ix + iy*width);
+		r = svm_image_texture_read(kg, id, offset + ix + iy*width);
 	}
 	else { /* We default to linear interpolation if it is not closest */
 		float tx = svm_image_texture_frac(x*width, &ix);
@@ -103,10 +108,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 		}
 
 
-		r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width);
-		r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width);
-		r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width);
-		r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width);
+		r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
+		r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
+		r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
+		r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
 	}
 
 	if(use_alpha && r.w != 1.0f && r.w != 0.0f) {
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 4a1b06e..10a5ca4 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -223,7 +223,7 @@ int ImageManager::add_image(const string& filename,
 	size_t slot;
 
 	/* Load image info and find out if we need a float texture. */
-	is_float = (pack_images)? false: is_float_image(filename, builtin_data, is_linear);
+	is_float = is_float_image(filename, builtin_data, is_linear);
 
 	ImageDataType type = is_float? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4;
 
@@ -803,12 +803,16 @@ void ImageManager::device_pack_images(Device *device,
                                       DeviceScene *dscene,
                                       Progress& /*progess*/)
 {
-	/* for OpenCL, we pack all image textures inside a single big texture, and
-	 * will do our own interpolation in the kernel */
-	size_t size = 0;
+	/* For OpenCL, we pack all image textures into a single large texture, and
+	 * do our own interpolation in the kernel. */
+	size_t size = 0, offset = 0;
+	ImageDataType type;
+
+	int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4];
+	uint4 *info = dscene->tex_image_packed_info.resize(info_size);
 
-	/* Only byte textures are supported atm */
-	ImageDataType type = IMAGE_DATA_TYPE_BYTE4;
+	/* Byte Textures*/
+	type = IMAGE_DATA_TYPE_BYTE4;
 
 	for(size_t slot = 0; slot < images[type].size(); slot++) {
 		if(!images[type][slot])
@@ -818,10 +822,7 @@ void ImageManager::device_pack_images(Device *device,
 		size += tex_img.size();
 	}
 
-	uint4 *info = dscene->tex_image_packed_info.resize(images[type].size());
-	uchar4 *pixels = dscene->tex_image_packed.resize(size);
-
-	size_t offset = 0;
+	uchar4 *pixels_byte = dscene->tex_image_byte4_packed.resize(size);
 
 	for(size_t slot = 0; slot < images[type].size(); slot++) {
 		if(!images[type][slot])
@@ -829,24 +830,61 @@ void ImageManager::device_pack_images(Device *device,
 
 		device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
 
+		/* The image options are packed
+		   bit 0 -> periodic
+		   bit 1 + 2 -> interpolation type */
+		uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1;
+		info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
+
+		memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+		offset += tex_img.size();
+	}
+
+	/* Float Textures*/
+	type = IMAGE_DATA_TYPE_FLOAT4;
+	size = 0, offset = 0;
+
+	for(size_t slot = 0; slot < images[type].size(); slot++) {
+		if(!images[type][slot])
+			continue;
+
+		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
+		size += tex_img.size();
+	}
+
+	float4 *pixels_float = dscene->tex_image_float4_packed.resize(size);
+
+	for(size_t slot = 0; slot < images[type].size(); slot++) {
+		if(!images[type][slot])
+			continue;
+
+		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
+
 		/* todo: support 3D textures, only CPU for now */
 
 		/* The image options are packed
 		   bit 0 -> periodic
 		   bit 1 + 2 -> interpolation type */
 		uint8_t interpolation = (images[type][slot]->interpolation << 1) + 1;
-		info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
+		info[type_index_to_flattened_slot(slot, type)] = make_uint4(tex_img.data_width, tex_img.data_height, offset, interpolation);
 
-		memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
+		memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
 		offset += tex_img.size();
 	}
 
-	if(dscene->tex_image_packed.size()) {
-		if(dscene->tex_image_packed.device_pointer) {
+	if(dscene->tex_image_byte4_packed.size()) {
+		if(dscene->tex_image_byte4_packed.device_pointer) {
+			thread_scoped_lock device_lock(device_mutex);
+			device->tex_free(dscene->tex_image_byte4_packed);
+		}
+		device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed);
+	}
+	if(dscene->tex_image_float4_packed.size()) {
+		if(dscene->tex_image_float4_packed.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
-			device->tex_free(dscene->tex_image_packed);
+			device->tex_free(dscene->tex_image_float4_packed);
 		}
-		device->tex_alloc("__tex_image_packed", dscene->tex_image_packed);
+		device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
 	}
 	if(dscene->tex_image_packed_info.size()) {
 		if(dscene->tex_image_packed_info.device_pointer) {
@@ -876,10 +914,12 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
 		images[type].clear();
 	}
 
-	device->tex_free(dscene->tex_image_packed);
+	device->tex_free(dscene->tex_image_byte4_packed);
+	device->tex_free(dscene->tex_image_float4_packed);
 	device->tex_free(dscene->tex_image_packed_info);
 
-	dscene->tex_image_packed.clear();
+	dscene->tex_image_byte4_packed.clear();
+	dscene->tex_image_float4_packed.clear();
 	dscene->tex_image_packed_info.clear();
 }
 
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index b05e5a6..455053f 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -113,7 +113,8 @@ public:
 	device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_IMAGES_CPU];
 
 	/* opencl images */
-	device_vector<uchar4> tex_image_packed;
+	device_vector<uchar4> tex_image_byte4_packed;
+	device_vector<float4> tex_image_float4_packed;
 	device_vector<uint4> tex_image_packed_info;
 
 	KernelData data;
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index 837858d..346ccd8 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -38,8 +38,8 @@ CCL_NAMESPACE_BEGIN
 
 /* OpenCL */
 #define TEX_NUM_BYTE4_IMAGES_OPENCL		1024
-#define TEX_NUM_FLOAT4_IMAGES_OPENCL		0
-#define TEX_IMAGE_BYTE4_START_OPENCL		TEX_NUM_FLOAT4_IMAGES_OPENCL
+#define TEX_NUM_FLOAT4_IMAGES_OPENCL	1024
+#define TEX_IMAGE_BYTE4_START_OPENCL	TEX_NUM_FLOAT4_IMAGES_OPENCL
 
 
 /* Color to use when textures are not found. */




More information about the Bf-blender-cvs mailing list