[Bf-blender-cvs] [4a4f043] master: Cycles: Add support for single channel float textures on CPU.

Thomas Dinges noreply at git.blender.org
Wed May 11 21:58:52 CEST 2016


Commit: 4a4f043bc4235c046d2b58e00f2b80665ded11bf
Author: Thomas Dinges
Date:   Mon May 9 12:51:42 2016 +0200
Branches: master
https://developer.blender.org/rB4a4f043bc4235c046d2b58e00f2b80665ded11bf

Cycles: Add support for single channel float textures on CPU.

Until now, single channel textures were packed into a float4, wasting 3 floats per pixel. Memory usage of such textures is now reduced by 3/4.
Voxel Attributes such as density, flame and heat benefit from this, but also Bumpmaps with one channel.
This commit also includes some cleanup and code deduplication for image loading.

Example Smoke render from Cosmos Laundromat: http://www.pasteall.org/pic/show.php?id=102972
Memory here went down from ~600MB to ~300MB.

Reviewers: #cycles, brecht

Differential Revision: https://developer.blender.org/D1981

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/kernel/kernel_compat_cpu.h
M	intern/cycles/kernel/kernel_globals.h
M	intern/cycles/kernel/kernels/cpu/kernel.cpp
M	intern/cycles/render/image.cpp
M	intern/cycles/render/image.h
M	intern/cycles/render/nodes.cpp
M	intern/cycles/render/scene.h
M	intern/cycles/util/util_texture.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index 4d0a57d..676b1279 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -27,9 +27,6 @@
 #include "device.h"
 #include "device_intern.h"
 
-/* Texture limits and slot info */
-#include "util_texture.h"
-
 #include "kernel.h"
 #include "kernel_compat_cpu.h"
 #include "kernel_types.h"
diff --git a/intern/cycles/kernel/kernel_compat_cpu.h b/intern/cycles/kernel/kernel_compat_cpu.h
index ee8cee0..7fc8d2b 100644
--- a/intern/cycles/kernel/kernel_compat_cpu.h
+++ b/intern/cycles/kernel/kernel_compat_cpu.h
@@ -40,6 +40,7 @@
 #include "util_simd.h"
 #include "util_half.h"
 #include "util_types.h"
+#include "util_texture.h"
 
 #define ccl_addr_space
 
@@ -108,6 +109,13 @@ template<typename T> struct texture_image  {
 		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
 	}
 
+	ccl_always_inline float4 read(float r)
+	{
+		/* TODO(dingto): Optimize this, so interpolation
+		 * happens on float instead of float4 */
+		return make_float4(r, r, r, 1.0f);
+	}
+
 	ccl_always_inline int wrap_periodic(int x, int width)
 	{
 		x %= width;
@@ -470,6 +478,7 @@ typedef texture<uint> texture_uint;
 typedef texture<int> texture_int;
 typedef texture<uint4> texture_uint4;
 typedef texture<uchar4> texture_uchar4;
+typedef texture_image<float> texture_image_float;
 typedef texture_image<float4> texture_image_float4;
 typedef texture_image<uchar4> texture_image_uchar4;
 
@@ -479,9 +488,21 @@ typedef texture_image<uchar4> texture_image_uchar4;
 #define kernel_tex_fetch_ssef(tex, index) (kg->tex.fetch_ssef(index))
 #define kernel_tex_fetch_ssei(tex, index) (kg->tex.fetch_ssei(index))
 #define kernel_tex_lookup(tex, t, offset, size) (kg->tex.lookup(t, offset, size))
-#define kernel_tex_image_interp(tex, x, y) ((tex < TEX_NUM_FLOAT4_IMAGES_CPU) ? kg->texture_float4_images[tex].interp(x, y) : kg->texture_byte4_images[tex - TEX_NUM_FLOAT4_IMAGES_CPU].interp(x, y))
-#define kernel_tex_image_interp_3d(tex, x, y, z) ((tex < TEX_NUM_FLOAT4_IMAGES_CPU) ? kg->texture_float4_images[tex].interp_3d(x, y, z) : kg->texture_byte4_images[tex - TEX_NUM_FLOAT4_IMAGES_CPU].interp_3d(x, y, z))
-#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) ((tex < TEX_NUM_FLOAT4_IMAGES_CPU) ? kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation) : kg->texture_byte4_images[tex - TEX_NUM_FLOAT4_IMAGES_CPU].interp_3d_ex(x, y, z, interpolation))
+
+#define kernel_tex_image_interp(tex, x, y) \
+	((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp(x, y) : \
+	(tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp(x, y) : \
+	kg->texture_float4_images[tex].interp(x, y))
+
+#define kernel_tex_image_interp_3d(tex, x, y, z) \
+	((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d(x, y, z) : \
+	(tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp_3d(x, y, z) : \
+	kg->texture_float4_images[tex].interp_3d(x, y, z))
+
+#define kernel_tex_image_interp_3d_ex(tex, x, y, z, interpolation) \
+	((tex >= TEX_IMAGE_FLOAT_START_CPU) ? kg->texture_float_images[tex - TEX_IMAGE_FLOAT_START_CPU].interp_3d_ex(x, y, z, interpolation) : \
+	(tex >= TEX_IMAGE_BYTE4_START_CPU) ? kg->texture_byte4_images[tex - TEX_IMAGE_BYTE4_START_CPU].interp_3d_ex(x, y, z, interpolation) : \
+	kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation))
 
 #define kernel_data (kg->__data)
 
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index b08d892..3af44e0 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -34,6 +34,7 @@ struct OSLShadingSystem;
 typedef struct KernelGlobals {
 	texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_IMAGES_CPU];
 	texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_IMAGES_CPU];
+	texture_image_float texture_float_images[TEX_NUM_FLOAT_IMAGES_CPU];
 
 #  define KERNEL_TEX(type, ttype, name) ttype name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name)
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index e7d0d8a..960012e 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -106,10 +106,26 @@ void kernel_tex_copy(KernelGlobals *kg,
 			tex->extension = extension;
 		}
 	}
+	else if(strstr(name, "__tex_image_float")) {
+		texture_image_float *tex = NULL;
+		int id = atoi(name + strlen("__tex_image_float_"));
+		int array_index = id - TEX_IMAGE_FLOAT_START_CPU;
+
+		if(array_index >= 0 && array_index < TEX_NUM_FLOAT_IMAGES_CPU) {
+			tex = &kg->texture_float_images[array_index];
+		}
+
+		if(tex) {
+			tex->data = (float*)mem;
+			tex->dimensions_set(width, height, depth);
+			tex->interpolation = interpolation;
+			tex->extension = extension;
+		}
+	}
 	else if(strstr(name, "__tex_image_byte4")) {
 		texture_image_uchar4 *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_byte4_"));
-		int array_index = id - TEX_NUM_FLOAT4_IMAGES_CPU;
+		int array_index = id - TEX_IMAGE_BYTE4_START_CPU;
 
 		if(array_index >= 0 && array_index < TEX_NUM_BYTE4_IMAGES_CPU) {
 			tex = &kg->texture_byte4_images[array_index];
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 10a5ca4..102c610 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -19,7 +19,6 @@
 #include "scene.h"
 
 #include "util_foreach.h"
-#include "util_image.h"
 #include "util_path.h"
 #include "util_progress.h"
 #include "util_texture.h"
@@ -43,31 +42,41 @@ ImageManager::ImageManager(const DeviceInfo& info)
 	if(info.type == DEVICE_CPU) {
 		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_CPU;
 		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_CPU;
+		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_CPU;
 		tex_image_byte4_start = TEX_IMAGE_BYTE4_START_CPU;
+		tex_image_float_start = TEX_IMAGE_FLOAT_START_CPU;
 	}
 	/* CUDA (Fermi) */
 	else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && !info.extended_images) {
 		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_CUDA;
 		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_CUDA;
+		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_CUDA;
 		tex_image_byte4_start = TEX_IMAGE_BYTE4_START_CUDA;
+		tex_image_float_start = TEX_IMAGE_FLOAT_START_CUDA;
 	}
 	/* CUDA (Kepler and above) */
 	else if((info.type == DEVICE_CUDA || info.type == DEVICE_MULTI) && info.extended_images) {
 		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_CUDA_KEPLER;
 		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_CUDA_KEPLER;
+		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_CUDA_KEPLER;
 		tex_image_byte4_start = TEX_IMAGE_BYTE4_START_CUDA_KELPER;
+		tex_image_float_start = TEX_IMAGE_FLOAT_START_CUDA_KELPER;
 	}
 	/* OpenCL */
 	else if(info.pack_images) {
 		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_IMAGES_OPENCL;
 		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_IMAGES_OPENCL;
+		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_IMAGES_OPENCL;
 		tex_image_byte4_start = TEX_IMAGE_BYTE4_START_OPENCL;
+		tex_image_float_start = TEX_IMAGE_FLOAT_START_OPENCL;
 	}
 	/* Should never happen */
 	else {
 		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
 		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
+		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
 		tex_image_byte4_start = 0;
+		tex_image_float_start = 0;
 		assert(0);
 	}
 }
@@ -106,21 +115,30 @@ bool ImageManager::set_animation_frame_update(int frame)
 	return false;
 }
 
-bool ImageManager::is_float_image(const string& filename, void *builtin_data, bool& is_linear)
+ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filename,
+                                                             void *builtin_data,
+                                                             bool& is_linear)
 {
 	bool is_float = false;
 	is_linear = false;
+	int channels = 4;
 
 	if(builtin_data) {
 		if(builtin_image_info_cb) {
-			int width, height, depth, channels;
+			int width, height, depth;
 			builtin_image_info_cb(filename, builtin_data, is_float, width, height, depth, channels);
 		}
 
-		if(is_float)
+		if(is_float) {
 			is_linear = true;
 
-		return is_float;
+			if(channels > 1)
+				return IMAGE_DATA_TYPE_FLOAT4;
+			else
+				return IMAGE_DATA_TYPE_FLOAT;
+		}
+		else
+			return IMAGE_DATA_TYPE_BYTE4;
 	}
 
 	ImageInput *in = ImageInput::create(filename);
@@ -143,6 +161,8 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo
 				}
 			}
 
+			channels = spec.nchannels;
+
 			/* basic color space detection, not great but better than nothing
 			 * before we do OpenColorIO integration */
 			if(is_float) {
@@ -166,20 +186,34 @@ bool ImageManager::is_float_image(const string& filename, void *builtin_data, bo
 		delete in;
 	}
 
-	return is_float;
+	if(is_float) {
+		if(channels > 1)
+			return IMAGE_DATA_TYPE_FLOAT4;
+		else
+			return IMAGE_DATA_TYPE_FLOAT;
+	}
+	else
+		return IMAGE_DATA_TYPE_BYTE4;
 }
 
 int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
 {
 	if(type == IMAGE_DATA_TYPE_BYTE4)
 		return slot + tex_image_byte4_start;
+	else if(type == IMAGE_DATA_TYPE_FLOAT)
+		return slot + tex_image_float_start;
 	else
 		return slot;
 }
 
 int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
 {
-	if(flat_slot >= tex_image_byte4_start) {
+	if(flat_slot >= tex_image_float_start)
+	{
+		*type = IMAGE_DATA_TYPE_FLOAT;
+		return flat_slot - tex_image_float_start;
+	}
+	else if(flat_slot >= tex_image_byte4_start) {
 		*type = IMAGE_DATA_TYPE_BYTE4;
 		return flat_slot - tex_image_byte4_start;
 	}
@@ -192,9 +226,11 @@ int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *typ
 string ImageManager::name_from_type(int type)
 {
 	if(type == IMAGE_DATA_TYPE_FLOAT4)
+		return "float4";
+	else if(type == IMAGE_DATA

@@ Diff output truncated at 10240 characters. @@




More information about the Bf-blender-cvs mailing list