[Bf-blender-cvs] [fd64e214cbc] soc-2018-cycles-volumes: Remove SparseTile; support CPU tricubic interp.

Thu Jun 7 17:18:58 CEST 2018

Commit: fd64e214cbceefd09c863e6ee0c3aac1db2428b0
Author: Geraldine Chua
Date:   Thu Jun 7 17:03:24 2018 +0800
Branches: soc-2018-cycles-volumes
https://developer.blender.org/rBfd64e214cbceefd09c863e6ee0c3aac1db2428b0

Remove SparseTile; support CPU tricubic interp.

Sparse grids now use their normal types instead of a specific struct.
Also added support for tricubic interpolation of sparse grids for CPU
rendering.

===================================================================

M	intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M	intern/cycles/render/image.cpp
M	intern/cycles/render/mesh_volume.cpp
M	intern/cycles/util/util_sparse_grid.h

===================================================================

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 3435bcab70a..93aa6117f58 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -75,6 +75,21 @@ template<typename T> struct TextureInterpolator  {
 		return read(data[y * width + x]);
 	}
 
+	static ccl_always_inline float4 read(const T *data, const int *offsets,
+	                                     int x, int y, int z,
+	                                     int tiw, int tih, int tid)
+	{
+		int index = compute_index(offsets, x, y, z, tiw, tih, tid);
+		return index < 0 ? make_float4(0.0f) : read(data[index]);
+	}
+
+	static ccl_always_inline float4 read(const T *data, const int *offsets,
+	                                     int idx, int width, int height, int depth)
+	{
+		int index = compute_index(offsets, idx, width, height, depth);
+		return index < 0 ? make_float4(0.0f) : read(data[index]);
+	}
+
 	static ccl_always_inline int wrap_periodic(int x, int width)
 	{
 		x %= width;
@@ -281,18 +296,14 @@ template<typename T> struct TextureInterpolator  {
 				return make_float4(0.0f);
 		}
 
+		const T *data = (const T*)info.data;
 		const int *ofs = (const int*)info.offsets;
+
 		if(ofs) {
-			const SparseTile<T> *data = (const SparseTile<T>*)info.data;
-			return read(get_value<T>(data, ofs, ix, iy, iz,
-			                         compute_tile_resolution(width),
-			                         compute_tile_resolution(height),
-			                         compute_tile_resolution(depth)));
-		}
-		else {
-			const T *data = (const T*)info.data;
-			return read(data[compute_index(ix, iy, iz, width, height, depth)]);
+			return read(data, ofs, ix, iy, iz, get_tile_res(width),
+			            get_tile_res(height), get_tile_res(depth));
 		}
+		return read(data[compute_index(ix, iy, iz, width, height, depth)]);
 	}
 
 	static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
@@ -340,29 +351,26 @@ template<typename T> struct TextureInterpolator  {
 		}
 
 		float4 r;
+		const T *data = (const T*)info.data;
 		const int *ofs = (const int*)info.offsets;
 
 		if(ofs) {
-			const SparseTile<T> *data = (const SparseTile<T>*)info.data;
-			int tiw = compute_tile_resolution(width);
-			int tih = compute_tile_resolution(height);
-			int tid = compute_tile_resolution(depth);
+			int tiw = get_tile_res(width), tih = get_tile_res(height), tid = get_tile_res(depth);
 			/* Initial check if either voxel is in an active tile. */
 			if(!tile_is_active(ofs, ix, iy, iz, tiw, tih, tid) &&
 			   !tile_is_active(ofs, nix, niy, niz, tiw, tih, tid)) {
 				return make_float4(0.0f);
 			}
-			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(get_value<T>(data, ofs, ix,  iy,  iz,  tiw, tih, tid));
-			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(get_value<T>(data, ofs, nix, iy,  iz,  tiw, tih, tid));
-			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(get_value<T>(data, ofs, ix,  niy, iz,  tiw, tih, tid));
-			r += (1.0f - tz)*ty*tx					 * read(get_value<T>(data, ofs, nix, niy, iz,  tiw, tih, tid));
-			r += tz*(1.0f - ty)*(1.0f - tx)			 * read(get_value<T>(data, ofs, ix,  iy,  niz, tiw, tih, tid));
-			r += tz*(1.0f - ty)*tx					 * read(get_value<T>(data, ofs, nix, iy,  niz, tiw, tih, tid));
-			r += tz*ty*(1.0f - tx)					 * read(get_value<T>(data, ofs, ix,  niy, niz, tiw, tih, tid));
-			r += tz*ty*tx							 * read(get_value<T>(data, ofs, nix, niy, niz, tiw, tih, tid));
+			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix,  iy,  iz,  tiw, tih, tid);
+			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(data, ofs, nix, iy,  iz,  tiw, tih, tid);
+			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(data, ofs, ix,  niy, iz,  tiw, tih, tid);
+			r += (1.0f - tz)*ty*tx					 * read(data, ofs, nix, niy, iz,  tiw, tih, tid);
+			r += tz*(1.0f - ty)*(1.0f - tx)			 * read(data, ofs, ix,  iy,  niz, tiw, tih, tid);
+			r += tz*(1.0f - ty)*tx					 * read(data, ofs, nix, iy,  niz, tiw, tih, tid);
+			r += tz*ty*(1.0f - tx)					 * read(data, ofs, ix,  niy, niz, tiw, tih, tid);
+			r += tz*ty*tx							 * read(data, ofs, nix, niy, niz, tiw, tih, tid);
 		}
 		else {
-			const T *data = (const T*)info.data;
 			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index(ix,  iy,  iz,  width, height, depth)]);
 			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(data[compute_index(nix, iy,  iz,  width, height, depth)]);
 			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(data[compute_index(ix,  niy, iz,  width, height, depth)]);
@@ -461,7 +469,9 @@ template<typename T> struct TextureInterpolator  {
 		/* Some helper macro to keep code reasonable size,
 		 * let compiler to inline all the matrix multiplications.
 		 */
-#define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
+#define DATA(x, y, z) (ofs ? \
+	    read(data, ofs, xc[x] + yc[y] + zc[z], width, height, depth) : \
+	    read(data[xc[x] + yc[y] + zc[z]]))
 #define COL_TERM(col, row) \
 		(v[col] * (u[0] * DATA(0, col, row) + \
 		           u[1] * DATA(1, col, row) + \
@@ -479,6 +489,7 @@ template<typename T> struct TextureInterpolator  {
 
 		/* Actual interpolation. */
 		const T *data = (const T*)info.data;
+		const int *ofs = (const int*)info.offsets;
 		return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
 
 #undef COL_TERM
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index c1caea34619..bef1419d178 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -730,52 +730,49 @@ void ImageManager::file_load_failed(device_vector<DeviceType> *tex_img,
 template<typename DeviceType>
 bool ImageManager::file_make_image_sparse(Device *device,
                                           Image *img,
-                                          device_vector<DeviceType> *tex_dense)
+                                          device_vector<DeviceType> *tex_img)
 {
-	device_vector<SparseTile<DeviceType>> *tex_sparse
-	        = new device_vector<SparseTile<DeviceType>>(device,
-	                                                    (img->mem_name).c_str(),
-	                                                    MEM_TEXTURE);
 	device_vector<int> *tex_offsets
 	        = new device_vector<int>(device,
 	                                 (img->mem_name + "_offsets").c_str(),
 	                                 MEM_TEXTURE);
 
-	vector<SparseTile<DeviceType>> sparse_grid;
+	vector<DeviceType> sparse_grid;
 	vector<int> offsets;
-	int active_tile_count = create_sparse_grid<DeviceType>(
-	                            tex_dense->data(),
-	                            tex_dense->data_width,
-	                            tex_dense->data_height,
-	                            tex_dense->data_depth,
-	                            &sparse_grid,
-	                            &offsets);
-
-	if(active_tile_count < 1) {
+	int voxel_count = create_sparse_grid<DeviceType>(tex_img->data(),
+	                                                 tex_img->data_width,
+	                                                 tex_img->data_height,
+	                                                 tex_img->data_depth,
+	                                                 &sparse_grid,
+	                                                 &offsets);
+
+	if(voxel_count < 1) {
 		VLOG(1) << "Could not make sparse grid for "
 		        << path_filename(img->filename) << " (" << img->mem_name << ")"
 		        << ", no active tiles";
-		delete tex_sparse;
 		delete tex_offsets;
-		tex_sparse = NULL;
 		tex_offsets = NULL;
 		return false;
 	}
 
-	SparseTile<DeviceType> *texture_pixels;
+	VLOG(1) << "Original memory usage of '"
+	        << path_filename(img->filename) << "' (" << img->mem_name << "): "
+	        << string_human_readable_size(tex_img->memory_size());
+
+	DeviceType *texture_pixels;
 	int *texture_offsets;
-	int tiw = compute_tile_resolution(tex_dense->data_width);
-	int tih = compute_tile_resolution(tex_dense->data_height);
-	int tid = compute_tile_resolution(tex_dense->data_depth);
+	int tiw = get_tile_res(tex_img->data_width);
+	int tih = get_tile_res(tex_img->data_height);
+	int tid = get_tile_res(tex_img->data_depth);
 
 	{
-		/* Since only active tiles are stored in tex_sparse, its
+		/* Since only active tiles are stored in tex_img, its
 		 * allocated memory will be <= the actual resolution
 		 * of the volume. We store the true resolution (in tiles) in the
 		 * tex_offsets instead, since it needs to be allocated enough
 		 * space to track all tiles anyway. */
 		thread_scoped_lock device_lock(device_mutex);
-		texture_pixels = (SparseTile<DeviceType>*)tex_sparse->alloc(active_tile_count);
+		texture_pixels = (DeviceType*)tex_img->alloc(voxel_count);
 		texture_offsets = (int*)tex_offsets->alloc(tiw, tih, tid);
 	}
 
@@ -784,20 +781,9 @@ bool ImageManager::file_make_image_sparse(Device *device,
 		   offsets.size() * sizeof(int));
 	memcpy(&texture_pixels[0],
 		   &sparse_grid[0],
-		   active_tile_count * sizeof(SparseTile<DeviceType>));
-
-	img->mem = tex_sparse;
-	img->mem->interpolation = img->interpolation;
-	img->mem->extension = img->extension;
-	img->mem->offsets = tex_offsets;
-
-	thread_scoped_lock device_lock(device_mutex);
-	tex_sparse->copy_to_device();
-
-	VLOG(1) << "Original memory usage of '"
-	        << path_filename(img->filename) << "' (" << img->mem_name << "): "
-	        << string_human_readable_size(tex_dense->memory_size());
+		   voxel_count * sizeof(DeviceType));
 
+	tex_img->offsets = tex_offsets;
 	return true;
 }
 
@@ -824,19 +810,17 @@ void ImageManager::load_image(Device *device,
 	}
 
 	if(img->make_sparse) {
-		if(file_make_image_sparse<DeviceType>(device, img, tex_img)) {
-			delete tex_img;
-			tex_img = NULL;
+		if(!file_make_image_sparse<DeviceType>(device, img, tex_img)) {
+			file_load_failed<StorageType, DeviceType>(tex_img, type);
 		}
 	}
 
-	if(tex_img) {
-		img->mem = tex_img;
-		img->mem->interpolation = img->interpolation;
-		img->mem->extension = img->extension;
-		thread_scoped_lock device_lock(device_mutex);
-		tex_img->copy_to_device();
-	}
+	img->mem = tex_img;
+	img->mem->interpolation = img->interpolation;
+	img->mem->extension = img->extension;
+
+	thread_s

@@ Diff output truncated at 10240 characters. @@