[Bf-blender-cvs] [22f243d379f] soc-2018-cycles-volumes: Reduce sparse grid memory usage and minor fixes.

Geraldine Chua noreply at git.blender.org
Sun Jun 10 17:16:19 CEST 2018


Commit: 22f243d379ffd1bd7101dd1955b2efe0062ed651
Author: Geraldine Chua
Date:   Fri Jun 8 18:11:03 2018 +0800
Branches: soc-2018-cycles-volumes
https://developer.blender.org/rB22f243d379ffd1bd7101dd1955b2efe0062ed651

Reduce sparse grid memory usage and minor fixes.

Sparse grids originally padded out an image to dimensions divisible by
TILE_SIZE, which resulted in many empty voxels in large volumes. Now,
border tiles are taken into account when calculating voxel indexes.

Aside from that, made some other minor fixes throughout the files.

===================================================================

M	intern/cycles/device/device_cpu.cpp
M	intern/cycles/device/device_memory.h
M	intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M	intern/cycles/render/image.cpp
M	intern/cycles/render/image.h
M	intern/cycles/render/mesh_volume.cpp
M	intern/cycles/util/util_sparse_grid.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index a21884de6d6..4e1fa05e168 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -378,8 +378,9 @@ public:
 	{
 		size_t total_memory = mem.memory_size();
 		device_memory *offsets = mem.offsets;
-		if(offsets != NULL)
+		if(offsets) {
 			total_memory += offsets->memory_size();
+		}
 
 		VLOG(1) << "Texture allocate: " << mem.name << ", "
 		        << string_human_readable_number(total_memory) << " bytes. ("
@@ -414,21 +415,10 @@ public:
 			info.cl_buffer = 0;
 			info.interpolation = mem.interpolation;
 			info.extension = mem.extension;
-			if(offsets != NULL) {
-				/* If mem is a sparse volume, its real (tile)
-				 * dimensions are stored in the offsets texture.
-				 * Here, we store the pixel resolution. */
-				info.width = offsets->data_width * TILE_SIZE;
-				info.height = offsets->data_height * TILE_SIZE;
-				info.depth = offsets->data_depth * TILE_SIZE;
-				info.offsets = (uint64_t)offsets->host_pointer;
-			}
-			else {
-				info.width = mem.data_width;
-				info.height = mem.data_height;
-				info.depth = mem.data_depth;
-				info.offsets = (uint64_t)0;
-			}
+			info.width = mem.real_width;
+			info.height = mem.real_height;
+			info.depth = mem.real_depth;
+			info.offsets = (uint64_t)(offsets ? offsets->host_pointer : 0);
 			need_texture_info = true;
 		}
 
@@ -436,7 +426,7 @@ public:
 		mem.device_size = mem.memory_size();
 		stats.mem_alloc(mem.device_size);
 
-		if(offsets != NULL) {
+		if(offsets) {
 			offsets->device_pointer = (device_ptr)offsets->host_pointer;
 			offsets->device_size = offsets->memory_size();
 			stats.mem_alloc(offsets->device_size);
@@ -447,6 +437,9 @@ public:
 	void tex_free(device_memory& mem)
 	{
 		if(mem.device_pointer) {
+			if(mem.offsets) {
+				tex_free(*mem.offsets);
+			}
 			mem.device_pointer = 0;
 			stats.mem_free(mem.device_size);
 			mem.device_size = 0;
diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h
index dd7f72ce102..a230d3928ca 100644
--- a/intern/cycles/device/device_memory.h
+++ b/intern/cycles/device/device_memory.h
@@ -187,6 +187,12 @@ public:
 	size_t data_width;
 	size_t data_height;
 	size_t data_depth;
+	/* For normal images, data_* = real_*. For sparse images,
+	 * real_* refers to the real voxel resolution of the image,
+	 * since sparse images are stored as a long 1D array. */
+	size_t real_width;
+	size_t real_height;
+	size_t real_depth;
 	MemoryType type;
 	const char *name;
 	InterpolationType interpolation;
@@ -318,9 +324,9 @@ public:
 		}
 
 		data_size = new_size;
-		data_width = width;
-		data_height = height;
-		data_depth = depth;
+		data_width = real_width = width;
+		data_height = real_height = height;
+		data_depth = real_depth = depth;
 
 		return data();
 	}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 93aa6117f58..7513efc6b15 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -77,16 +77,23 @@ template<typename T> struct TextureInterpolator  {
 
 	static ccl_always_inline float4 read(const T *data, const int *offsets,
 	                                     int x, int y, int z,
+	                                     int width, int height, int depth,
 	                                     int tiw, int tih, int tid)
 	{
-		int index = compute_index(offsets, x, y, z, tiw, tih, tid);
+		int index = compute_index(offsets, x, y, z,
+		                          width, height, depth, tiw, tih, tid);
 		return index < 0 ? make_float4(0.0f) : read(data[index]);
 	}
 
 	static ccl_always_inline float4 read(const T *data, const int *offsets,
 	                                     int idx, int width, int height, int depth)
 	{
-		int index = compute_index(offsets, idx, width, height, depth);
+		int3 c = compute_coordinates(idx, width, height, depth);
+		int index = compute_index(offsets, c.x, c.y, c.z,
+		                          width, height, depth,
+		                          get_tile_res(width),
+		                          get_tile_res(height),
+		                          get_tile_res(depth));
 		return index < 0 ? make_float4(0.0f) : read(data[index]);
 	}
 
@@ -300,8 +307,8 @@ template<typename T> struct TextureInterpolator  {
 		const int *ofs = (const int*)info.offsets;
 
 		if(ofs) {
-			return read(data, ofs, ix, iy, iz, get_tile_res(width),
-			            get_tile_res(height), get_tile_res(depth));
+			return read(data, ofs, ix, iy, iz, width, height, depth,
+			            get_tile_res(width), get_tile_res(height), get_tile_res(depth));
 		}
 		return read(data[compute_index(ix, iy, iz, width, height, depth)]);
 	}
@@ -361,14 +368,14 @@ template<typename T> struct TextureInterpolator  {
 			   !tile_is_active(ofs, nix, niy, niz, tiw, tih, tid)) {
 				return make_float4(0.0f);
 			}
-			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix,  iy,  iz,  tiw, tih, tid);
-			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(data, ofs, nix, iy,  iz,  tiw, tih, tid);
-			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(data, ofs, ix,  niy, iz,  tiw, tih, tid);
-			r += (1.0f - tz)*ty*tx					 * read(data, ofs, nix, niy, iz,  tiw, tih, tid);
-			r += tz*(1.0f - ty)*(1.0f - tx)			 * read(data, ofs, ix,  iy,  niz, tiw, tih, tid);
-			r += tz*(1.0f - ty)*tx					 * read(data, ofs, nix, iy,  niz, tiw, tih, tid);
-			r += tz*ty*(1.0f - tx)					 * read(data, ofs, ix,  niy, niz, tiw, tih, tid);
-			r += tz*ty*tx							 * read(data, ofs, nix, niy, niz, tiw, tih, tid);
+			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data, ofs, ix,  iy,  iz,  width, height, depth, tiw, tih, tid);
+			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(data, ofs, nix, iy,  iz,  width, height, depth, tiw, tih, tid);
+			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(data, ofs, ix,  niy, iz,  width, height, depth, tiw, tih, tid);
+			r += (1.0f - tz)*ty*tx					 * read(data, ofs, nix, niy, iz,  width, height, depth, tiw, tih, tid);
+			r += tz*(1.0f - ty)*(1.0f - tx)			 * read(data, ofs, ix,  iy,  niz, width, height, depth, tiw, tih, tid);
+			r += tz*(1.0f - ty)*tx					 * read(data, ofs, nix, iy,  niz, width, height, depth, tiw, tih, tid);
+			r += tz*ty*(1.0f - tx)					 * read(data, ofs, ix,  niy, niz, width, height, depth, tiw, tih, tid);
+			r += tz*ty*tx							 * read(data, ofs, nix, niy, niz, width, height, depth, tiw, tih, tid);
 		}
 		else {
 			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index(ix,  iy,  iz,  width, height, depth)]);
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 61394700886..8ce96a5810a 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -730,53 +730,52 @@ void ImageManager::file_load_failed(device_vector<DeviceType> *tex_img,
 }
 
 template<typename DeviceType>
-bool ImageManager::file_make_image_sparse(Device *device,
+void ImageManager::file_make_image_sparse(Device *device,
                                           Image *img,
                                           device_vector<DeviceType> *tex_img)
 {
-	device_vector<int> *tex_offsets
-	        = new device_vector<int>(device,
-	                                 (img->mem_name + "_offsets").c_str(),
-	                                 MEM_TEXTURE);
+	int real_width = tex_img->data_width;
+	int real_height = tex_img->data_height;
+	int real_depth = tex_img->data_depth;
 
 	vector<DeviceType> sparse_grid;
 	vector<int> offsets;
+
 	int voxel_count = create_sparse_grid<DeviceType>(tex_img->data(),
-	                                                 tex_img->data_width,
-	                                                 tex_img->data_height,
-	                                                 tex_img->data_depth,
+	                                                 real_width,
+	                                                 real_height,
+	                                                 real_depth,
 	                                                 img->isovalue,
 	                                                 &sparse_grid,
 	                                                 &offsets);
 
+	size_t memory_usage = offsets.size() * sizeof(int) +
+	                   voxel_count * sizeof(DeviceType);
+
 	if(voxel_count < 1) {
 		VLOG(1) << "Could not make sparse grid for "
 		        << path_filename(img->filename) << " (" << img->mem_name << ")"
 		        << ", no active tiles";
-		delete tex_offsets;
-		tex_offsets = NULL;
-		return false;
+		return;
 	}
 
-	VLOG(1) << "Original memory usage of '"
-	        << path_filename(img->filename) << "' (" << img->mem_name << "): "
-	        << string_human_readable_size(tex_img->memory_size());
+	VLOG(1) << "Memory usage of '"
+	        << path_filename(img->filename) << "' (" << img->mem_name
+	        << ") reduced from "
+	        << string_human_readable_size(tex_img->memory_size()) << " to "
+	        << string_human_readable_size(memory_usage);
 
 	DeviceType *texture_pixels;
 	int *texture_offsets;
-	int tiw = get_tile_res(tex_img->data_width);
-	int tih = get_tile_res(tex_img->data_height);
-	int tid = get_tile_res(tex_img->data_depth);
+	device_vector<int> *tex_offsets
+	        = new device_vector<int>(device,
+	                                 (img->mem_name + "_offsets").c_str(),
+	                                 MEM_TEXTURE);
 
 	{
-		/* Since only active tiles are stored in tex_img, its
-		 * allocated memory will be <= the actual resolution
-		 * of the volume. We store the true resolution (in tiles) in the
-		 * tex_offsets instead, since it needs to be allocated enough
-		 * space to track all tiles anyway. */
 		thread_scoped_lock device_lock(device_mutex);
 		texture_pixels = (DeviceType*)tex_img->alloc(voxel_count);
-		texture_offsets = (int*)tex_offsets->alloc(tiw, tih, tid);
+		texture_offsets = (int*)tex_offsets->alloc(offsets.size());
 	}
 
 	memcpy(&texture_offsets[0],
@@ -787,7 +786,9 @@ bool ImageManager::file_make_image_sparse(Device *device,
 		   voxel_count * sizeof(DeviceType));
 
 	tex_img->offsets = tex_offsets;
-	return true;
+	tex_img->real_

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list