[Bf-blender-cvs] [4e94a6f576d] soc-2018-cycles-volumes: Minor sparse tile fixes.

Mon Jun 11 17:00:24 CEST 2018

Commit: 4e94a6f576de76d670b5ed3dd69500fd8c539e67
Author: Geraldine Chua
Date:   Mon Jun 11 22:57:59 2018 +0800
Branches: soc-2018-cycles-volumes
https://developer.blender.org/rB4e94a6f576de76d670b5ed3dd69500fd8c539e67

Minor sparse tile fixes.

Fixed some really obvious errors with CUDA sampling, and moved the
coordinates to index calculation back into kernel, saving a small
perecentage of time.

===================================================================

M	intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
M	intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
M	intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
M	intern/cycles/render/image.cpp
M	intern/cycles/render/mesh_volume.cpp
M	intern/cycles/util/util_sparse_grid.h

===================================================================

diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index c43b94db7e0..a0d6b54245b 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -30,6 +30,11 @@ template<typename T> struct TextureInterpolator  {
 		u[3] = (1.0f / 6.0f) * t * t * t; \
 	} (void)0
 
+	static ccl_always_inline int flatten(int x, int y, int z, int width, int height)
+	{
+		return x + width * (y + z * height);
+	}
+
 	static ccl_always_inline float4 read(float4 r)
 	{
 		return r;
@@ -82,7 +87,7 @@ template<typename T> struct TextureInterpolator  {
 		int tix = x / TILE_SIZE, itix = x % TILE_SIZE,
 		    tiy = y / TILE_SIZE, itiy = y % TILE_SIZE,
 		    tiz = z / TILE_SIZE, itiz = z % TILE_SIZE;
-		int dense_index = compute_index_fast(tix, tiy, tiz, tiw, tih) * 2;
+		int dense_index = flatten(tix, tiy, tiz, tiw, tih) * 2;
 		int sparse_index = grid_info[dense_index];
 		int dims = grid_info[dense_index + 1];
 		if(sparse_index < 0) {
@@ -90,7 +95,7 @@ template<typename T> struct TextureInterpolator  {
 		}
 		int itiw = dims & (1 << ST_SHIFT_TRUNCATE_WIDTH) ? ltw : TILE_SIZE;
 		int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? lth : TILE_SIZE;
-		int in_tile_index = compute_index_fast(itix, itiy, itiz, itiw, itih);
+		int in_tile_index = flatten(itix, itiy, itiz, itiw, itih);
 		return read(data[sparse_index + in_tile_index]);
 	}
 
@@ -318,7 +323,7 @@ template<typename T> struct TextureInterpolator  {
 			            info.tiled_width, info.tiled_height,
 			            info.last_tile_width, info.last_tile_height);
 		}
-		return read(data[compute_index_fast(ix, iy, iz, width, height)]);
+		return read(data[flatten(ix, iy, iz, width, height)]);
 	}
 
 	static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
@@ -384,14 +389,14 @@ template<typename T> struct TextureInterpolator  {
 			r += tz*ty*tx                            * read(data, gi, nix, niy, niz, tiw, tih, ltw, lth);
 		}
 		else {
-			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[compute_index_fast(ix,  iy,  iz,  width, height)]);
-			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(data[compute_index_fast(nix, iy,  iz,  width, height)]);
-			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(data[compute_index_fast(ix,  niy, iz,  width, height)]);
-			r += (1.0f - tz)*ty*tx					 * read(data[compute_index_fast(nix, niy, iz,  width, height)]);
-			r += tz*(1.0f - ty)*(1.0f - tx)			 * read(data[compute_index_fast(ix,  iy,  niz, width, height)]);
-			r += tz*(1.0f - ty)*tx					 * read(data[compute_index_fast(nix, iy,  niz, width, height)]);
-			r += tz*ty*(1.0f - tx)					 * read(data[compute_index_fast(ix,  niy, niz, width, height)]);
-			r += tz*ty*tx							 * read(data[compute_index_fast(nix, niy, niz, width, height)]);
+			r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx) * read(data[flatten(ix,  iy,  iz,  width, height)]);
+			r += (1.0f - tz)*(1.0f - ty)*tx			 * read(data[flatten(nix, iy,  iz,  width, height)]);
+			r += (1.0f - tz)*ty*(1.0f - tx)			 * read(data[flatten(ix,  niy, iz,  width, height)]);
+			r += (1.0f - tz)*ty*tx					 * read(data[flatten(nix, niy, iz,  width, height)]);
+			r += tz*(1.0f - ty)*(1.0f - tx)			 * read(data[flatten(ix,  iy,  niz, width, height)]);
+			r += tz*(1.0f - ty)*tx					 * read(data[flatten(nix, iy,  niz, width, height)]);
+			r += tz*ty*(1.0f - tx)					 * read(data[flatten(ix,  niy, niz, width, height)]);
+			r += tz*ty*tx							 * read(data[flatten(nix, niy, niz, width, height)]);
 		}
 
 		return r;
diff --git a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
index dcd1b33722a..f73f5cdbb3a 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
+++ b/intern/cycles/kernel/kernels/cuda/kernel_cuda_image.h
@@ -58,7 +58,8 @@ ccl_device float cubic_h1(float a)
 	return 1.0f + cubic_w3(a) / (cubic_w2(a) + cubic_w3(a)) + 0.5f;
 }
 
-ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float fx, float fy, float fz)
+/* Converts coordinates from normal volume textures dense to sparse ones. */
+ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float &fx, float &fy, float &fz)
 {
 	float *ix, *iy, *iz;
 	modff(fx, *ix);
@@ -76,9 +77,9 @@ ccl_device bool compute_sparse_coordinates(const TextureInfo *info, float fx, fl
 	int tile_y = info->grid_info[dense_index + 1];
 	int tile_z = info->grid_info[dense_index + 2];
 	int dims = info->grid_info[dense_index + 3];
-	fx += tile_x + itix + (dims & (1 << ST_SHIFT_X_LHS_PAD));
-	fy += tile_y + itiy + (dims & (1 << ST_SHIFT_Y_LHS_PAD));
-	fz += tile_z + itiz + (dims & (1 << ST_SHIFT_Z_LHS_PAD));
+	fx += tile_x + itix + (dims & (1 << ST_SHIFT_X_PAD));
+	fy += tile_y + itiy + (dims & (1 << ST_SHIFT_Y_PAD));
+	fz += tile_z + itiz + (dims & (1 << ST_SHIFT_Z_PAD));
 	return true;
 }
 
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
index 2e0db6609f7..be1996af3a0 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
+++ b/intern/cycles/kernel/kernels/opencl/kernel_opencl_image.h
@@ -67,6 +67,7 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, const ccl_glo
 	}
 }
 
+/* Calculates the index for sparse volume textures. */
 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg,
                                                 const ccl_global TextureInfo *info,
                                                 int id, int x, int y, int z)
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index 3789821e7d1..e0337bfab42 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -739,14 +739,26 @@ void ImageManager::file_make_image_sparse(Device *device,
 	int real_depth = tex_img->data_depth;
 	vector<DeviceType> sparse_grid;
 	vector<int> grid_info;
+	int voxel_count;
 
-	int voxel_count = create_sparse_grid<DeviceType>(tex_img->data(),
+	if(device->info.type != DEVICE_CUDA) {
+		voxel_count = create_sparse_grid<DeviceType>(tex_img->data(),
 	                                                 real_width,
 	                                                 real_height,
 	                                                 real_depth,
 	                                                 img->isovalue,
 	                                                 &sparse_grid,
 	                                                 &grid_info);
+	}
+	else {
+		voxel_count = create_sparse_grid_cuda<DeviceType>(tex_img->data(),
+		                                                  real_width,
+		                                                  real_height,
+		                                                  real_depth,
+		                                                  img->isovalue,
+		                                                  &sparse_grid,
+		                                                  &grid_info);
+	}
 
 	if(voxel_count < 1) {
 		VLOG(1) << "Could not make sparse grid for "
diff --git a/intern/cycles/render/mesh_volume.cpp b/intern/cycles/render/mesh_volume.cpp
index 8e91105afdf..5ce8198b9d6 100644
--- a/intern/cycles/render/mesh_volume.cpp
+++ b/intern/cycles/render/mesh_volume.cpp
@@ -467,6 +467,7 @@ void MeshManager::create_volume_mesh(Scene *scene,
 	const int3 last_tile_res = make_int3(resolution.x % TILE_SIZE,
 	                                     resolution.y % TILE_SIZE,
 	                                     resolution.z % TILE_SIZE);
+	const bool using_cuda = (scene->device->info.type == DEVICE_CUDA);
 
 	if(attr) {
 		const Transform *tfm = attr->data_transform();
@@ -493,12 +494,24 @@ void MeshManager::create_volume_mesh(Scene *scene,
 					int voxel_index;
 
 					if(grid_info) {
-						voxel_index = compute_index(grid_info, x, y, z,
-						                            tiled_res.x,
-						                            tiled_res.y,
-						                            tiled_res.z,
-						                            last_tile_res.x,
-						                            last_tile_res.y);
+						if(!using_cuda) {
+							voxel_index = compute_index(grid_info, x, y, z,
+														tiled_res.x,
+														tiled_res.y,
+														tiled_res.z,
+														last_tile_res.x,
+														last_tile_res.y);
+						}
+						else {
+							voxel_index = compute_index_cuda(grid_info,
+							                                 x, y, z,
+							                                 resolution.x,
+							                                 resolution.y,
+							                                 resolution.z,
+							                                 tiled_res.x,
+							                                 tiled_res.y,
+							                                 tiled_res.z);
+						}
 						if(voxel_index < 0) {
 							continue;
 						}
diff --git a/intern/cycles/util/util_sparse_grid.h b/intern/cycles/util/util_sparse_grid.h
index dff9c3949ed..d63cee25fda 100644
--- a/intern/cycles/util/util_sparse_grid.h
+++ b/intern/cycles/util/util_sparse_grid.h
@@ -69,8 +69,8 @@ const inline int compute_index(const size_t x, const size_t y, const size_t z,
 	return x + width * (y + z * height);
 }
 
-const inline int compute_index_fast(const size_t x, const size_t y, const size_t z,
-                                    const size_t width, const size_t height)
+const inline int compute_index(const size_t x, const size_t y, const size_t z,
+                               const size_t width, const size_t height)
 {
 	return x + width * (y + z * height);
 }
@@ -114,11 +114,10 @@ const inline int compute_index(const int *grid_info,
 	int itih = dims & (1 << ST_SHIFT_TRUNCATE_HEIGHT) ? lth : TILE_SIZE;
 	/* Look up voxel in the tile.
 	 * Need to check whether or not a tile is padded on any of its 6 faces. */
-	int in_tile_index = compute_index_fast(itix, itiy, itiz, itiw, itih);
+	int in_tile_index = compute_index(itix, itiy, itiz, itiw, itih);
 	return sparse_index + in_tile_index;
 }
 
-
 /* Do not call this function in the kernel. */
 const inline int compute_index_cuda(const int *grid_info,
                                     int x, int y, int z,
@@ -176,7 +175,7 

@@ Diff output truncated at 10240 characters. @@