[Bf-blender-cvs] [c08c931] master: Cycles / CUDA: Increase maximum image textures on GPU. Instead of 95, we can use 145 images now. This only affects Kepler and above (sm30, sm_35 and sm_50).
Thomas Dinges
noreply at git.blender.org
Sun May 11 03:39:08 CEST 2014
Commit: c08c931fb6f57bdca7865d48ac09a0775590f3ce
Author: Thomas Dinges
Date: Sun May 11 03:38:39 2014 +0200
https://developer.blender.org/rBc08c931fb6f57bdca7865d48ac09a0775590f3ce
Cycles / CUDA: Increase maximum image textures on GPU.
Instead of 95, we can use 145 images now. This only affects Kepler and above (sm30, sm_35 and sm_50).
This can be increased further if needed, but let's first test if this does not come with a performance impact.
Originally developed during my GSoC 2013.
===================================================================
M intern/cycles/device/device.h
M intern/cycles/device/device_cuda.cpp
M intern/cycles/device/device_multi.cpp
M intern/cycles/kernel/kernel_textures.h
M intern/cycles/kernel/svm/svm_image.h
M intern/cycles/render/image.cpp
M intern/cycles/render/image.h
M intern/cycles/render/scene.cpp
M intern/cycles/render/scene.h
===================================================================
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index cbabcb1..bcddd4f 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -54,6 +54,7 @@ public:
bool display_device;
bool advanced_shading;
bool pack_images;
+ bool extended_images; /* flag for GPU and Multi device */
vector<DeviceInfo> multi_devices;
DeviceInfo()
@@ -64,6 +65,7 @@ public:
display_device = false;
advanced_shading = true;
pack_images = false;
+ extended_images = false;
}
};
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 9139a75..6895521 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -1150,6 +1150,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
int major, minor;
cuDeviceComputeCapability(&major, &minor, num);
info.advanced_shading = (major >= 2);
+ info.extended_images = (major >= 3);
info.pack_images = false;
/* if device has a kernel timeout, assume it is used for display */
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 59bbf3b..c866eba 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -328,6 +328,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
info.advanced_shading = with_advanced_shading;
info.pack_images = false;
+ info.extended_images = true;
foreach(DeviceInfo& subinfo, devices) {
if(subinfo.type == type) {
@@ -351,6 +352,7 @@ static bool device_multi_add(vector<DeviceInfo>& devices, DeviceType type, bool
if(subinfo.display_device)
info.display_device = true;
info.pack_images = info.pack_images || subinfo.pack_images;
+ info.extended_images = info.extended_images && subinfo.extended_images;
num_added++;
}
}
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
index c8734d6..94115fd 100644
--- a/intern/cycles/kernel/kernel_textures.h
+++ b/intern/cycles/kernel/kernel_textures.h
@@ -174,6 +174,61 @@ KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
+/* Kepler and above */
+#if __CUDA_ARCH__ >= 300
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_100)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_101)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_102)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_103)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_104)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_105)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_106)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_107)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_108)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_109)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_110)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_111)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_112)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_113)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_114)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_115)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_116)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_117)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_118)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_119)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_120)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_121)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_122)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_123)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_124)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_125)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_126)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_127)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_128)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_129)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_130)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_131)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_132)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_133)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_134)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_135)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_136)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_137)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_138)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_139)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_140)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_141)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_142)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_143)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_144)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_145)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_146)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_147)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_148)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_149)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_150)
+#endif
+
/* packed image (opencl) */
KERNEL_TEX(uchar4, texture_uchar4, __tex_image_packed)
KERNEL_TEX(uint4, texture_uint4, __tex_image_packed_info)
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 6627786..6c658af 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -149,8 +149,8 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
* - group by size and use a 3d texture, performance impact
* - group into larger texture with some padding for correct lerp
*
- * also note that cuda has 128 textures limit, we use 100 now, since
- * we still need some for other storage */
+ * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler),
+ * and we cannot use all since we still need some for other storage */
switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_float_000, x, y); break;
@@ -253,7 +253,62 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
case 97: r = kernel_tex_image_interp(__tex_image_097, x, y); break;
case 98: r = kernel_tex_image_interp(__tex_image_098, x, y); break;
case 99: r = kernel_tex_image_interp(__tex_image_099, x, y); break;
- default:
+
+#if __CUDA_ARCH__ >= 300
+ case 100: r = kernel_tex_image_interp(__tex_image_100, x, y); break;
+ case 101: r = kernel_tex_image_interp(__tex_image_101, x, y); break;
+ case 102: r = kernel_tex_image_interp(__tex_image_102, x, y); break;
+ case 103: r = kernel_tex_image_interp(__tex_image_103, x, y); break;
+ case 104: r = kernel_tex_image_interp(__tex_image_104, x, y); break;
+ case 105: r = kernel_tex_image_interp(__tex_image_105, x, y); break;
+ case 106: r = kernel_tex_image_interp(__tex_image_106, x, y); break;
+ case 107: r = kernel_tex_image_interp(__tex_image_107, x, y); break;
+ case 108: r = kernel_tex_image_interp(__tex_image_108, x, y); break;
+ case 109: r = kernel_tex_image_interp(__tex_image_109, x, y); break;
+ case 110: r = kernel_tex_image_interp(__tex_image_110, x, y); break;
+ case 111: r = kernel_tex_image_interp(__tex_image_111, x, y); break;
+ case 112: r = kernel_tex_image_interp(__tex_image_112, x, y); break;
+ case 113: r = kernel_tex_image_interp(__tex_image_113, x, y); break;
+ case 114: r = kernel_tex_image_interp(__tex_image_114, x, y); break;
+ case 115: r = kernel_tex_image_interp(__tex_image_115, x, y); break;
+ case 116: r = kernel_tex_image_interp(__tex_image_116, x, y); break;
+ case 117: r = kernel_tex_image_interp(__tex_image_117, x, y); break;
+ case 118: r = kernel_tex_image_interp(__tex_image_118, x, y); break;
+ case 119: r = kernel_tex_image_interp(__tex_image_119, x, y); break;
+ case 120: r = kernel_tex_image_interp(__tex_image_120, x, y); break;
+ case 121: r = kernel_tex_image_interp(__tex_image_121, x, y); break;
+ case 122: r = kernel_tex_image_interp(__tex_image_122, x, y); break;
+ case 123: r = kernel_tex_image_interp(__tex_image_123, x, y); break;
+ case 124: r = kernel_tex_image_interp(__tex_image_124, x, y); break;
+ case 125: r = kernel_tex_image_interp(__tex_image_125, x, y); break;
+ case 126: r = kernel_tex_image_interp(__tex_image_126, x, y); break;
+ case 127: r = kernel_tex_image_interp(__tex_image_127, x, y); break;
+ case 128: r = kernel_tex_image_interp(__tex_image_128, x, y); break;
+ case 129: r = kernel_tex_image_interp(__tex_image_129, x, y); break;
+ case 130: r = kernel_tex_image_interp(__tex_image_130, x, y); break;
+ case 131: r = kernel_tex_image_interp(__tex_image_131, x, y); break;
+ case 132: r = kernel_tex_image_interp(__tex_image_132, x, y); break;
+ case 133: r = kernel_tex_image_interp(__tex_image_133, x, y); break;
+ case 134: r = kernel_tex_image_interp(__tex_image_134, x, y); break;
+ case 135: r = kernel_tex_image_interp(__tex_image_135, x, y); break;
+ case 136: r = kernel_tex_image_interp(__tex_image_136, x, y); break;
+ case 137: r = kernel_tex_image_interp(__tex_image_137, x, y); break;
+ case 138: r = kernel_tex_image_interp(__tex_image_138, x, y); break;
+ case 139: r = kernel_tex_image_interp(__tex_image_139, x, y); break;
+ case 140: r = kernel_tex_image_interp(__tex_image_140, x, y); break;
+ case 141: r = kernel_tex_image_interp(__tex_image_141, x, y); break;
+ case 142: r = kernel_tex_image_interp(__tex_image_142, x, y); break;
+ case 143: r = kernel_tex_image_interp(__tex_image_143, x, y); break;
+ case 144: r = kernel_tex_image_interp(__tex_image_144, x, y); break;
+ case 14
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list