[Bf-blender-cvs] [f9f834068e3] blender-v3.3-release: Cycles: Allow Intel GPUs under Metal
Morteza Mostajab
noreply at git.blender.org
Mon Nov 28 19:27:06 CET 2022
Commit: f9f834068e38f562f3590c12af35d3033db98995
Author: Morteza Mostajab
Date: Wed Oct 19 17:09:23 2022 +0100
Branches: blender-v3.3-release
https://developer.blender.org/rBf9f834068e38f562f3590c12af35d3033db98995
Cycles: Allow Intel GPUs under Metal
Known Issues:
- Command buffer failures when using binary archives (binary archives is disabled for Intel GPUs as a workaround)
- Wrong texture sampler being applied (to be addressed in the future)
Ref T92212
Differential Revision: https://developer.blender.org/D16253
===================================================================
M intern/cycles/device/metal/device_impl.mm
M intern/cycles/device/metal/kernel.mm
M intern/cycles/device/metal/util.mm
M intern/cycles/kernel/device/metal/context_begin.h
===================================================================
diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm
index 6feeaa0707c..f40b056606f 100644
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@@ -338,6 +338,12 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
MTLCompileOptions *options = [[MTLCompileOptions alloc] init];
+ if (@available(macos 13.0, *)) {
+ if (device_vendor == METAL_GPU_INTEL) {
+ [options setOptimizationLevel:MTLLibraryOptimizationLevelSize];
+ }
+ }
+
options.fastMathEnabled = YES;
if (@available(macOS 12.0, *)) {
options.languageVersion = MTLLanguageVersion2_4;
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index e22b0159108..e06858db3ce 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -325,6 +325,12 @@ bool MetalKernelPipeline::should_use_binary_archive() const
}
}
+ /* Workaround for Intel GPU having issue using Binary Archives */
+ MetalGPUVendor gpu_vendor = MetalInfo::get_device_vendor(mtlDevice);
+ if (gpu_vendor == METAL_GPU_INTEL) {
+ return false;
+ }
+
if (pso_type == PSO_GENERIC) {
/* Archive the generic kernels. */
return true;
diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm
index 65c67c400fe..eb77aeb6a54 100644
--- a/intern/cycles/device/metal/util.mm
+++ b/intern/cycles/device/metal/util.mm
@@ -110,6 +110,10 @@ vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
usable |= (vendor == METAL_GPU_AMD);
}
+ if (@available(macos 13.0, *)) {
+ usable |= (vendor == METAL_GPU_INTEL);
+ }
+
if (usable) {
metal_printf("- %s\n", device_name.c_str());
[device retain];
diff --git a/intern/cycles/kernel/device/metal/context_begin.h b/intern/cycles/kernel/device/metal/context_begin.h
index 99cb1e3826e..e75ec9cadec 100644
--- a/intern/cycles/kernel/device/metal/context_begin.h
+++ b/intern/cycles/kernel/device/metal/context_begin.h
@@ -34,21 +34,48 @@ class MetalKernelContext {
kernel_assert(0);
return 0;
}
-
+
+#ifdef __KERNEL_METAL_INTEL__
+ template<typename TextureType, typename CoordsType>
+ inline __attribute__((__always_inline__))
+ auto ccl_gpu_tex_object_read_intel_workaround(TextureType texture_array,
+ const uint tid, const uint sid,
+ CoordsType coords) const
+ {
+ switch(sid) {
+ default:
+ case 0: return texture_array[tid].tex.sample(sampler(address::repeat, filter::nearest), coords);
+ case 1: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::nearest), coords);
+ case 2: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::nearest), coords);
+ case 3: return texture_array[tid].tex.sample(sampler(address::repeat, filter::linear), coords);
+ case 4: return texture_array[tid].tex.sample(sampler(address::clamp_to_edge, filter::linear), coords);
+ case 5: return texture_array[tid].tex.sample(sampler(address::clamp_to_zero, filter::linear), coords);
+ }
+ }
+#endif
+
// texture2d
template<>
inline __attribute__((__always_inline__))
float4 ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y));
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y));
+#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_2D(ccl_gpu_tex_object_2D tex, float x, float y) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_2d[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_2d, tid, sid, float2(x, y)).x;
+#endif
}
// texture3d
@@ -57,14 +84,22 @@ class MetalKernelContext {
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z));
+#endif
}
template<>
inline __attribute__((__always_inline__))
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, float x, float y, float z) const {
const uint tid(tex);
const uint sid(tex >> 32);
+#ifndef __KERNEL_METAL_INTEL__
return metal_ancillaries->textures_3d[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
+#else
+ return ccl_gpu_tex_object_read_intel_workaround(metal_ancillaries->textures_3d, tid, sid, float3(x, y, z)).x;
+#endif
}
# include "kernel/device/gpu/image.h"
More information about the Bf-blender-cvs
mailing list