[Bf-blender-cvs] [ac07fb38a1b] master: Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.

Jason Fielder noreply at git.blender.org
Thu Sep 1 22:19:09 CEST 2022


Commit: ac07fb38a1b35fa156b2d0901eb35cd65ed73903
Author: Jason Fielder
Date:   Thu Sep 1 22:14:18 2022 +0200
Branches: master
https://developer.blender.org/rBac07fb38a1b35fa156b2d0901eb35cd65ed73903

Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.

- Adding in compatibility paths to support minimum per-vertex strides for vertex formats. OpenGL supports a minimum stride of 1 byte, in Metal, this minimum stride is 4 bytes. Meaing a vertex format must be atleast 4-bytes in size.

- Replacing transform feedback compile-time check to conditional look-up, given TF is supported on macOS with Metal.

- 3D texture size safety check added as a general capability, rather than being in the gl backend only. Also required for Metal.

Authored by Apple: Michael Parkin-White

Ref T96261

Reviewed By: fclem

Maniphest Tasks: T96261

Differential Revision: https://developer.blender.org/D14510

===================================================================

M	source/blender/draw/engines/eevee/eevee_volumes.c
M	source/blender/draw/intern/draw_cache.c
M	source/blender/draw/intern/draw_cache_impl_curves.cc
M	source/blender/draw/intern/draw_cache_impl_particles.c
M	source/blender/draw/intern/draw_curves.cc
M	source/blender/draw/intern/draw_hair.cc
M	source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
M	source/blender/gpu/GPU_capabilities.h
M	source/blender/gpu/GPU_vertex_buffer.h
M	source/blender/gpu/intern/gpu_capabilities.cc
M	source/blender/gpu/intern/gpu_texture.cc
M	source/blender/gpu/intern/gpu_vertex_buffer.cc
M	source/blender/gpu/intern/gpu_vertex_buffer_private.hh
M	source/blender/gpu/intern/gpu_vertex_format.cc
M	source/blender/gpu/intern/gpu_vertex_format_private.h
M	source/blender/gpu/opengl/gl_backend.cc
M	source/blender/gpu/opengl/gl_context.hh
M	source/blender/gpu/opengl/gl_texture.cc

===================================================================

diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c
index 533e71b9b32..2d96cffb4ba 100644
--- a/source/blender/draw/engines/eevee/eevee_volumes.c
+++ b/source/blender/draw/engines/eevee/eevee_volumes.c
@@ -30,6 +30,7 @@
 #include "DEG_depsgraph_query.h"
 
 #include "GPU_capabilities.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 #include "GPU_texture.h"
 #include "eevee_private.h"
@@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
   tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size));
   tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1);
 
+  /* Clamp 3D texture size based on device maximum. */
+  int maxSize = GPU_max_texture_3d_size();
+  BLI_assert(tex_size[0] <= maxSize);
+  tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0];
+  tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1];
+  tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2];
+
   common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]);
   common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]);
   common_data->vol_coord_scale[2] = 1.0f / viewport_size[0];
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 4ff5745fc86..6537490c06c 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -826,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void)
 {
   if (SHC.drw_gpencil_dummy_quad == NULL) {
     GPUVertFormat format = {0};
-    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT);
+    /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */
+    GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
     GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
     GPU_vertbuf_data_alloc(vbo, 4);
 
diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc
index 4f0072ec657..3bca17d9c56 100644
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@@ -269,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
     GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
     GPU_vertformat_alias_add(&format, "pos");
 
-    cache.proc_point_buf = GPU_vertbuf_create_with_format(&format);
+    cache.proc_point_buf = GPU_vertbuf_create_with_format_ex(
+        &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len);
 
     MutableSpan posTime_data{
@@ -279,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
     GPUVertFormat length_format = {0};
     GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
 
-    cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+    cache.proc_length_buf = GPU_vertbuf_create_with_format_ex(
+        &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len);
 
     MutableSpan hairLength_data{
@@ -319,8 +321,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac
                                                             const char *name)
 {
   CurvesEvalFinalCache &final_cache = cache.final[subdiv];
-  final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format,
-                                                                        GPU_USAGE_DEVICE_ONLY);
+  final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+      format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -351,7 +353,8 @@ static void curves_batch_ensure_attribute(const Curves &curves,
   /* All attributes use vec4, see comment below. */
   GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format);
+  cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+      &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index];
 
   GPU_vertbuf_data_alloc(attr_vbo,
@@ -416,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
   uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT);
 
   /* Curve Data. */
-  cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+  cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+      &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len);
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step);
 
-  cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+  cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+      &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len);
   GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
 
@@ -441,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY);
+  cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+      &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index 02afbab6899..4fdc46ea18b 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -32,6 +32,8 @@
 #include "ED_particle.h"
 
 #include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
 #include "GPU_material.h"
 
 #include "DEG_depsgraph_query.h"
@@ -808,7 +810,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
   GPUVertFormat format = {0};
   GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-  cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format);
+  /* Transform feedback buffer only needs to be resident in device memory. */
+  GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC;
+  cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+      &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
 
   /* Create a destination buffer for the transform feedback. Sized appropriately */
   /* Those are points! not line segments. */
@@ -873,17 +878,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
   memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names));
 
   /* Strand Data */
-  cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+  cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+      &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len);
   GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step);
 
-  cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+  cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+      &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
   GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len);
   GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step);
 
   /* UV layers */
   for (int i = 0; i < cache->num_uv_layers; i++) {
-    cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv);
+    cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex(
+        &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
 
@@ -913,7 +921,8 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
 
   /* Vertex colors */
   for (int i = 0; i < cache->num_col_layers; i++) {
-    cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col);
+    cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex(
+        &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
     GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
     GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
 
@@ -1059,8 +1068,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
   static GPUVertFormat format = {0};
   GPU_vertformat_clear(&format);
 
-  /* initialize vertex format */
-  GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+  /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum
+   * stride requirement. */
+  GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
 
   GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
   GPU_vertbuf_data_alloc(vbo, 1);
@@ -1101,7 +1111,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
     uint pos_id = GPU_vertformat_attr_add(
         &pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
 
-    cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format);
+    cache->proc_point_buf = GPU_vertbuf_create_

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list