[Bf-blender-cvs] [ac07fb38a1b] master: Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.
Jason Fielder
noreply at git.blender.org
Thu Sep 1 22:19:09 CEST 2022
Commit: ac07fb38a1b35fa156b2d0901eb35cd65ed73903
Author: Jason Fielder
Date: Thu Sep 1 22:14:18 2022 +0200
Branches: master
https://developer.blender.org/rBac07fb38a1b35fa156b2d0901eb35cd65ed73903
Metal: Minimum per-vertex stride, 3D texture size + Transform feedback GPUCapabilities expansion.
- Adding in compatibility paths to support minimum per-vertex strides for vertex formats. OpenGL supports a minimum stride of 1 byte, in Metal, this minimum stride is 4 bytes. Meaing a vertex format must be atleast 4-bytes in size.
- Replacing transform feedback compile-time check to conditional look-up, given TF is supported on macOS with Metal.
- 3D texture size safety check added as a general capability, rather than being in the gl backend only. Also required for Metal.
Authored by Apple: Michael Parkin-White
Ref T96261
Reviewed By: fclem
Maniphest Tasks: T96261
Differential Revision: https://developer.blender.org/D14510
===================================================================
M source/blender/draw/engines/eevee/eevee_volumes.c
M source/blender/draw/intern/draw_cache.c
M source/blender/draw/intern/draw_cache_impl_curves.cc
M source/blender/draw/intern/draw_cache_impl_particles.c
M source/blender/draw/intern/draw_curves.cc
M source/blender/draw/intern/draw_hair.cc
M source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_edge_fac.cc
M source/blender/gpu/GPU_capabilities.h
M source/blender/gpu/GPU_vertex_buffer.h
M source/blender/gpu/intern/gpu_capabilities.cc
M source/blender/gpu/intern/gpu_texture.cc
M source/blender/gpu/intern/gpu_vertex_buffer.cc
M source/blender/gpu/intern/gpu_vertex_buffer_private.hh
M source/blender/gpu/intern/gpu_vertex_format.cc
M source/blender/gpu/intern/gpu_vertex_format_private.h
M source/blender/gpu/opengl/gl_backend.cc
M source/blender/gpu/opengl/gl_context.hh
M source/blender/gpu/opengl/gl_texture.cc
===================================================================
diff --git a/source/blender/draw/engines/eevee/eevee_volumes.c b/source/blender/draw/engines/eevee/eevee_volumes.c
index 533e71b9b32..2d96cffb4ba 100644
--- a/source/blender/draw/engines/eevee/eevee_volumes.c
+++ b/source/blender/draw/engines/eevee/eevee_volumes.c
@@ -30,6 +30,7 @@
#include "DEG_depsgraph_query.h"
#include "GPU_capabilities.h"
+#include "GPU_context.h"
#include "GPU_material.h"
#include "GPU_texture.h"
#include "eevee_private.h"
@@ -82,6 +83,13 @@ void EEVEE_volumes_init(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
tex_size[1] = (int)ceilf(fmaxf(1.0f, viewport_size[1] / (float)tile_size));
tex_size[2] = max_ii(scene_eval->eevee.volumetric_samples, 1);
+ /* Clamp 3D texture size based on device maximum. */
+ int maxSize = GPU_max_texture_3d_size();
+ BLI_assert(tex_size[0] <= maxSize);
+ tex_size[0] = tex_size[0] > maxSize ? maxSize : tex_size[0];
+ tex_size[1] = tex_size[1] > maxSize ? maxSize : tex_size[1];
+ tex_size[2] = tex_size[2] > maxSize ? maxSize : tex_size[2];
+
common_data->vol_coord_scale[0] = viewport_size[0] / (float)(tile_size * tex_size[0]);
common_data->vol_coord_scale[1] = viewport_size[1] / (float)(tile_size * tex_size[1]);
common_data->vol_coord_scale[2] = 1.0f / viewport_size[0];
diff --git a/source/blender/draw/intern/draw_cache.c b/source/blender/draw/intern/draw_cache.c
index 4ff5745fc86..6537490c06c 100644
--- a/source/blender/draw/intern/draw_cache.c
+++ b/source/blender/draw/intern/draw_cache.c
@@ -826,7 +826,8 @@ GPUBatch *DRW_gpencil_dummy_buffer_get(void)
{
if (SHC.drw_gpencil_dummy_quad == NULL) {
GPUVertFormat format = {0};
- GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT);
+ /* NOTE: Use GPU_COMP_U32 to satisfy minimum 4-byte vertex stride for Metal backend. */
+ GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
GPU_vertbuf_data_alloc(vbo, 4);
diff --git a/source/blender/draw/intern/draw_cache_impl_curves.cc b/source/blender/draw/intern/draw_cache_impl_curves.cc
index 4f0072ec657..3bca17d9c56 100644
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@@ -269,7 +269,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
GPU_vertformat_attr_add(&format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
GPU_vertformat_alias_add(&format, "pos");
- cache.proc_point_buf = GPU_vertbuf_create_with_format(&format);
+ cache.proc_point_buf = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_point_buf, cache.point_len);
MutableSpan posTime_data{
@@ -279,7 +280,8 @@ static void curves_batch_cache_ensure_procedural_pos(const Curves &curves,
GPUVertFormat length_format = {0};
GPU_vertformat_attr_add(&length_format, "hairLength", GPU_COMP_F32, 1, GPU_FETCH_FLOAT);
- cache.proc_length_buf = GPU_vertbuf_create_with_format(&length_format);
+ cache.proc_length_buf = GPU_vertbuf_create_with_format_ex(
+ &length_format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_length_buf, cache.strands_len);
MutableSpan hairLength_data{
@@ -319,8 +321,8 @@ static void curves_batch_cache_ensure_procedural_final_attr(CurvesEvalCache &cac
const char *name)
{
CurvesEvalFinalCache &final_cache = cache.final[subdiv];
- final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(format,
- GPU_USAGE_DEVICE_ONLY);
+ final_cache.attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+ format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
/* Create a destination buffer for the transform feedback. Sized appropriately */
/* Those are points! not line segments. */
@@ -351,7 +353,8 @@ static void curves_batch_ensure_attribute(const Curves &curves,
/* All attributes use vec4, see comment below. */
GPU_vertformat_attr_add(&format, sampler_name, GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format(&format);
+ cache.proc_attributes_buf[index] = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPUVertBuf *attr_vbo = cache.proc_attributes_buf[index];
GPU_vertbuf_data_alloc(attr_vbo,
@@ -416,11 +419,13 @@ static void curves_batch_cache_ensure_procedural_strand_data(Curves &curves,
uint seg_id = GPU_vertformat_attr_add(&format_seg, "data", GPU_COMP_U16, 1, GPU_FETCH_INT);
/* Curve Data. */
- cache.proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+ cache.proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+ &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_strand_buf, cache.strands_len);
GPU_vertbuf_attr_get_raw_data(cache.proc_strand_buf, data_id, &data_step);
- cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+ cache.proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+ &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache.proc_strand_seg_buf, cache.strands_len);
GPU_vertbuf_attr_get_raw_data(cache.proc_strand_seg_buf, seg_id, &seg_step);
@@ -441,7 +446,8 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
GPUVertFormat format = {0};
GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(&format, GPU_USAGE_DEVICE_ONLY);
+ cache.final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+ &format, GPU_USAGE_DEVICE_ONLY | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
/* Create a destination buffer for the transform feedback. Sized appropriately */
/* Those are points! not line segments. */
diff --git a/source/blender/draw/intern/draw_cache_impl_particles.c b/source/blender/draw/intern/draw_cache_impl_particles.c
index 02afbab6899..4fdc46ea18b 100644
--- a/source/blender/draw/intern/draw_cache_impl_particles.c
+++ b/source/blender/draw/intern/draw_cache_impl_particles.c
@@ -32,6 +32,8 @@
#include "ED_particle.h"
#include "GPU_batch.h"
+#include "GPU_capabilities.h"
+#include "GPU_context.h"
#include "GPU_material.h"
#include "DEG_depsgraph_query.h"
@@ -808,7 +810,10 @@ static void particle_batch_cache_ensure_procedural_final_points(ParticleHairCach
GPUVertFormat format = {0};
GPU_vertformat_attr_add(&format, "pos", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format(&format);
+ /* Transform feedback buffer only needs to be resident in device memory. */
+ GPUUsageType type = GPU_transform_feedback_support() ? GPU_USAGE_DEVICE_ONLY : GPU_USAGE_STATIC;
+ cache->final[subdiv].proc_buf = GPU_vertbuf_create_with_format_ex(
+ &format, type | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
/* Create a destination buffer for the transform feedback. Sized appropriately */
/* Those are points! not line segments. */
@@ -873,17 +878,20 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
memset(cache->uv_layer_names, 0, sizeof(cache->uv_layer_names));
/* Strand Data */
- cache->proc_strand_buf = GPU_vertbuf_create_with_format(&format_data);
+ cache->proc_strand_buf = GPU_vertbuf_create_with_format_ex(
+ &format_data, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_strand_buf, cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_strand_buf, data_id, &data_step);
- cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format(&format_seg);
+ cache->proc_strand_seg_buf = GPU_vertbuf_create_with_format_ex(
+ &format_seg, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_strand_seg_buf, cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_strand_seg_buf, seg_id, &seg_step);
/* UV layers */
for (int i = 0; i < cache->num_uv_layers; i++) {
- cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format(&format_uv);
+ cache->proc_uv_buf[i] = GPU_vertbuf_create_with_format_ex(
+ &format_uv, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_uv_buf[i], cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_uv_buf[i], uv_id, &uv_step[i]);
@@ -913,7 +921,8 @@ static void particle_batch_cache_ensure_procedural_strand_data(PTCacheEdit *edit
/* Vertex colors */
for (int i = 0; i < cache->num_col_layers; i++) {
- cache->proc_col_buf[i] = GPU_vertbuf_create_with_format(&format_col);
+ cache->proc_col_buf[i] = GPU_vertbuf_create_with_format_ex(
+ &format_col, GPU_USAGE_STATIC | GPU_USAGE_FLAG_BUFFER_TEXTURE_ONLY);
GPU_vertbuf_data_alloc(cache->proc_col_buf[i], cache->strands_len);
GPU_vertbuf_attr_get_raw_data(cache->proc_col_buf[i], col_id, &col_step[i]);
@@ -1059,8 +1068,9 @@ static void particle_batch_cache_ensure_procedural_indices(PTCacheEdit *edit,
static GPUVertFormat format = {0};
GPU_vertformat_clear(&format);
- /* initialize vertex format */
- GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U8, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
+ /* NOTE: initialize vertex format. Using GPU_COMP_U32 to satisfy Metal's 4-byte minimum
+ * stride requirement. */
+ GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT_TO_FLOAT_UNIT);
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
GPU_vertbuf_data_alloc(vbo, 1);
@@ -1101,7 +1111,8 @@ static void particle_batch_cache_ensure_procedural_pos(PTCacheEdit *edit,
uint pos_id = GPU_vertformat_attr_add(
&pos_format, "posTime", GPU_COMP_F32, 4, GPU_FETCH_FLOAT);
- cache->proc_point_buf = GPU_vertbuf_create_with_format(&pos_format);
+ cache->proc_point_buf = GPU_vertbuf_create_
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list