[Bf-blender-cvs] [396fe9a98af] temp-texture-painting-gpu: Researching bottlenecks.
Jeroen Bakker
noreply at git.blender.org
Fri Oct 14 14:51:26 CEST 2022
Commit: 396fe9a98af41805ade2e10a1a36f08e1e59bda6
Author: Jeroen Bakker
Date: Fri Oct 14 14:51:10 2022 +0200
Branches: temp-texture-painting-gpu
https://developer.blender.org/rB396fe9a98af41805ade2e10a1a36f08e1e59bda6
Researching bottlenecks.
===================================================================
M source/blender/blenkernel/BKE_pbvh_pixels.hh
M source/blender/blenkernel/intern/pbvh_pixels.cc
M source/blender/editors/sculpt_paint/sculpt_paint_image.cc
M source/blender/gpu/GPU_sculpt_shader_shared.h
M source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh
M source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl
M source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl
M source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl
===================================================================
diff --git a/source/blender/blenkernel/BKE_pbvh_pixels.hh b/source/blender/blenkernel/BKE_pbvh_pixels.hh
index bff6bee41e4..1eed7aab423 100644
--- a/source/blender/blenkernel/BKE_pbvh_pixels.hh
+++ b/source/blender/blenkernel/BKE_pbvh_pixels.hh
@@ -106,14 +106,12 @@ struct UDIMTilePixels {
Vector<PackedPixelRow> pixel_rows;
int64_t gpu_buffer_offset;
/* Region of the tile that can be painted on by this node. Size of a subtile is determined by */
- /* TODO: use list of sub_tile_ids to not overcommit texture usage. */
- rcti gpu_sub_tiles;
+ Vector<int2> gpu_sub_tiles;
UDIMTilePixels()
{
flags.dirty = false;
BLI_rcti_init_minmax(&dirty_region);
- BLI_rcti_init_minmax(&gpu_sub_tiles);
}
void mark_dirty(const PackedPixelRow &pixel_row)
diff --git a/source/blender/blenkernel/intern/pbvh_pixels.cc b/source/blender/blenkernel/intern/pbvh_pixels.cc
index a87a29019b3..38fae311d04 100644
--- a/source/blender/blenkernel/intern/pbvh_pixels.cc
+++ b/source/blender/blenkernel/intern/pbvh_pixels.cc
@@ -15,6 +15,8 @@
#include "BLI_math.h"
#include "BLI_task.h"
+#include "PIL_time_utildefines.h"
+
#include "BKE_image_wrappers.hh"
#include "bmesh.h"
@@ -79,19 +81,37 @@ void NodeData::build_pixels_gpu_buffer()
void UDIMTilePixels::init_gpu_sub_tiles()
{
- BLI_rcti_init_minmax(&gpu_sub_tiles);
+ BLI_assert(gpu_sub_tiles.is_empty());
+ const int max_sub_tiles = 16;
+ bool sub_tiles_hit[max_sub_tiles][max_sub_tiles];
+ for (int x = 0; x < max_sub_tiles; x++) {
+ for (int y = 0; y < max_sub_tiles; y++) {
+ sub_tiles_hit[x][y] = false;
+ }
+ }
+
+ int2 max_sub_tile_len(0, 0);
for (const PackedPixelRow &elements : pixel_rows) {
int2 subtile_from = int2(elements.start_image_coordinate / TEXTURE_STREAMING_TILE_SIZE);
int2 coord_to = int2(elements.start_image_coordinate) + int2(elements.num_pixels + 1, 1);
int2 subtile_to = int2(coord_to / TEXTURE_STREAMING_TILE_SIZE);
+ for (int x = subtile_from.x; x < subtile_to.x; x++) {
+ sub_tiles_hit[x][subtile_from.y] = true;
+ }
+ }
- BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_from);
- BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_to);
+ for (int x = 0; x < max_sub_tiles; x++) {
+ for (int y = 0; y < max_sub_tiles; y++) {
+ if (sub_tiles_hit[x][y]) {
+ gpu_sub_tiles.append(int2(x, y));
+ }
+ }
}
}
void NodeData::init_gpu_sub_tiles()
{
+ printf("%s\n", __func__);
for (UDIMTilePixels &tile : tiles) {
tile.init_gpu_sub_tiles();
}
diff --git a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
index ddcf91d76e4..88f4aca67d8 100644
--- a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
+++ b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
@@ -553,7 +553,7 @@ static void init_paint_brush(const SculptSession &ss,
* - Only tiles that are painted on are loaded in memory, painted on and merged back to the actual
* texture.
*/
-template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
+template<int32_t Size, int32_t Depth = 16> class GPUSubTileTexture {
struct Info {
struct {
bool in_use_stroke : 1;
@@ -572,7 +572,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
std::array<int32_t, Depth> layer_lookup_;
GPUTexture *gpu_texture_ = nullptr;
- GPUStorageBuf *tile_buf_ = nullptr;
+ GPUStorageBuf *paint_tile_buf_ = nullptr;
int64_t tile_buf_size_ = 0;
public:
@@ -593,17 +593,9 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
gpu_texture_ = nullptr;
}
- if (tile_buf_) {
- GPU_storagebuf_free(tile_buf_);
- tile_buf_ = nullptr;
- }
- }
-
- void reset_usage()
- {
- printf("%s\n", __func__);
- for (Info &info : infos_) {
- info.flags.in_use = false;
+ if (paint_tile_buf_) {
+ GPU_storagebuf_free(paint_tile_buf_);
+ paint_tile_buf_ = nullptr;
}
}
@@ -766,6 +758,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
{
BLI_assert(gpu_texture_);
float *buffer = nullptr;
+ bool tiles_updated = false;
for (int64_t index : infos_.index_range()) {
Info &info = infos_[index];
PaintTileData &tile = paint_tiles_[index];
@@ -781,11 +774,22 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
buffer = static_cast<float *>(MEM_callocN(Size * Size * 4 * sizeof(float), __func__));
}
+ printf("%s: initializing tile {tile:%d, sub_tile:%d,%d, layer_id:%d}\n",
+ __func__,
+ tile.tile_number,
+ UNPACK2(tile.sub_tile_id),
+ tile.layer_id);
+
/* TODO: Copy correct data from ImBuf.*/
- // GPU_texture_update_sub(
- // gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1);
+ GPU_texture_update_sub(
+ gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1);
info.flags.needs_update = false;
+ tiles_updated = true;
+ }
+
+ if (tiles_updated) {
+ GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
}
if (buffer) {
@@ -798,32 +802,42 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
return gpu_texture_;
}
- void ensure_tile_buf()
+ void ensure_paint_tile_buf()
{
int64_t needed_size = paint_tiles_.capacity() * sizeof(PaintTileData);
/* Reuse previous buffer only when exact size, due to potentional read out of bound errors.*/
- if (tile_buf_ && tile_buf_size_ == needed_size) {
+ if (paint_tile_buf_ && tile_buf_size_ == needed_size) {
return;
}
- if (tile_buf_) {
- GPU_storagebuf_free(tile_buf_);
- tile_buf_ = nullptr;
+ if (paint_tile_buf_) {
+ GPU_storagebuf_free(paint_tile_buf_);
+ paint_tile_buf_ = nullptr;
+ }
+ paint_tile_buf_ = GPU_storagebuf_create(needed_size);
+ }
+
+ void update_paint_tile_buf()
+ {
+ BLI_assert(paint_tile_buf_);
+ for (PaintTileData &tile : paint_tiles_) {
+ tile.in_use_frame = false;
}
- tile_buf_ = GPU_storagebuf_create(needed_size);
+ GPU_storagebuf_update(paint_tile_buf_, paint_tiles_.data());
}
- void update_tile_buf()
+ void read_back_paint_tile_buf()
{
- BLI_assert(tile_buf_);
- GPU_storagebuf_update(tile_buf_, paint_tiles_.data());
+ BLI_assert(paint_tile_buf_);
+ // GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
+ GPU_storagebuf_read(paint_tile_buf_, paint_tiles_.data());
}
- GPUStorageBuf *tile_buf_get()
+ GPUStorageBuf *paint_tile_buf_get()
{
- BLI_assert(tile_buf_);
- return tile_buf_;
+ BLI_assert(paint_tile_buf_);
+ return paint_tile_buf_;
}
int32_t paint_tiles_len()
@@ -835,7 +849,7 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
{
GPU_texture_image_bind(gpu_texture_get(),
GPU_shader_get_texture_binding(shader, "paint_tiles_img"));
- GPU_storagebuf_bind(tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf"));
+ GPU_storagebuf_bind(paint_tile_buf_get(), GPU_shader_get_ssbo(shader, "paint_tile_buf"));
GPU_shader_uniform_1i(shader, "paint_tile_buf_len", paint_tiles_len());
}
@@ -847,6 +861,11 @@ template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
if (!info.flags.in_use_frame) {
continue;
}
+ /*
+ PaintTileData &paint_tile = paint_tiles_[index];
+ if (!paint_tile.in_use_frame) {
+ continue;
+ }*/
predicate(paint_tiles_[index]);
}
}
@@ -1029,19 +1048,15 @@ static void gpu_painting_image_merge(GPUSculptPaintData &batches,
ImageUser &image_user,
ImBuf &image_buffer)
{
+ GPU_memory_barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
GPUTexture *canvas_tex = BKE_image_get_gpu_texture(&image, &image_user, &image_buffer);
GPUShader *shader = SCULPT_shader_paint_image_merge_get();
GPU_shader_bind(shader);
batches.tile_texture.bind(shader);
GPU_texture_image_bind(canvas_tex, GPU_shader_get_texture_binding(shader, "texture_img"));
batches.tile_texture.foreach_in_frame([shader](PaintTileData &paint_tile) {
- printf("%s: merging tile stored on layer %d {tile:%d sub_tile:%d,%d} \n",
- __func__,
- paint_tile.layer_id,
- paint_tile.tile_number,
- UNPACK2(paint_tile.sub_tile_id));
GPU_shader_uniform_1i(shader, "layer_id", paint_tile.layer_id);
- GPU_compute_dispatch(shader, TEXTURE_STREAMING_TILE_SIZE, TEXTURE_STREAMING_TILE_SIZE, 1);
+ GPU_compute_dispatch(shader, TEXTURE_STREAMING_TILE_SIZE / 32, TEXTURE_STREAMING_TILE_SIZE, 1);
});
}
@@ -1064,7 +1079,7 @@ static void init_paint_step(const SculptSession &ss,
}
}
-static void dispatch_gpu_painting(TexturePaintingUserData &data)
+static void add_paint_step(TexturePaintingUserData &data)
{
SculptSession &ss = *data.ob->sculpt;
@@ -1073,6 +1088,7 @@ static void dispatch_gpu_painting(TexturePaintingUserData &data)
PaintStepData paint_step;
init_paint_step(ss, *data.brush, paint_step);
batches.steps.append(paint_step);
+ PIL_sleep_ms(1);
}
/* This should be done based on the frame_selection nodes, otherwise we might be over
@@ -1086,11 +1102,8 @@ static void paint_tiles_mark_used(TexturePaintingUserData &data)
for (PBVHNode *node : MutableSpan<PBVHNode *>(data.nodes, data.nodes_len)) {
NodeData &node_data = BKE_pbvh_pixels_node_data_get(*node);
for (UDIMTilePixels &tile : node_data.tiles) {
- for (int x = tile.gpu_sub_tiles.xmin; x <= tile.gpu_sub_tiles.xmax; x++) {
- for (int y = tile.gpu_sub_tiles.ymin; y <= tile.gpu_sub_tiles.ymax; y++) {
- int2 sub_tile_id(x, y);
- batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id);
- }
+ for (int2 &sub_tile_id : tile.gpu_sub_tiles) {
+ batches.tile_texture.mark_usage(tile.tile_number, sub_tile_id);
}
}
}
@@ -1120,7 +1133,7 @@ static TileNumbers collect_active_tile_numbers(const TexturePaintingUserData &da
return result;
}
-sta
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list