[Bf-blender-cvs] [82d9617dab0] temp-texture-painting-gpu: Paint Tile streaming.

Tue Oct 11 13:45:22 CEST 2022

Commit: 82d9617dab040400a13f142949b55c55a8491ce9
Author: Jeroen Bakker
Date:   Tue Oct 11 13:45:19 2022 +0200
Branches: temp-texture-painting-gpu
https://developer.blender.org/rB82d9617dab040400a13f142949b55c55a8491ce9

Paint Tile streaming.

===================================================================

M	source/blender/blenkernel/BKE_pbvh_pixels.hh
M	source/blender/blenkernel/intern/paint.cc
M	source/blender/blenkernel/intern/pbvh_pixels.cc
M	source/blender/editors/sculpt_paint/sculpt_intern.h
M	source/blender/editors/sculpt_paint/sculpt_paint_image.cc
M	source/blender/gpu/CMakeLists.txt
M	source/blender/gpu/GPU_sculpt_shader_shared.h
M	source/blender/gpu/shaders/sculpt_paint/infos/sculpt_paint_image_info.hh
M	source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_comp.glsl
M	source/blender/gpu/shaders/sculpt_paint/sculpt_paint_image_merge_comp.glsl
A	source/blender/gpu/shaders/sculpt_paint/sculpt_paint_tile_lib.glsl

===================================================================

diff --git a/source/blender/blenkernel/BKE_pbvh_pixels.hh b/source/blender/blenkernel/BKE_pbvh_pixels.hh
index 282aebcbcdc..bff6bee41e4 100644
--- a/source/blender/blenkernel/BKE_pbvh_pixels.hh
+++ b/source/blender/blenkernel/BKE_pbvh_pixels.hh
@@ -21,6 +21,10 @@
 
 namespace blender::bke::pbvh::pixels {
 
+/* During GPU painting the texture is spliced into sub-tiles. This constant contains the size of
+ * sub-tiles (width and height). */
+const int32_t TEXTURE_STREAMING_TILE_SIZE = 1024;
+
 /**
  * Data shared between pixels that belong to the same triangle.
  *
@@ -101,11 +105,15 @@ struct UDIMTilePixels {
 
   Vector<PackedPixelRow> pixel_rows;
   int64_t gpu_buffer_offset;
+  /* Region of the tile that can be painted on by this node. Size of a subtile is determined by  */
+  /* TODO: use list of sub_tile_ids to not overcommit texture usage. */
+  rcti gpu_sub_tiles;
 
   UDIMTilePixels()
   {
     flags.dirty = false;
     BLI_rcti_init_minmax(&dirty_region);
+    BLI_rcti_init_minmax(&gpu_sub_tiles);
   }
 
   void mark_dirty(const PackedPixelRow &pixel_row)
@@ -121,6 +129,8 @@ struct UDIMTilePixels {
     BLI_rcti_init_minmax(&dirty_region);
     flags.dirty = false;
   }
+
+  void init_gpu_sub_tiles();
 };
 
 struct UDIMTileUndo {
@@ -215,6 +225,7 @@ struct NodeData {
     triangles.ensure_gpu_buffer();
     if (gpu_buffers.pixels == nullptr) {
       build_pixels_gpu_buffer();
+      init_gpu_sub_tiles();
     }
   }
 
@@ -226,6 +237,7 @@ struct NodeData {
 
  private:
   void build_pixels_gpu_buffer();
+  void init_gpu_sub_tiles();
 };
 
 NodeData &BKE_pbvh_pixels_node_data_get(PBVHNode &node);
diff --git a/source/blender/blenkernel/intern/paint.cc b/source/blender/blenkernel/intern/paint.cc
index f7f5f7e411a..642fd9fa00d 100644
--- a/source/blender/blenkernel/intern/paint.cc
+++ b/source/blender/blenkernel/intern/paint.cc
@@ -1393,6 +1393,13 @@ void BKE_sculptsession_free_vwpaint_data(SculptSession *ss)
   MEM_SAFE_FREE(gmap->poly_map_mem);
 }
 
+/* TODO: We should move the gpu batches to BKE. */
+void SCULPT_paint_image_sculpt_data_free(SculptSession *ss);
+static void bke_sculptsession_free_texture_paint_data(SculptSession *UNUSED(ss))
+{
+  // SCULPT_paint_image_sculpt_data_free(ss);
+}
+
 /**
  * Write out the sculpt dynamic-topology #BMesh to the #Mesh.
  */
@@ -1533,6 +1540,7 @@ void BKE_sculptsession_free(Object *ob)
     }
 
     BKE_sculptsession_free_vwpaint_data(ob->sculpt);
+    bke_sculptsession_free_texture_paint_data(ob->sculpt);
 
     MEM_SAFE_FREE(ss->last_paint_canvas_key);
 
diff --git a/source/blender/blenkernel/intern/pbvh_pixels.cc b/source/blender/blenkernel/intern/pbvh_pixels.cc
index db1a7514b15..a87a29019b3 100644
--- a/source/blender/blenkernel/intern/pbvh_pixels.cc
+++ b/source/blender/blenkernel/intern/pbvh_pixels.cc
@@ -77,6 +77,26 @@ void NodeData::build_pixels_gpu_buffer()
       elem_len * sizeof(PackedPixelRow), elements.data(), GPU_USAGE_STATIC, __func__);
 }
 
+void UDIMTilePixels::init_gpu_sub_tiles()
+{
+  BLI_rcti_init_minmax(&gpu_sub_tiles);
+  for (const PackedPixelRow &elements : pixel_rows) {
+    int2 subtile_from = int2(elements.start_image_coordinate / TEXTURE_STREAMING_TILE_SIZE);
+    int2 coord_to = int2(elements.start_image_coordinate) + int2(elements.num_pixels + 1, 1);
+    int2 subtile_to = int2(coord_to / TEXTURE_STREAMING_TILE_SIZE);
+
+    BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_from);
+    BLI_rcti_do_minmax_v(&gpu_sub_tiles, subtile_to);
+  }
+}
+
+void NodeData::init_gpu_sub_tiles()
+{
+  for (UDIMTilePixels &tile : tiles) {
+    tile.init_gpu_sub_tiles();
+  }
+}
+
 /**
  * During debugging this check could be enabled.
  * It will write to each image pixel that is covered by the PBVH.
diff --git a/source/blender/editors/sculpt_paint/sculpt_intern.h b/source/blender/editors/sculpt_paint/sculpt_intern.h
index ac7a43e7fc2..b3b8a30f2aa 100644
--- a/source/blender/editors/sculpt_paint/sculpt_intern.h
+++ b/source/blender/editors/sculpt_paint/sculpt_intern.h
@@ -1770,6 +1770,7 @@ void SCULPT_paint_image_batches_flush(struct PaintModeSettings *paint_mode_setti
 void SCULPT_paint_image_batches_finalize(struct PaintModeSettings *paint_mode_settings,
                                          struct Sculpt *sd,
                                          struct Object *ob);
+void SCULPT_paint_image_sculpt_data_free(SculptSession *ss);
 
 /* Smear Brush. */
 void SCULPT_do_smear_brush(Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode);
diff --git a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
index 99353b934fd..a44b9141578 100644
--- a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
+++ b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
@@ -526,7 +526,6 @@ static void init_paint_brush_alpha(const Brush &brush, PaintBrushData &r_paint_b
   r_paint_brush.alpha = brush.alpha;
 }
 
-/* TODO: Currently only spherical is supported. */
 static void init_paint_brush_test(const SculptSession &ss, PaintBrushData &r_paint_brush)
 {
   r_paint_brush.test.symm_rot_mat_inv = ss.cache->symm_rot_mat_inv;
@@ -547,6 +546,237 @@ static void init_paint_brush(const SculptSession &ss,
   init_paint_brush_falloff(brush, r_paint_brush);
 }
 
+/**
+ * Tiles are split on the GPU in sub-tiles.
+ *
+ * Sub tiles are used to reduce the needed memory on the GPU.
+ * - Only tiles that are painted on are loaded in memory, painted on and merged back to the actual
+ * texture.
+ */
+
+template<int32_t Size, int32_t Depth = 512> class GPUSubTileTexture {
+  struct Info {
+    struct {
+      bool in_use : 1;
+      /* Does this sub tile needs to be updated (CPU->GPU transfer).*/
+      bool needs_update : 1;
+      bool should_be_removed : 1;
+    } flags;
+  };
+  const int32_t LayerIdUnused = -1;
+  const int32_t LayerIdMarkRemoval = -2;
+
+  Vector<PaintTileData> paint_tiles_;
+  Vector<Info> infos_;
+
+  std::array<int32_t, Depth> layer_lookup_;
+
+  GPUTexture *gpu_texture_ = nullptr;
+  GPUStorageBuf *tile_buf_ = nullptr;
+  int64_t tile_buf_size_ = 0;
+
+ public:
+  GPUSubTileTexture()
+  {
+    for (int i = 0; i < Depth; i++) {
+      layer_lookup_[i] = LayerIdUnused;
+    }
+  }
+  ~GPUSubTileTexture()
+  {
+    if (gpu_texture_) {
+      GPU_texture_free(gpu_texture_);
+      gpu_texture_ = nullptr;
+    }
+
+    if (tile_buf_) {
+      GPU_storagebuf_free(tile_buf_);
+      tile_buf_ = nullptr;
+    }
+  }
+
+  void reset_usage()
+  {
+    printf("%s\n", __func__);
+    for (Info &info : infos_) {
+      info.flags.in_use = false;
+    }
+  }
+
+  void mark_usage(TileNumber tile_number, int2 sub_tile_id)
+  {
+    for (int index : paint_tiles_.index_range()) {
+      PaintTileData &tile = paint_tiles_[index];
+      if (tile.tile_number == tile_number && tile.sub_tile_id == sub_tile_id) {
+        Info &info = infos_[index];
+        if (!info.flags.in_use) {
+          printf("%s: mark existing {tile:%d, sub_tile:%d,%d}\n",
+                 __func__,
+                 tile_number,
+                 UNPACK2(sub_tile_id));
+        }
+        info.flags.in_use = true;
+        return;
+      }
+    }
+
+    /* Tile not yet added, add a new one.*/
+    Info info;
+    info.flags.in_use = true;
+    info.flags.needs_update = true;
+    info.flags.should_be_removed = false;
+    infos_.append(info);
+
+    PaintTileData tile;
+    tile.tile_number = tile_number;
+    tile.sub_tile_id = sub_tile_id;
+    tile.layer_id = LayerIdUnused;
+    paint_tiles_.append(tile);
+
+    printf(
+        "%s: mark new {tile:%d, sub_tile:%d,%d}\n", __func__, tile_number, UNPACK2(sub_tile_id));
+  }
+
+  /** Remove all sub tiles that are currently flagged not to be used (flags.in_use = false). */
+  void remove_unused()
+  {
+    for (int i = 0; i < layer_lookup_.size(); i++) {
+      int index = layer_lookup_[i];
+      if (index == -1) {
+        continue;
+      }
+      infos_[index].flags.should_be_removed = false;
+      if (infos_[index].flags.in_use == false) {
+        infos_[index].flags.should_be_removed = true;
+        paint_tiles_[index].layer_id = LayerIdMarkRemoval;
+        printf("%s: remove sub tile at layer %d\n", __func__, i);
+        layer_lookup_[i] = -1;
+      }
+    }
+
+    infos_.remove_if([&](Info &info) { return info.flags.should_be_removed; });
+    paint_tiles_.remove_if(
+        [&](PaintTileData &tile) { return tile.layer_id == LayerIdMarkRemoval; });
+  }
+
+  void assign_layer_ids()
+  {
+    for (int64_t index : paint_tiles_.index_range()) {
+      PaintTileData &tile = paint_tiles_[index];
+
+      if (tile.layer_id != LayerIdUnused) {
+        continue;
+      }
+
+      tile.layer_id = first_empty_layer_id();
+      layer_lookup_[tile.layer_id] = index;
+      printf("%s: assign {tile:%d, sub_tile:%d,%d} to layer %d\n",
+             __func__,
+             tile.tile_number,
+             UNPACK2(tile.sub_tile_id),
+             tile.layer_id);
+    }
+  }
+
+  int first_empty_layer_id() const
+  {
+    for (int i = 0; i < Depth; i++) {
+      if (layer_lookup_[i] == LayerIdUnused) {
+        return i;
+      }
+    }
+
+    BLI_assert_unreachable();
+    return LayerIdUnused;
+  }
+
+  void ensure_gpu_texture()
+  {
+    if (gpu_texture_ != nullptr) {
+      return;
+    }
+    gpu_texture_ = GPU_texture_create_3d(
+        "GPUSubTileTexture", Size, Size, Depth, 1, GPU_RGBA16F, GPU_DATA_FLOAT, nullptr);
+  }
+
+  void update_gpu_texture(TileNumber tile_number, ImBuf &UNUSED(image_buffer))
+  {
+    BLI_assert(gpu_texture_);
+    float *buffer = nullptr;
+    for (int64_t index : infos_.index_range()) {
+      Info &info = infos_[index];
+      PaintTileData &tile = paint_tiles_[index];
+      if (!info.flags.needs_update) {
+        continue;
+      }
+
+      if (tile.tile_number != tile_number) {
+        continue;
+      }
+
+      if (buffer == nullptr) {
+        buffer = static_cast<float *>(MEM_callocN(Size * Size * 4 * sizeof(float), __func__));
+      }
+
+      /* TODO: Copy correct data from ImBuf.*/
+
+      GPU_texture_update_sub(
+          gpu_texture_, GPU_DATA_FLOAT, buffer, 0, 0, tile.layer_id, Size, Size, 1);
+      info.flags.needs_update = false;
+    }
+
+    if (buffe

@@ Diff output truncated at 10240 characters. @@