[Bf-blender-cvs] [a127025f1d8] cycles-x: Fix deadlock due to lock order inversion in rendering after GPU display changes

Tue Sep 14 15:13:45 CEST 2021

Commit: a127025f1d876ac8a6352607e9b560b275554f07
Author: Brecht Van Lommel
Date:   Mon Sep 13 14:28:58 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBa127025f1d876ac8a6352607e9b560b275554f07

Fix deadlock due to lock order inversion in rendering after GPU display changes

Main thread:
* Acquires DST.gl_context_mutex lock for drawing image editor
* Acquires mutex_ in GPUDisplay::draw()

Render thread:
* Acquires mutex_ in GPUDisplay::update_begin()
* Acquires DST.gl_context_mutex for updating texture

Ideally DST.gl_context_mutex would not be needed at all, for best performance.
But this requires deep changes to the draw manager since it relies on global
variables in many places.

Instead rely on DST.gl_context_mutex for mutual exclusion between update and
draw operations in the subclass, and only use mutex_ for thread safe access to
the parameters.

Differential Revision: https://developer.blender.org/D12468

===================================================================

M	intern/cycles/blender/blender_gpu_display.cpp
M	intern/cycles/blender/blender_gpu_display.h
M	intern/cycles/render/gpu_display.cpp
M	intern/cycles/render/gpu_display.h

===================================================================

diff --git a/intern/cycles/blender/blender_gpu_display.cpp b/intern/cycles/blender/blender_gpu_display.cpp
index 23175436281..eca11c061ca 100644
--- a/intern/cycles/blender/blender_gpu_display.cpp
+++ b/intern/cycles/blender/blender_gpu_display.cpp
@@ -292,8 +292,19 @@ BlenderGPUDisplay::~BlenderGPUDisplay()
  * Update procedure.
  */
 
-bool BlenderGPUDisplay::do_update_begin(int texture_width, int texture_height)
+bool BlenderGPUDisplay::do_update_begin(const GPUDisplayParams &params,
+                                        int texture_width,
+                                        int texture_height)
 {
+  /* Note that it's the responsibility of BlenderGPUDisplay to ensure updating and drawing
+   * the texture does not happen at the same time. This is achieved indirectly.
+   *
+   * When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
+   * This same lock is also held when do_draw() is called, which together ensure mutual
+   * exclusion.
+   *
+   * This locking is not performed at the GPU display level, because that would cause lock
+   * inversion. */
   if (!gl_context_enable()) {
     return false;
   }
@@ -325,8 +336,8 @@ bool BlenderGPUDisplay::do_update_begin(int texture_width, int texture_height)
    * too much data to GPU when resolution divider is not 1. */
   /* TODO(sergey): Investigate whether keeping the PBO exact size of the texute makes non-interop
    * mode faster. */
-  const int buffer_width = params_.full_size.x;
-  const int buffer_height = params_.full_size.y;
+  const int buffer_width = params.full_size.x;
+  const int buffer_height = params.full_size.y;
   if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
     const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
     glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id_);
@@ -441,8 +452,9 @@ void BlenderGPUDisplay::graphics_interop_deactivate()
  * Drawing.
  */
 
-void BlenderGPUDisplay::do_draw()
+void BlenderGPUDisplay::do_draw(const GPUDisplayParams &params)
 {
+  /* See do_update_begin() for why no locking is required here. */
   const bool transparent = true;  // TODO(sergey): Derive this from Film.
 
   if (!gl_draw_resources_ensure()) {
@@ -456,7 +468,7 @@ void BlenderGPUDisplay::do_draw()
     glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
   }
 
-  display_shader_->bind(params_.full_size.x, params_.full_size.y);
+  display_shader_->bind(params.full_size.x, params.full_size.y);
 
   glActiveTexture(GL_TEXTURE0);
   glBindTexture(GL_TEXTURE_2D, texture_.gl_id_);
@@ -464,7 +476,7 @@ void BlenderGPUDisplay::do_draw()
   glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
 
   texture_update_if_needed();
-  vertex_buffer_update();
+  vertex_buffer_update(params);
 
   /* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
   GLuint vertex_array_object;
@@ -671,7 +683,7 @@ void BlenderGPUDisplay::texture_update_if_needed()
   texture_.need_update = false;
 }
 
-void BlenderGPUDisplay::vertex_buffer_update()
+void BlenderGPUDisplay::vertex_buffer_update(const GPUDisplayParams &params)
 {
   /* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
    * rendered. */
@@ -684,23 +696,23 @@ void BlenderGPUDisplay::vertex_buffer_update()
 
   vpointer[0] = 0.0f;
   vpointer[1] = 0.0f;
-  vpointer[2] = params_.offset.x;
-  vpointer[3] = params_.offset.y;
+  vpointer[2] = params.offset.x;
+  vpointer[3] = params.offset.y;
 
   vpointer[4] = 1.0f;
   vpointer[5] = 0.0f;
-  vpointer[6] = (float)params_.size.x + params_.offset.x;
-  vpointer[7] = params_.offset.y;
+  vpointer[6] = (float)params.size.x + params.offset.x;
+  vpointer[7] = params.offset.y;
 
   vpointer[8] = 1.0f;
   vpointer[9] = 1.0f;
-  vpointer[10] = (float)params_.size.x + params_.offset.x;
-  vpointer[11] = (float)params_.size.y + params_.offset.y;
+  vpointer[10] = (float)params.size.x + params.offset.x;
+  vpointer[11] = (float)params.size.y + params.offset.y;
 
   vpointer[12] = 0.0f;
   vpointer[13] = 1.0f;
-  vpointer[14] = params_.offset.x;
-  vpointer[15] = (float)params_.size.y + params_.offset.y;
+  vpointer[14] = params.offset.x;
+  vpointer[15] = (float)params.size.y + params.offset.y;
 
   glUnmapBuffer(GL_ARRAY_BUFFER);
 }
diff --git a/intern/cycles/blender/blender_gpu_display.h b/intern/cycles/blender/blender_gpu_display.h
index 32ad245eeaa..0fc985c7dd9 100644
--- a/intern/cycles/blender/blender_gpu_display.h
+++ b/intern/cycles/blender/blender_gpu_display.h
@@ -104,7 +104,9 @@ class BlenderGPUDisplay : public GPUDisplay {
   virtual void graphics_interop_deactivate() override;
 
  protected:
-  virtual bool do_update_begin(int texture_width, int texture_height) override;
+  virtual bool do_update_begin(const GPUDisplayParams &params,
+                               int texture_width,
+                               int texture_height) override;
   virtual void do_update_end() override;
 
   virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
@@ -112,7 +114,7 @@ class BlenderGPUDisplay : public GPUDisplay {
                                          int texture_y,
                                          int pixels_width,
                                          int pixels_height) override;
-  virtual void do_draw() override;
+  virtual void do_draw(const GPUDisplayParams &params) override;
 
   virtual half4 *do_map_texture_buffer() override;
   virtual void do_unmap_texture_buffer() override;
@@ -144,7 +146,7 @@ class BlenderGPUDisplay : public GPUDisplay {
    * This buffer is used to render texture in the viewport.
    *
    * NOTE: The buffer needs to be bound. */
-  void vertex_buffer_update();
+  void vertex_buffer_update(const GPUDisplayParams &params);
 
   BL::RenderEngine b_engine_;
 
diff --git a/intern/cycles/render/gpu_display.cpp b/intern/cycles/render/gpu_display.cpp
index 61ef69a82d2..a8f0cc50583 100644
--- a/intern/cycles/render/gpu_display.cpp
+++ b/intern/cycles/render/gpu_display.cpp
@@ -63,13 +63,18 @@ bool GPUDisplay::update_begin(int texture_width, int texture_height)
     return false;
   }
 
-  mutex_.lock();
-
-  texture_state_.size = make_int2(texture_width, texture_height);
+  /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
+   * The update itself is non-blocking however, for better performance and to avoid
+   * potential deadlocks due to locks held by the subclass. */
+  GPUDisplayParams params;
+  {
+    thread_scoped_lock lock(mutex_);
+    params = params_;
+    texture_state_.size = make_int2(texture_width, texture_height);
+  }
 
-  if (!do_update_begin(texture_width, texture_height)) {
+  if (!do_update_begin(params, texture_width, texture_height)) {
     LOG(ERROR) << "GPUDisplay implementation could not begin update.";
-    mutex_.unlock();
     return false;
   }
 
@@ -90,7 +95,6 @@ void GPUDisplay::update_end()
   do_update_end();
 
   update_state_.is_active = false;
-  mutex_.unlock();
 }
 
 int2 GPUDisplay::get_texture_size() const
@@ -199,13 +203,25 @@ void GPUDisplay::graphics_interop_deactivate()
 
 bool GPUDisplay::draw()
 {
-  thread_scoped_lock lock(mutex_);
+  /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time.
+   * The drawing itself is non-blocking however, for better performance and to avoid
+   * potential deadlocks due to locks held by the subclass. */
+  GPUDisplayParams params;
+  bool is_usable;
+  bool is_outdated;
+
+  {
+    thread_scoped_lock lock(mutex_);
+    params = params_;
+    is_usable = texture_state_.is_usable;
+    is_outdated = texture_state_.is_outdated;
+  }
 
-  if (texture_state_.is_usable) {
-    do_draw();
+  if (is_usable) {
+    do_draw(params);
   }
 
-  return !texture_state_.is_outdated;
+  return !is_outdated;
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/gpu_display.h b/intern/cycles/render/gpu_display.h
index c9ba89a0665..9b5ae591455 100644
--- a/intern/cycles/render/gpu_display.h
+++ b/intern/cycles/render/gpu_display.h
@@ -77,11 +77,9 @@ class GPUDisplay {
   /* --------------------------------------------------------------------
    * Update procedure.
    *
-   * These callsi indicates a desire of the caller to update content of the displayed texture. They
-   * will perform proper mutex locks, avoiding re-draws while update is in process. They will also
-   * perform proper context activation in the particular GPUDisplay implementation. */
+   * These calls indicates a desire of the caller to update content of the displayed texture. */
 
-  /* Returns truth when update is ready. Update should be finished with update_end().
+  /* Returns true when update is ready. Update should be finished with update_end().
    *
    * If false is returned then no update is possible, and no update_end() call is needed.
    *
@@ -162,15 +160,16 @@ class GPUDisplay {
 
   /* Draw the current state of the texture.
    *
-   * Returns truth if this call did draw an updated state of the texture. */
+   * Returns true if this call did draw an updated state of the texture. */
   bool draw();
 
  protected:
   /* Implementation-specific calls which subclasses are to implement.
    * These `do_foo()` method corresponds to their `foo()` calls, but they are purely virtual to
    * simplify their particular implementation. */
-
-  virtual bool do_update_begin(int texture_width, int texture_height) = 0;
+  virtual bool do_update_begin(const GPUDisplayParams &params,
+                               int texture_width,
+                               int texture_height) = 0;
   virtual void do_update_end() = 0;
 
   virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels,
@@ -178,17 +177,21 @@ class GPUDisplay {
                                          int texture_y,
                                          int pixels_width,
                                          int pixels_height) = 0;
-  virtual void do_draw() = 0;
 
   virtual half4 *do_map_texture_buffer() = 0;
   virtual void do_unmap_texture_buffer() = 0;
 
+  /* Note that this might be called in parallel to do_update_begin() and do_update_end(),
+   * the subclass is responsible for appropriate mutex locks to avoid 

@@ Diff output truncated at 10240 characters. @@