[Bf-blender-cvs] [ce66b22c427] master: Fix crash when editing shaders on Intel HD 4000.

Wed Jun 5 18:51:50 CEST 2019

Commit: ce66b22c427defa3db498d2d69ee615b3c913c5f
Author: mano-wii
Date:   Wed Jun 5 13:06:11 2019 -0300
Branches: master
https://developer.blender.org/rBce66b22c427defa3db498d2d69ee615b3c913c5f

Fix crash when editing shaders on Intel HD 4000.

In the Intel HD 4000 driver a shader has to be deleted in the same context in which it is created.
However, because you can't use a rendering context on different threads, to maintain the multithreaded compilation, the solution was to use the `GL_ARB_get_program_binary` and copy the binary generated for the shader and generate a shader on the main context using that binary.
This solution is limited only to Intel HD 4000 and windows.

Reviewers: fclem

Reviewed By: fclem

Differential Revision: https://developer.blender.org/D5019

===================================================================

M	source/blender/draw/intern/draw_manager_shader.c
M	source/blender/gpu/GPU_extensions.h
M	source/blender/gpu/GPU_shader.h
M	source/blender/gpu/intern/gpu_codegen.c
M	source/blender/gpu/intern/gpu_codegen.h
M	source/blender/gpu/intern/gpu_extensions.c
M	source/blender/gpu/intern/gpu_material.c
M	source/blender/gpu/intern/gpu_shader.c

===================================================================

diff --git a/source/blender/draw/intern/draw_manager_shader.c b/source/blender/draw/intern/draw_manager_shader.c
index 186bbae5cad..bdb3d5958d6 100644
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -63,7 +63,8 @@ typedef struct DRWDeferredShader {
 } DRWDeferredShader;
 
 typedef struct DRWShaderCompiler {
-  ListBase queue; /* DRWDeferredShader */
+  ListBase queue;          /* DRWDeferredShader */
+  ListBase queue_conclude; /* DRWDeferredShader */
   SpinLock list_lock;
 
   DRWDeferredShader *mat_compiling;
@@ -134,7 +135,12 @@ static void drw_deferred_shader_compilation_exec(void *custom_data,
     BLI_mutex_unlock(&comp->compilation_lock);
 
     BLI_spin_lock(&comp->list_lock);
-    drw_deferred_shader_free(comp->mat_compiling);
+    if (GPU_material_status(comp->mat_compiling->mat) == GPU_MAT_QUEUED) {
+      BLI_addtail(&comp->queue_conclude, comp->mat_compiling);
+    }
+    else {
+      drw_deferred_shader_free(comp->mat_compiling);
+    }
     comp->mat_compiling = NULL;
     BLI_spin_unlock(&comp->list_lock);
   }
@@ -148,6 +154,17 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
 
   drw_deferred_shader_queue_free(&comp->queue);
 
+  if (!BLI_listbase_is_empty(&comp->queue_conclude)) {
+    /* Compile the shaders in the context they will be deleted. */
+    DRW_opengl_context_enable_ex(false);
+    DRWDeferredShader *mat_conclude;
+    while (mat_conclude = BLI_poptail(&comp->queue_conclude)) {
+      GPU_material_compile(mat_conclude->mat);
+      drw_deferred_shader_free(mat_conclude);
+    }
+    DRW_opengl_context_disable_ex(true);
+  }
+
   BLI_spin_end(&comp->list_lock);
   BLI_mutex_end(&comp->compilation_lock);
 
diff --git a/source/blender/gpu/GPU_extensions.h b/source/blender/gpu/GPU_extensions.h
index 8f602bf1398..d0abf671fcd 100644
--- a/source/blender/gpu/GPU_extensions.h
+++ b/source/blender/gpu/GPU_extensions.h
@@ -46,6 +46,7 @@ void GPU_get_dfdy_factors(float fac[2]);
 bool GPU_mip_render_workaround(void);
 bool GPU_depth_blitting_workaround(void);
 bool GPU_unused_fb_slot_workaround(void);
+bool GPU_context_local_shaders_workaround(void);
 bool GPU_crappy_amd_driver(void);
 
 bool GPU_mem_stats_supported(void);
diff --git a/source/blender/gpu/GPU_shader.h b/source/blender/gpu/GPU_shader.h
index 0fe5427019a..dca66817666 100644
--- a/source/blender/gpu/GPU_shader.h
+++ b/source/blender/gpu/GPU_shader.h
@@ -58,6 +58,10 @@ GPUShader *GPU_shader_create_ex(const char *vertexcode,
                                 const char **tf_names,
                                 const int tf_count,
                                 const char *shader_name);
+GPUShader *GPU_shader_load_from_binary(const char *binary,
+                                       const int binary_format,
+                                       const int binary_len,
+                                       const char *shname);
 struct GPU_ShaderCreateFromArray_Params {
   const char **vert, **geom, **frag, **defs;
 };
@@ -95,6 +99,8 @@ void GPU_shader_uniform_int(GPUShader *shader, int location, int value);
 
 int GPU_shader_get_attribute(GPUShader *shader, const char *name);
 
+char *GPU_shader_get_binary(GPUShader *shader, int *r_binary_format, int *r_binary_len);
+
 /* Builtin/Non-generated shaders */
 typedef enum eGPUBuiltinShader {
   /* specialized drawing */
diff --git a/source/blender/gpu/intern/gpu_codegen.c b/source/blender/gpu/intern/gpu_codegen.c
index 12c35d76ac4..0c751808489 100644
--- a/source/blender/gpu/intern/gpu_codegen.c
+++ b/source/blender/gpu/intern/gpu_codegen.c
@@ -2105,17 +2105,17 @@ static int count_active_texture_sampler(GPUShader *shader, char *source)
   return sampler_len;
 }
 
-static bool gpu_pass_shader_validate(GPUPass *pass)
+static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
 {
-  if (pass->shader == NULL) {
+  if (shader == NULL) {
     return false;
   }
 
   /* NOTE: The only drawback of this method is that it will count a sampler
    * used in the fragment shader and only declared (but not used) in the vertex
    * shader as used by both. But this corner case is not happening for now. */
-  int vert_samplers_len = count_active_texture_sampler(pass->shader, pass->vertexcode);
-  int frag_samplers_len = count_active_texture_sampler(pass->shader, pass->fragmentcode);
+  int vert_samplers_len = count_active_texture_sampler(shader, pass->vertexcode);
+  int frag_samplers_len = count_active_texture_sampler(shader, pass->fragmentcode);
 
   int total_samplers_len = vert_samplers_len + frag_samplers_len;
 
@@ -2126,7 +2126,7 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
   }
 
   if (pass->geometrycode) {
-    int geom_samplers_len = count_active_texture_sampler(pass->shader, pass->geometrycode);
+    int geom_samplers_len = count_active_texture_sampler(shader, pass->geometrycode);
     total_samplers_len += geom_samplers_len;
     if (geom_samplers_len > GPU_max_textures_geom()) {
       return false;
@@ -2136,30 +2136,40 @@ static bool gpu_pass_shader_validate(GPUPass *pass)
   return (total_samplers_len <= GPU_max_textures());
 }
 
-void GPU_pass_compile(GPUPass *pass, const char *shname)
+bool GPU_pass_compile(GPUPass *pass, const char *shname)
 {
+  bool sucess = true;
   if (!pass->compiled) {
-    pass->shader = GPU_shader_create(
+    GPUShader *shader = GPU_shader_create(
         pass->vertexcode, pass->fragmentcode, pass->geometrycode, NULL, pass->defines, shname);
 
     /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
      * We need to make sure to count active samplers to avoid undefined behavior. */
-    if (!gpu_pass_shader_validate(pass)) {
-      if (pass->shader != NULL) {
+    if (!gpu_pass_shader_validate(pass, shader)) {
+      sucess = false;
+      if (shader != NULL) {
         fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
-        GPU_shader_free(pass->shader);
+        GPU_shader_free(shader);
+        shader = NULL;
       }
-      pass->shader = NULL;
     }
-    else if (!BLI_thread_is_main()) {
-      /* For some Intel drivers, you must use the program at least once
-       * in the rendering context that it is linked. */
-      glUseProgram(GPU_shader_get_program(pass->shader));
-      glUseProgram(0);
+    else if (!BLI_thread_is_main() && GPU_context_local_shaders_workaround()) {
+      pass->binary.content = GPU_shader_get_binary(
+          shader, &pass->binary.format, &pass->binary.len);
+      GPU_shader_free(shader);
+      shader = NULL;
     }
 
+    pass->shader = shader;
     pass->compiled = true;
   }
+  else if (pass->binary.content && BLI_thread_is_main()) {
+    pass->shader = GPU_shader_load_from_binary(
+        pass->binary.content, pass->binary.format, pass->binary.len, shname);
+    MEM_SAFE_FREE(pass->binary.content);
+  }
+
+  return sucess;
 }
 
 void GPU_pass_release(GPUPass *pass)
@@ -2178,6 +2188,9 @@ static void gpu_pass_free(GPUPass *pass)
   MEM_SAFE_FREE(pass->geometrycode);
   MEM_SAFE_FREE(pass->vertexcode);
   MEM_SAFE_FREE(pass->defines);
+  if (pass->binary.content) {
+    MEM_freeN(pass->binary.content);
+  }
   MEM_freeN(pass);
 }
 
diff --git a/source/blender/gpu/intern/gpu_codegen.h b/source/blender/gpu/intern/gpu_codegen.h
index d1bb3f26920..eac5df7e348 100644
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -164,6 +164,11 @@ struct GPUPass {
   char *defines;
   uint refcount; /* Orphaned GPUPasses gets freed by the garbage collector. */
   uint32_t hash; /* Identity hash generated from all GLSL code. */
+  struct {
+    char *content;
+    int format;
+    int len;
+  } binary;
   bool compiled; /* Did we already tried to compile the attached GPUShader. */
 };
 
@@ -185,7 +190,7 @@ void GPU_nodes_extract_dynamic_inputs(struct GPUShader *shader, ListBase *inputs
 void GPU_nodes_get_vertex_attrs(ListBase *nodes, struct GPUVertAttrLayers *attrs);
 void GPU_nodes_prune(ListBase *nodes, struct GPUNodeLink *outlink);
 
-void GPU_pass_compile(GPUPass *pass, const char *shname);
+bool GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
 void GPU_pass_free_nodes(ListBase *nodes);
 
diff --git a/source/blender/gpu/intern/gpu_extensions.c b/source/blender/gpu/intern/gpu_extensions.c
index 8cd554cc9d5..58efe3dc5c4 100644
--- a/source/blender/gpu/intern/gpu_extensions.c
+++ b/source/blender/gpu/intern/gpu_extensions.c
@@ -89,6 +89,9 @@ static struct GPUGlobal {
   /* Crappy driver don't know how to map framebuffer slot to output vars...
    * We need to have no "holes" in the output buffer slots. */
   bool unused_fb_slot_workaround;
+  /* Some crappy Intel drivers don't work well with shaders created in different
+   * rendering contexts. */
+  bool context_local_shaders_workaround;
 } GG = {1, 0};
 
 static void gpu_detect_mip_render_workaround(void)
@@ -209,6 +212,11 @@ bool GPU_unused_fb_slot_workaround(void)
   return GG.unused_fb_slot_workaround;
 }
 
+bool GPU_context_local_shaders_workaround(void)
+{
+  return GG.context_local_shaders_workaround;
+}
+
 bool GPU_crappy_amd_driver(void)
 {
   /* Currently are the same drivers with the `unused_fb_slot` problem. */
@@ -347,6 +355,7 @@ void gpu_extensions_init(void)
     GG.mip_render_workaround = true;
     GG.depth_blitting_workaround = true;
     GG.unused_fb_slot_workaround = true;
+    GG.context_local_shaders_workaround = true;
   }
 
   /* df/dy calculation factors, those are dependent on driver */
@@ -354,19 +363,24 @@ void gpu_extensions_init(void)
     GG.dfdyfactors[0] = 1.0;
     GG.dfdyfactors[1] = -1.0;
   }
-  else if ((GG.device == GPU_DEVICE_INTEL) && (GG.os == GPU_OS_WIN) &&
-           (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
-            strstr(version, "4.0.0 - Build 9.18.10.3186") ||
-            strstr(version, "4.0.0 - Build 9.18.10.3165") ||
-            strstr(version, "3.1.0 - Build 9.17.10.3347") ||
-            strstr(version, "3.1.0 - Build 9.17.10.4101") ||
-            strstr(version, "3.3.0 - Build 8.15.10.2618"))) {
-    GG.dfdyfactors[0] = -1.0;
-    GG.dfdyfactors[1] = 1.0;
-  }
-  else {
-    GG.dfdyfactors[0] =

@@ Diff output truncated at 10240 characters. @@