[Bf-blender-cvs] [f48d15a8618] master: Cycles: limit number of processes compiling OpenCL kernel based on memory

Wed Mar 25 16:46:00 CET 2020

Commit: f48d15a86189756ae7dd9b7ba45fb3af24e0846b
Author: Brecht Van Lommel
Date:   Wed Mar 25 13:11:09 2020 +0100
Branches: master
https://developer.blender.org/rBf48d15a86189756ae7dd9b7ba45fb3af24e0846b

Cycles: limit number of processes compiling OpenCL kernel based on memory

The numbers here can probably be tweaked to be better, but it's hard to
predict and this should at least avoid excessive memory swapping.

Fixes T57064.

===================================================================

M	intern/cycles/device/opencl/device_opencl_impl.cpp
M	intern/cycles/device/opencl/opencl_util.cpp
M	intern/cycles/util/CMakeLists.txt
A	intern/cycles/util/util_semaphore.h

===================================================================

diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp
index b7a2be79804..2766f85d17c 100644
--- a/intern/cycles/device/opencl/device_opencl_impl.cpp
+++ b/intern/cycles/device/opencl/device_opencl_impl.cpp
@@ -257,16 +257,16 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels(
 
     /* Ordered with most complex kernels first, to reduce overall compile time. */
     ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter);
+    ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
+    ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
     if (requested_features.use_volume || is_preview) {
       ADD_SPLIT_KERNEL_PROGRAM(do_volume);
     }
+    ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
+    ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
+    ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
     ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl);
     ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao);
-    ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao);
-    ADD_SPLIT_KERNEL_PROGRAM(lamp_emission);
-    ADD_SPLIT_KERNEL_PROGRAM(direct_lighting);
-    ADD_SPLIT_KERNEL_PROGRAM(indirect_background);
-    ADD_SPLIT_KERNEL_PROGRAM(shader_eval);
 
     /* Quick kernels bundled in a single program to reduce overhead of starting
      * Blender processes. */
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index 978c75d2e2c..b8b07cf2947 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -23,6 +23,7 @@
 #  include "util/util_logging.h"
 #  include "util/util_md5.h"
 #  include "util/util_path.h"
+#  include "util/util_semaphore.h"
 #  include "util/util_system.h"
 #  include "util/util_time.h"
 
@@ -390,8 +391,27 @@ static void escape_python_string(string &str)
   string_replace(str, "'", "\'");
 }
 
+static int opencl_compile_process_limit()
+{
+  /* Limit number of concurrent processes compiling, with a heuristic based
+   * on total physical RAM and estimate of memory usage needed when compiling
+   * with all Cycles features enabled.
+   *
+   * This is somewhat arbitrary as we don't know the actual available RAM or
+   * how much the kernel compilation will needed depending on the features, but
+   * better than not limiting at all. */
+  static const int64_t GB = 1024LL * 1024LL * 1024LL;
+  static const int64_t process_memory = 2 * GB;
+  static const int64_t base_memory = 2 * GB;
+  static const int64_t system_memory = system_physical_ram();
+  static const int64_t process_limit = (system_memory - base_memory) / process_memory;
+
+  return max((int)process_limit, 1);
+}
+
 bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
 {
+  /* Construct arguments. */
   vector<string> args;
   args.push_back("--background");
   args.push_back("--factory-startup");
@@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin)
       kernel_file_escaped.c_str(),
       clbin_escaped.c_str()));
 
-  double starttime = time_dt();
+  /* Limit number of concurrent processes compiling. */
+  static thread_counting_semaphore semaphore(opencl_compile_process_limit());
+  semaphore.acquire();
+
+  /* Compile. */
+  const double starttime = time_dt();
   add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false);
   add_log(string("Build flags: ") + kernel_build_options, true);
-  if (!system_call_self(args) || !path_exists(clbin)) {
+  const bool success = system_call_self(args);
+  const double elapsed = time_dt() - starttime;
+
+  semaphore.release();
+
+  if (!success || !path_exists(clbin)) {
     return false;
   }
 
-  double elapsed = time_dt() - starttime;
   add_log(
       string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed),
       false);
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index ef100c12453..c1f71461dfd 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -102,6 +102,7 @@ set(SRC_HEADERS
   util_sky_model_data.h
   util_avxf.h
   util_avxb.h
+  util_semaphore.h
   util_sseb.h
   util_ssef.h
   util_ssei.h
diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h
new file mode 100644
index 00000000000..d995b0732b8
--- /dev/null
+++ b/intern/cycles/util/util_semaphore.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2020 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_SEMAPHORE_H__
+#define __UTIL_SEMAPHORE_H__
+
+#include "util/util_thread.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Counting Semaphore
+ *
+ * To restrict concurrent access to a resource to a specified number
+ * of threads. Similar to std::counting_semaphore from C++20. */
+
+class thread_counting_semaphore {
+ public:
+  explicit thread_counting_semaphore(const int count) : count(count)
+  {
+  }
+
+  thread_counting_semaphore(const thread_counting_semaphore &) = delete;
+
+  void acquire()
+  {
+    thread_scoped_lock lock(mutex);
+    while (count == 0) {
+      condition.wait(lock);
+    }
+    count--;
+  }
+
+  void release()
+  {
+    thread_scoped_lock lock(mutex);
+    count++;
+    condition.notify_one();
+  }
+
+ protected:
+  thread_mutex mutex;
+  thread_condition_variable condition;
+  int count;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_SEMAPHORE_H__ */