[Bf-blender-cvs] [f64fe321fa6] cycles-x: Merge branch 'master' into cycles-x
Sergey Sharybin
noreply at git.blender.org
Tue Jul 27 12:13:55 CEST 2021
Commit: f64fe321fa65f7c36ebf7104b84868d1bb65a2fc
Author: Sergey Sharybin
Date: Tue Jul 27 12:13:46 2021 +0200
Branches: cycles-x
https://developer.blender.org/rBf64fe321fa65f7c36ebf7104b84868d1bb65a2fc
Merge branch 'master' into cycles-x
===================================================================
===================================================================
diff --cc intern/cycles/device/cuda/device_impl.cpp
index 0eaf787dbd7,00000000000..37fab8f8293
mode 100644,000000..100644
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@@ -1,1370 -1,0 +1,1370 @@@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+# include <climits>
+# include <limits.h>
+# include <stdio.h>
+# include <stdlib.h>
+# include <string.h>
+
+# include "device/cuda/device_impl.h"
+
+# include "render/buffers.h"
+
+# include "util/util_debug.h"
+# include "util/util_foreach.h"
+# include "util/util_logging.h"
+# include "util/util_map.h"
+# include "util/util_md5.h"
+# include "util/util_opengl.h"
+# include "util/util_path.h"
+# include "util/util_string.h"
+# include "util/util_system.h"
+# include "util/util_time.h"
+# include "util/util_types.h"
+# include "util/util_windows.h"
+
+CCL_NAMESPACE_BEGIN
+
+class CUDADevice;
+
+bool CUDADevice::have_precompiled_kernels()
+{
+ string cubins_path = path_get("lib");
+ return path_exists(cubins_path);
+}
+
+bool CUDADevice::show_samples() const
+{
+ /* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+ return true;
+}
+
+BVHLayoutMask CUDADevice::get_bvh_layout_mask() const
+{
+ return BVH_LAYOUT_BVH2;
+}
+
+void CUDADevice::set_error(const string &error)
+{
+ Device::set_error(error);
+
+ if (first_error) {
+ fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
+ fprintf(stderr,
+ "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
+ first_error = false;
+ }
+}
+
+CUDADevice::CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
+ : Device(info, stats, profiler), texture_info(this, "__texture_info", MEM_GLOBAL)
+{
+ first_error = true;
+
+ cuDevId = info.num;
+ cuDevice = 0;
+ cuContext = 0;
+
+ cuModule = 0;
+
+ need_texture_info = false;
+
+ device_texture_headroom = 0;
+ device_working_headroom = 0;
+ move_texture_to_host = false;
+ map_host_limit = 0;
+ map_host_used = 0;
+ can_map_host = 0;
+ pitch_alignment = 0;
+
+ /* Initialize CUDA. */
+ CUresult result = cuInit(0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to initialize CUDA runtime (%s)", cuewErrorString(result)));
+ return;
+ }
+
+ /* Setup device and context. */
+ result = cuDeviceGet(&cuDevice, cuDevId);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to get CUDA device handle from ordinal (%s)",
+ cuewErrorString(result)));
+ return;
+ }
+
+ /* CU_CTX_MAP_HOST for mapping host memory when out of device memory.
+ * CU_CTX_LMEM_RESIZE_TO_MAX for reserving local memory ahead of render,
+ * so we can predict which memory to map to host. */
+ cuda_assert(
+ cuDeviceGetAttribute(&can_map_host, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, cuDevice));
+
+ cuda_assert(cuDeviceGetAttribute(
+ &pitch_alignment, CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT, cuDevice));
+
+ unsigned int ctx_flags = CU_CTX_LMEM_RESIZE_TO_MAX;
+ if (can_map_host) {
+ ctx_flags |= CU_CTX_MAP_HOST;
+ init_host_memory();
+ }
+
+ /* Create context. */
+ result = cuCtxCreate(&cuContext, ctx_flags, cuDevice);
+
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to create CUDA context (%s)", cuewErrorString(result)));
+ return;
+ }
+
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+ cuDevArchitecture = major * 100 + minor * 10;
+
+ /* Pop context set by cuCtxCreate. */
+ cuCtxPopCurrent(NULL);
+}
+
+CUDADevice::~CUDADevice()
+{
+ texture_info.free();
+
+ cuda_assert(cuCtxDestroy(cuContext));
+}
+
+bool CUDADevice::support_device(const uint /*kernel_features*/)
+{
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* We only support sm_30 and above */
+ if (major < 3) {
+ set_error(string_printf(
+ "CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor));
+ return false;
+ }
+
+ return true;
+}
+
+bool CUDADevice::check_peer_access(Device *peer_device)
+{
+ if (peer_device == this) {
+ return false;
+ }
+ if (peer_device->info.type != DEVICE_CUDA && peer_device->info.type != DEVICE_OPTIX) {
+ return false;
+ }
+
+ CUDADevice *const peer_device_cuda = static_cast<CUDADevice *>(peer_device);
+
+ int can_access = 0;
+ cuda_assert(cuDeviceCanAccessPeer(&can_access, cuDevice, peer_device_cuda->cuDevice));
+ if (can_access == 0) {
+ return false;
+ }
+
+ // Ensure array access over the link is possible as well (for 3D textures)
+ cuda_assert(cuDeviceGetP2PAttribute(&can_access,
+ CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED,
+ cuDevice,
+ peer_device_cuda->cuDevice));
+ if (can_access == 0) {
+ return false;
+ }
+
+ // Enable peer access in both directions
+ {
+ const CUDAContextScope scope(this);
+ CUresult result = cuCtxEnablePeerAccess(peer_device_cuda->cuContext, 0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to enable peer access on CUDA context (%s)",
+ cuewErrorString(result)));
+ return false;
+ }
+ }
+ {
+ const CUDAContextScope scope(peer_device_cuda);
+ CUresult result = cuCtxEnablePeerAccess(cuContext, 0);
+ if (result != CUDA_SUCCESS) {
+ set_error(string_printf("Failed to enable peer access on CUDA context (%s)",
+ cuewErrorString(result)));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CUDADevice::use_adaptive_compilation()
+{
+ return DebugFlags().cuda.adaptive_compile;
+}
+
+/* Common NVCC flags which stays the same regardless of shading model,
+ * kernel sources md5 and only depends on compiler or compilation settings.
+ */
+string CUDADevice::compile_kernel_get_common_cflags(const uint kernel_features)
+{
+ const int machine = system_cpu_bits();
+ const string source_path = path_get("source");
+ const string include_path = source_path;
+ string cflags = string_printf(
+ "-m%d "
+ "--ptxas-options=\"-v\" "
+ "--use_fast_math "
+ "-DNVCC "
+ "-I\"%s\"",
+ machine,
+ include_path.c_str());
+ if (use_adaptive_compilation()) {
+ cflags += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
+ }
+ const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+ if (extra_cflags) {
+ cflags += string(" ") + string(extra_cflags);
+ }
+
+# ifdef WITH_NANOVDB
+ cflags += " -DWITH_NANOVDB";
+# endif
+
+ return cflags;
+}
+
+string CUDADevice::compile_kernel(const uint kernel_features,
+ const char *name,
+ const char *base,
+ bool force_ptx)
+{
+ /* Compute kernel name. */
+ int major, minor;
+ cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
+ cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
+
+ /* Attempt to use kernel provided with Blender. */
+ if (!use_adaptive_compilation()) {
+ if (!force_ptx) {
+ const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
+ if (path_exists(cubin)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return cubin;
+ }
+ }
+
+ /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
+ int ptx_major = major, ptx_minor = minor;
+ while (ptx_major >= 3) {
+ const string ptx = path_get(
+ string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor));
+ VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+ if (path_exists(ptx)) {
+ VLOG(1) << "Using precompiled kernel.";
+ return ptx;
+ }
+
+ if (ptx_minor > 0) {
+ ptx_minor--;
+ }
+ else {
+ ptx_major--;
+ ptx_minor = 9;
+ }
+ }
+ }
+
+ /* Try to use locally compiled kernel. */
+ string source_path = path_get("source");
+ const string source_md5 = path_files_md5_hash(source_path);
+
+ /* We include cflags into md5 so changing cuda toolkit or changing other
+ * compiler command line arguments makes sure cubin gets re-built.
+ */
+ string common_cflags = compile_kernel_get_common_cflags(kernel_features);
+ const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
+
+ const char *const kernel_ext = force_ptx ? "ptx" : "cubin";
+ const char *const kernel_arch = force_ptx ? "compute" : "sm";
+ const string cubin_file = string_printf(
+ "cycles_%s_%s_%d%d_%s.%s", name, kernel_arch, major, minor, kernel_md5.c_str(), kernel_ext);
+ const string cubin = path_cache_get(path_join("kernels", cubin_file));
+ VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
+ if (path_
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list