[Bf-blender-cvs] [153e001c743] master: Cleanup: Move common CUDA/OptiX Cycles device code into separate file

Wed Feb 12 13:19:29 CET 2020

Commit: 153e001c743bf0f6bc259966418446441e00e200
Author: Patrick Mours
Date:   Tue Feb 11 18:54:50 2020 +0100
Branches: master
https://developer.blender.org/rB153e001c743bf0f6bc259966418446441e00e200

Cleanup: Move common CUDA/OptiX Cycles device code into separate file

This reduces code duplication between the CUDA and OptiX device implementations: The CUDA device
class is now split into declaration and definition (similar to the OpenCL device) and the OptiX device
class implements that and only overrides the functions it actually has to change, while using the CUDA
implementation for everything else.

Reviewed By: brecht

Differential Revision: https://developer.blender.org/D6814

===================================================================

M	intern/cycles/device/CMakeLists.txt
A	intern/cycles/device/cuda/device_cuda.h
A	intern/cycles/device/cuda/device_cuda_impl.cpp
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/device/device_opencl.cpp
M	intern/cycles/device/device_optix.cpp
R100	intern/cycles/device/opencl/opencl.h	intern/cycles/device/opencl/device_opencl.h
R099	intern/cycles/device/opencl/opencl_split.cpp	intern/cycles/device/opencl/device_opencl_impl.cpp
M	intern/cycles/device/opencl/memory_manager.cpp
M	intern/cycles/device/opencl/opencl_util.cpp

===================================================================

diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt
index 35a79356957..aa5b65a2b73 100644
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@@ -34,13 +34,17 @@ set(SRC
   device_task.cpp
 )
 
+set(SRC_CUDA
+  cuda/device_cuda.h
+  cuda/device_cuda_impl.cpp
+)
+
 set(SRC_OPENCL
-  opencl/opencl.h
+  opencl/device_opencl.h
+  opencl/device_opencl_impl.cpp
   opencl/memory_manager.h
-
-  opencl/opencl_split.cpp
-  opencl/opencl_util.cpp
   opencl/memory_manager.cpp
+  opencl/opencl_util.cpp
 )
 
 if(WITH_CYCLES_NETWORK)
@@ -98,4 +102,4 @@ endif()
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})
 
-cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_OPENCL} ${SRC_HEADERS})
+cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_CUDA} ${SRC_OPENCL} ${SRC_HEADERS})
diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
new file mode 100644
index 00000000000..3d29d13a781
--- /dev/null
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+#  include "device/device.h"
+#  include "device/device_denoising.h"
+#  include "device/device_split_kernel.h"
+
+#  include "util/util_map.h"
+
+#  ifdef WITH_CUDA_DYNLOAD
+#    include "cuew.h"
+#  else
+#    include "util/util_opengl.h"
+#    include <cuda.h>
+#    include <cudaGL.h>
+#  endif
+
+CCL_NAMESPACE_BEGIN
+
+class CUDADevice : public Device {
+
+  friend class CUDASplitKernelFunction;
+  friend class CUDASplitKernel;
+  friend class CUDAContextScope;
+
+ public:
+  DedicatedTaskPool task_pool;
+  CUdevice cuDevice;
+  CUcontext cuContext;
+  CUmodule cuModule, cuFilterModule;
+  size_t device_texture_headroom;
+  size_t device_working_headroom;
+  bool move_texture_to_host;
+  size_t map_host_used;
+  size_t map_host_limit;
+  int can_map_host;
+  int cuDevId;
+  int cuDevArchitecture;
+  bool first_error;
+  CUDASplitKernel *split_kernel;
+
+  struct CUDAMem {
+    CUDAMem() : texobject(0), array(0), use_mapped_host(false)
+    {
+    }
+
+    CUtexObject texobject;
+    CUarray array;
+
+    /* If true, a mapped host memory in shared_pointer is being used. */
+    bool use_mapped_host;
+  };
+  typedef map<device_memory *, CUDAMem> CUDAMemMap;
+  CUDAMemMap cuda_mem_map;
+
+  struct PixelMem {
+    GLuint cuPBO;
+    CUgraphicsResource cuPBOresource;
+    GLuint cuTexId;
+    int w, h;
+  };
+  map<device_ptr, PixelMem> pixel_mem_map;
+
+  /* Bindless Textures */
+  device_vector<TextureInfo> texture_info;
+  bool need_texture_info;
+
+  CUdeviceptr cuda_device_ptr(device_ptr mem)
+  {
+    return (CUdeviceptr)mem;
+  }
+
+  static bool have_precompiled_kernels();
+
+  virtual bool show_samples() const;
+
+  virtual BVHLayoutMask get_bvh_layout_mask() const;
+
+  void cuda_error_documentation();
+
+  bool cuda_error_(CUresult result, const string &stmt);
+
+  void cuda_error_message(const string &message);
+
+  CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);
+
+  virtual ~CUDADevice();
+
+  bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
+
+  bool use_adaptive_compilation();
+
+  bool use_split_kernel();
+
+  string compile_kernel_get_common_cflags(const DeviceRequestedFeatures &requested_features,
+                                          bool filter = false,
+                                          bool split = false);
+
+  bool compile_check_compiler();
+
+  string compile_kernel(const DeviceRequestedFeatures &requested_features,
+                        bool filter = false,
+                        bool split = false);
+
+  virtual bool load_kernels(const DeviceRequestedFeatures &requested_features);
+
+  void reserve_local_memory(const DeviceRequestedFeatures &requested_features);
+
+  void init_host_memory();
+
+  void load_texture_info();
+
+  void move_textures_to_host(size_t size, bool for_texture);
+
+  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
+
+  void generic_copy_to(device_memory &mem);
+
+  void generic_free(device_memory &mem);
+
+  void mem_alloc(device_memory &mem);
+
+  void mem_copy_to(device_memory &mem);
+
+  void mem_copy_from(device_memory &mem, int y, int w, int h, int elem);
+
+  void mem_zero(device_memory &mem);
+
+  void mem_free(device_memory &mem);
+
+  device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/);
+
+  virtual void const_copy_to(const char *name, void *host, size_t size);
+
+  void tex_alloc(device_memory &mem);
+
+  void tex_free(device_memory &mem);
+
+  bool denoising_non_local_means(device_ptr image_ptr,
+                                 device_ptr guide_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr out_ptr,
+                                 DenoisingTask *task);
+
+  bool denoising_construct_transform(DenoisingTask *task);
+
+  bool denoising_accumulate(device_ptr color_ptr,
+                            device_ptr color_variance_ptr,
+                            device_ptr scale_ptr,
+                            int frame,
+                            DenoisingTask *task);
+
+  bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
+
+  bool denoising_combine_halves(device_ptr a_ptr,
+                                device_ptr b_ptr,
+                                device_ptr mean_ptr,
+                                device_ptr variance_ptr,
+                                int r,
+                                int4 rect,
+                                DenoisingTask *task);
+
+  bool denoising_divide_shadow(device_ptr a_ptr,
+                               device_ptr b_ptr,
+                               device_ptr sample_variance_ptr,
+                               device_ptr sv_variance_ptr,
+                               device_ptr buffer_variance_ptr,
+                               DenoisingTask *task);
+
+  bool denoising_get_feature(int mean_offset,
+                             int variance_offset,
+                             device_ptr mean_ptr,
+                             device_ptr variance_ptr,
+                             float scale,
+                             DenoisingTask *task);
+
+  bool denoising_write_feature(int out_offset,
+                               device_ptr from_ptr,
+                               device_ptr buffer_ptr,
+                               DenoisingTask *task);
+
+  bool denoising_detect_outliers(device_ptr image_ptr,
+                                 device_ptr variance_ptr,
+                                 device_ptr depth_ptr,
+                                 device_ptr output_ptr,
+                                 DenoisingTask *task);
+
+  void denoise(RenderTile &rtile, DenoisingTask &denoising);
+
+  void path_trace(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
+
+  void film_convert(DeviceTask &task,
+                    device_ptr buffer,
+                    device_ptr rgba_byte,
+                    device_ptr rgba_half);
+
+  void shader(DeviceTask &task);
+
+  CUdeviceptr map_pixels(device_ptr mem);
+
+  void unmap_pixels(device_ptr mem);
+
+  void pixels_alloc(device_memory &mem);
+
+  void pixels_copy_from(device_memory &mem, int y, int w, int h);
+
+  void pixels_free(device_memory &mem);
+
+  void draw_pixels(device_memory &mem,
+                   int y,
+                   int w,
+                   int h,
+                   int width,
+                   int height,
+                   int dx,
+                   int dy,
+                   int dw,
+                   int dh,
+                   bool transparent,
+                   const DeviceDrawParams &draw_params);
+
+  void thread_run(DeviceTask *task);
+
+  virtual void task_add(DeviceTask &task);
+
+  virtual void task_wait();
+
+  virtual void task_cancel();
+};
+
+CCL_NAMESPACE_END
+
+#endif
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
new file mode 100644
index 00000000000..a4e1c026263
--- /dev/null
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -0,0 +1,2502 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_CUDA
+
+#  include <climits>
+#  include <limits.h>
+#  include <stdio.h>
+#  include <stdlib.h>
+#  include <string.h>
+
+#  include "device/cuda/device_cuda.h"
+#  include "device/device_intern.h"
+#  include "device/device_split_kernel.h"
+
+#  include "render/buffers.h"
+
+#  include "kernel/filter/filter_defines.h"
+
+#  include "util/util_debug.h"
+#  include "util/util_foreach.h"
+#  include "util/util_logging.h"
+#  include "util/util_map.h"
+#  include "util/util_md5.h"
+#  include "util/util_opengl.h"
+#  include "util/util_path.h"
+#  include "util/util_string.h"
+#  include "util/util_system.h"
+#  include "util/util_types.h"
+#  include "util/util_time.h"
+#  include "util/util_windows.h"
+
+#  include "kernel/split/kernel_split_data_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+# 

@@ Diff output truncated at 10240 characters. @@