[Bf-blender-cvs] [a93043153a3] master: Cleanup: Remove superfluous "cuda_device_ptr" function

Tue Feb 25 17:14:47 CET 2020

Commit: a93043153a35200b62513b4749ab1018b354918a
Author: Patrick Mours
Date:   Tue Feb 25 17:10:39 2020 +0100
Branches: master
https://developer.blender.org/rBa93043153a35200b62513b4749ab1018b354918a

Cleanup: Remove superfluous "cuda_device_ptr" function

===================================================================

M	intern/cycles/device/cuda/device_cuda.h
M	intern/cycles/device/cuda/device_cuda_impl.cpp

===================================================================

diff --git a/intern/cycles/device/cuda/device_cuda.h b/intern/cycles/device/cuda/device_cuda.h
index 5820b525fd6..a825bd6b128 100644
--- a/intern/cycles/device/cuda/device_cuda.h
+++ b/intern/cycles/device/cuda/device_cuda.h
@@ -82,11 +82,6 @@ class CUDADevice : public Device {
   device_vector<TextureInfo> texture_info;
   bool need_texture_info;
 
-  CUdeviceptr cuda_device_ptr(device_ptr mem)
-  {
-    return (CUdeviceptr)mem;
-  }
-
   static bool have_precompiled_kernels();
 
   virtual bool show_samples() const;
diff --git a/intern/cycles/device/cuda/device_cuda_impl.cpp b/intern/cycles/device/cuda/device_cuda_impl.cpp
index 5c25d7b0208..f6a4f93a690 100644
--- a/intern/cycles/device/cuda/device_cuda_impl.cpp
+++ b/intern/cycles/device/cuda/device_cuda_impl.cpp
@@ -829,18 +829,17 @@ CUDADevice::CUDAMem *CUDADevice::generic_alloc(device_memory &mem, size_t pitch_
 
 void CUDADevice::generic_copy_to(device_memory &mem)
 {
-  if (mem.host_pointer && mem.device_pointer) {
-    CUDAContextScope scope(this);
-
-    /* If use_mapped_host of mem is false, the current device only
-     * uses device memory allocated by cuMemAlloc regardless of
-     * mem.host_pointer and mem.shared_pointer, and should copy
-     * data from mem.host_pointer. */
+  if (!mem.host_pointer || !mem.device_pointer) {
+    return;
+  }
 
-    if (cuda_mem_map[&mem].use_mapped_host == false || mem.host_pointer != mem.shared_pointer) {
-      cuda_assert(
-          cuMemcpyHtoD(cuda_device_ptr(mem.device_pointer), mem.host_pointer, mem.memory_size()));
-    }
+  /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
+   * cuMemAlloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
+   * mem.host_pointer. */
+  if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
+    const CUDAContextScope scope(this);
+    cuda_assert(
+        cuMemcpyHtoD((CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size()));
   }
 }
 
@@ -947,7 +946,7 @@ void CUDADevice::mem_zero(device_memory &mem)
    * regardless of mem.host_pointer and mem.shared_pointer. */
   if (!cuda_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
     const CUDAContextScope scope(this);
-    cuda_assert(cuMemsetD8(cuda_device_ptr(mem.device_pointer), 0, mem.memory_size()));
+    cuda_assert(cuMemsetD8((CUdeviceptr)mem.device_pointer, 0, mem.memory_size()));
   }
   else if (mem.host_pointer) {
     memset(mem.host_pointer, 0, mem.memory_size());
@@ -1273,7 +1272,7 @@ bool CUDADevice::denoising_non_local_means(device_ptr image_ptr,
   if (have_error())
     return false;
 
-  CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+  CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
   CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
   CUdeviceptr weightAccum = difference + 2 * sizeof(float) * pass_stride * num_shifts;
   CUdeviceptr scale_ptr = 0;
@@ -1405,7 +1404,7 @@ bool CUDADevice::denoising_accumulate(device_ptr color_ptr,
   if (have_error())
     return false;
 
-  CUdeviceptr difference = cuda_device_ptr(task->buffer.temporary_mem.device_pointer);
+  CUdeviceptr difference = (CUdeviceptr)task->buffer.temporary_mem.device_pointer;
   CUdeviceptr blurDifference = difference + sizeof(float) * pass_stride * num_shifts;
 
   CUfunction cuNLMCalcDifference, cuNLMBlur, cuNLMCalcWeight, cuNLMConstructGramian;
@@ -1703,7 +1702,7 @@ void CUDADevice::path_trace(DeviceTask &task,
   wtile->h = rtile.h;
   wtile->offset = rtile.offset;
   wtile->stride = rtile.stride;
-  wtile->buffer = (float *)cuda_device_ptr(rtile.buffer);
+  wtile->buffer = (float *)(CUdeviceptr)rtile.buffer;
 
   /* Prepare work size. More step samples render faster, but for now we
    * remain conservative for GPUs connected to a display to avoid driver
@@ -1727,7 +1726,7 @@ void CUDADevice::path_trace(DeviceTask &task,
     wtile->num_samples = min(step_samples, end_sample - sample);
     work_tiles.copy_to_device();
 
-    CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
+    CUdeviceptr d_work_tiles = (CUdeviceptr)work_tiles.device_pointer;
     uint total_work_size = wtile->w * wtile->h * wtile->num_samples;
     uint num_blocks = divide_up(total_work_size, num_threads_per_block);
 
@@ -1762,7 +1761,7 @@ void CUDADevice::film_convert(DeviceTask &task,
 
   CUfunction cuFilmConvert;
   CUdeviceptr d_rgba = map_pixels((rgba_byte) ? rgba_byte : rgba_half);
-  CUdeviceptr d_buffer = cuda_device_ptr(buffer);
+  CUdeviceptr d_buffer = (CUdeviceptr)buffer;
 
   /* get kernel function */
   if (rgba_half) {
@@ -1823,8 +1822,8 @@ void CUDADevice::shader(DeviceTask &task)
   CUDAContextScope scope(this);
 
   CUfunction cuShader;
-  CUdeviceptr d_input = cuda_device_ptr(task.shader_input);
-  CUdeviceptr d_output = cuda_device_ptr(task.shader_output);
+  CUdeviceptr d_input = (CUdeviceptr)task.shader_input;
+  CUdeviceptr d_output = (CUdeviceptr)task.shader_output;
 
   /* get kernel function */
   if (task.shader_eval_type >= SHADER_EVAL_BAKE) {
@@ -1907,7 +1906,7 @@ CUdeviceptr CUDADevice::map_pixels(device_ptr mem)
     return buffer;
   }
 
-  return cuda_device_ptr(mem);
+  return (CUdeviceptr)mem;
 }
 
 void CUDADevice::unmap_pixels(device_ptr mem)
@@ -2347,7 +2346,7 @@ uint64_t CUDASplitKernel::state_buffer_size(device_memory & /*kg*/,
   size_buffer.zero_to_device();
 
   uint threads = num_threads;
-  CUdeviceptr d_size = device->cuda_device_ptr(size_buffer.device_pointer);
+  CUdeviceptr d_size = (CUdeviceptr)size_buffer.device_pointer;
 
   struct args_t {
     uint *num_threads;
@@ -2382,13 +2381,13 @@ bool CUDASplitKernel::enqueue_split_kernel_data_init(const KernelDimensions &dim
 {
   CUDAContextScope scope(device);
 
-  CUdeviceptr d_split_data = device->cuda_device_ptr(split_data.device_pointer);
-  CUdeviceptr d_ray_state = device->cuda_device_ptr(ray_state.device_pointer);
-  CUdeviceptr d_queue_index = device->cuda_device_ptr(queue_index.device_pointer);
-  CUdeviceptr d_use_queues_flag = device->cuda_device_ptr(use_queues_flag.device_pointer);
-  CUdeviceptr d_work_pool_wgs = device->cuda_device_ptr(work_pool_wgs.device_pointer);
+  CUdeviceptr d_split_data = (CUdeviceptr)split_data.device_pointer;
+  CUdeviceptr d_ray_state = (CUdeviceptr)ray_state.device_pointer;
+  CUdeviceptr d_queue_index = (CUdeviceptr)queue_index.device_pointer;
+  CUdeviceptr d_use_queues_flag = (CUdeviceptr)use_queues_flag.device_pointer;
+  CUdeviceptr d_work_pool_wgs = (CUdeviceptr)work_pool_wgs.device_pointer;
 
-  CUdeviceptr d_buffer = device->cuda_device_ptr(rtile.buffer);
+  CUdeviceptr d_buffer = (CUdeviceptr)rtile.buffer;
 
   int end_sample = rtile.start_sample + rtile.num_samples;
   int queue_size = dim.global_size[0] * dim.global_size[1];