[Bf-blender-cvs] [044a77352f8] master: Cycles: add HIP device support for AMD GPUs

Brian Savery noreply at git.blender.org
Tue Sep 28 19:23:14 CEST 2021


Commit: 044a77352f8a8a0e1f60190369d69ef26587b65f
Author: Brian Savery
Date:   Tue Sep 28 16:51:14 2021 +0200
Branches: master
https://developer.blender.org/rB044a77352f8a8a0e1f60190369d69ef26587b65f

Cycles: add HIP device support for AMD GPUs

NOTE: this feature is not ready for user testing, and not yet enabled in daily
builds. It is being merged now for easier collaboration on development.

HIP is a heterogenous compute interface allowing C++ code to be executed on
GPUs similar to CUDA. It is intended to bring back AMD GPU rendering support
on Windows and Linux.

https://github.com/ROCm-Developer-Tools/HIP.

As of the time of writing, it should compile and run on Linux with existing
HIP compilers and driver runtimes. Publicly available compilers and drivers
for Windows will come later.

See task T91571 for more details on the current status and work remaining
to be done.

Credits:

Sayak Biswas (AMD)
Arya Rafii (AMD)
Brian Savery (AMD)

Differential Revision: https://developer.blender.org/D12578

===================================================================

M	CMakeLists.txt
M	extern/CMakeLists.txt
A	extern/hipew/CMakeLists.txt
A	extern/hipew/include/hipew.h
A	extern/hipew/src/hipew.c
M	intern/cycles/CMakeLists.txt
M	intern/cycles/blender/CMakeLists.txt
M	intern/cycles/blender/addon/engine.py
M	intern/cycles/blender/addon/properties.py
M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/blender_device.cpp
M	intern/cycles/blender/blender_python.cpp
M	intern/cycles/cmake/external_libs.cmake
M	intern/cycles/cmake/macros.cmake
M	intern/cycles/device/CMakeLists.txt
M	intern/cycles/device/device.cpp
M	intern/cycles/device/device.h
M	intern/cycles/device/device_memory.h
A	intern/cycles/device/hip/device.cpp
A	intern/cycles/device/hip/device.h
A	intern/cycles/device/hip/device_impl.cpp
A	intern/cycles/device/hip/device_impl.h
A	intern/cycles/device/hip/graphics_interop.cpp
A	intern/cycles/device/hip/graphics_interop.h
A	intern/cycles/device/hip/kernel.cpp
A	intern/cycles/device/hip/kernel.h
A	intern/cycles/device/hip/queue.cpp
A	intern/cycles/device/hip/queue.h
A	intern/cycles/device/hip/util.cpp
A	intern/cycles/device/hip/util.h
M	intern/cycles/integrator/path_trace.cpp
M	intern/cycles/kernel/CMakeLists.txt
M	intern/cycles/kernel/device/gpu/parallel_active_index.h
M	intern/cycles/kernel/device/gpu/parallel_prefix_sum.h
M	intern/cycles/kernel/device/gpu/parallel_reduce.h
M	intern/cycles/kernel/device/gpu/parallel_sorted_index.h
A	intern/cycles/kernel/device/hip/compat.h
A	intern/cycles/kernel/device/hip/config.h
A	intern/cycles/kernel/device/hip/globals.h
A	intern/cycles/kernel/device/hip/kernel.cpp
M	intern/cycles/util/util_atomic.h
M	intern/cycles/util/util_debug.cpp
M	intern/cycles/util/util_debug.h
M	intern/cycles/util/util_half.h
M	intern/cycles/util/util_math.h

===================================================================

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e807b84e22..c4b8bf6dcd4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -419,6 +419,8 @@ mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
 
 option(WITH_CYCLES_DEVICE_CUDA              "Enable Cycles CUDA compute support" ON)
 option(WITH_CYCLES_DEVICE_OPTIX             "Enable Cycles OptiX support" ON)
+option(WITH_CYCLES_DEVICE_HIP               "Enable Cycles HIP support" OFF)
+mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
 mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
 
 option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
@@ -821,6 +823,11 @@ if(NOT WITH_CUDA_DYNLOAD)
   endif()
 endif()
 
+if(WITH_CYCLES_DEVICE_HIP)
+  # Currently HIP must be dynamically loaded, this may change in future toolkits
+  set(WITH_HIP_DYNLOAD ON)
+endif()
+
 #-----------------------------------------------------------------------------
 # Check check if submodules are cloned
 
@@ -1850,6 +1857,9 @@ elseif(WITH_CYCLES_STANDALONE)
   if(WITH_CUDA_DYNLOAD)
     add_subdirectory(extern/cuew)
   endif()
+  if(WITH_HIP_DYNLOAD)
+    add_subdirectory(extern/hipew)
+  endif()
   if(NOT WITH_SYSTEM_GLEW)
     add_subdirectory(extern/glew)
   endif()
diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt
index 7f7d91f0765..2b2cca04503 100644
--- a/extern/CMakeLists.txt
+++ b/extern/CMakeLists.txt
@@ -70,6 +70,9 @@ if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
   if(WITH_CUDA_DYNLOAD)
     add_subdirectory(cuew)
   endif()
+  if(WITH_HIP_DYNLOAD)
+    add_subdirectory(hipew)
+  endif()
 endif()
 
 if(WITH_GHOST_X11 AND WITH_GHOST_XDND)
diff --git a/extern/hipew/CMakeLists.txt b/extern/hipew/CMakeLists.txt
new file mode 100644
index 00000000000..d215ea8c691
--- /dev/null
+++ b/extern/hipew/CMakeLists.txt
@@ -0,0 +1,39 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2021, Blender Foundation
+# All rights reserved.
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+  .
+  include
+)
+
+set(INC_SYS
+
+)
+
+set(SRC
+  src/hipew.c
+
+  include/hipew.h
+)
+
+set(LIB
+)
+
+blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
diff --git a/extern/hipew/include/hipew.h b/extern/hipew/include/hipew.h
new file mode 100644
index 00000000000..02fffc331bf
--- /dev/null
+++ b/extern/hipew/include/hipew.h
@@ -0,0 +1,1207 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+#ifndef __HIPEW_H__
+#define __HIPEW_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+
+#define HIP_IPC_HANDLE_SIZE 64
+#define hipHostMallocPortable 0x01
+#define hipHostMallocMapped 0x02
+#define hipHostMallocWriteCombined 0x04
+#define hipHostRegisterPortable 0x01
+#define hipHostRegisterMapped 0x02
+#define hipHostRegisterIoMemory 0x04
+#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01
+#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02
+#define hipArrayLayered 0x01
+#define hipArraySurfaceLoadStore 0x02
+#define hipArrayCubemap 0x04
+#define hipArrayTextureGather 0x08
+#define HIP_TRSA_OVERRIDE_FORMAT 0x01
+#define HIP_TRSF_READ_AS_INTEGER 0x01
+#define HIP_TRSF_NORMALIZED_COORDINATES 0x02
+#define HIP_LAUNCH_PARAM_END ((void*)0x00)
+#define HIP_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
+#define HIP_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02)
+
+/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
+ * the cuda library has both the old ones for compatibility and new
+ * ones with _v2 postfix,
+ */
+#define hipModuleGetGlobal hipModuleGetGlobal
+#define hipMemGetInfo hipMemGetInfo
+#define hipMemAllocPitch hipMemAllocPitch
+#define hipMemGetAddressRange hipMemGetAddressRange
+#define hipMemcpyHtoD hipMemcpyHtoD
+#define hipMemcpyDtoH hipMemcpyDtoH
+#define hipMemcpyDtoD hipMemcpyDtoD
+#define hipMemcpyHtoA hipMemcpyHtoA
+#define hipMemcpyAtoH hipMemcpyAtoH
+#define hipMemcpyHtoDAsync hipMemcpyHtoDAsync
+#define hipMemcpyDtoHAsync hipMemcpyDtoHAsync
+#define hipMemcpyDtoDAsync hipMemcpyDtoDAsync
+#define hipMemsetD8 hipMemsetD8
+#define hipMemsetD16 hipMemsetD16
+#define hipMemsetD32 hipMemsetD32
+#define hipArrayCreate hipArrayCreate
+#define hipArray3DCreate hipArray3DCreate
+#define hipTexRefSetAddress hipTexRefSetAddress
+#define hipTexRefGetAddress hipTexRefGetAddress
+#define hipStreamDestroy hipStreamDestroy
+#define hipEventDestroy hipEventDestroy
+#define hipTexRefSetAddress2D hipTexRefSetAddress2D
+
+/* Types. */
+#ifdef _MSC_VER
+typedef unsigned __int32 hipuint32_t;
+typedef unsigned __int64 hipuint64_t;
+#else
+#include <stdint.h>
+typedef uint32_t hipuint32_t;
+typedef uint64_t hipuint64_t;
+#endif
+
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined (__aarch64__)
+typedef unsigned long long hipDeviceptr_t;
+#else
+typedef unsigned int hipDeviceptr_t;
+#endif
+
+
+#ifdef _WIN32
+#  define HIPAPI __stdcall
+#  define HIP_CB __stdcall
+#else
+#  define HIPAPI
+#  define HIP_CB
+#endif
+
+typedef int hipDevice_t;
+typedef struct ihipCtx_t* hipCtx_t;
+typedef struct ihipModule_t* hipModule_t;
+typedef struct ihipModuleSymbol_t* hipFunction_t;
+typedef struct hipArray* hArray;
+typedef struct hipMipmappedArray_st* hipMipmappedArray_t;
+typedef struct ihipEvent_t* hipEvent_t;
+typedef struct ihipStream_t* hipStream_t;
+typedef unsigned long long hipTextureObject_t;
+
+typedef struct HIPuuid_st {
+  char bytes[16];
+} HIPuuid;
+
+typedef enum hipChannelFormatKind {
+    hipChannelFormatKindSigned = 0,
+    hipChannelFormatKindUnsigned = 1,
+    hipChannelFormatKindFloat = 2,
+    hipChannelFormatKindNone = 3,
+}hipChannelFormatKind;
+
+typedef struct hipChannelFormatDesc {
+    int x;
+    int y;
+    int z;
+    int w;
+    enum hipChannelFormatKind f;
+}hipChannelFormatDesc;
+
+typedef enum hipTextureFilterMode {
+  hipFilterModePoint = 0,
+  hipFilterModeLinear = 1,
+} hipTextureFilterMode;
+
+typedef enum hipArray_Format {
+  HIP_AD_FORMAT_UNSIGNED_INT8 = 0x01,
+  HIP_AD_FORMAT_SIGNED_INT8 = 0x08,
+  HIP_AD_FORMAT_UNSIGNED_INT16 = 0x02,
+  HIP_AD_FORMAT_SIGNED_INT16 = 0x09,
+  HIP_AD_FORMAT_UNSIGNED_INT32 = 0x03,
+  HIP_AD_FORMAT_SIGNED_INT32 = 0x0a,
+  HIP_AD_FORMAT_HALF = 0x10,
+  HIP_AD_FORMAT_FLOAT = 0x20,
+} hipArray_Format;
+
+typedef enum hipTextureAddressMode {
+  hipAddressModeWrap = 0,
+  hipAddressModeClamp = 1,
+  hipAddressModeMirror = 2,
+  hipAddressModeBorder = 3,
+} hipTextureAddressMode;
+
+/**
+ * hip texture reference
+ */
+typedef struct textureReference {
+    int normalized;
+    //enum hipTextureReadMode readMode;// used only for driver API's
+    enum hipTextureFilterMode filterMode;
+    enum hipTextureAddressMode addressMode[3];  // Texture address mode for up to 3 dimensions
+    struct hipChannelFormatDesc channelDesc;
+    int sRGB;                    // Perform sRGB->linear conversion during texture read
+    unsigned int maxAnisotropy;  // Limit to the anisotropy ratio
+    enum hipTextureFilterMode mipmapFilterMode;
+    float mipmapLevelBias;
+    float minMipmapLevelClamp;
+    float maxMipmapLevelClamp;
+
+    hipTextureObject_t textureObject;
+    int numChannels;
+    enum hipArray_Format format;
+}textureReference;
+
+typedef textureReference* hipTexRef;
+
+typedef enum hipMemoryType {
+  hipMemoryTypeHost = 0x00,
+  hipMemoryTypeDevice = 0x01,
+  hipMemoryTypeArray = 0x02,
+  hipMemoryTypeUnified = 0x03,
+} hipMemoryType;
+
+/**
+ * Pointer attributes
+ */
+typedef struct hipPointerAttribute_t {
+    enum hipMemoryType memoryType;
+    int device;
+    void* devicePointer;
+    void* hostPointer;
+    int isManaged;
+    unsigned allocationFlags; /* flags specified when memory was allocated*/
+    /* peers? */
+} hipPointerAttribute_t;
+
+typedef struct ihipIpcEventHandle_t {
+  char reserved[HIP_IPC_HANDLE_SIZE];
+} ihipIpcEventHandle_t;
+
+typedef struct hipIpcMemHandle_st {
+  char reserved[HIP_IPC_HANDLE_SIZE];
+} hipIpcMemHandle_t;
+
+typedef enum HIPipcMem_flags_enum {
+  hipIpcMemLazyEnablePeerAccess = 0x1,
+} HIPipcMem_flags;
+
+typedef enum HIPmemAttach_flags_enum {
+  hipMemAttachGlobal = 0x1,
+  hipMemAttachHost = 0x2,
+  HIP_MEM_ATTACH_SINGLE = 0x4,
+} HIPmemAttach_flags;
+
+typedef enum HIPctx_flags_enum {
+  hipDeviceScheduleAuto = 0x00,
+  hipDeviceScheduleSpin = 0x01,
+  hipDeviceScheduleYield = 0x02,
+  hipDeviceScheduleBlockingSync = 0x04,
+  hipDeviceScheduleMask = 0x07,
+  hipDeviceMapHost = 0x08,
+  hipDeviceLmemResizeToMax = 0x10,
+} HIPctx_flags;
+
+typedef enum HIPstream_flags_enum {
+  hipStreamDefault = 0x0,
+  hipStreamNonBlocking = 0x1,
+} HIPstream_flags;
+
+typedef enum HIPevent_flags_enum {
+  hipEventDefault = 0x0,
+  hipEventBlockingSync = 0x1,
+  hipEventDisableTiming = 0x2,
+  hipEventInterprocess = 0x4,
+} HIPevent_flags;
+
+typedef enum HIPstreamWaitValue_flags_enum {
+  HIP_STREAM_WAIT_VALUE_GEQ = 0x0,
+  HIP_STREAM_WAIT_VALUE_EQ = 0x1,
+  HIP_STREAM_WAIT_VALUE_AND = 0x2,
+  HIP_STREAM_WAIT_VALUE_NOR = 0x3,
+  HIP_STREAM_WAIT_VALUE_FLUSH = (1 << 30),
+} HIPstreamWaitValue_flags;
+
+typedef enum 

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list