[Bf-blender-cvs] [6532a2e6952] soc-2019-embree-gpu: Initial implementation for Embree GPU

MATILLAT Quentin noreply at git.blender.org
Fri May 31 16:12:31 CEST 2019


Commit: 6532a2e6952641344ac077c3250d9706b5b1c0ae
Author: MATILLAT Quentin
Date:   Fri May 31 15:52:54 2019 +0200
Branches: soc-2019-embree-gpu
https://developer.blender.org/rB6532a2e6952641344ac077c3250d9706b5b1c0ae

Initial implementation for Embree GPU

===================================================================

M	intern/cycles/blender/addon/properties.py
M	intern/cycles/blender/addon/ui.py
M	intern/cycles/blender/blender_sync.cpp
M	intern/cycles/bvh/CMakeLists.txt
M	intern/cycles/bvh/bvh.cpp
A	intern/cycles/bvh/bvh_embree_gpu.cpp
A	intern/cycles/bvh/bvh_embree_gpu.h
M	intern/cycles/device/device_cuda.cpp
M	intern/cycles/kernel/kernel_types.h
M	intern/cycles/render/mesh.cpp

===================================================================

diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index d9e145c8b75..ba0339cf3d0 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -527,6 +527,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
         description="Use Embree as ray accelerator",
         default=False,
     )
+    use_bvh_embree_gpu: BoolProperty(
+        name="Use Embree on GPU (experimental)",
+        description="Use Embree as ray accelerator",
+        default=False,
+    )
     debug_use_spatial_splits: BoolProperty(
         name="Use Spatial Splits",
         description="Use BVH spatial splits: longer builder time, faster render",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index a49efb3567f..463cf78b6e6 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -650,6 +650,10 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa
             row = col.row()
             row.active = use_cpu(context)
             row.prop(cscene, "use_bvh_embree")
+        if _cycles.with_embree:
+            row = col.row()
+            row.active = use_cuda(context)
+            row.prop(cscene, "use_bvh_embree_gpu")
         col.prop(cscene, "debug_use_spatial_splits")
         sub = col.column()
         sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 8d93d517d4e..6ab5294427c 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -697,6 +697,8 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
 #ifdef WITH_EMBREE
   params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
                                                                    params.bvh_layout;
+  params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree_gpu") ? BVH_LAYOUT_EMBREE_GPU :
+                                                                       params.bvh_layout;
 #endif
   return params;
 }
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index 36bbd937e1a..3952b307096 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SRC
   bvh_binning.cpp
   bvh_build.cpp
   bvh_embree.cpp
+  bvh_embree_gpu.cpp
   bvh_node.cpp
   bvh_sort.cpp
   bvh_split.cpp
@@ -28,6 +29,7 @@ set(SRC_HEADERS
   bvh_binning.h
   bvh_build.h
   bvh_embree.h
+  bvh_embree_gpu.h
   bvh_node.h
   bvh_params.h
   bvh_sort.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 53c66777928..b51d6b280d9 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -28,6 +28,7 @@
 
 #ifdef WITH_EMBREE
 #  include "bvh/bvh_embree.h"
+#  include "bvh/bvh_embree_gpu.h"
 #endif
 
 #include "util/util_foreach.h"
@@ -107,6 +108,10 @@ BVH *BVH::create(const BVHParams &params, const vector<Object *> &objects)
     case BVH_LAYOUT_EMBREE:
 #ifdef WITH_EMBREE
       return new BVHEmbree(params, objects);
+#endif
+    case BVH_LAYOUT_EMBREE_GPU:
+#ifdef WITH_EMBREE
+      return new BVHEmbreeGPU(params, objects);
 #endif
     case BVH_LAYOUT_NONE:
     case BVH_LAYOUT_ALL:
diff --git a/intern/cycles/bvh/bvh_embree_gpu.cpp b/intern/cycles/bvh/bvh_embree_gpu.cpp
new file mode 100644
index 00000000000..52367089ce4
--- /dev/null
+++ b/intern/cycles/bvh/bvh_embree_gpu.cpp
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_EMBREE
+
+#include "bvh/bvh_embree_gpu.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct {
+    BVHEmbreeGPU *bvhBldr;
+    Progress *p;
+} UserParams;
+
+
+BVHEmbreeGPU::BVHEmbreeGPU(const BVHParams& params_, const vector<Object*>& objects_)
+    : BVH(params_, objects_), stats(nullptr)
+{
+    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+    _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+    this->rtc_device = rtcNewDevice("verbose=1");
+
+    rtcSetDeviceErrorFunction(this->rtc_device, [](void*, enum RTCError, const char* str) {
+        VLOG(1) << str;
+    }, nullptr);
+
+    pack.root_index = -1;
+}
+
+BVHEmbreeGPU::~BVHEmbreeGPU()
+{
+    rtcReleaseDevice(this->rtc_device);
+}
+
+ccl::BoundBox RTCBoundBoxToCCL(const RTCBounds *bound) {
+    return ccl::BoundBox(
+                make_float3(bound->lower_x, bound->lower_y, bound->lower_z),
+                make_float3(bound->upper_x, bound->upper_y, bound->upper_z));
+
+}
+ccl::BoundBox RTCBuildPrimToCCL(const RTCBuildPrimitive &bound) {
+    return ccl::BoundBox(
+                make_float3(bound.lower_x, bound.lower_y, bound.lower_z),
+                make_float3(bound.upper_x, bound.upper_y, bound.upper_z));
+
+}
+
+void CCLBoundBoxToRTC(const ccl::BoundBox &bb, RTCBounds *bound) {
+    bound->lower_x = bb.min.x;
+    bound->lower_y = bb.min.y;
+    bound->lower_z = bb.min.z;
+
+    bound->upper_x = bb.max.x;
+    bound->upper_y = bb.max.y;
+    bound->upper_z = bb.max.z;
+}
+
+void BVHEmbreeGPU::build(Progress& progress, Stats *stats_)
+{
+    this->stats = stats_;
+    rtcSetDeviceMemoryMonitorFunction(this->rtc_device, [](void* userPtr, const ssize_t bytes, const bool) -> bool {
+        Stats *stats = static_cast<Stats*>(userPtr);
+        if(stats == NULL) return true;
+
+        if(bytes > 0) {
+            stats->mem_alloc(static_cast<size_t>(bytes));
+        }
+        else {
+            stats->mem_free(static_cast<size_t>(-bytes));
+        }
+        return true;
+    }, stats);
+
+    progress.set_substatus("Building BVH");
+
+
+    struct RTCBuildArguments args = rtcDefaultBuildArguments();
+    args.byteSize = sizeof(args);
+
+    const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC;
+
+    args.buildFlags = (dynamic ? RTC_BUILD_FLAG_DYNAMIC : RTC_BUILD_FLAG_NONE);
+    args.buildQuality = dynamic ? RTC_BUILD_QUALITY_LOW :
+                                  (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM);
+
+    /* Count triangles first so we can reserve arrays once. */
+    size_t prim_count = 0;
+
+    foreach(Object *ob, objects) {
+        prim_count += ob->mesh->num_triangles();
+    }
+
+    pack.prim_object.reserve(prim_count);
+    pack.prim_type.reserve(prim_count);
+    pack.prim_index.reserve(prim_count);
+    pack.prim_tri_index.reserve(prim_count);
+
+    this->offset.resize(objects.size());
+    unsigned int i = 0;
+
+    pack.object_node.clear();
+
+    vector<RTCBuildPrimitive> prims;
+    prims.reserve(objects.size() * 3);
+    foreach(Object *ob, objects) {
+        add_object(ob, i);
+
+        const float3 *mesh_verts = ob->mesh->verts.data();
+        for(size_t tri = 0; tri < ob->mesh->num_triangles(); ++tri) {
+            BoundBox bb = BoundBox::empty;
+            ob->mesh->get_triangle(tri).bounds_grow(mesh_verts, bb);
+            RTCBuildPrimitive prim;
+            prim.lower_x = bb.min.x;
+            prim.lower_y = bb.min.y;
+            prim.lower_z = bb.min.z;
+            prim.upper_x = bb.max.x;
+            prim.upper_y = bb.max.y;
+            prim.upper_z = bb.max.z;
+            prim.geomID = i;
+            prim.primID = tri;
+
+            prims.push_back(prim);
+        }
+
+        ++i;
+        if(progress.get_cancel()) return;
+    }
+
+    if(progress.get_cancel()) {
+        stats = nullptr;
+        return;
+    }
+
+    args.bvh = rtcNewBVH(this->rtc_device);
+    args.maxBranchingFactor = 2;
+
+    args.primitives = prims.data();
+    args.primitiveCount = prims.size();
+    args.primitiveArrayCapacity = prims.capacity();
+
+    args.sahBlockSize = 1;
+    args.maxDepth = BVHParams::MAX_DEPTH;
+    args.traversalCost = this->params.sah_node_cost;
+    // 2 is a corrective factor for Embree (may depend on the scene for optimal results)
+    args.intersectionCost = this->params.sah_primitive_cost * 2;
+
+    args.createNode = [](RTCThreadLocalAllocator alloc, unsigned int numChildren, void*) -> void* {
+        CHECK_EQ(numChildren, 2) << "Should only have two children";
+        void* ptr = rtcThreadLocalAlloc(alloc,sizeof(InnerNode),16);
+        return new (ptr) InnerNode(BoundBox::empty);
+    };
+    args.setNodeBounds = [](void* nodePtr, const RTCBounds** bounds, unsigned int numChildren, void*) {
+        InnerNode *node = static_cast<InnerNode*>(nodePtr);
+        node->num_children_ = static_cast<int>(numChildren);
+        for (size_t i=0; i < numChildren; i++) {
+            node->bounds.grow(RTCBoundBoxToCCL(bounds[i]));
+        }
+    };
+    args.setNodeChildren = [](void* nodePtr, void** childPtr, unsigned int numChildren, void*) {
+        InnerNode *node = static_cast<InnerNode*>(nodePtr);
+        node->num_children_ = static_cast<int>(numChildren);
+        for (size_t i=0; i < numChildren; i++) {
+            node->children[i] = static_cast<BVHNode*>(childPtr[i]);
+        }
+    };
+    args.createLeaf = [](RTCThreadLocalAllocator alloc, const RTCBuildPrimitive* prims, size_t numPrims, void *user_ptr) -> void* {
+        UserParams *userParams = static_cast<UserParams*>(user_ptr);
+        void* ptr = rtcThreadLocalAlloc(alloc, sizeof(LeafNode), 16);
+
+        int min = 999999,
+                max = 0;
+        uint visibility = 0;
+        BoundBox bounds = BoundBox::empty;
+
+        for(size_t i = 0; i < numPrims; i++) {
+            const Object *ob = userParams->bvhB

@@ Diff output truncated at 10240 characters. @@



More information about the Bf-blender-cvs mailing list