[Bf-blender-cvs] [6532a2e6952] soc-2019-embree-gpu: Initial implementation for Embree GPU
MATILLAT Quentin
noreply at git.blender.org
Fri May 31 16:12:31 CEST 2019
Commit: 6532a2e6952641344ac077c3250d9706b5b1c0ae
Author: MATILLAT Quentin
Date: Fri May 31 15:52:54 2019 +0200
Branches: soc-2019-embree-gpu
https://developer.blender.org/rB6532a2e6952641344ac077c3250d9706b5b1c0ae
Initial implementation for Embree GPU
===================================================================
M intern/cycles/blender/addon/properties.py
M intern/cycles/blender/addon/ui.py
M intern/cycles/blender/blender_sync.cpp
M intern/cycles/bvh/CMakeLists.txt
M intern/cycles/bvh/bvh.cpp
A intern/cycles/bvh/bvh_embree_gpu.cpp
A intern/cycles/bvh/bvh_embree_gpu.h
M intern/cycles/device/device_cuda.cpp
M intern/cycles/kernel/kernel_types.h
M intern/cycles/render/mesh.cpp
===================================================================
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index d9e145c8b75..ba0339cf3d0 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -527,6 +527,11 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Use Embree as ray accelerator",
default=False,
)
+ use_bvh_embree_gpu: BoolProperty(
+ name="Use Embree on GPU (experimental)",
+ description="Use Embree as ray accelerator",
+ default=False,
+ )
debug_use_spatial_splits: BoolProperty(
name="Use Spatial Splits",
description="Use BVH spatial splits: longer builder time, faster render",
diff --git a/intern/cycles/blender/addon/ui.py b/intern/cycles/blender/addon/ui.py
index a49efb3567f..463cf78b6e6 100644
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -650,6 +650,10 @@ class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Pa
row = col.row()
row.active = use_cpu(context)
row.prop(cscene, "use_bvh_embree")
+ if _cycles.with_embree:
+ row = col.row()
+ row.active = use_cuda(context)
+ row.prop(cscene, "use_bvh_embree_gpu")
col.prop(cscene, "debug_use_spatial_splits")
sub = col.column()
sub.active = not cscene.use_bvh_embree or not _cycles.with_embree
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index 8d93d517d4e..6ab5294427c 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -697,6 +697,8 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
#ifdef WITH_EMBREE
params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree") ? BVH_LAYOUT_EMBREE :
params.bvh_layout;
+ params.bvh_layout = RNA_boolean_get(&cscene, "use_bvh_embree_gpu") ? BVH_LAYOUT_EMBREE_GPU :
+ params.bvh_layout;
#endif
return params;
}
diff --git a/intern/cycles/bvh/CMakeLists.txt b/intern/cycles/bvh/CMakeLists.txt
index 36bbd937e1a..3952b307096 100644
--- a/intern/cycles/bvh/CMakeLists.txt
+++ b/intern/cycles/bvh/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SRC
bvh_binning.cpp
bvh_build.cpp
bvh_embree.cpp
+ bvh_embree_gpu.cpp
bvh_node.cpp
bvh_sort.cpp
bvh_split.cpp
@@ -28,6 +29,7 @@ set(SRC_HEADERS
bvh_binning.h
bvh_build.h
bvh_embree.h
+ bvh_embree_gpu.h
bvh_node.h
bvh_params.h
bvh_sort.h
diff --git a/intern/cycles/bvh/bvh.cpp b/intern/cycles/bvh/bvh.cpp
index 53c66777928..b51d6b280d9 100644
--- a/intern/cycles/bvh/bvh.cpp
+++ b/intern/cycles/bvh/bvh.cpp
@@ -28,6 +28,7 @@
#ifdef WITH_EMBREE
# include "bvh/bvh_embree.h"
+# include "bvh/bvh_embree_gpu.h"
#endif
#include "util/util_foreach.h"
@@ -107,6 +108,10 @@ BVH *BVH::create(const BVHParams ¶ms, const vector<Object *> &objects)
case BVH_LAYOUT_EMBREE:
#ifdef WITH_EMBREE
return new BVHEmbree(params, objects);
+#endif
+ case BVH_LAYOUT_EMBREE_GPU:
+#ifdef WITH_EMBREE
+ return new BVHEmbreeGPU(params, objects);
#endif
case BVH_LAYOUT_NONE:
case BVH_LAYOUT_ALL:
diff --git a/intern/cycles/bvh/bvh_embree_gpu.cpp b/intern/cycles/bvh/bvh_embree_gpu.cpp
new file mode 100644
index 00000000000..52367089ce4
--- /dev/null
+++ b/intern/cycles/bvh/bvh_embree_gpu.cpp
@@ -0,0 +1,458 @@
+/*
+ * Copyright 2018, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef WITH_EMBREE
+
+#include "bvh/bvh_embree_gpu.h"
+
+#include "render/mesh.h"
+#include "render/object.h"
+#include "util/util_foreach.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+
+CCL_NAMESPACE_BEGIN
+
+typedef struct {
+ BVHEmbreeGPU *bvhBldr;
+ Progress *p;
+} UserParams;
+
+
+BVHEmbreeGPU::BVHEmbreeGPU(const BVHParams& params_, const vector<Object*>& objects_)
+ : BVH(params_, objects_), stats(nullptr)
+{
+ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
+ _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
+ this->rtc_device = rtcNewDevice("verbose=1");
+
+ rtcSetDeviceErrorFunction(this->rtc_device, [](void*, enum RTCError, const char* str) {
+ VLOG(1) << str;
+ }, nullptr);
+
+ pack.root_index = -1;
+}
+
+BVHEmbreeGPU::~BVHEmbreeGPU()
+{
+ rtcReleaseDevice(this->rtc_device);
+}
+
+ccl::BoundBox RTCBoundBoxToCCL(const RTCBounds *bound) {
+ return ccl::BoundBox(
+ make_float3(bound->lower_x, bound->lower_y, bound->lower_z),
+ make_float3(bound->upper_x, bound->upper_y, bound->upper_z));
+
+}
+ccl::BoundBox RTCBuildPrimToCCL(const RTCBuildPrimitive &bound) {
+ return ccl::BoundBox(
+ make_float3(bound.lower_x, bound.lower_y, bound.lower_z),
+ make_float3(bound.upper_x, bound.upper_y, bound.upper_z));
+
+}
+
+void CCLBoundBoxToRTC(const ccl::BoundBox &bb, RTCBounds *bound) {
+ bound->lower_x = bb.min.x;
+ bound->lower_y = bb.min.y;
+ bound->lower_z = bb.min.z;
+
+ bound->upper_x = bb.max.x;
+ bound->upper_y = bb.max.y;
+ bound->upper_z = bb.max.z;
+}
+
+void BVHEmbreeGPU::build(Progress& progress, Stats *stats_)
+{
+ this->stats = stats_;
+ rtcSetDeviceMemoryMonitorFunction(this->rtc_device, [](void* userPtr, const ssize_t bytes, const bool) -> bool {
+ Stats *stats = static_cast<Stats*>(userPtr);
+ if(stats == NULL) return true;
+
+ if(bytes > 0) {
+ stats->mem_alloc(static_cast<size_t>(bytes));
+ }
+ else {
+ stats->mem_free(static_cast<size_t>(-bytes));
+ }
+ return true;
+ }, stats);
+
+ progress.set_substatus("Building BVH");
+
+
+ struct RTCBuildArguments args = rtcDefaultBuildArguments();
+ args.byteSize = sizeof(args);
+
+ const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC;
+
+ args.buildFlags = (dynamic ? RTC_BUILD_FLAG_DYNAMIC : RTC_BUILD_FLAG_NONE);
+ args.buildQuality = dynamic ? RTC_BUILD_QUALITY_LOW :
+ (params.use_spatial_split ? RTC_BUILD_QUALITY_HIGH : RTC_BUILD_QUALITY_MEDIUM);
+
+ /* Count triangles first so we can reserve arrays once. */
+ size_t prim_count = 0;
+
+ foreach(Object *ob, objects) {
+ prim_count += ob->mesh->num_triangles();
+ }
+
+ pack.prim_object.reserve(prim_count);
+ pack.prim_type.reserve(prim_count);
+ pack.prim_index.reserve(prim_count);
+ pack.prim_tri_index.reserve(prim_count);
+
+ this->offset.resize(objects.size());
+ unsigned int i = 0;
+
+ pack.object_node.clear();
+
+ vector<RTCBuildPrimitive> prims;
+ prims.reserve(objects.size() * 3);
+ foreach(Object *ob, objects) {
+ add_object(ob, i);
+
+ const float3 *mesh_verts = ob->mesh->verts.data();
+ for(size_t tri = 0; tri < ob->mesh->num_triangles(); ++tri) {
+ BoundBox bb = BoundBox::empty;
+ ob->mesh->get_triangle(tri).bounds_grow(mesh_verts, bb);
+ RTCBuildPrimitive prim;
+ prim.lower_x = bb.min.x;
+ prim.lower_y = bb.min.y;
+ prim.lower_z = bb.min.z;
+ prim.upper_x = bb.max.x;
+ prim.upper_y = bb.max.y;
+ prim.upper_z = bb.max.z;
+ prim.geomID = i;
+ prim.primID = tri;
+
+ prims.push_back(prim);
+ }
+
+ ++i;
+ if(progress.get_cancel()) return;
+ }
+
+ if(progress.get_cancel()) {
+ stats = nullptr;
+ return;
+ }
+
+ args.bvh = rtcNewBVH(this->rtc_device);
+ args.maxBranchingFactor = 2;
+
+ args.primitives = prims.data();
+ args.primitiveCount = prims.size();
+ args.primitiveArrayCapacity = prims.capacity();
+
+ args.sahBlockSize = 1;
+ args.maxDepth = BVHParams::MAX_DEPTH;
+ args.traversalCost = this->params.sah_node_cost;
+ // 2 is a corrective factor for Embree (may depend on the scene for optimal results)
+ args.intersectionCost = this->params.sah_primitive_cost * 2;
+
+ args.createNode = [](RTCThreadLocalAllocator alloc, unsigned int numChildren, void*) -> void* {
+ CHECK_EQ(numChildren, 2) << "Should only have two children";
+ void* ptr = rtcThreadLocalAlloc(alloc,sizeof(InnerNode),16);
+ return new (ptr) InnerNode(BoundBox::empty);
+ };
+ args.setNodeBounds = [](void* nodePtr, const RTCBounds** bounds, unsigned int numChildren, void*) {
+ InnerNode *node = static_cast<InnerNode*>(nodePtr);
+ node->num_children_ = static_cast<int>(numChildren);
+ for (size_t i=0; i < numChildren; i++) {
+ node->bounds.grow(RTCBoundBoxToCCL(bounds[i]));
+ }
+ };
+ args.setNodeChildren = [](void* nodePtr, void** childPtr, unsigned int numChildren, void*) {
+ InnerNode *node = static_cast<InnerNode*>(nodePtr);
+ node->num_children_ = static_cast<int>(numChildren);
+ for (size_t i=0; i < numChildren; i++) {
+ node->children[i] = static_cast<BVHNode*>(childPtr[i]);
+ }
+ };
+ args.createLeaf = [](RTCThreadLocalAllocator alloc, const RTCBuildPrimitive* prims, size_t numPrims, void *user_ptr) -> void* {
+ UserParams *userParams = static_cast<UserParams*>(user_ptr);
+ void* ptr = rtcThreadLocalAlloc(alloc, sizeof(LeafNode), 16);
+
+ int min = 999999,
+ max = 0;
+ uint visibility = 0;
+ BoundBox bounds = BoundBox::empty;
+
+ for(size_t i = 0; i < numPrims; i++) {
+ const Object *ob = userParams->bvhB
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-blender-cvs
mailing list