[Bf-blender-cvs] [cf41b5b] opensubdiv-modifier: Move towards GPU-side tessellation

Mon May 12 20:08:57 CEST 2014

Commit: cf41b5b0fd1aab6d17a3e243a0dffb774ba168d1
Author: Sergey Sharybin
Date:   Sun May 11 12:12:11 2014 +0200
https://developer.blender.org/rBcf41b5b0fd1aab6d17a3e243a0dffb774ba168d1

Move towards GPU-side tessellation

Currently only lands all the basics which are needed to
support using GPU tessellator from the OpenSubdiv library.

CPU evaluation is disabled in purposes of testing GPU side,
which means bounding box is wrong, edges are not displayed,
textured mode is likely also broken since now.

Data structures layout and API is not final at all, it is
just a cleaned version from the patch which i had last week.
All the drawing should be moved outside from the CCGSubSurf
structure and become some more generic functions in bf_gpu.

Also currently tessellation is hardcoded to use CUDA backend,
would either become an user preference or some automatic mode
in the future.

Wouldn't recommend using this state in the production, it's
still just a basis for further development and doing some
tests.

===================================================================

M	build_files/cmake/Modules/FindOpenSubdiv.cmake
M	intern/CMakeLists.txt
A	intern/opensubdiv/CMakeLists.txt
A	intern/opensubdiv/SConscript
A	intern/opensubdiv/cudaInit.h
A	intern/opensubdiv/gpu_shader_opensubd_display.glsl
A	intern/opensubdiv/opensubdiv_capi.cc
A	intern/opensubdiv/opensubdiv_capi.h
A	intern/opensubdiv/opensubdiv_gpu_capi.cc
M	source/blender/blenkernel/CMakeLists.txt
M	source/blender/blenkernel/SConscript
M	source/blender/blenkernel/intern/CCGSubSurf.c
M	source/blender/blenkernel/intern/CCGSubSurf.h
M	source/blender/blenkernel/intern/DerivedMesh.c
M	source/blender/blenkernel/intern/subsurf_ccg.c
M	source/blenderplayer/CMakeLists.txt
M	source/creator/CMakeLists.txt

===================================================================

diff --git a/build_files/cmake/Modules/FindOpenSubdiv.cmake b/build_files/cmake/Modules/FindOpenSubdiv.cmake
index 91ef74a..0119969 100644
--- a/build_files/cmake/Modules/FindOpenSubdiv.cmake
+++ b/build_files/cmake/Modules/FindOpenSubdiv.cmake
@@ -75,6 +75,16 @@ FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenSubdiv DEFAULT_MSG
 IF(OPENSUBDIV_FOUND)
   SET(OPENSUBDIV_LIBRARIES ${_opensubdiv_LIBRARIES})
   SET(OPENSUBDIV_INCLUDE_DIRS ${OPENSUBDIV_INCLUDE_DIR})
+
+  # TODO(sergey): Ideally we do linking to CUDA n runtime, not compile time,
+  # so this way we can have Blender running on the systems which don't have
+  # NVidia or don't have CUDA runtime libraries.
+  #
+  # Or we'll just use GLSL backend on all the systems.
+  FIND_PACKAGE(CUDA)
+  IF(CUDA_FOUND)
+    LIST(APPEND OPENSUBDIV_LIBRARIES ${CUDA_CUDART_LIBRARY})
+  ENDIF()
 ENDIF(OPENSUBDIV_FOUND)
 
 MARK_AS_ADVANCED(
diff --git a/intern/CMakeLists.txt b/intern/CMakeLists.txt
index 7f54ccc..7d3b920 100644
--- a/intern/CMakeLists.txt
+++ b/intern/CMakeLists.txt
@@ -80,6 +80,10 @@ if(WITH_OPENNL)
 	add_subdirectory(opennl)
 endif()
 
+if(WITH_OPENSUBDIV)
+	add_subdirectory(opensubdiv)
+endif()
+
 # only windows needs utf16 converter
 if(WIN32)
 	add_subdirectory(utfconv)
diff --git a/intern/opensubdiv/CMakeLists.txt b/intern/opensubdiv/CMakeLists.txt
new file mode 100644
index 0000000..f6beaa0
--- /dev/null
+++ b/intern/opensubdiv/CMakeLists.txt
@@ -0,0 +1,43 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2013, Blender Foundation
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Sergey Sharybin.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+	.
+	../guardedalloc
+)
+
+set(INC_SYS
+	${OPENSUBDIV_INCLUDE_DIR}
+)
+
+set(SRC
+	opensubdiv_capi.cc
+	opensubdiv_gpu_capi.cc
+	opensubdiv_capi.h
+)
+
+data_to_c_simple(gpu_shader_opensubd_display.glsl SRC)
+
+blender_add_lib(bf_intern_opensubdiv "${SRC}" "${INC}" "${INC_SYS}")
diff --git a/intern/opensubdiv/SConscript b/intern/opensubdiv/SConscript
new file mode 100644
index 0000000..fddc41b
--- /dev/null
+++ b/intern/opensubdiv/SConscript
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+#
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2013, Blender Foundation
+# All rights reserved.
+#
+# The Original Code is: all of this file.
+#
+# Contributor(s): Sergey Sharybin.
+#
+# ***** END GPL LICENSE BLOCK *****
+
+Import('env')
+
+sources = env.Glob('*.cc')
+
+defs = []
+
+incs = '. ../guardedalloc'
+incs += ' ' + env['BF_OPENSUBDIV_INC']
+
+# generated data files
+import os
+sources.extend((
+    os.path.join(env['DATA_SOURCES'], "gpu_shader_opensubd_display.glsl.c"),
+))
+
+env.BlenderLib('bf_intern_opensubdiv', sources, Split(incs), defs, libtype=['extern','player'], priority=[10, 185])
diff --git a/intern/opensubdiv/cudaInit.h b/intern/opensubdiv/cudaInit.h
new file mode 100644
index 0000000..2eae7fb
--- /dev/null
+++ b/intern/opensubdiv/cudaInit.h
@@ -0,0 +1,111 @@
+//
+//   Copyright 2013 Pixar
+//
+//   Licensed under the Apache License, Version 2.0 (the "Apache License")
+//   with the following modification; you may not use this file except in
+//   compliance with the Apache License and the following modification to it:
+//   Section 6. Trademarks. is deleted and replaced with:
+//
+//   6. Trademarks. This License does not grant permission to use the trade
+//      names, trademarks, service marks, or product names of the Licensor
+//      and its affiliates, except as required to comply with Section 4(c) of
+//      the License and to reproduce the content of the NOTICE file.
+//
+//   You may obtain a copy of the Apache License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the Apache License with the above modification is
+//   distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+//   KIND, either express or implied. See the Apache License for the specific
+//   language governing permissions and limitations under the Apache License.
+//
+
+#ifndef OSD_CUDA_INIT_H
+#define OSD_CUDA_INIT_H
+
+#include <algorithm>
+#include <cstdio>
+
+// From "NVIDIA GPU Computing SDK 4.2/C/common/inc/cutil_inline_runtime.h":
+
+// Beginning of GPU Architecture definitions
+inline int _ConvertSMVer2Cores_local(int major, int minor)
+{
+    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
+    typedef struct {
+        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
+        int Cores;
+    } sSMtoCores;
+
+    sSMtoCores nGpuArchCoresPerSM[] =
+    { { 0x10,  8 }, // Tesla Generation (SM 1.0) G80 class
+      { 0x11,  8 }, // Tesla Generation (SM 1.1) G8x class
+      { 0x12,  8 }, // Tesla Generation (SM 1.2) G9x class
+      { 0x13,  8 }, // Tesla Generation (SM 1.3) GT200 class
+      { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
+      { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
+      { 0x30, 192}, // Fermi Generation (SM 3.0) GK10x class
+      {   -1, -1 }
+    };
+
+    int index = 0;
+    while (nGpuArchCoresPerSM[index].SM != -1) {
+        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) {
+            return nGpuArchCoresPerSM[index].Cores;
+        }
+        index++;
+    }
+    printf("MapSMtoCores undefined SMversion %d.%d!\n", major, minor);
+    return -1;
+}
+// end of GPU Architecture definitions
+
+// This function returns the best GPU (with maximum GFLOPS)
+inline int cutGetMaxGflopsDeviceId()
+{
+    int current_device   = 0, sm_per_multiproc = 0;
+    int max_compute_perf = 0, max_perf_device  = 0;
+    int device_count     = 0, best_SM_arch     = 0;
+    cudaDeviceProp deviceProp;
+
+    cudaGetDeviceCount( &device_count );
+    // Find the best major SM Architecture GPU device
+    while ( current_device < device_count ) {
+        cudaGetDeviceProperties( &deviceProp, current_device );
+        if (deviceProp.major > 0 && deviceProp.major < 9999) {
+            best_SM_arch = std::max(best_SM_arch, deviceProp.major);
+        }
+        current_device++;
+    }
+
+    // Find the best CUDA capable GPU device
+    current_device = 0;
+    while( current_device < device_count ) {
+        cudaGetDeviceProperties( &deviceProp, current_device );
+        if (deviceProp.major == 9999 && deviceProp.minor == 9999) {
+            sm_per_multiproc = 1;
+        } else {
+            sm_per_multiproc = _ConvertSMVer2Cores_local(deviceProp.major, deviceProp.minor);
+        }
+        int compute_perf  = deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
+        if( compute_perf  > max_compute_perf ) {
+            // If we find GPU with SM major > 2, search only these
+            if ( best_SM_arch > 2 ) {
+                // If our device==dest_SM_arch, choose this, or else pass
+                if (deviceProp.major == best_SM_arch) {
+                    max_compute_perf  = compute_perf;
+                    max_perf_device   = current_device;
+                }
+            } else {
+                max_compute_perf  = compute_perf;
+                max_perf_device   = current_device;
+            }
+        }
+        ++current_device;
+    }
+    return max_perf_device;
+}
+
+#endif //OSD_CUDA_INIT_H
diff --git a/intern/opensubdiv/gpu_shader_opensubd_display.glsl b/intern/opensubdiv/gpu_shader_opensubd_display.glsl
new file mode 100644
index 0000000..0b437ac
--- /dev/null
+++ b/intern/opensubdiv/gpu_shader_opensubd_display.glsl
@@ -0,0 +1,398 @@
+//
+//   Copyright 2013 Pixar
+//
+//   Licensed under the Apache License, Version 2.0 (the "Apache License")
+//   with the following modification; you may not use this file except in
+//   compliance with the Apache License and the following modification to it:
+//   Section 6. Trademarks. is deleted and replaced with:
+//
+//   6. Trademarks. This License does not grant permission to use the trade
+//      names, trademarks, service marks, or product names of the Licensor
+//      and its affiliates, except as required to comply with Section 4(c) of
+//      the License and to reproduce the content of the NOTICE file.
+//
+//   You may obtain a copy of the Apache License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+//   Unless required by applicable law or agreed to in writing, software
+//   distributed under the Apache License with the above modification is
+//   distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+//   KIND, either express or implied. See the Apache License for the specific
+//   language governing permissions and limitations under the Apache Lice

@@ Diff output truncated at 10240 characters. @@