[Bf-blender-cvs] [471a896] cycles_split_kernel: Cycles: Replace utility macros with functions from util_types.h
Mai Lavelle
noreply at git.blender.org
Wed Dec 7 04:27:02 CET 2016
Commit: 471a896731bfdf2db28214681f45d6d33738b5b4
Author: Mai Lavelle
Date: Mon Dec 5 18:19:58 2016 -0500
Branches: cycles_split_kernel
https://developer.blender.org/rB471a896731bfdf2db28214681f45d6d33738b5b4
Cycles: Replace utility macros with functions from util_types.h
===================================================================
M intern/cycles/device/device_split_kernel.cpp
M intern/cycles/kernel/split/kernel_split_data.h
M intern/cycles/util/util_types.h
===================================================================
diff --git a/intern/cycles/device/device_split_kernel.cpp b/intern/cycles/device/device_split_kernel.cpp
index 4bb3c5d..db1e79d 100644
--- a/intern/cycles/device/device_split_kernel.cpp
+++ b/intern/cycles/device/device_split_kernel.cpp
@@ -21,8 +21,6 @@
CCL_NAMESPACE_BEGIN
-#define ROUND_UP(x, multiple) (((((x) - 1 ) / (multiple)) + 1) * (multiple))
-
DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
{
path_iteration_times = PATH_ITER_INC_FACTOR;
@@ -98,8 +96,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
*/
int2 max_render_feasible_tile_size;
const int2 tile_size = task->requested_tile_size;
- max_render_feasible_tile_size.x = ROUND_UP(tile_size.x, local_size[0]);
- max_render_feasible_tile_size.y = ROUND_UP(tile_size.y, local_size[1]);
+ max_render_feasible_tile_size.x = round_up(tile_size.x, local_size[0]);
+ max_render_feasible_tile_size.y = round_up(tile_size.y, local_size[1]);
/* Calculate per_thread_output_buffer_size. */
size_t per_thread_output_buffer_size;
diff --git a/intern/cycles/kernel/split/kernel_split_data.h b/intern/cycles/kernel/split/kernel_split_data.h
index bab5718..387e395 100644
--- a/intern/cycles/kernel/split/kernel_split_data.h
+++ b/intern/cycles/kernel/split/kernel_split_data.h
@@ -108,23 +108,22 @@ typedef struct SplitData {
} SplitData;
#define SIZEOF_SD(max_closure) (sizeof(ShaderData) - (sizeof(ShaderClosure) * (MAX_CLOSURE - (max_closure))))
-#define ALIGN_16(num) (((num) + 15) & ~15)
ccl_device_inline size_t split_data_buffer_size(size_t num_elements,
size_t max_closure,
size_t per_thread_output_buffer_size)
{
size_t size = 0;
-#define SPLIT_DATA_ENTRY(type, name, num) + ALIGN_16(num_elements * num * sizeof(type))
+#define SPLIT_DATA_ENTRY(type, name, num) + align_up(num_elements * num * sizeof(type), 16)
size = size SPLIT_DATA_ENTRIES;
#undef SPLIT_DATA_ENTRY
/* TODO(sergey): This will actually over-allocate if
* particular kernel does not support multiclosure.
*/
- size += ALIGN_16(num_elements * SIZEOF_SD(max_closure)); /* sd */
- size += ALIGN_16(2 * num_elements * SIZEOF_SD(max_closure)); /* sd_DL_shadow */
- size += ALIGN_16(num_elements * per_thread_output_buffer_size); /* per_sample_output_buffers */
+ size += align_up(num_elements * SIZEOF_SD(max_closure), 16); /* sd */
+ size += align_up(2 * num_elements * SIZEOF_SD(max_closure), 16); /* sd_DL_shadow */
+ size += align_up(num_elements * per_thread_output_buffer_size, 16); /* per_sample_output_buffers */
return size;
}
@@ -137,18 +136,18 @@ ccl_device_inline void split_data_init(ccl_global SplitData *split_data,
ccl_global char *p = (ccl_global char*)data;
#define SPLIT_DATA_ENTRY(type, name, num) \
- split_data->name = (type*)p; p += ALIGN_16(num_elements * num * sizeof(type));
+ split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
SPLIT_DATA_ENTRIES
#undef SPLIT_DATA_ENTRY
split_data->sd = (ShaderData*)p;
- p += ALIGN_16(num_elements * SIZEOF_SD(MAX_CLOSURE));
+ p += align_up(num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
split_data->sd_DL_shadow = (ShaderData*)p;
- p += ALIGN_16(2 * num_elements * SIZEOF_SD(MAX_CLOSURE));
+ p += align_up(2 * num_elements * SIZEOF_SD(MAX_CLOSURE), 16);
split_data->per_sample_output_buffers = (ccl_global float*)p;
- //p += ALIGN_16(num_elements * per_thread_output_buffer_size);
+ //p += align_up(num_elements * per_thread_output_buffer_size, 16);
split_data->ray_state = ray_state;
}
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 165f831..96e108b 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -399,11 +399,6 @@ ccl_device_inline float4 make_float4(float x, float y, float z, float w)
return a;
}
-ccl_device_inline int align_up(int offset, int alignment)
-{
- return (offset + alignment - 1) & ~(alignment - 1);
-}
-
ccl_device_inline int3 make_int3(int i)
{
#ifdef __KERNEL_SSE__
@@ -478,6 +473,16 @@ ccl_device_inline int4 make_int4(const float3& f)
#endif
+ccl_device_inline int align_up(int offset, int alignment)
+{
+ return (offset + alignment - 1) & ~(alignment - 1);
+}
+
+ccl_device_inline int round_up(int x, int multiple)
+{
+ return ((x + multiple - 1) / multiple) * multiple;
+}
+
/* Interpolation types for textures
* cuda also use texture space to store other objects */
enum InterpolationType {
More information about the Bf-blender-cvs
mailing list