Skip to content

Commit 4d08152

Browse files
committed
add OpenCL kernels as string literals
1 parent cbef453 commit 4d08152

File tree

4 files changed

+56
-5
lines changed

4 files changed

+56
-5
lines changed

src/treelearner/gpu_tree_learner.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,12 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
295295
std::string kernel_source;
296296
std::string kernel_name;
297297
if (max_num_bin_ <= 64) {
298-
kernel_source = "histogram64.cl";
298+
kernel_source = kernel64_src_;
299299
kernel_name = "histogram64";
300300
device_bin_size_ = 64;
301301
}
302302
else if ( max_num_bin_ <= 256) {
303-
kernel_source = "histogram256.cl";
303+
kernel_source = kernel256_src_;
304304
kernel_name = "histogram256";
305305
device_bin_size_ = 256;
306306
}
@@ -332,9 +332,10 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
332332
device_histogram_outputs_ = boost::compute::buffer(ctx_, num_dense_feature4_ * 4 * device_bin_size_ * sizeof(GPUHistogramBinEntry),
333333
boost::compute::memory_object::write_only, nullptr);
334334
Log::Info("Using GPU Device: %s, Vendor: %s", dev_.name().c_str(), dev_.vendor().c_str());
335-
Log::Info("Compiling OpenCL Kernel from %s...", kernel_source.c_str());
335+
Log::Info("Compiling OpenCL Kernel with %d bins...", device_bin_size_);
336+
// Log::Info("Compiling OpenCL Kernel:\n%s", kernel_source.c_str());
336337
for (int i = 0; i <= max_exp_workgroups_per_feature_; ++i) {
337-
auto program = boost::compute::program::create_with_source_file(kernel_source, ctx_);
338+
auto program = boost::compute::program::create_with_source(kernel_source, ctx_);
338339
std::ostringstream opts;
339340
// FIXME: sparse data
340341
opts << "-D FEATURE_SIZE=" << num_data_ << " -D POWER_FEATURE_WORKGROUPS=" << i
@@ -358,7 +359,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
358359
}
359360
// create the OpenCL kernel for the root node (all data)
360361
int full_exp_workgroups_per_feature = GetNumWorkgroupsPerFeature(num_data_);
361-
auto program = boost::compute::program::create_with_source_file(kernel_source, ctx_);
362+
auto program = boost::compute::program::create_with_source(kernel_source, ctx_);
362363
std::ostringstream opts;
363364
// FIXME: sparse data
364365
opts << "-D FEATURE_SIZE=" << num_data_ << " -D POWER_FEATURE_WORKGROUPS=" << full_exp_workgroups_per_feature

src/treelearner/gpu_tree_learner.h

+10
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,16 @@ class GPUTreeLearner: public TreeLearner {
195195
boost::compute::device dev_;
196196
boost::compute::context ctx_;
197197
boost::compute::command_queue queue_;
198+
/*! \brief GPU kernel for 256 bins */
199+
const char *kernel256_src_ =
200+
#include "ocl/histogram256.cl"
201+
;
202+
/*! \brief GPU kernel for 64 bins */
203+
const char *kernel64_src_ =
204+
#include "ocl/histogram64.cl"
205+
;
206+
/*! \brief GPU kernel for 64 bins */
207+
198208
/*! \brief a array of histogram kernels with different number
199209
of workgroups per feature */
200210
std::vector<boost::compute::kernel> histogram_kernels_;

histogram256.cl src/treelearner/ocl/histogram256.cl

+20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
// this file can either be read and passed to an OpenCL compiler directly,
2+
// or included in a C++11 source file as a string literal
3+
#ifndef __OPENCL_VERSION__
4+
// If we are including this file in C++,
5+
// the entire source file following (except the last #endif) will become
6+
// a raw string literal. The extra ")" is just for mathcing parentheses
7+
// to make the editor happy. The extra ")" and extra endif will be skipped.
8+
// DO NOT add anything between here and the next #ifdef, otherwise you need
9+
// to modify the skip count at the end of this file.
10+
R""()
11+
#endif
12+
13+
#ifndef _HISTOGRAM_256_KERNEL_
14+
#define _HISTOGRAM_256_KERNEL_
15+
116
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
217
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
318

@@ -551,3 +566,8 @@ __kernel void histogram256(__global const uchar4* feature_data_base,
551566
}
552567
}
553568

569+
// The following line ends the string literal, adds an extra #endif at the end
570+
// the +9 skips extra characters ")", newline, "#endif" and newline at the beginning
571+
// )"" "\n#endif" + 9
572+
#endif
573+

histogram64.cl src/treelearner/ocl/histogram64.cl

+20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,18 @@
1+
// this file can either be read and passed to an OpenCL compiler directly,
2+
// or included in a C++11 source file as a string literal
3+
#ifndef __OPENCL_VERSION__
4+
// If we are including this file in C++,
5+
// the entire source file following (except the last #endif) will become
6+
// a raw string literal. The extra ")" is just for mathcing parentheses
7+
// to make the editor happy. The extra ")" and extra endif will be skipped.
8+
// DO NOT add anything between here and the next #ifdef, otherwise you need
9+
// to modify the skip count at the end of this file.
10+
R""()
11+
#endif
12+
13+
#ifndef _HISTOGRAM_64_KERNEL_
14+
#define _HISTOGRAM_64_KERNEL_
15+
116
#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable
217
#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable
318

@@ -503,3 +518,8 @@ __kernel void histogram64(__global const uchar4* feature_data_base,
503518
}
504519
}
505520

521+
// The following line ends the string literal, adds an extra #endif at the end
522+
// the +9 skips extra characters ")", newline, "#endif" and newline at the beginning
523+
// )"" "\n#endif" + 9
524+
#endif
525+

0 commit comments

Comments
 (0)