@@ -295,12 +295,12 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
295
295
std::string kernel_source;
296
296
std::string kernel_name;
297
297
if (max_num_bin_ <= 64 ) {
298
- kernel_source = " histogram64.cl " ;
298
+ kernel_source = kernel64_src_ ;
299
299
kernel_name = " histogram64" ;
300
300
device_bin_size_ = 64 ;
301
301
}
302
302
else if ( max_num_bin_ <= 256 ) {
303
- kernel_source = " histogram256.cl " ;
303
+ kernel_source = kernel256_src_ ;
304
304
kernel_name = " histogram256" ;
305
305
device_bin_size_ = 256 ;
306
306
}
@@ -332,9 +332,10 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
332
332
device_histogram_outputs_ = boost::compute::buffer (ctx_, num_dense_feature4_ * 4 * device_bin_size_ * sizeof (GPUHistogramBinEntry),
333
333
boost::compute::memory_object::write_only, nullptr );
334
334
Log::Info (" Using GPU Device: %s, Vendor: %s" , dev_.name ().c_str (), dev_.vendor ().c_str ());
335
- Log::Info (" Compiling OpenCL Kernel from %s..." , kernel_source.c_str ());
335
+ Log::Info (" Compiling OpenCL Kernel with %d bins..." , device_bin_size_);
336
+ // Log::Info("Compiling OpenCL Kernel:\n%s", kernel_source.c_str());
336
337
for (int i = 0 ; i <= max_exp_workgroups_per_feature_; ++i) {
337
- auto program = boost::compute::program::create_with_source_file (kernel_source, ctx_);
338
+ auto program = boost::compute::program::create_with_source (kernel_source, ctx_);
338
339
std::ostringstream opts;
339
340
// FIXME: sparse data
340
341
opts << " -D FEATURE_SIZE=" << num_data_ << " -D POWER_FEATURE_WORKGROUPS=" << i
@@ -358,7 +359,7 @@ void GPUTreeLearner::InitGPU(int platform_id, int device_id) {
358
359
}
359
360
// create the OpenCL kernel for the root node (all data)
360
361
int full_exp_workgroups_per_feature = GetNumWorkgroupsPerFeature (num_data_);
361
- auto program = boost::compute::program::create_with_source_file (kernel_source, ctx_);
362
+ auto program = boost::compute::program::create_with_source (kernel_source, ctx_);
362
363
std::ostringstream opts;
363
364
// FIXME: sparse data
364
365
opts << " -D FEATURE_SIZE=" << num_data_ << " -D POWER_FEATURE_WORKGROUPS=" << full_exp_workgroups_per_feature
0 commit comments