Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions clang/lib/DPCT/AnalysisInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5710,12 +5710,6 @@ void KernelCallExpr::printSubmit(KernelPrinter &Printer) {
Printer << "*/" << getNL();
Printer.indent();
}
if (DpctGlobalInfo::useRootGroup()) {
Printer << "auto exp_props = "
"sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::"
"experimental::use_root_sync};\n";
ExecutionConfig.Properties = "exp_props";
}
if (!getEvent().empty()) {
Printer << "*" << getEvent() << " = ";
}
Expand Down Expand Up @@ -5993,6 +5987,7 @@ int KernelCallExpr::calculateOriginArgsSize() const {
return Size;
}
std::string KernelCallExpr::getReplacement() {
addPropertiesStmt();
addDevCapCheckStmt();
addAccessorDecl();
addStreamDecl();
Expand Down Expand Up @@ -6293,6 +6288,17 @@ void KernelCallExpr::removeExtraIndent() {
LocInfo.Indent.length(), "", nullptr));
}

void KernelCallExpr::addPropertiesStmt() {
if (DpctGlobalInfo::useRootGroup()) {
std::string Str;
llvm::raw_string_ostream OS(Str);
OS << "auto exp_props = "
"sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::"
"experimental::use_root_sync};";
ExecutionConfig.Properties = "exp_props";
OuterStmts.OthersList.emplace_back(Str);
}
}
void KernelCallExpr::addDevCapCheckStmt() {
llvm::SmallVector<std::string> AspectList;
if (getVarMap().hasBF64()) {
Expand Down
1 change: 1 addition & 0 deletions clang/lib/DPCT/AnalysisInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -2883,6 +2883,7 @@ class KernelCallExpr : public CallFunctionExpr {

void removeExtraIndent();
void addDevCapCheckStmt();
void addPropertiesStmt();
void addAccessorDecl(MemVarInfo::VarScope Scope);
void addAccessorDecl(std::shared_ptr<MemVarInfo> VI);
void addStreamDecl();
Expand Down
35 changes: 35 additions & 0 deletions clang/test/dpct/root_group_braces.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// UNSUPPORTED: cuda-8.0
// UNSUPPORTED: v8.0
// RUN: dpct --format-range=none -out-root %T/root_group_braces %s --cuda-include-path="%cuda-path/include" --use-experimental-features=root-group -- -x cuda --cuda-host-only -std=c++14
// RUN: FileCheck %s --match-full-lines --input-file %T/root_group_braces/root_group_braces.dp.cpp
// RUN: %if build_lit %{icpx -c -fsycl %T/root_group_braces/root_group_braces.dp.cpp -o %T/root_group_braces/root_group_braces.dp.o %}

#include <cuda_runtime.h>

__global__ void kernel1() {}
__global__ void kernel2() {}

int main() {
int a = 0;
// CHECK: case 1:
// CHECK-NEXT: {
// CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
// CHECK: }
// CHECK-NEXT break;
// CHECK: case 2:
// CHECK-NEXT: {
// CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
// CHECK: }
// CHECK-NEXT break;
switch (a) {
case 1:
kernel1<<<1, 1>>>();
break;
case 2:
kernel2<<<1, 1>>>();
break;
default:
break;
};
return 0;
}
21 changes: 14 additions & 7 deletions clang/test/dpct/sync_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,15 @@ __global__ void kernel() {
}

int main() {
// CHECK: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
// CHECK: {
// CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we inline the exp_props in parallel_for?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about insert the exp_pros when on demand? In this case, it is not required.

// CHECK-EMPTY:
// CHECK-NEXT: dpct::get_in_order_queue().parallel_for(
// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 2)),
// CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {
// CHECK-NEXT: kernel(item_ct1);
// CHECK-NEXT: });
// CHECK-NEXT: }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if there is only one kernel in the code block, then no need to insert {}.

// CHECK-NEXT: dpct::get_current_device().queues_wait_and_throw();
kernel<<<2, 2>>>();
cudaDeviceSynchronize();
Expand Down Expand Up @@ -193,12 +196,16 @@ __global__ void foo_tile32() {
}

int foo3() {
//CHECK: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
//CHECK-NEXT: dpct::get_in_order_queue().parallel_for(
//CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
//CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {{\[\[}}intel::reqd_sub_group_size(32){{\]\]}} {
//CHECK-NEXT: foo2(item_ct1);
//CHECK-NEXT: });
// CHECK: {
// CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
// CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64});
// CHECK-EMPTY:
// CHECK-NEXT: dpct::get_in_order_queue().parallel_for(
// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
// CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {{\[\[}}intel::reqd_sub_group_size(32){{\]\]}} {
// CHECK-NEXT: foo2(item_ct1);
// CHECK-NEXT: });
// CHECK-NEXT: }
foo2<<<1,1>>>();
return 0;
}
Expand Down
17 changes: 10 additions & 7 deletions clang/test/dpct/sync_api_noneusm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,16 @@ __global__ void kernel() {
}

int main() {
// CHECK: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
// CHECK-NEXT: dpct::get_out_of_order_queue().parallel_for(
// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 2)),
// CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {
// CHECK-NEXT: kernel(item_ct1);
// CHECK-NEXT: });
// CHECK-NEXT: dpct::get_current_device().queues_wait_and_throw();
// CHECK: {
// CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync};
// CHECK-EMPTY:
// CHECK-NEXT: dpct::get_out_of_order_queue().parallel_for(
// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 2)),
// CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {
// CHECK-NEXT: kernel(item_ct1);
// CHECK-NEXT: });
// CHECK-NEXT: }
// CHECK-NEXT: dpct::get_current_device().queues_wait_and_throw();
kernel<<<2, 2>>>();
cudaDeviceSynchronize();
return 0;
Expand Down