diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index b17f7f389999..ffc6e24b3dbb 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -5710,12 +5710,6 @@ void KernelCallExpr::printSubmit(KernelPrinter &Printer) { Printer << "*/" << getNL(); Printer.indent(); } - if (DpctGlobalInfo::useRootGroup()) { - Printer << "auto exp_props = " - "sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::" - "experimental::use_root_sync};\n"; - ExecutionConfig.Properties = "exp_props"; - } if (!getEvent().empty()) { Printer << "*" << getEvent() << " = "; } @@ -5993,6 +5987,7 @@ int KernelCallExpr::calculateOriginArgsSize() const { return Size; } std::string KernelCallExpr::getReplacement() { + addPropertiesStmt(); addDevCapCheckStmt(); addAccessorDecl(); addStreamDecl(); @@ -6293,6 +6288,17 @@ void KernelCallExpr::removeExtraIndent() { LocInfo.Indent.length(), "", nullptr)); } +void KernelCallExpr::addPropertiesStmt() { + if (DpctGlobalInfo::useRootGroup()) { + std::string Str; + llvm::raw_string_ostream OS(Str); + OS << "auto exp_props = " + "sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::" + "experimental::use_root_sync};"; + ExecutionConfig.Properties = "exp_props"; + OuterStmts.OthersList.emplace_back(Str); + } +} void KernelCallExpr::addDevCapCheckStmt() { llvm::SmallVector AspectList; if (getVarMap().hasBF64()) { diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index d06a07782b53..304142bad2bc 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -2883,6 +2883,7 @@ class KernelCallExpr : public CallFunctionExpr { void removeExtraIndent(); void addDevCapCheckStmt(); + void addPropertiesStmt(); void addAccessorDecl(MemVarInfo::VarScope Scope); void addAccessorDecl(std::shared_ptr VI); void addStreamDecl(); diff --git a/clang/test/dpct/root_group_braces.cu b/clang/test/dpct/root_group_braces.cu new file mode 100644 index 000000000000..d4b25a4c4930 --- /dev/null +++ b/clang/test/dpct/root_group_braces.cu @@ -0,0 +1,35 @@ +// UNSUPPORTED: cuda-8.0 +// UNSUPPORTED: v8.0 +// RUN: dpct --format-range=none -out-root %T/root_group_braces %s --cuda-include-path="%cuda-path/include" --use-experimental-features=root-group -- -x cuda --cuda-host-only -std=c++14 +// RUN: FileCheck %s --match-full-lines --input-file %T/root_group_braces/root_group_braces.dp.cpp +// RUN: %if build_lit %{icpx -c -fsycl %T/root_group_braces/root_group_braces.dp.cpp -o %T/root_group_braces/root_group_braces.dp.o %} + +#include + +__global__ void kernel1() {} +__global__ void kernel2() {} + +int main() { + int a = 0; + // CHECK: case 1: + // CHECK-NEXT: { + // CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; + // CHECK: } + // CHECK-NEXT break; + // CHECK: case 2: + // CHECK-NEXT: { + // CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; + // CHECK: } + // CHECK-NEXT break; + switch (a) { + case 1: + kernel1<<<1, 1>>>(); + break; + case 2: + kernel2<<<1, 1>>>(); + break; + default: + break; + }; + return 0; +} \ No newline at end of file diff --git a/clang/test/dpct/sync_api.cu b/clang/test/dpct/sync_api.cu index 67185586217c..00807ea54c5d 100644 --- a/clang/test/dpct/sync_api.cu +++ b/clang/test/dpct/sync_api.cu @@ -106,12 +106,15 @@ __global__ void kernel() { } int main() { - // CHECK: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; + // CHECK: { + // CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; + // CHECK-EMPTY: // CHECK-NEXT: dpct::get_in_order_queue().parallel_for( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 2)), // CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) { // CHECK-NEXT: kernel(item_ct1); // CHECK-NEXT: }); + // CHECK-NEXT: } // CHECK-NEXT: dpct::get_current_device().queues_wait_and_throw(); kernel<<<2, 2>>>(); cudaDeviceSynchronize(); @@ -193,12 +196,16 @@ __global__ void foo_tile32() { } int foo3() { -//CHECK: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; -//CHECK-NEXT: dpct::get_in_order_queue().parallel_for( -//CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), -//CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {{\[\[}}intel::reqd_sub_group_size(32){{\]\]}} { -//CHECK-NEXT: foo2(item_ct1); -//CHECK-NEXT: }); + // CHECK: { + // CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; + // CHECK-NEXT: dpct::get_device(dpct::get_device_id(dpct::get_in_order_queue().get_device())).has_capability_or_fail({sycl::aspect::fp64}); + // CHECK-EMPTY: + // CHECK-NEXT: dpct::get_in_order_queue().parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + // CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) {{\[\[}}intel::reqd_sub_group_size(32){{\]\]}} { + // CHECK-NEXT: foo2(item_ct1); + // CHECK-NEXT: }); + // CHECK-NEXT: } foo2<<<1,1>>>(); return 0; } diff --git a/clang/test/dpct/sync_api_noneusm.cu b/clang/test/dpct/sync_api_noneusm.cu index 35ae23cf470d..8e277d8737c3 100644 --- a/clang/test/dpct/sync_api_noneusm.cu +++ b/clang/test/dpct/sync_api_noneusm.cu @@ -102,13 +102,16 @@ __global__ void kernel() { } int main() { -// CHECK: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; -// CHECK-NEXT: dpct::get_out_of_order_queue().parallel_for( -// CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 2)), -// CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) { -// CHECK-NEXT: kernel(item_ct1); -// CHECK-NEXT: }); -// CHECK-NEXT: dpct::get_current_device().queues_wait_and_throw(); + // CHECK: { + // CHECK-NEXT: auto exp_props = sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::use_root_sync}; + // CHECK-EMPTY: + // CHECK-NEXT: dpct::get_out_of_order_queue().parallel_for( + // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), sycl::range<3>(1, 1, 2)), + // CHECK-NEXT: exp_props, [=](sycl::nd_item<3> item_ct1) { + // CHECK-NEXT: kernel(item_ct1); + // CHECK-NEXT: }); + // CHECK-NEXT: } + // CHECK-NEXT: dpct::get_current_device().queues_wait_and_throw(); kernel<<<2, 2>>>(); cudaDeviceSynchronize(); return 0;