diff --git a/clang/lib/DPCT/RulesLang/APINamesCooperativeGroups.inc b/clang/lib/DPCT/RulesLang/APINamesCooperativeGroups.inc index d1ed20618a70..539f658f44da 100644 --- a/clang/lib/DPCT/RulesLang/APINamesCooperativeGroups.inc +++ b/clang/lib/DPCT/RulesLang/APINamesCooperativeGroups.inc @@ -8,7 +8,6 @@ #ifdef FUNCTION_CALL - /* --- cg::sync(X) --- cg::sync(cg::thread_block) @@ -146,30 +145,31 @@ CASE_FACTORY_ENTRY( ARG(0).get_local_linear_id() */ CONDITIONAL_FACTORY_ENTRY( - CheckParamType(0, "const class cooperative_groups::__v1::thread_block &"), - MEMBER_CALL_FACTORY_ENTRY("thread_rank", NDITEM, - false, "get_local_linear_id"), + CheckParamType(0, "const class cooperative_groups::__v1::thread_block &"), + MEMBER_CALL_FACTORY_ENTRY("thread_rank", NDITEM, false, + "get_local_linear_id"), + CONDITIONAL_FACTORY_ENTRY( + argHasThreadBlockTileType(0, 32), + MEMBER_CALL_FACTORY_ENTRY("thread_rank", SUBGROUP, false, + "get_local_linear_id"), CONDITIONAL_FACTORY_ENTRY( - argHasThreadBlockTileType(0, 32), - MEMBER_CALL_FACTORY_ENTRY("thread_rank", SUBGROUP, - false, "get_local_linear_id"), + UseLogicalGroup, CONDITIONAL_FACTORY_ENTRY( - UseLogicalGroup, - CONDITIONAL_FACTORY_ENTRY( - makeCheckOr( - argHasThreadBlockTileType(0, 16), - argHasThreadBlockTileType(0, 8), - argHasThreadBlockTileType(0, 4), - argHasThreadBlockTileType(0, 2), - argHasThreadBlockTileType(0, 1)), - FEATURE_REQUEST_FACTORY( - HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("thread_rank", - ARG(0), false, "get_local_linear_id")), - UNSUPPORT_FACTORY_ENTRY("thread_rank", - Diagnostics::API_NOT_MIGRATED, ARG("thread_rank"))), + makeCheckOr(argHasThreadBlockTileType(0, 16), + argHasThreadBlockTileType(0, 8), + argHasThreadBlockTileType(0, 4), + argHasThreadBlockTileType(0, 2), + argHasThreadBlockTileType(0, 1)), + FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + MEMBER_CALL_FACTORY_ENTRY("thread_rank", ARG(0), false, + "get_local_linear_id")), UNSUPPORT_FACTORY_ENTRY("thread_rank", - Diagnostics::API_NOT_MIGRATED, ARG("thread_rank"))))) + Diagnostics::API_NOT_MIGRATED, + ARG("thread_rank"))), + UNSUPPORT_FACTORY_ENTRY("thread_rank", + Diagnostics::API_NOT_MIGRATED, + ARG("thread_rank"))))) CONDITIONAL_FACTORY_ENTRY( makeCheckAnd(CheckParamType( @@ -201,8 +201,7 @@ CONDITIONAL_FACTORY_ENTRY( MapNames::getClNamespace() + "ext::oneapi::experimental::this_kernel::get_opportunistic_group")), UNSUPPORT_FACTORY_ENTRY( - "coalesced_threads", - Diagnostics::TRY_EXPERIMENTAL_FEATURE, + "coalesced_threads", Diagnostics::TRY_EXPERIMENTAL_FEATURE, ARG("coalesced_threads"), ARG("--use-experimental-features=non-uniform-groups"))) #endif @@ -226,37 +225,38 @@ CONDITIONAL_FACTORY_ENTRY( */ WARNING_FACTORY_ENTRY( "cooperative_groups::__v1::thread_block.sync", - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.sync", NDITEM, - false, "barrier"), + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.sync", + NDITEM, false, "barrier"), Diagnostics::BARRIER_PERFORMANCE_TUNNING, ARG("nd_item")) WARNING_FACTORY_ENTRY( "cooperative_groups::__v1::coalesced_group.sync", - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::coalesced_group.sync", NDITEM, - false, "barrier"), + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::coalesced_group.sync", + NDITEM, false, "barrier"), Diagnostics::BARRIER_PERFORMANCE_TUNNING, ARG("nd_item")) // for cuda <= 10.2 CONDITIONAL_FACTORY_ENTRY( baseHasThreadBlockTileType(32), WARNING_FACTORY_ENTRY( "cooperative_groups::__v1::__thread_block_tile_base.sync", - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.sync", - SUBGROUP, false, "barrier"), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.sync", SUBGROUP, + false, "barrier"), Diagnostics::BARRIER_PERFORMANCE_TUNNING, ARG("sub_group")), - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.sync", - Diagnostics::API_NOT_MIGRATED, - ARG("sync"))) + UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.sync", + Diagnostics::API_NOT_MIGRATED, ARG("sync"))) // for cuda >= 11.0 CONDITIONAL_FACTORY_ENTRY( baseHasThreadBlockTileType(32), WARNING_FACTORY_ENTRY( "cooperative_groups::__v1::thread_block_tile.sync", - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.sync", - SUBGROUP, false, "barrier"), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.sync", SUBGROUP, false, + "barrier"), Diagnostics::BARRIER_PERFORMANCE_TUNNING, ARG("sub_group")), UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.sync", - Diagnostics::API_NOT_MIGRATED, - ARG("sync"))) + Diagnostics::API_NOT_MIGRATED, ARG("sync"))) /* --- X.size() --- @@ -278,39 +278,43 @@ MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block::size", GROUP, // for cuda <= 10.2 CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.size", - SUBGROUP, false, "get_local_linear_range")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.size", + SUBGROUP, false, "get_local_linear_range")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.size", - MemberExprBase(), false, "get_local_linear_range"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.size", - Diagnostics::API_NOT_MIGRATED, ARG("size")))) + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.size", + MemberExprBase(), false, "get_local_linear_range"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.size", + Diagnostics::API_NOT_MIGRATED, ARG("size")))) // for cuda >= 11.0 CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.size", - SUBGROUP, false, "get_local_linear_range")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.size", SUBGROUP, + false, "get_local_linear_range")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.size", - MemberExprBase(), false, "get_local_linear_range"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.size", - Diagnostics::API_NOT_MIGRATED, ARG("size")))) + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.size", + MemberExprBase(), false, "get_local_linear_range"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.size", + Diagnostics::API_NOT_MIGRATED, ARG("size")))) /* --- X.num_threads() --- @@ -324,47 +328,52 @@ CASE_FACTORY_ENTRY( => if UseLogicalGroup then MemberBase().get_local_linear_id() */ -MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.num_threads", GROUP, - false, "get_local_linear_range") +MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.num_threads", + GROUP, false, "get_local_linear_range") MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block::num_threads", - GROUP, false, "get_local_linear_range") + GROUP, false, "get_local_linear_range") // for cuda <= 10.2 CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.num_threads", - SUBGROUP, false, "get_local_linear_range")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.num_threads", + SUBGROUP, false, "get_local_linear_range")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.num_threads", - MemberExprBase(), false, "get_local_linear_range"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.num_threads", - Diagnostics::API_NOT_MIGRATED, ARG("num_threads")))) + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_" + "block_tile_base.num_threads", + MemberExprBase(), false, + "get_local_linear_range"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.num_threads", + Diagnostics::API_NOT_MIGRATED, ARG("num_threads")))) // for cuda >= 11.0 CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.num_threads", - SUBGROUP, false, "get_local_linear_range")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.num_threads", + SUBGROUP, false, "get_local_linear_range")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.num_threads", - MemberExprBase(), false, "get_local_linear_range"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.num_threads", - Diagnostics::API_NOT_MIGRATED, ARG("num_threads")))) + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.num_threads", + MemberExprBase(), false, "get_local_linear_range"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.num_threads", + Diagnostics::API_NOT_MIGRATED, ARG("num_threads")))) CONDITIONAL_FACTORY_ENTRY( UseRootGroup, @@ -430,14 +439,13 @@ CONDITIONAL_FACTORY_ENTRY( CONDITIONAL_FACTORY_ENTRY( UseRootGroup, - MEMBER_CALL_FACTORY_ENTRY( - "cooperative_groups::__v1::grid_group.size", MemberExprBase(), - false, "get_local_linear_range"), - UNSUPPORT_FACTORY_ENTRY( - "cooperative_groups::__v1::grid_group.size", - Diagnostics::TRY_EXPERIMENTAL_FEATURE, - ARG("cooperative_groups::__v1::grid_group.size"), - ARG("--use-experimental-features=root-group"))) + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::grid_group.size", + MemberExprBase(), false, + "get_local_linear_range"), + UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::grid_group.size", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cooperative_groups::__v1::grid_group.size"), + ARG("--use-experimental-features=root-group"))) CONDITIONAL_FACTORY_ENTRY( UseRootGroup, @@ -468,40 +476,45 @@ MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block.thread_rank", // for cuda <= 10.2 CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.thread_rank", - SUBGROUP, false, "get_local_linear_id")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.thread_rank", + SUBGROUP, false, "get_local_linear_id")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.thread_rank", - MemberExprBase(), false, "get_local_linear_id"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__thread_block_tile_base.thread_rank", - Diagnostics::API_NOT_MIGRATED, ARG("thread_rank")))) + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__thread_" + "block_tile_base.thread_rank", + MemberExprBase(), false, + "get_local_linear_id"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::__thread_block_tile_base.thread_rank", + Diagnostics::API_NOT_MIGRATED, ARG("thread_rank")))) // for cuda >= 11.0 CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.thread_rank", - SUBGROUP, false, "get_local_linear_id")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.thread_rank", + SUBGROUP, false, "get_local_linear_id")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.thread_rank", - MemberExprBase(), false, "get_local_linear_id"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.thread_rank", - Diagnostics::API_NOT_MIGRATED, ARG("thread_rank")))) + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.thread_rank", + MemberExprBase(), false, "get_local_linear_id"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.thread_rank", + Diagnostics::API_NOT_MIGRATED, ARG("thread_rank")))) /* --- X.meta_group_rank() --- @@ -514,39 +527,96 @@ CASE_FACTORY_ENTRY( */ CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.meta_group_rank", - SUBGROUP, false, "get_group_linear_id")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.meta_group_rank", + SUBGROUP, false, "get_group_linear_id")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.meta_group_rank", - MemberExprBase(), false, "get_group_linear_id"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::thread_block_tile.meta_group_rank", - Diagnostics::API_NOT_MIGRATED, ARG("meta_group_rank")))) + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.meta_group_rank", + MemberExprBase(), false, "get_group_linear_id"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.meta_group_rank", + Diagnostics::API_NOT_MIGRATED, ARG("meta_group_rank")))) CASE_FACTORY_ENTRY( CASE(baseHasThreadBlockTileType(32), - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_thread_block_tile.meta_group_rank", + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_" + "thread_block_tile.meta_group_rank", SUBGROUP, false, "get_group_linear_id")), - CASE(makeCheckAnd(UseLogicalGroup, makeCheckOr( - baseHasThreadBlockTileType(16), - baseHasThreadBlockTileType(8), - baseHasThreadBlockTileType(4), - baseHasThreadBlockTileType(2), - baseHasThreadBlockTileType(1))), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), + FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__single_" + "warp_thread_block_tile.meta_group_rank", + MemberExprBase(), false, + "get_group_linear_id"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_" + "thread_block_tile.meta_group_rank", + Diagnostics::API_NOT_MIGRATED, + ARG("meta_group_rank")))) + +/* + --- X.meta_group_size() --- + cg::thread_block_tile<32>::meta_group_size() + => item_ct1.get_sub_group().get_group_linear_range() + + cg::thread_block_tile::meta_group_size() if n in {1,2,4,8,16} + => if UseLogicalGroup then + MemberBase().get_group_linear_range() +*/ +CASE_FACTORY_ENTRY( + CASE(baseHasThreadBlockTileType(32), + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.meta_group_size", + SUBGROUP, false, "get_group_linear_range")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), FEATURE_REQUEST_FACTORY( HelperFeatureEnum::device_ext, - MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_thread_block_tile.meta_group_rank", - MemberExprBase(), false, "get_group_linear_id"))), - OTHERWISE( - UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_thread_block_tile.meta_group_rank", - Diagnostics::API_NOT_MIGRATED, ARG("meta_group_rank")))) + MEMBER_CALL_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.meta_group_size", + MemberExprBase(), false, "get_group_linear_range"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY( + "cooperative_groups::__v1::thread_block_tile.meta_group_size", + Diagnostics::API_NOT_MIGRATED, ARG("meta_group_size")))) + +CASE_FACTORY_ENTRY( + CASE(baseHasThreadBlockTileType(32), + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_" + "thread_block_tile.meta_group_size", + SUBGROUP, false, "get_group_linear_range")), + CASE(makeCheckAnd(UseLogicalGroup, + makeCheckOr(baseHasThreadBlockTileType(16), + baseHasThreadBlockTileType(8), + baseHasThreadBlockTileType(4), + baseHasThreadBlockTileType(2), + baseHasThreadBlockTileType(1))), + FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + MEMBER_CALL_FACTORY_ENTRY("cooperative_groups::__v1::__single_" + "warp_thread_block_tile.meta_group_size", + MemberExprBase(), false, + "get_group_linear_range"))), + OTHERWISE(UNSUPPORT_FACTORY_ENTRY("cooperative_groups::__v1::__single_warp_" + "thread_block_tile.meta_group_size", + Diagnostics::API_NOT_MIGRATED, + ARG("meta_group_size")))) /* --- X.shfl_down() --- @@ -1082,9 +1152,8 @@ CONDITIONAL_FACTORY_ENTRY( CONDITIONAL_FACTORY_ENTRY( UseNonUniformGroups, CALL_FACTORY_ENTRY("cooperative_groups::__v1::coalesced_group.shfl", - CALL(MapNames::getClNamespace() + - "select_from_group", - MemberExprBase(), ARG(0), ARG(1))), + CALL(MapNames::getClNamespace() + "select_from_group", + MemberExprBase(), ARG(0), ARG(1))), UNSUPPORT_FACTORY_ENTRY( "cooperative_groups::__v1::coalesced_group.shfl", Diagnostics::TRY_EXPERIMENTAL_FEATURE, diff --git a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp index 82326b8e818c..585fdb65a444 100644 --- a/clang/lib/DPCT/RulesLang/MapNamesLang.cpp +++ b/clang/lib/DPCT/RulesLang/MapNamesLang.cpp @@ -306,6 +306,7 @@ const std::unordered_set MapNamesLang::CooperativeGroupsAPISet{ "shfl", "shfl_xor", "meta_group_rank", + "meta_group_size", "block_tile_memory", "thread_index", "group_index", diff --git a/clang/lib/DPCT/RulesLang/RulesLangCooperativeGroups.cpp b/clang/lib/DPCT/RulesLang/RulesLangCooperativeGroups.cpp index 466bd278d2e3..af72a85876d7 100644 --- a/clang/lib/DPCT/RulesLang/RulesLangCooperativeGroups.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLangCooperativeGroups.cpp @@ -115,11 +115,12 @@ void CooperativeGroupsFunctionRule::runRule( if (FuncName == "sync" || FuncName == "thread_rank" || FuncName == "size" || FuncName == "shfl_down" || FuncName == "shfl_up" || FuncName == "shfl" || FuncName == "shfl_xor" || FuncName == "meta_group_rank" || - FuncName == "reduce" || FuncName == "thread_index" || - FuncName == "group_index" || FuncName == "num_threads" || - FuncName == "inclusive_scan" || FuncName == "exclusive_scan" || - FuncName == "coalesced_threads" || FuncName == "this_grid" || - FuncName == "num_blocks" || FuncName == "block_rank") { + FuncName == "meta_group_size" || FuncName == "reduce" || + FuncName == "thread_index" || FuncName == "group_index" || + FuncName == "num_threads" || FuncName == "inclusive_scan" || + FuncName == "exclusive_scan" || FuncName == "coalesced_threads" || + FuncName == "this_grid" || FuncName == "num_blocks" || + FuncName == "block_rank") { // There are 3 usages of cooperative groups APIs. // 1. cg::thread_block tb; tb.sync(); // member function // 2. cg::thread_block tb; cg::sync(tb); // free function @@ -134,6 +135,7 @@ void CooperativeGroupsFunctionRule::runRule( // shfl_up 1/1 0/0 0/0 // shfl_xor 1/1 0/0 0/0 // meta_group_rank 1/1 0/0 0/0 + // meta_group_size 1/1 0/0 0/0 ExprAnalysis EA(CE); emplaceTransformation(EA.getReplacement()); diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index c750e4de889f..742f52b6c129 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -2226,7 +2226,7 @@ ENTRY_MEMBER_FUNCTION(cooperative_groups::multi_grid_group, cooperative_groups:: ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, sync, sync, true, NO_FLAG, P4, "Comment") ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, num_threads, num_threads, true, NO_FLAG, P4, "Successful") ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, thread_rank, thread_rank, true, NO_FLAG, P4, "Comment") -ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, meta_group_size, meta_group_size, false, NO_FLAG, P4, "Comment") +ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, meta_group_size, meta_group_size, true, NO_FLAG, P4, "Comment") ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, meta_group_rank, meta_group_rank, true, NO_FLAG, P4, "Comment") ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, shfl, shfl, true, NO_FLAG, P4, "DPCT1119") ENTRY_MEMBER_FUNCTION(cooperative_groups::thread_block_tile, cooperative_groups::__v1::thread_block_tile, shfl_up, shfl_up, true, NO_FLAG, P4, "Comment") diff --git a/clang/test/dpct/cooperative_groups2.cu b/clang/test/dpct/cooperative_groups2.cu index 851c56b2d688..3d0f63d5f7ed 100644 --- a/clang/test/dpct/cooperative_groups2.cu +++ b/clang/test/dpct/cooperative_groups2.cu @@ -39,6 +39,14 @@ __device__ void foo() { ctile32.meta_group_rank(); tile32.meta_group_rank(); + // X.meta_group_size() + // CHECK-COUNT-5: item_ct1.get_sub_group().get_group_linear_range(); + cg::tiled_partition<32>(block).meta_group_size(); + catile32.meta_group_size(); + atile32.meta_group_size(); + ctile32.meta_group_size(); + tile32.meta_group_size(); + // CHECK: const auto catile16 = dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 16); const auto catile16 = cg::tiled_partition<16>(block); // CHECK: auto atile16 = dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 16); @@ -53,6 +61,12 @@ __device__ void foo() { catile16.meta_group_rank(); // CHECK: atile16.get_group_linear_id(); atile16.meta_group_rank(); + // CHECK: dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 16).get_group_linear_range(); + cg::tiled_partition<16>(block).meta_group_size(); + // CHECK: catile16.get_group_linear_range(); + catile16.meta_group_size(); + // CHECK: atile16.get_group_linear_range(); + atile16.meta_group_size(); // CHECK: const auto catile8 = dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 8); const auto catile8 = cg::tiled_partition<8>(block); @@ -68,6 +82,12 @@ __device__ void foo() { catile8.meta_group_rank(); // CHECK: atile8.get_group_linear_id(); atile8.meta_group_rank(); + // CHECK: dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 8).get_group_linear_range(); + cg::tiled_partition<8>(block).meta_group_size(); + // CHECK: catile8.get_group_linear_range(); + catile8.meta_group_size(); + // CHECK: atile8.get_group_linear_range(); + atile8.meta_group_size(); // CHECK: const auto catile4 = dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 4); const auto catile4 = cg::tiled_partition<4>(block); @@ -83,6 +103,12 @@ __device__ void foo() { catile4.meta_group_rank(); // CHECK: atile4.get_group_linear_id(); atile4.meta_group_rank(); + // CHECK: dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 4).get_group_linear_range(); + cg::tiled_partition<4>(block).meta_group_size(); + // CHECK: catile4.get_group_linear_range(); + catile4.meta_group_size(); + // CHECK: atile4.get_group_linear_range(); + atile4.meta_group_size(); // CHECK: const auto catile2 = dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 2); const auto catile2 = cg::tiled_partition<2>(block); @@ -98,6 +124,12 @@ __device__ void foo() { catile2.meta_group_rank(); // CHECK: atile2.get_group_linear_id(); atile2.meta_group_rank(); + // CHECK: dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 2).get_group_linear_range(); + cg::tiled_partition<2>(block).meta_group_size(); + // CHECK: catile2.get_group_linear_range(); + catile2.meta_group_size(); + // CHECK: atile2.get_group_linear_range(); + atile2.meta_group_size(); // CHECK: const auto catile1 = dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 1); const auto catile1 = cg::tiled_partition<1>(block); @@ -113,6 +145,12 @@ __device__ void foo() { catile1.meta_group_rank(); // CHECK: atile1.get_group_linear_id(); atile1.meta_group_rank(); + // CHECK: dpct::experimental::logical_group(item_ct1, item_ct1.get_group(), 1).get_group_linear_range(); + cg::tiled_partition<1>(block).meta_group_size(); + // CHECK: catile1.get_group_linear_range(); + catile1.meta_group_size(); + // CHECK: atile1.get_group_linear_range(); + atile1.meta_group_size(); // CHECK: item_ct1.get_group().get_local_linear_range(); cg::thread_block::size();