Skip to content

Commit

Permalink
OpenMPTarget: Kernel mode implementation for collapse clause trials.
Browse files Browse the repository at this point in the history
  • Loading branch information
Rahulkumar Gayatri committed Aug 22, 2024
1 parent b3540ca commit 8d81455
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 4 deletions.
2 changes: 1 addition & 1 deletion containers/src/Kokkos_Bitset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ class Bitset {
offset = !(scan_direction & BIT_SCAN_REVERSE)
? offset
: (offset + block_mask) & block_mask;
block = Impl::rotate_right(block, offset);
block = Impl::rotate_right(block, offset);
return (((!(scan_direction & BIT_SCAN_REVERSE)
? Impl::bit_scan_forward(block)
: Impl::int_log2(block)) +
Expand Down
45 changes: 45 additions & 0 deletions core/src/OpenMPTarget/Kokkos_OpenMPTarget_ParallelFor_MDRange.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,49 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
const Index end_1 = policy.m_upper[1];
const Index end_2 = policy.m_upper[2];


#if defined(KOKKOS_IMPL_OPENMPTARGET_KERNEL_MODE)
const Index tot = (end_2-begin_2) * (end_1-begin_1) * (end_0-begin_0);

auto tot_inner = (end_1 - begin_1) * (end_0 - begin_0);
auto tot_outer = (end_2 - begin_2) * tot_inner;

//#pragma omp target teams distribute parallel for map(to : functor)
//for (auto iter2 = 0; iter2 < tot_outer; ++iter2) {
// if(omp_get_team_num() == 0 && omp_get_num_threads() == 0)
// printf("num_teams = %d, team_size = %d, thread)id = %d\n", omp_get_num_teams(), omp_get_num_threads(), omp_get_num_threads());

constexpr const int team_size = 1;
const int num_teams = (tot_outer + team_size - 1) / team_size * team_size;

printf("tot_outer = %d\n", tot_outer);

for (auto tmp = 0; tmp < tot_outer; ++tmp) {
#pragma omp target teams ompx_bare thread_limit(1) num_teams(1) map(to:functor) firstprivate(tot_outer)
{
const Index blockDimx = ompx::block_dim(ompx::dim_x);
const Index blockIdx = ompx::block_id(ompx::dim_x);
const Index threadIdx = ompx::thread_id(ompx::dim_x);

auto iter2 = tmp; //+ blockDimx * blockIdx + threadIdx;
if (iter2 < tot_outer) {
auto i2 = iter2 / tot_inner;
auto iter = iter2 % tot_inner;

auto i1 = iter / (end_0 - begin_0);
auto i0 = iter % (end_0 - begin_0);

// printf("(i0,i1,i2) = (%d,%d,%d)\n", i0,i1,i2);
// printf("blockIdx = %d, iter2 = %d\n", blockIdx, iter2);

if constexpr (std::is_void<typename Policy::work_tag>::value)
functor(i0, i1, i2);
else
functor(typename Policy::work_tag(), i0, i1, i2);
}
}
}
#else
#pragma omp target teams distribute parallel for collapse(3) map(to : functor)
for (auto i2 = begin_2; i2 < end_2; ++i2) {
for (auto i1 = begin_1; i1 < end_1; ++i1) {
Expand All @@ -257,6 +300,8 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
}
}
}
#endif

}

template <int Rank>
Expand Down
6 changes: 3 additions & 3 deletions core/src/impl/Kokkos_HostThreadTeam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ int HostThreadTeamData::organize_team(const int team_size) {
// zombi team around (for example m_pool_size = 5 and team_size = 2
// (ii) if team_alloc > team_size then the last team might have less
// threads than the others
m_team_rank = (team_base_rank + team_size <= m_pool_size) &&
m_team_rank = (team_base_rank + team_size <= m_pool_size) &&
(team_alloc_rank < team_size)
? team_alloc_rank
: -1;
? team_alloc_rank
: -1;
m_team_size = team_size;
m_team_alloc = team_alloc_size;
m_league_rank = league_rank;
Expand Down

0 comments on commit 8d81455

Please sign in to comment.