Skip to content

Commit

Permalink
Some kernel perf updates.
Browse files Browse the repository at this point in the history
Co-authored-by: Yuhsiang Tsai <[email protected]>
  • Loading branch information
pratikvn and yhmtsai committed Mar 7, 2022
1 parent 1926b88 commit a690dba
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 86 deletions.
3 changes: 2 additions & 1 deletion omp/base/index_set_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ void global_to_local(std::shared_ptr<const DefaultExecutor> exec,
subset_begin,
std::upper_bound(subset_begin, subset_begin + num_subsets, index));
auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1);
if (subset_end[shifted_bucket] <= index) {
if (subset_end[shifted_bucket] <= index ||
index < subset_begin[shifted_bucket]) {
local_indices[i] = invalid_index<IndexType>();
} else {
local_indices[i] = index - subset_begin[shifted_bucket] +
Expand Down
89 changes: 47 additions & 42 deletions omp/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,32 +752,35 @@ void calculate_nonzeros_per_row_in_index_set(
const IndexSet<IndexType>& col_index_set, Array<IndexType>* row_nnz)
{
auto num_row_subsets = row_index_set.get_num_subsets();
auto num_col_subsets = col_index_set.get_num_subsets();
auto row_superset_indices = row_index_set.get_superset_indices();
auto row_subset_begin = row_index_set.get_subsets_begin();
auto row_subset_end = row_index_set.get_subsets_end();
auto col_subset_begin = col_index_set.get_subsets_begin();
auto col_subset_end = col_index_set.get_subsets_end();
auto src_ptrs = source->get_const_row_ptrs();
size_type res_row = 0;
size_type max_row_nnz = 0;
for (size_type i = 1; i < source->get_size()[0] + 1; i++) {
max_row_nnz =
std::max<size_type>(max_row_nnz, src_ptrs[i] - src_ptrs[i - 1]);
}
Array<IndexType> l_idxs(exec, max_row_nnz);

#pragma omp parallel for
for (size_type set = 0; set < num_row_subsets; ++set) {
size_type res_row = row_superset_indices[set];
for (auto row = row_subset_begin[set]; row < row_subset_end[set];
++row) {
row_nnz->get_data()[res_row] = zero<IndexType>();
gko::kernels::omp::index_set::global_to_local(
exec, col_index_set.get_size(), col_index_set.get_num_subsets(),
col_index_set.get_subsets_begin(),
col_index_set.get_subsets_end(),
col_index_set.get_superset_indices(),
static_cast<IndexType>(l_idxs.get_num_elems()),
source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(),
false);
for (size_type nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]);
++nnz) {
auto l_idx = l_idxs.get_const_data()[nnz];
if (l_idx != invalid_index<IndexType>()) {
for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) {
auto index = source->get_const_col_idxs()[i];
if (index >= col_index_set.get_size()) {
continue;
}
const auto bucket = std::distance(
col_subset_begin,
std::upper_bound(col_subset_begin,
col_subset_begin + num_col_subsets,
index));
auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1);
if (col_subset_end[shifted_bucket] <= index ||
(index < col_subset_begin[shifted_bucket])) {
continue;
} else {
row_nnz->get_data()[res_row]++;
}
}
Expand Down Expand Up @@ -839,35 +842,37 @@ void compute_submatrix_from_index_set(
auto res_row_ptrs = result->get_row_ptrs();
auto res_col_idxs = result->get_col_idxs();
auto res_values = result->get_values();
const auto src_row_ptrs = source->get_const_row_ptrs();
auto num_col_subsets = col_index_set.get_num_subsets();
auto col_subset_begin = col_index_set.get_subsets_begin();
auto col_subset_end = col_index_set.get_subsets_end();
auto col_superset_indices = col_index_set.get_superset_indices();
const auto src_ptrs = source->get_const_row_ptrs();
const auto src_col_idxs = source->get_const_col_idxs();
const auto src_values = source->get_const_values();
size_type max_row_nnz = 0;
for (size_type i = 1; i < source->get_size()[0] + 1; i++) {
max_row_nnz = std::max<size_type>(
max_row_nnz, src_row_ptrs[i] - src_row_ptrs[i - 1]);
}
Array<IndexType> l_idxs(exec, max_row_nnz);

#pragma omp parallel for
size_type res_nnz = 0;
for (size_type set = 0; set < num_row_subsets; ++set) {
for (auto row = row_subset_begin[set]; row < row_subset_end[set];
++row) {
size_type res_nnz = res_row_ptrs[row - row_subset_begin[set]];
gko::kernels::omp::index_set::global_to_local(
exec, col_index_set.get_size(), col_index_set.get_num_subsets(),
col_index_set.get_subsets_begin(),
col_index_set.get_subsets_end(),
col_index_set.get_superset_indices(),
static_cast<IndexType>(l_idxs.get_num_elems()),
source->get_const_col_idxs() + src_row_ptrs[row],
l_idxs.get_data(), false);
for (size_type nnz = 0;
nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) {
auto l_idx = l_idxs.get_const_data()[nnz];
if (l_idx != invalid_index<IndexType>()) {
res_col_idxs[res_nnz] = l_idx;
res_values[res_nnz] = src_values[nnz + src_row_ptrs[row]];
for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) {
auto index = src_col_idxs[i];
if (index >= col_index_set.get_size()) {
continue;
}
const auto bucket = std::distance(
col_subset_begin,
std::upper_bound(col_subset_begin,
col_subset_begin + num_col_subsets,
index));
auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1);
if (col_subset_end[shifted_bucket] <= index ||
(index < col_subset_begin[shifted_bucket])) {
continue;
} else {
res_col_idxs[res_nnz] =
index - col_subset_begin[shifted_bucket] +
col_superset_indices[shifted_bucket];
res_values[res_nnz] = src_values[i];
res_nnz++;
}
}
Expand Down
2 changes: 0 additions & 2 deletions omp/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,6 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef)
this->ref, row_nnz.get_data(), row_nnz.get_num_elems());
auto num_nnz = row_nnz.get_data()[rset.get_num_elems()];
auto drow_nnz = gko::Array<int>(this->omp, row_nnz);
drow_nnz.fill(gko::one<int>());
auto smat1 = Mtx::create(
this->ref, gko::dim<2>(rset.get_num_elems(), cset.get_num_elems()),
std::move(gko::Array<ValueType>(this->ref, num_nnz)),
Expand All @@ -788,7 +787,6 @@ TEST_F(Csr, ComputeSubmatrixFromIndexSetIsEquivalentToRef)
std::move(gko::Array<IndexType>(this->omp, num_nnz)),
std::move(drow_nnz));


gko::kernels::reference::csr::compute_submatrix_from_index_set(
this->ref, this->mtx2.get(), rset, cset, smat1.get());
gko::kernels::omp::csr::compute_submatrix_from_index_set(
Expand Down
3 changes: 2 additions & 1 deletion reference/base/index_set_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ void global_to_local(std::shared_ptr<const DefaultExecutor> exec,
subset_begin, std::upper_bound(shifted_subset,
subset_begin + num_subsets, index));
shifted_bucket = bucket == 0 ? 0 : (bucket - 1);
if (subset_end[shifted_bucket] <= index) {
if (subset_end[shifted_bucket] <= index ||
index < subset_begin[shifted_bucket]) {
local_indices[i] = invalid_index<IndexType>();
} else {
local_indices[i] = index - subset_begin[shifted_bucket] +
Expand Down
84 changes: 44 additions & 40 deletions reference/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,33 +632,34 @@ void calculate_nonzeros_per_row_in_index_set(
const IndexSet<IndexType>& row_index_set,
const IndexSet<IndexType>& col_index_set, Array<IndexType>* row_nnz)
{
size_type res_row = 0;
auto num_row_subsets = row_index_set.get_num_subsets();
auto row_subset_begin = row_index_set.get_subsets_begin();
auto row_subset_end = row_index_set.get_subsets_end();
auto row_superset_indices = row_index_set.get_superset_indices();
auto num_col_subsets = col_index_set.get_num_subsets();
auto col_subset_begin = col_index_set.get_subsets_begin();
auto col_subset_end = col_index_set.get_subsets_end();
auto src_ptrs = source->get_const_row_ptrs();
size_type max_row_nnz = 0;
for (size_type i = 1; i < source->get_size()[0] + 1; i++) {
max_row_nnz =
std::max<size_type>(max_row_nnz, src_ptrs[i] - src_ptrs[i - 1]);
}
Array<IndexType> l_idxs(exec, max_row_nnz);
for (size_type set = 0; set < num_row_subsets; ++set) {
size_type res_row = row_superset_indices[set];
for (auto row = row_subset_begin[set]; row < row_subset_end[set];
++row) {
row_nnz->get_data()[res_row] = zero<IndexType>();
gko::kernels::reference::index_set::global_to_local(
exec, col_index_set.get_size(), col_index_set.get_num_subsets(),
col_index_set.get_subsets_begin(),
col_index_set.get_subsets_end(),
col_index_set.get_superset_indices(),
static_cast<IndexType>(l_idxs.get_num_elems()),
source->get_const_col_idxs() + src_ptrs[row], l_idxs.get_data(),
false);
for (IndexType nnz = 0; nnz < (src_ptrs[row + 1] - src_ptrs[row]);
++nnz) {
auto l_idx = l_idxs.get_const_data()[nnz];
if (l_idx != invalid_index<IndexType>()) {
for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) {
auto index = source->get_const_col_idxs()[i];
if (index >= col_index_set.get_size()) {
continue;
}
const auto bucket = std::distance(
col_subset_begin,
std::upper_bound(col_subset_begin,
col_subset_begin + num_col_subsets,
index));
auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1);
if (col_subset_end[shifted_bucket] <= index ||
(index < col_subset_begin[shifted_bucket])) {
continue;
} else {
row_nnz->get_data()[res_row]++;
}
}
Expand Down Expand Up @@ -721,34 +722,37 @@ void compute_submatrix_from_index_set(
auto res_row_ptrs = result->get_row_ptrs();
auto res_col_idxs = result->get_col_idxs();
auto res_values = result->get_values();
const auto src_row_ptrs = source->get_const_row_ptrs();
auto num_col_subsets = col_index_set.get_num_subsets();
auto col_subset_begin = col_index_set.get_subsets_begin();
auto col_subset_end = col_index_set.get_subsets_end();
auto col_superset_indices = col_index_set.get_superset_indices();
const auto src_ptrs = source->get_const_row_ptrs();
const auto src_col_idxs = source->get_const_col_idxs();
const auto src_values = source->get_const_values();

size_type res_nnz = 0;
size_type max_row_nnz = 0;
for (size_type i = 1; i < source->get_size()[0] + 1; i++) {
max_row_nnz = std::max<size_type>(
max_row_nnz, src_row_ptrs[i] - src_row_ptrs[i - 1]);
}
Array<IndexType> l_idxs(exec, max_row_nnz);
for (size_type set = 0; set < num_row_subsets; ++set) {
for (auto row = row_subset_begin[set]; row < row_subset_end[set];
++row) {
gko::kernels::reference::index_set::global_to_local(
exec, col_index_set.get_size(), col_index_set.get_num_subsets(),
col_index_set.get_subsets_begin(),
col_index_set.get_subsets_end(),
col_index_set.get_superset_indices(),
static_cast<IndexType>(l_idxs.get_num_elems()),
source->get_const_col_idxs() + src_row_ptrs[row],
l_idxs.get_data(), false);
for (auto nnz = 0;
nnz < (src_row_ptrs[row + 1] - src_row_ptrs[row]); ++nnz) {
auto l_idx = l_idxs.get_const_data()[nnz];
if (l_idx != invalid_index<IndexType>()) {
res_col_idxs[res_nnz] = l_idx;
res_values[res_nnz] = src_values[nnz + src_row_ptrs[row]];
for (size_type i = src_ptrs[row]; i < src_ptrs[row + 1]; ++i) {
auto index = source->get_const_col_idxs()[i];
if (index >= col_index_set.get_size()) {
continue;
}
const auto bucket = std::distance(
col_subset_begin,
std::upper_bound(col_subset_begin,
col_subset_begin + num_col_subsets,
index));
auto shifted_bucket = bucket == 0 ? 0 : (bucket - 1);
if (col_subset_end[shifted_bucket] <= index ||
(index < col_subset_begin[shifted_bucket])) {
continue;
} else {
res_col_idxs[res_nnz] =
index - col_subset_begin[shifted_bucket] +
col_superset_indices[shifted_bucket];
res_values[res_nnz] = src_values[i];
res_nnz++;
}
}
Expand Down
12 changes: 12 additions & 0 deletions reference/test/matrix/csr_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,18 @@ TYPED_TEST(Csr, CanGetSubmatrixWithIndexSet)

GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0);
}

{
SCOPED_TRACE("Non Square 2x4");
auto row_set = gko::IndexSet<index_type>(this->exec, {5, 6});
auto col_set = gko::IndexSet<index_type>(this->exec, {4, 3, 0, 1});
auto sub_mat1 = mat->create_submatrix(row_set, col_set);
auto ref1 = gko::initialize<Mtx>({I<T>{0.0, 1.0, 0.0, 2.0}, // 5
I<T>{0.0, 3.0, 7.5, 1.0}}, // 6
this->exec);

GKO_EXPECT_MTX_NEAR(sub_mat1.get(), ref1.get(), 0.0);
}
}


Expand Down

0 comments on commit a690dba

Please sign in to comment.