Skip to content

Commit

Permalink
3D integer operations
Browse files Browse the repository at this point in the history
  • Loading branch information
DiamonDinoia committed Jul 18, 2024
1 parent e1ad9bb commit 366295d
Show file tree
Hide file tree
Showing 13 changed files with 549 additions and 545 deletions.
6 changes: 4 additions & 2 deletions perftest/cuda/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ def build_args(args):
# example command to run:
# nsys profile -o cuperftest_profile ./cuperftest --prec f --n_runs 10 --method 1 --N1 256 --N2 256 --N3 256 --M 1E8 --tol 1E-6
# example arguments
args = {"--prec": "d",
args = {"--prec": "f",
"--n_runs": "5",
"--method": "0",
"--sort": "1",
# "--N1": "16777216",
"--N1": "256",
"--N2": "256",
# "--N3": "256",
"--N3": "256",
"--kerevalmethod": "1",
"--M": "1E8",
"--tol": "1E-6"}
Expand Down Expand Up @@ -82,6 +82,8 @@ def build_args(args):
data['method'].append('GM')
elif method == '2':
data['method'].append('SM')
elif method == '4':
data['method'].append('BLOCK')
print("Method " + data['method'][-1])
cmd = ["profile", "--force-overwrite", "true", "-o", "cuperftest_profile", cwd + "/cuperftest"] + build_args(args)
stdout, stderr = run_command("nsys", cmd)
Expand Down
3 changes: 0 additions & 3 deletions src/cuda/1d/cufinufft1d.cu
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
#include <cmath>
#include <complex>
#include <cufinufft/contrib/helper_cuda.h>
#include <iomanip>
#include <iostream>
#include <type_traits>

#include <cassert>
#include <cufft.h>
Expand Down
4 changes: 0 additions & 4 deletions src/cuda/1d/interp1d_wrapper.cu
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
#include <cuComplex.h>
#include <cufinufft/contrib/helper_cuda.h>
#include <iomanip>
#include <iostream>

#include <cufinufft/memtransfer.h>
#include <cufinufft/spreadinterp.h>
#include <cufinufft/types.h>

using namespace cufinufft::memtransfer;

#include "spreadinterp1d.cuh"

namespace cufinufft {
Expand Down
1 change: 0 additions & 1 deletion src/cuda/1d/spread1d_wrapper.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include <cassert>
#include <cufinufft/contrib/helper_cuda.h>
#include <iomanip>
#include <iostream>

#include <cuComplex.h>
Expand Down
1 change: 0 additions & 1 deletion src/cuda/1d/spreadinterp1d.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <cufinufft/contrib/helper_cuda.h>
#include <thrust/extrema.h>

#include <cuda/std/complex>
#include <cufinufft/defs.h>
#include <cufinufft/spreadinterp.h>
#include <cufinufft/utils.h>
Expand Down
6 changes: 1 addition & 5 deletions src/cuda/2d/cufinufft2d.cu
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
#include <assert.h>
#include <cassert>
#include <cmath>
#include <complex>
#include <iomanip>
#include <iostream>

#include <cufft.h>
#include <cufinufft/contrib/helper_cuda.h>

#include <cufinufft/cudeconvolve.h>
#include <cufinufft/memtransfer.h>
#include <cufinufft/spreadinterp.h>

using namespace cufinufft::deconvolve;
Expand Down
3 changes: 0 additions & 3 deletions src/cuda/2d/interp2d_wrapper.cu
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
#include <iomanip>
#include <iostream>

#include <cuComplex.h>
#include <cufinufft/contrib/helper_cuda.h>

#include <cufinufft/common.h>
#include <cufinufft/memtransfer.h>
#include <cufinufft/spreadinterp.h>

using namespace cufinufft::memtransfer;
using namespace cufinufft::common;

#include "spreadinterp2d.cuh"
Expand Down
3 changes: 0 additions & 3 deletions src/cuda/2d/spread2d_wrapper.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cassert>
#include <iomanip>
#include <iostream>

#include <cuComplex.h>
Expand All @@ -8,14 +7,12 @@
#include <thrust/scan.h>

#include <cufinufft/common.h>
#include <cufinufft/memtransfer.h>
#include <cufinufft/precision_independent.h>
#include <cufinufft/spreadinterp.h>

#include "spreadinterp2d.cuh"

using namespace cufinufft::common;
using namespace cufinufft::memtransfer;

namespace cufinufft {
namespace spreadinterp {
Expand Down
3 changes: 0 additions & 3 deletions src/cuda/3d/cufinufft3d.cu
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
#include <cmath>
#include <complex>
#include <iomanip>
#include <iostream>

#include <cufft.h>
#include <cufinufft/contrib/helper_cuda.h>

#include <cufinufft/cudeconvolve.h>
#include <cufinufft/memtransfer.h>
#include <cufinufft/spreadinterp.h>
#include <cufinufft/types.h>

Expand Down
24 changes: 11 additions & 13 deletions src/cuda/3d/interp3d_wrapper.cu
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#include <iomanip>
#include <iostream>

#include <cuComplex.h>
#include <cufinufft/contrib/helper_cuda.h>

#include "spreadinterp3d.cuh"
#include <cufinufft/common.h>
#include <cufinufft/memtransfer.h>
#include <cufinufft/spreadinterp.h>

#include "spreadinterp3d.cuh"

using namespace cufinufft::memtransfer;
using namespace cufinufft::common;

namespace cufinufft {
namespace spreadinterp {
Expand Down Expand Up @@ -123,26 +123,24 @@ int cuinterp3d_subprob(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_
int *d_subprob_to_bin = d_plan->subprob_to_bin;
int totalnumsubprob = d_plan->totalnumsubprob;

T sigma = d_plan->spopts.upsampfac;
T es_c = d_plan->spopts.ES_c;
T es_beta = d_plan->spopts.ES_beta;
size_t sharedplanorysize = (bin_size_x + 2 * ceil(ns / 2.0)) *
(bin_size_y + 2 * ceil(ns / 2.0)) *
(bin_size_z + 2 * ceil(ns / 2.0)) * sizeof(cuda_complex<T>);
if (sharedplanorysize > 49152) {
std::cerr << "[cuinterp3d_subprob] error: not enough shared memory\n";
return FINUFFT_ERR_INSUFFICIENT_SHMEM;
}
T sigma = d_plan->spopts.upsampfac;
T es_c = d_plan->spopts.ES_c;
T es_beta = d_plan->spopts.ES_beta;
const auto sharedplanorysize =
shared_memory_required<T>(3, d_plan->spopts.nspread, d_plan->opts.gpu_binsizex,
d_plan->opts.gpu_binsizey, d_plan->opts.gpu_binsizez);

for (int t = 0; t < blksize; t++) {
if (d_plan->opts.gpu_kerevalmeth == 1) {
cufinufft_set_shared_memory(interp_3d_subprob<T, 1>, 3, *d_plan);
interp_3d_subprob<T, 1><<<totalnumsubprob, 256, sharedplanorysize, stream>>>(
d_kx, d_ky, d_kz, d_c + t * M, d_fw + t * nf1 * nf2 * nf3, M, ns, nf1, nf2, nf3,
es_c, es_beta, sigma, d_binstartpts, d_binsize, bin_size_x, bin_size_y,
bin_size_z, d_subprob_to_bin, d_subprobstartpts, d_numsubprob, maxsubprobsize,
numbins[0], numbins[1], numbins[2], d_idxnupts);
RETURN_IF_CUDA_ERROR
} else {
cufinufft_set_shared_memory(interp_3d_subprob<T, 0>, 3, *d_plan);
interp_3d_subprob<T, 0><<<totalnumsubprob, 256, sharedplanorysize, stream>>>(
d_kx, d_ky, d_kz, d_c + t * M, d_fw + t * nf1 * nf2 * nf3, M, ns, nf1, nf2, nf3,
es_c, es_beta, sigma, d_binstartpts, d_binsize, bin_size_x, bin_size_y,
Expand Down
15 changes: 6 additions & 9 deletions src/cuda/3d/spread3d_wrapper.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cassert>
#include <iomanip>
#include <iostream>

#include <cuComplex.h>
Expand All @@ -8,12 +7,10 @@
#include <thrust/scan.h>

#include <cufinufft/common.h>
#include <cufinufft/memtransfer.h>
#include <cufinufft/precision_independent.h>
#include <cufinufft/spreadinterp.h>

using namespace cufinufft::common;
using namespace cufinufft::memtransfer;

#include "spreadinterp3d.cuh"

Expand Down Expand Up @@ -532,12 +529,12 @@ int cuspread3d_subprob(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_
int totalnumsubprob = d_plan->totalnumsubprob;
int *d_subprob_to_bin = d_plan->subprob_to_bin;

T sigma = d_plan->spopts.upsampfac;
T es_c = d_plan->spopts.ES_c;
T es_beta = d_plan->spopts.ES_beta;
size_t sharedplanorysize = (bin_size_x + 2 * ceil(ns / 2.0)) *
(bin_size_y + 2 * ceil(ns / 2.0)) *
(bin_size_z + 2 * ceil(ns / 2.0)) * sizeof(cuda_complex<T>);
T sigma = d_plan->spopts.upsampfac;
T es_c = d_plan->spopts.ES_c;
T es_beta = d_plan->spopts.ES_beta;
const auto sharedplanorysize =
shared_memory_required<T>(3, d_plan->spopts.nspread, d_plan->opts.gpu_binsizex,
d_plan->opts.gpu_binsizey, d_plan->opts.gpu_binsizez);
for (int t = 0; t < blksize; t++) {
if (d_plan->opts.gpu_kerevalmeth) {
cufinufft_set_shared_memory(spread_3d_subprob<T, 1>, 3, *d_plan);
Expand Down
Loading

0 comments on commit 366295d

Please sign in to comment.