Skip to content

Commit

Permalink
cuda: update interface to take 64-bit M
Browse files Browse the repository at this point in the history
For compatibility with FINUFFT.
  • Loading branch information
janden committed Dec 27, 2023
1 parent e602697 commit 338b5e2
Show file tree
Hide file tree
Showing 13 changed files with 89 additions and 89 deletions.
4 changes: 2 additions & 2 deletions include/cufinufft.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ int cufinufft_makeplan(int type, int dim, const int64_t *n_modes, int iflag, int
int cufinufftf_makeplan(int type, int dim, const int64_t *n_modes, int iflag, int ntr, float eps,
cufinufftf_plan *d_plan_ptr, cufinufft_opts *opts);

int cufinufft_setpts(cufinufft_plan d_plan, int M, double *d_x, double *d_y, double *d_z, int N, double *d_s,
int cufinufft_setpts(cufinufft_plan d_plan, int64_t M, double *d_x, double *d_y, double *d_z, int N, double *d_s,
double *d_t, double *d_u);
int cufinufftf_setpts(cufinufftf_plan d_plan, int M, float *d_x, float *d_y, float *d_z, int N, float *d_s,
int cufinufftf_setpts(cufinufftf_plan d_plan, int64_t M, float *d_x, float *d_y, float *d_z, int N, float *d_s,
float *d_t, float *d_u);

int cufinufft_execute(cufinufft_plan d_plan, cuDoubleComplex *d_c, cuDoubleComplex *d_fk);
Expand Down
2 changes: 1 addition & 1 deletion include/cufinufft/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
}

template <typename T>
int cufinufft_setpts_impl(int M, T *d_kx, T *d_ky, T *d_kz, int N, T *d_s, T *d_t, T *d_u, cufinufft_plan_t<T> *d_plan)
int cufinufft_setpts_impl(int64_t M, T *d_kx, T *d_ky, T *d_kz, int N, T *d_s, T *d_t, T *d_u, cufinufft_plan_t<T> *d_plan)
/*
"setNUpts" stage (in single or double precision).
Expand Down
38 changes: 19 additions & 19 deletions include/cufinufft/spreadinterp.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,46 +88,46 @@ int cuinterp3d(cufinufft_plan_t<T> *d_plan, int blksize);

// Wrappers for methods of spreading
template <typename T>
int cuspread1d_nuptsdriven_prop(int nf1, int M, cufinufft_plan_t<T> *d_plan);
int cuspread1d_nuptsdriven_prop(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread1d_nuptsdriven(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread1d_nuptsdriven(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuspread1d_subprob_prop(int nf1, int M, cufinufft_plan_t<T> *d_plan);
int cuspread1d_subprob_prop(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread1d_subprob(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread1d_subprob(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);

template <typename T>
int cuspread2d_nuptsdriven_prop(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan);
int cuspread2d_nuptsdriven_prop(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread2d_nuptsdriven(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread2d_nuptsdriven(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuspread2d_subprob_prop(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan);
int cuspread2d_subprob_prop(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread2d_subprob(int nf1, int nf2, int m, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread2d_subprob(int nf1, int nf2, int64_t m, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuspread3d_nuptsdriven_prop(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan);
int cuspread3d_nuptsdriven_prop(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread3d_nuptsdriven(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread3d_nuptsdriven(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuspread3d_blockgather_prop(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan);
int cuspread3d_blockgather_prop(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread3d_blockgather(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread3d_blockgather(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuspread3d_subprob_prop(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan);
int cuspread3d_subprob_prop(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan);
template <typename T>
int cuspread3d_subprob(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuspread3d_subprob(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);

// Wrappers for methods of interpolation
template <typename T>
int cuinterp1d_nuptsdriven(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuinterp1d_nuptsdriven(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuinterp2d_nuptsdriven(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuinterp2d_nuptsdriven(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuinterp2d_subprob(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuinterp2d_subprob(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuinterp3d_nuptsdriven(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuinterp3d_nuptsdriven(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);
template <typename T>
int cuinterp3d_subprob(int nf1, int nf2, int nf3, int M, cufinufft_plan_t<T> *d_plan, int blksize);
int cuinterp3d_subprob(int nf1, int nf2, int nf3, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize);

} // namespace spreadinterp
} // namespace cufinufft
Expand Down
2 changes: 1 addition & 1 deletion include/cufinufft/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ struct cufinufft_plan_t {

int type;
int dim;
CUFINUFFT_BIGINT M;
int64_t M;
CUFINUFFT_BIGINT nf1;
CUFINUFFT_BIGINT nf2;
CUFINUFFT_BIGINT nf3;
Expand Down
4 changes: 2 additions & 2 deletions src/cuda/1d/interp1d_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ int cuinterp1d(cufinufft_plan_t<T> *d_plan, int blksize)
*/
{
int nf1 = d_plan->nf1;
int M = d_plan->M;
int64_t M = d_plan->M;

int ier;
switch (d_plan->opts.gpu_method) {
Expand All @@ -43,7 +43,7 @@ int cuinterp1d(cufinufft_plan_t<T> *d_plan, int blksize)
}

template <typename T>
int cuinterp1d_nuptsdriven(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuinterp1d_nuptsdriven(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;
dim3 threadsPerBlock;
dim3 blocks;
Expand Down
18 changes: 9 additions & 9 deletions src/cuda/1d/spread1d_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ int cuspread1d(cufinufft_plan_t<T> *d_plan, int blksize)
*/
{
int nf1 = d_plan->nf1;
int M = d_plan->M;
int64_t M = d_plan->M;

int ier;
switch (d_plan->opts.gpu_method) {
Expand All @@ -51,7 +51,7 @@ int cuspread1d(cufinufft_plan_t<T> *d_plan, int blksize)
}

template <typename T>
int cuspread1d_nuptsdriven_prop(int nf1, int M, cufinufft_plan_t<T> *d_plan) {
int cuspread1d_nuptsdriven_prop(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan) {
auto &stream = d_plan->stream;

if (d_plan->opts.gpu_sort) {
Expand Down Expand Up @@ -96,7 +96,7 @@ int cuspread1d_nuptsdriven_prop(int nf1, int M, cufinufft_plan_t<T> *d_plan) {
}

template <typename T>
int cuspread1d_nuptsdriven(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuspread1d_nuptsdriven(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;
dim3 threadsPerBlock;
dim3 blocks;
Expand Down Expand Up @@ -135,7 +135,7 @@ int cuspread1d_nuptsdriven(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blks
}

template <typename T>
int cuspread1d_subprob_prop(int nf1, int M, cufinufft_plan_t<T> *d_plan)
int cuspread1d_subprob_prop(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan)
/*
This function determines the properties for spreading that are independent
of the strength of the nodes, only relates to the locations of the nodes,
Expand Down Expand Up @@ -217,7 +217,7 @@ int cuspread1d_subprob_prop(int nf1, int M, cufinufft_plan_t<T> *d_plan)
}

template <typename T>
int cuspread1d_subprob(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuspread1d_subprob(int nf1, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;

int ns = d_plan->spopts.nspread; // psi's support in terms of number of cells
Expand Down Expand Up @@ -275,10 +275,10 @@ int cuspread1d_subprob(int nf1, int M, cufinufft_plan_t<T> *d_plan, int blksize)

template int cuspread1d<float>(cufinufft_plan_t<float> *d_plan, int blksize);
template int cuspread1d<double>(cufinufft_plan_t<double> *d_plan, int blksize);
template int cuspread1d_nuptsdriven_prop<float>(int nf1, int M, cufinufft_plan_t<float> *d_plan);
template int cuspread1d_nuptsdriven_prop<double>(int nf1, int M, cufinufft_plan_t<double> *d_plan);
template int cuspread1d_subprob_prop<float>(int nf1, int M, cufinufft_plan_t<float> *d_plan);
template int cuspread1d_subprob_prop<double>(int nf1, int M, cufinufft_plan_t<double> *d_plan);
template int cuspread1d_nuptsdriven_prop<float>(int nf1, int64_t M, cufinufft_plan_t<float> *d_plan);
template int cuspread1d_nuptsdriven_prop<double>(int nf1, int64_t M, cufinufft_plan_t<double> *d_plan);
template int cuspread1d_subprob_prop<float>(int nf1, int64_t M, cufinufft_plan_t<float> *d_plan);
template int cuspread1d_subprob_prop<double>(int nf1, int64_t M, cufinufft_plan_t<double> *d_plan);

} // namespace spreadinterp
} // namespace cufinufft
18 changes: 9 additions & 9 deletions src/cuda/1d/spreadinterp1d.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ namespace spreadinterp {
/* Kernels for NUptsdriven Method */

template <typename T, int KEREVALMETH>
__global__ void spread_1d_nuptsdriven(const T *x, const cuda_complex<T> *c, cuda_complex<T> *fw, int M, int ns, int nf1,
__global__ void spread_1d_nuptsdriven(const T *x, const cuda_complex<T> *c, cuda_complex<T> *fw, int64_t M, int ns, int nf1,
T es_c, T es_beta, T sigma, const int *idxnupts, int pirange) {
int xx, ix;
T ker1[MAX_NSPREAD];

T x_rescaled;
cuda_complex<T> cnow;
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < M; i += blockDim.x * gridDim.x) {
for (int64_t i = blockDim.x * blockIdx.x + threadIdx.x; i < M; i += blockDim.x * gridDim.x) {
x_rescaled = RESCALE(x[idxnupts[i]], nf1, pirange);
cnow = c[idxnupts[i]];
int xstart = ceil(x_rescaled - ns / 2.0);
Expand All @@ -47,12 +47,12 @@ __global__ void spread_1d_nuptsdriven(const T *x, const cuda_complex<T> *c, cuda
/* Kernels for SubProb Method */
// SubProb properties
template <typename T>
__global__ void calc_bin_size_noghost_1d(int M, int nf1, int bin_size_x, int nbinx, int *bin_size, const T *x,
__global__ void calc_bin_size_noghost_1d(int64_t M, int nf1, int bin_size_x, int nbinx, int *bin_size, const T *x,
int *sortidx, int pirange) {
int binx;
int oldidx;
T x_rescaled;
for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < M; i += gridDim.x * blockDim.x) {
for (int64_t i = threadIdx.x + blockIdx.x * blockDim.x; i < M; i += gridDim.x * blockDim.x) {
x_rescaled = RESCALE(x[i], nf1, pirange);
binx = floor(x_rescaled / bin_size_x);
binx = binx >= nbinx ? binx - 1 : binx;
Expand All @@ -66,11 +66,11 @@ __global__ void calc_bin_size_noghost_1d(int M, int nf1, int bin_size_x, int nbi
}

template <typename T>
__global__ void calc_inverse_of_global_sort_idx_1d(int M, int bin_size_x, int nbinx, const int *bin_startpts,
__global__ void calc_inverse_of_global_sort_idx_1d(int64_t M, int bin_size_x, int nbinx, const int *bin_startpts,
const int *sortidx, const T *x, int *index, int pirange, int nf1) {
int binx;
T x_rescaled;
for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < M; i += gridDim.x * blockDim.x) {
for (int64_t i = threadIdx.x + blockIdx.x * blockDim.x; i < M; i += gridDim.x * blockDim.x) {
x_rescaled = RESCALE(x[i], nf1, pirange);
binx = floor(x_rescaled / bin_size_x);
binx = binx >= nbinx ? binx - 1 : binx;
Expand All @@ -81,7 +81,7 @@ __global__ void calc_inverse_of_global_sort_idx_1d(int M, int bin_size_x, int nb
}

template <typename T, int KEREVALMETH>
__global__ void spread_1d_subprob(const T *x, const cuda_complex<T> *c, cuda_complex<T> *fw, int M, int ns, int nf1,
__global__ void spread_1d_subprob(const T *x, const cuda_complex<T> *c, cuda_complex<T> *fw, int64_t M, int ns, int nf1,
T es_c, T es_beta, T sigma, const int *binstartpts, const int *bin_size,
int bin_size_x, const int *subprob_to_bin, const int *subprobstartpts,
const int *numsubprob, int maxsubprobsize, int nbinx, const int *idxnupts,
Expand Down Expand Up @@ -147,10 +147,10 @@ __global__ void spread_1d_subprob(const T *x, const cuda_complex<T> *c, cuda_com
/* --------------------- 1d Interpolation Kernels ----------------------------*/
/* Kernels for NUptsdriven Method */
template <typename T, int KEREVALMETH>
__global__ void interp_1d_nuptsdriven(const T *x, cuda_complex<T> *c, const cuda_complex<T> *fw, int M, int ns, int nf1,
__global__ void interp_1d_nuptsdriven(const T *x, cuda_complex<T> *c, const cuda_complex<T> *fw, int64_t M, int ns, int nf1,
T es_c, T es_beta, T sigma, const int *idxnupts, int pirange) {
T ker1[MAX_NSPREAD];
for (int i = blockDim.x * blockIdx.x + threadIdx.x; i < M; i += blockDim.x * gridDim.x) {
for (int64_t i = blockDim.x * blockIdx.x + threadIdx.x; i < M; i += blockDim.x * gridDim.x) {
T x_rescaled = RESCALE(x[idxnupts[i]], nf1, pirange);

int xstart = ceil(x_rescaled - ns / 2.0);
Expand Down
6 changes: 3 additions & 3 deletions src/cuda/2d/interp2d_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ int cuinterp2d(cufinufft_plan_t<T> *d_plan, int blksize)
{
int nf1 = d_plan->nf1;
int nf2 = d_plan->nf2;
int M = d_plan->M;
int64_t M = d_plan->M;

int ier;
switch (d_plan->opts.gpu_method) {
Expand All @@ -47,7 +47,7 @@ int cuinterp2d(cufinufft_plan_t<T> *d_plan, int blksize)
}

template <typename T>
int cuinterp2d_nuptsdriven(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuinterp2d_nuptsdriven(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;

dim3 threadsPerBlock;
Expand Down Expand Up @@ -90,7 +90,7 @@ int cuinterp2d_nuptsdriven(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan,
}

template <typename T>
int cuinterp2d_subprob(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuinterp2d_subprob(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;

int ns = d_plan->spopts.nspread; // psi's support in terms of number of cells
Expand Down
18 changes: 9 additions & 9 deletions src/cuda/2d/spread2d_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ int cuspread2d(cufinufft_plan_t<T> *d_plan, int blksize)
{
int nf1 = d_plan->nf1;
int nf2 = d_plan->nf2;
int M = d_plan->M;
int64_t M = d_plan->M;

int ier;
switch (d_plan->opts.gpu_method) {
Expand All @@ -52,7 +52,7 @@ int cuspread2d(cufinufft_plan_t<T> *d_plan, int blksize)
}

template <typename T>
int cuspread2d_nuptsdriven_prop(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan) {
int cuspread2d_nuptsdriven_prop(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan) {
auto &stream = d_plan->stream;

if (d_plan->opts.gpu_sort) {
Expand Down Expand Up @@ -105,7 +105,7 @@ int cuspread2d_nuptsdriven_prop(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_
}

template <typename T>
int cuspread2d_nuptsdriven(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuspread2d_nuptsdriven(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;
dim3 threadsPerBlock;
dim3 blocks;
Expand Down Expand Up @@ -146,7 +146,7 @@ int cuspread2d_nuptsdriven(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan,
}

template <typename T>
int cuspread2d_subprob_prop(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan)
int cuspread2d_subprob_prop(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan)
/*
This function determines the properties for spreading that are independent
of the strength of the nodes, only relates to the locations of the nodes,
Expand Down Expand Up @@ -233,7 +233,7 @@ int cuspread2d_subprob_prop(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan
}

template <typename T>
int cuspread2d_subprob(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int blksize) {
int cuspread2d_subprob(int nf1, int nf2, int64_t M, cufinufft_plan_t<T> *d_plan, int blksize) {
auto &stream = d_plan->stream;

int ns = d_plan->spopts.nspread; // psi's support in terms of number of cells
Expand Down Expand Up @@ -296,10 +296,10 @@ int cuspread2d_subprob(int nf1, int nf2, int M, cufinufft_plan_t<T> *d_plan, int

template int cuspread2d<float>(cufinufft_plan_t<float> *d_plan, int blksize);
template int cuspread2d<double>(cufinufft_plan_t<double> *d_plan, int blksize);
template int cuspread2d_subprob_prop<float>(int nf1, int nf2, int M, cufinufft_plan_t<float> *d_plan);
template int cuspread2d_subprob_prop<double>(int nf1, int nf2, int M, cufinufft_plan_t<double> *d_plan);
template int cuspread2d_nuptsdriven_prop<float>(int nf1, int nf2, int M, cufinufft_plan_t<float> *d_plan);
template int cuspread2d_nuptsdriven_prop<double>(int nf1, int nf2, int M, cufinufft_plan_t<double> *d_plan);
template int cuspread2d_subprob_prop<float>(int nf1, int nf2, int64_t M, cufinufft_plan_t<float> *d_plan);
template int cuspread2d_subprob_prop<double>(int nf1, int nf2, int64_t M, cufinufft_plan_t<double> *d_plan);
template int cuspread2d_nuptsdriven_prop<float>(int nf1, int nf2, int64_t M, cufinufft_plan_t<float> *d_plan);
template int cuspread2d_nuptsdriven_prop<double>(int nf1, int nf2, int64_t M, cufinufft_plan_t<double> *d_plan);

} // namespace spreadinterp
} // namespace cufinufft
Loading

0 comments on commit 338b5e2

Please sign in to comment.