diff --git a/include/finufft/finufft_core.h b/include/finufft/finufft_core.h index afc6ef86..4f81728d 100644 --- a/include/finufft/finufft_core.h +++ b/include/finufft/finufft_core.h @@ -145,36 +145,36 @@ template struct FINUFFT_PLAN_T { // the main plan object, fully C++ FINUFFT_PLAN_T &operator=(const FINUFFT_PLAN_T &) = delete; ~FINUFFT_PLAN_T(); - int type; // transform type (Rokhlin naming): 1,2 or 3 - int dim; // overall dimension: 1,2 or 3 - int ntrans; // how many transforms to do at once (vector or "many" mode) - BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts) - BIGINT nk; // number of NU freq pts (type 3 only) - TF tol; // relative user tolerance - int batchSize; // # strength vectors to group together for FFTW, etc - int nbatch; // how many batches done to cover all ntrans vectors + int type; // transform type (Rokhlin naming): 1,2 or 3 + int dim; // overall dimension: 1,2 or 3 + int ntrans; // how many transforms to do at once (vector or "many" mode) + BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts) + BIGINT nk; // number of NU freq pts (type 3 only) + TF tol; // relative user tolerance + int batchSize; // # strength vectors to group together for FFTW, etc + int nbatch; // how many batches done to cover all ntrans vectors - BIGINT ms; // number of modes in x (1) dir (historical CMCL name) = N1 - BIGINT mt; // number of modes in y (2) direction = N2 - BIGINT mu; // number of modes in z (3) direction = N3 - BIGINT N; // total # modes (prod of above three) + BIGINT ms; // number of modes in x (1) dir (historical CMCL name) = N1 + BIGINT mt; // number of modes in y (2) direction = N2 + BIGINT mu; // number of modes in z (3) direction = N3 + BIGINT N; // total # modes (prod of above three) - BIGINT nf1; // size of internal fine grid in x (1) direction - BIGINT nf2; // " y (2) - BIGINT nf3; // " z (3) - BIGINT nf; // total # fine grid points (product of the above three) + BIGINT nf1; // size of internal fine grid in x (1) direction + BIGINT nf2; // " y (2) + BIGINT nf3; // " z (3) + BIGINT nf; // total # fine grid points (product of the above three) - int fftSign; // sign in exponential for NUFFT defn, guaranteed to be +-1 + int fftSign; // sign in exponential for NUFFT defn, guaranteed to be +-1 - TF *phiHat1 = nullptr; // FT of kernel in t1,2, on x-axis mode grid - TF *phiHat2 = nullptr; // " y-axis. - TF *phiHat3 = nullptr; // " z-axis. + std::vector phiHat1; // FT of kernel in t1,2, on x-axis mode grid + std::vector phiHat2; // " y-axis. + std::vector phiHat3; // " z-axis. - TC *fwBatch = nullptr; // (batches of) fine grid(s) for FFTW to plan - // & act on. Usually the largest working array + TC *fwBatch = nullptr; // (batches of) fine grid(s) for FFTW to plan + // & act on. Usually the largest working array - BIGINT *sortIndices = nullptr; // precomputed NU pt permutation, speeds spread/interp - bool didSort; // whether binsorting used (false: identity perm used) + std::vector sortIndices; // precomputed NU pt permutation, speeds spread/interp + bool didSort; // whether binsorting used (false: identity perm used) TF *X = nullptr, *Y = nullptr, *Z = nullptr; // for t1,2: ptr to user-supplied NU pts // (no new allocs). for t3: allocated as @@ -183,9 +183,9 @@ template struct FINUFFT_PLAN_T { // the main plan object, fully C++ // type 3 specific TF *S = nullptr, *T = nullptr, *U = nullptr; // pointers to user's target NU pts arrays // (no new allocs) - TC *prephase = nullptr; // pre-phase, for all input NU pts - TC *deconv = nullptr; // reciprocal of kernel FT, phase, all output NU pts - TC *CpBatch = nullptr; // working array of prephased strengths + std::vector prephase; // pre-phase, for all input NU pts + std::vector deconv; // reciprocal of kernel FT, phase, all output NU pts + std::vector CpBatch; // working array of prephased strengths TF *Sp = nullptr, *Tp = nullptr, *Up = nullptr; // internal primed targs (s'_k, etc), // allocated type3params t3P; // groups together type 3 shift, scale, phase, parameters diff --git a/include/finufft/spreadinterp.h b/include/finufft/spreadinterp.h index 779101be..8a83af3c 100644 --- a/include/finufft/spreadinterp.h +++ b/include/finufft/spreadinterp.h @@ -39,14 +39,14 @@ FINUFFT_EXPORT int FINUFFT_CDECL spreadcheck(UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT N, T *kx, T *ky, T *kz, const finufft_spread_opts &opts); template -FINUFFT_EXPORT int FINUFFT_CDECL indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, - UBIGINT N3, UBIGINT N, T *kx, T *ky, T *kz, - const finufft_spread_opts &opts); +FINUFFT_EXPORT int FINUFFT_CDECL indexSort(std::vector &sort_indices, UBIGINT N1, + UBIGINT N2, UBIGINT N3, UBIGINT N, T *kx, + T *ky, T *kz, const finufft_spread_opts &opts); template FINUFFT_EXPORT int FINUFFT_CDECL spreadinterpSorted( - const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3, - T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, - T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform, + const std::vector &sort_indices, const UBIGINT N1, const UBIGINT N2, + const UBIGINT N3, T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx, + T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts, int did_sort); template FINUFFT_EXPORT T FINUFFT_CDECL evaluate_kernel(T x, const finufft_spread_opts &opts); diff --git a/src/finufft_core.cpp b/src/finufft_core.cpp index 5d6fe3a1..8987a37a 100644 --- a/src/finufft_core.cpp +++ b/src/finufft_core.cpp @@ -167,7 +167,8 @@ static void set_nhg_type3(T S, T X, finufft_opts opts, finufft_spread_opts spopt } template -static void onedim_fseries_kernel(BIGINT nf, T *fwkerhalf, finufft_spread_opts opts) +static void onedim_fseries_kernel(BIGINT nf, std::vector &fwkerhalf, + finufft_spread_opts opts) /* Approximates exact Fourier series coeffs of cnufftspread's real symmetric kernel, directly via q-node quadrature on Euler-Fourier formula, exploiting @@ -231,7 +232,8 @@ static void onedim_fseries_kernel(BIGINT nf, T *fwkerhalf, finufft_spread_opts o } template -static void onedim_nuft_kernel(BIGINT nk, T *k, T *phihat, finufft_spread_opts opts) +static void onedim_nuft_kernel(BIGINT nk, T *k, std::vector &phihat, + finufft_spread_opts opts) /* Approximates exact 1D Fourier transform of cnufftspread's real symmetric kernel, directly via q-node quadrature on Euler-Fourier formula, exploiting @@ -273,8 +275,8 @@ static void onedim_nuft_kernel(BIGINT nk, T *k, T *phihat, finufft_spread_opts o } template -static void deconvolveshuffle1d(int dir, T prefac, T *ker, BIGINT ms, T *fk, BIGINT nf1, - std::complex *fw, int modeord) +static void deconvolveshuffle1d(int dir, T prefac, const std::vector &ker, BIGINT ms, + T *fk, BIGINT nf1, std::complex *fw, int modeord) /* if dir==1: copies fw to fk with amplification by prefac/ker if dir==2: copies fk to fw (and zero pads rest of it), same amplification. @@ -332,9 +334,9 @@ static void deconvolveshuffle1d(int dir, T prefac, T *ker, BIGINT ms, T *fk, BIG } template -static void deconvolveshuffle2d(int dir, T prefac, T *ker1, T *ker2, BIGINT ms, BIGINT mt, - T *fk, BIGINT nf1, BIGINT nf2, std::complex *fw, - int modeord) +static void deconvolveshuffle2d(int dir, T prefac, const std::vector &ker1, + const std::vector &ker2, BIGINT ms, BIGINT mt, T *fk, + BIGINT nf1, BIGINT nf2, std::complex *fw, int modeord) /* 2D version of deconvolveshuffle1d, calls it on each x-line using 1/ker2 fac. @@ -376,7 +378,8 @@ static void deconvolveshuffle2d(int dir, T prefac, T *ker1, T *ker2, BIGINT ms, } template -static void deconvolveshuffle3d(int dir, T prefac, T *ker1, T *ker2, T *ker3, BIGINT ms, +static void deconvolveshuffle3d(int dir, T prefac, std::vector &ker1, + std::vector &ker2, std::vector &ker3, BIGINT ms, BIGINT mt, BIGINT mu, T *fk, BIGINT nf1, BIGINT nf2, BIGINT nf3, std::complex *fw, int modeord) /* @@ -651,16 +654,12 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int return ier; // set others as defaults (or unallocated for arrays)... - p->X = NULL; - p->Y = NULL; - p->Z = NULL; - p->phiHat1 = NULL; - p->phiHat2 = NULL; - p->phiHat3 = NULL; - p->nf1 = 1; - p->nf2 = 1; - p->nf3 = 1; // crucial to leave as 1 for unused dims - p->sortIndices = NULL; // used in all three types + p->X = NULL; + p->Y = NULL; + p->Z = NULL; + p->nf1 = 1; + p->nf2 = 1; + p->nf3 = 1; // crucial to leave as 1 for unused dims // ------------------------ types 1,2: planning needed --------------------- if (type == 1 || type == 2) { @@ -686,16 +685,16 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int // determine fine grid sizes, sanity check.. int nfier = set_nf_type12(p->ms, p->opts, p->spopts, &(p->nf1)); if (nfier) return nfier; // nf too big; we're done - p->phiHat1 = (TF *)malloc(sizeof(TF) * (p->nf1 / 2 + 1)); + p->phiHat1.resize(p->nf1 / 2 + 1); if (dim > 1) { nfier = set_nf_type12(p->mt, p->opts, p->spopts, &(p->nf2)); if (nfier) return nfier; - p->phiHat2 = (TF *)malloc(sizeof(TF) * (p->nf2 / 2 + 1)); + p->phiHat2.resize(p->nf2 / 2 + 1); } if (dim > 2) { nfier = set_nf_type12(p->mu, p->opts, p->spopts, &(p->nf3)); if (nfier) return nfier; - p->phiHat3 = (TF *)malloc(sizeof(TF) * (p->nf3 / 2 + 1)); + p->phiHat3.resize(p->nf3 / 2 + 1); } if (p->opts.debug) { // "long long" here is to avoid warnings with printf... @@ -739,9 +738,6 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int if (!p->fwBatch) { // we don't catch all such mallocs, just this big one fprintf(stderr, "[%s] FFTW malloc failed for fwBatch (working fine grids)!\n", __func__); - free(p->phiHat1); - free(p->phiHat2); - free(p->phiHat3); return FINUFFT_ERR_ALLOC; } @@ -756,13 +752,10 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int if (p->opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans); // in case destroy occurs before setpts, need safe dummy ptrs/plans... - p->CpBatch = NULL; p->fwBatch = NULL; p->Sp = NULL; p->Tp = NULL; p->Up = NULL; - p->prephase = NULL; - p->deconv = NULL; p->innerT2plan = NULL; // Type 3 will call finufft_makeplan for type 2; no need to init FFTW // Note we don't even know nj or nk yet, so can't do anything else! @@ -810,15 +803,7 @@ int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B if (ier) // no warnings allowed here return ier; timer.restart(); - // Free sortIndices if it has been allocated before in case of repeated setpts - // calls causing memory leak. We don't know it is the same size as before, so we - // have to malloc each time. - if (p->sortIndices) free(p->sortIndices); - p->sortIndices = (BIGINT *)malloc(sizeof(BIGINT) * p->nj); - if (!p->sortIndices) { - fprintf(stderr, "[%s] failed to allocate sortIndices!\n", __func__); - return FINUFFT_ERR_SPREAD_ALLOC; - } + p->sortIndices.resize(p->nj); p->didSort = indexSort(p->sortIndices, p->nf1, p->nf2, p->nf3, p->nj, xj, yj, zj, p->spopts); if (p->opts.debug) @@ -884,18 +869,13 @@ int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B p->fftPlan->free(p->fwBatch); p->fwBatch = p->fftPlan->alloc_complex(p->nf * p->batchSize); // maybe big workspace - // (note FFTW_ALLOC is not needed over malloc, but matches its type) - if (p->CpBatch) free(p->CpBatch); - p->CpBatch = - (std::complex *)malloc(sizeof(std::complex) * nj * p->batchSize); // batch - // c' - // work + p->CpBatch.resize(nj * p->batchSize); // batch c' work if (p->opts.debug) printf("[%s t3] widcen, batch %.2fGB alloc:\t%.3g s\n", __func__, (double)1E-09 * sizeof(std::complex) * (p->nf + nj) * p->batchSize, timer.elapsedsec()); - if (!p->fwBatch || !p->CpBatch) { + if (!p->fwBatch) { fprintf(stderr, "[%s t3] malloc fail for fwBatch or CpBatch!\n", __func__); return FINUFFT_ERR_ALLOC; } @@ -935,8 +915,7 @@ int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B // set up prephase array... std::complex imasign = (p->fftSign >= 0) ? std::complex(0, 1) : std::complex(0, -1); // +-i - if (p->prephase) free(p->prephase); - p->prephase = (std::complex *)malloc(sizeof(std::complex) * nj); + p->prephase.resize(nj); if (p->t3P.D1 != 0.0 || p->t3P.D2 != 0.0 || p->t3P.D3 != 0.0) { #pragma omp parallel for num_threads(p->opts.nthreads) schedule(static) for (BIGINT j = 0; j < nj; ++j) { // ... loop over src NU locs @@ -963,17 +942,16 @@ int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B } // (old STEP 3a) Compute deconvolution post-factors array (per targ pt)... // (exploits that FT separates because kernel is prod of 1D funcs) - if (p->deconv) free(p->deconv); - p->deconv = (std::complex *)malloc(sizeof(std::complex) * nk); - TF *phiHatk1 = (TF *)malloc(sizeof(TF) * nk); // don't confuse w/ p->phiHat + p->deconv.resize(nk); + std::vector phiHatk1(nk); // don't confuse w/ p->phiHat onedim_nuft_kernel(nk, p->Sp, phiHatk1, p->spopts); // fill phiHat1 - TF *phiHatk2 = NULL, *phiHatk3 = NULL; + std::vector phiHatk2, phiHatk3; if (d > 1) { - phiHatk2 = (TF *)malloc(sizeof(TF) * nk); + phiHatk2.resize(nk); onedim_nuft_kernel(nk, p->Tp, phiHatk2, p->spopts); // fill phiHat2 } if (d > 2) { - phiHatk3 = (TF *)malloc(sizeof(TF) * nk); + phiHatk3.resize(nk); onedim_nuft_kernel(nk, p->Up, phiHatk3, p->spopts); // fill phiHat3 } int Cfinite = @@ -995,23 +973,12 @@ int finufft_setpts_t(FINUFFT_PLAN_T *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B p->deconv[k] *= cos(phase) + imasign * sin(phase); // Euler e^{+-i.phase} } } - free(phiHatk1); - free(phiHatk2); - free(phiHatk3); // done w/ deconv fill if (p->opts.debug) printf("[%s t3] phase & deconv factors:\t%.3g s\n", __func__, timer.elapsedsec()); // Set up sort for spreading Cp (from primed NU src pts X, Y, Z) to fw... timer.restart(); - // Free sortIndices if it has been allocated before in case of repeated setpts - // calls causing memory leak. We don't know it is the same size as before, so we - // have to malloc each time. - if (p->sortIndices) free(p->sortIndices); - p->sortIndices = (BIGINT *)malloc(sizeof(BIGINT) * p->nj); - if (!p->sortIndices) { - fprintf(stderr, "[%s t3] failed to allocate sortIndices!\n", __func__); - return FINUFFT_ERR_SPREAD_ALLOC; - } + p->sortIndices.resize(p->nj); p->didSort = indexSort(p->sortIndices, p->nf1, p->nf2, p->nf3, p->nj, p->X, p->Y, p->Z, p->spopts); if (p->opts.debug) @@ -1167,8 +1134,8 @@ int finufft_execute_t(FINUFFT_PLAN_T *p, std::complex *cj, std::complex< // STEP 1: spread c'_j batch (x'_j NU pts) into fw batch grid... timer.restart(); - p->spopts.spread_direction = 1; // spread - spreadinterpSortedBatch(thisBatchSize, p, p->CpBatch); // p->X are primed + p->spopts.spread_direction = 1; // spread + spreadinterpSortedBatch(thisBatchSize, p, p->CpBatch.data()); // p->X are primed t_spr += timer.elapsedsec(); // STEP 2: type 2 NUFFT from fw batch to user output fk array batch... @@ -1213,23 +1180,16 @@ template FINUFFT_PLAN_T::~FINUFFT_PLAN_T() { // Thus either each thing free'd here is guaranteed to be NULL or correctly // allocated. if (fftPlan) fftPlan->free(fwBatch); // free the big FFTW (or t3 spread) working array - free(sortIndices); if (type == 1 || type == 2) { - free(phiHat1); - free(phiHat2); - free(phiHat3); - } else { // free the stuff alloc for type 3 only + } else { // free the stuff alloc for type 3 only delete innerT2plan; - innerT2plan = nullptr; // if NULL, ignore its error code - free(CpBatch); + innerT2plan = nullptr; // if NULL, ignore its error code free(Sp); free(Tp); free(Up); free(X); free(Y); free(Z); - free(prephase); - free(deconv); } } template FINUFFT_PLAN_T::~FINUFFT_PLAN_T(); diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index a7a9db46..7c7309de 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -1401,9 +1401,10 @@ static void add_wrapped_subgrid(BIGINT offset1, BIGINT offset2, BIGINT offset3, } template -static void bin_sort_singlethread( - BIGINT *ret, UBIGINT M, const T *kx, const T *ky, const T *kz, UBIGINT N1, UBIGINT N2, - UBIGINT N3, double bin_size_x, double bin_size_y, double bin_size_z, int debug) +static void bin_sort_singlethread(std::vector &ret, UBIGINT M, const T *kx, + const T *ky, const T *kz, UBIGINT N1, UBIGINT N2, + UBIGINT N3, double bin_size_x, double bin_size_y, + double bin_size_z, int debug) /* Returns permutation of all nonuniform points with good RAM access, * ie less cache misses for spreading, in 1D, 2D, or 3D. Single-threaded version * @@ -1475,9 +1476,10 @@ static void bin_sort_singlethread( } template -static void bin_sort_multithread( - BIGINT *ret, UBIGINT M, T *kx, T *ky, T *kz, UBIGINT N1, UBIGINT N2, UBIGINT N3, - double bin_size_x, double bin_size_y, double bin_size_z, int debug, int nthr) +static void bin_sort_multithread(std::vector &ret, UBIGINT M, T *kx, T *ky, T *kz, + UBIGINT N1, UBIGINT N2, UBIGINT N3, double bin_size_x, + double bin_size_y, double bin_size_z, int debug, + int nthr) /* Mostly-OpenMP'ed version of bin_sort. For documentation see: bin_sort_singlethread. Caution: when M (# NU pts) << N (# U pts), is SLOWER than single-thread. @@ -1690,15 +1692,10 @@ FINUFFT_EXPORT int FINUFFT_CDECL spreadinterp( { int ier = spreadcheck(N1, N2, N3, M, kx, ky, kz, opts); if (ier) return ier; - BIGINT *sort_indices = (BIGINT *)malloc(sizeof(BIGINT) * M); - if (!sort_indices) { - fprintf(stderr, "%s failed to allocate sort_indices!\n", __func__); - return FINUFFT_ERR_SPREAD_ALLOC; - } + std::vector sort_indices(M); int did_sort = indexSort(sort_indices, N1, N2, N3, M, kx, ky, kz, opts); spreadinterpSorted(sort_indices, N1, N2, N3, data_uniform, M, kx, ky, kz, data_nonuniform, opts, did_sort); - free(sort_indices); return 0; } @@ -1749,8 +1746,8 @@ template int spreadcheck(UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT M, const finufft_spread_opts &opts); template -int indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT M, T *kx, - T *ky, T *kz, const finufft_spread_opts &opts) +int indexSort(std::vector &sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, + UBIGINT M, T *kx, T *ky, T *kz, const finufft_spread_opts &opts) /* This makes a decision whether or not to sort the NU pts (influenced by opts.sort), and if yes, calls either single- or multi-threaded bin sort, writing reordered index list to sort_indices. If decided not to sort, the @@ -1825,17 +1822,17 @@ int indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT } return did_sort; } -template int indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, - UBIGINT M, float *kx, float *ky, float *kz, +template int indexSort(std::vector &sort_indices, UBIGINT N1, UBIGINT N2, + UBIGINT N3, UBIGINT M, float *kx, float *ky, float *kz, const finufft_spread_opts &opts); -template int indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, - UBIGINT M, double *kx, double *ky, double *kz, +template int indexSort(std::vector &sort_indices, UBIGINT N1, UBIGINT N2, + UBIGINT N3, UBIGINT M, double *kx, double *ky, double *kz, const finufft_spread_opts &opts); // -------------------------------------------------------------------------- template -static int spreadSorted(const BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, - T *FINUFFT_RESTRICT data_uniform, UBIGINT M, +static int spreadSorted(const std::vector &sort_indices, UBIGINT N1, UBIGINT N2, + UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, const T *data_nonuniform, const finufft_spread_opts &opts, int did_sort) @@ -1961,8 +1958,8 @@ static int spreadSorted(const BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIG // -------------------------------------------------------------------------- template FINUFFT_NEVER_INLINE static int interpSorted_kernel( - const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3, - const T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx, + const std::vector &sort_indices, const UBIGINT N1, const UBIGINT N2, + const UBIGINT N3, const T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) // Interpolate to NU pts in sorted order from a uniform grid. @@ -2069,10 +2066,10 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel( template static int interpSorted_dispatch( - const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3, - T *FINUFFT_RESTRICT data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx, - T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform, - const finufft_spread_opts &opts) { + const std::vector &sort_indices, const UBIGINT N1, const UBIGINT N2, + const UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, const UBIGINT M, + T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, + T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) { static_assert(MIN_NSPREAD <= NS && NS <= MAX_NSPREAD, "NS must be in the range (MIN_NSPREAD, MAX_NSPREAD)"); if constexpr (NS == MIN_NSPREAD) { // Base case @@ -2100,19 +2097,19 @@ static int interpSorted_dispatch( } template -static int interpSorted(const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, - const UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, - const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, - T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform, - const finufft_spread_opts &opts) { +static int interpSorted( + const std::vector &sort_indices, const UBIGINT N1, const UBIGINT N2, + const UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, const UBIGINT M, + T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, + T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) { return interpSorted_dispatch(sort_indices, N1, N2, N3, data_uniform, M, kx, ky, kz, data_nonuniform, opts); } template -int spreadinterpSorted(const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, - const UBIGINT N3, T *data_uniform, const UBIGINT M, - T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, +int spreadinterpSorted(const std::vector &sort_indices, const UBIGINT N1, + const UBIGINT N2, const UBIGINT N3, T *data_uniform, + const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts, int did_sort) /* Logic to select the main spreading (dir=1) vs interpolation (dir=2) routine. @@ -2132,14 +2129,14 @@ int spreadinterpSorted(const BIGINT *sort_indices, const UBIGINT N1, const UBIGI return 0; } template int spreadinterpSorted( - const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3, - float *data_uniform, const UBIGINT M, float *FINUFFT_RESTRICT kx, + const std::vector &sort_indices, const UBIGINT N1, const UBIGINT N2, + const UBIGINT N3, float *data_uniform, const UBIGINT M, float *FINUFFT_RESTRICT kx, float *FINUFFT_RESTRICT ky, float *FINUFFT_RESTRICT kz, float *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts, int did_sort); template int spreadinterpSorted( - const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3, - double *data_uniform, const UBIGINT M, double *FINUFFT_RESTRICT kx, + const std::vector &sort_indices, const UBIGINT N1, const UBIGINT N2, + const UBIGINT N3, double *data_uniform, const UBIGINT M, double *FINUFFT_RESTRICT kx, double *FINUFFT_RESTRICT ky, double *FINUFFT_RESTRICT kz, double *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts, int did_sort);