diff --git a/include/finufft/finufft_core.h b/include/finufft/finufft_core.h
index afc6ef86..4f81728d 100644
--- a/include/finufft/finufft_core.h
+++ b/include/finufft/finufft_core.h
@@ -145,36 +145,36 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan object, fully C++
   FINUFFT_PLAN_T &operator=(const FINUFFT_PLAN_T &) = delete;
   ~FINUFFT_PLAN_T();
 
-  int type;              // transform type (Rokhlin naming): 1,2 or 3
-  int dim;               // overall dimension: 1,2 or 3
-  int ntrans;            // how many transforms to do at once (vector or "many" mode)
-  BIGINT nj;             // num of NU pts in type 1,2 (for type 3, num input x pts)
-  BIGINT nk;             // number of NU freq pts (type 3 only)
-  TF tol;                // relative user tolerance
-  int batchSize;         // # strength vectors to group together for FFTW, etc
-  int nbatch;            // how many batches done to cover all ntrans vectors
+  int type;                // transform type (Rokhlin naming): 1,2 or 3
+  int dim;                 // overall dimension: 1,2 or 3
+  int ntrans;              // how many transforms to do at once (vector or "many" mode)
+  BIGINT nj;               // num of NU pts in type 1,2 (for type 3, num input x pts)
+  BIGINT nk;               // number of NU freq pts (type 3 only)
+  TF tol;                  // relative user tolerance
+  int batchSize;           // # strength vectors to group together for FFTW, etc
+  int nbatch;              // how many batches done to cover all ntrans vectors
 
-  BIGINT ms;             // number of modes in x (1) dir (historical CMCL name) = N1
-  BIGINT mt;             // number of modes in y (2) direction = N2
-  BIGINT mu;             // number of modes in z (3) direction = N3
-  BIGINT N;              // total # modes (prod of above three)
+  BIGINT ms;               // number of modes in x (1) dir (historical CMCL name) = N1
+  BIGINT mt;               // number of modes in y (2) direction = N2
+  BIGINT mu;               // number of modes in z (3) direction = N3
+  BIGINT N;                // total # modes (prod of above three)
 
-  BIGINT nf1;            // size of internal fine grid in x (1) direction
-  BIGINT nf2;            // " y (2)
-  BIGINT nf3;            // " z (3)
-  BIGINT nf;             // total # fine grid points (product of the above three)
+  BIGINT nf1;              // size of internal fine grid in x (1) direction
+  BIGINT nf2;              // " y (2)
+  BIGINT nf3;              // " z (3)
+  BIGINT nf;               // total # fine grid points (product of the above three)
 
-  int fftSign;           // sign in exponential for NUFFT defn, guaranteed to be +-1
+  int fftSign;             // sign in exponential for NUFFT defn, guaranteed to be +-1
 
-  TF *phiHat1 = nullptr; // FT of kernel in t1,2, on x-axis mode grid
-  TF *phiHat2 = nullptr; // " y-axis.
-  TF *phiHat3 = nullptr; // " z-axis.
+  std::vector<TF> phiHat1; // FT of kernel in t1,2, on x-axis mode grid
+  std::vector<TF> phiHat2; // " y-axis.
+  std::vector<TF> phiHat3; // " z-axis.
 
-  TC *fwBatch = nullptr; // (batches of) fine grid(s) for FFTW to plan
-                         // & act on. Usually the largest working array
+  TC *fwBatch = nullptr;   // (batches of) fine grid(s) for FFTW to plan
+                           // & act on. Usually the largest working array
 
-  BIGINT *sortIndices = nullptr; // precomputed NU pt permutation, speeds spread/interp
-  bool didSort;                  // whether binsorting used (false: identity perm used)
+  std::vector<BIGINT> sortIndices; // precomputed NU pt permutation, speeds spread/interp
+  bool didSort;                    // whether binsorting used (false: identity perm used)
 
   TF *X = nullptr, *Y = nullptr, *Z = nullptr; // for t1,2: ptr to user-supplied NU pts
                                                // (no new allocs). for t3: allocated as
@@ -183,9 +183,9 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan object, fully C++
   // type 3 specific
   TF *S = nullptr, *T = nullptr, *U = nullptr; // pointers to user's target NU pts arrays
                                                // (no new allocs)
-  TC *prephase = nullptr;                      // pre-phase, for all input NU pts
-  TC *deconv   = nullptr; // reciprocal of kernel FT, phase, all output NU pts
-  TC *CpBatch  = nullptr; // working array of prephased strengths
+  std::vector<TC> prephase;                    // pre-phase, for all input NU pts
+  std::vector<TC> deconv;  // reciprocal of kernel FT, phase, all output NU pts
+  std::vector<TC> CpBatch; // working array of prephased strengths
   TF *Sp = nullptr, *Tp = nullptr, *Up = nullptr; // internal primed targs (s'_k, etc),
                                                   // allocated
   type3params<TF> t3P; // groups together type 3 shift, scale, phase, parameters
diff --git a/include/finufft/spreadinterp.h b/include/finufft/spreadinterp.h
index 779101be..8a83af3c 100644
--- a/include/finufft/spreadinterp.h
+++ b/include/finufft/spreadinterp.h
@@ -39,14 +39,14 @@ FINUFFT_EXPORT int FINUFFT_CDECL spreadcheck(UBIGINT N1, UBIGINT N2, UBIGINT N3,
                                              UBIGINT N, T *kx, T *ky, T *kz,
                                              const finufft_spread_opts &opts);
 template<typename T>
-FINUFFT_EXPORT int FINUFFT_CDECL indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2,
-                                           UBIGINT N3, UBIGINT N, T *kx, T *ky, T *kz,
-                                           const finufft_spread_opts &opts);
+FINUFFT_EXPORT int FINUFFT_CDECL indexSort(std::vector<BIGINT> &sort_indices, UBIGINT N1,
+                                           UBIGINT N2, UBIGINT N3, UBIGINT N, T *kx,
+                                           T *ky, T *kz, const finufft_spread_opts &opts);
 template<typename T>
 FINUFFT_EXPORT int FINUFFT_CDECL spreadinterpSorted(
-    const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3,
-    T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky,
-    T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform,
+    const std::vector<BIGINT> &sort_indices, const UBIGINT N1, const UBIGINT N2,
+    const UBIGINT N3, T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx,
+    T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform,
     const finufft_spread_opts &opts, int did_sort);
 template<typename T>
 FINUFFT_EXPORT T FINUFFT_CDECL evaluate_kernel(T x, const finufft_spread_opts &opts);
diff --git a/src/finufft_core.cpp b/src/finufft_core.cpp
index 5d6fe3a1..8987a37a 100644
--- a/src/finufft_core.cpp
+++ b/src/finufft_core.cpp
@@ -167,7 +167,8 @@ static void set_nhg_type3(T S, T X, finufft_opts opts, finufft_spread_opts spopt
 }
 
 template<typename T>
-static void onedim_fseries_kernel(BIGINT nf, T *fwkerhalf, finufft_spread_opts opts)
+static void onedim_fseries_kernel(BIGINT nf, std::vector<T> &fwkerhalf,
+                                  finufft_spread_opts opts)
 /*
   Approximates exact Fourier series coeffs of cnufftspread's real symmetric
   kernel, directly via q-node quadrature on Euler-Fourier formula, exploiting
@@ -231,7 +232,8 @@ static void onedim_fseries_kernel(BIGINT nf, T *fwkerhalf, finufft_spread_opts o
 }
 
 template<typename T>
-static void onedim_nuft_kernel(BIGINT nk, T *k, T *phihat, finufft_spread_opts opts)
+static void onedim_nuft_kernel(BIGINT nk, T *k, std::vector<T> &phihat,
+                               finufft_spread_opts opts)
 /*
   Approximates exact 1D Fourier transform of cnufftspread's real symmetric
   kernel, directly via q-node quadrature on Euler-Fourier formula, exploiting
@@ -273,8 +275,8 @@ static void onedim_nuft_kernel(BIGINT nk, T *k, T *phihat, finufft_spread_opts o
 }
 
 template<typename T>
-static void deconvolveshuffle1d(int dir, T prefac, T *ker, BIGINT ms, T *fk, BIGINT nf1,
-                                std::complex<T> *fw, int modeord)
+static void deconvolveshuffle1d(int dir, T prefac, const std::vector<T> &ker, BIGINT ms,
+                                T *fk, BIGINT nf1, std::complex<T> *fw, int modeord)
 /*
   if dir==1: copies fw to fk with amplification by prefac/ker
   if dir==2: copies fk to fw (and zero pads rest of it), same amplification.
@@ -332,9 +334,9 @@ static void deconvolveshuffle1d(int dir, T prefac, T *ker, BIGINT ms, T *fk, BIG
 }
 
 template<typename T>
-static void deconvolveshuffle2d(int dir, T prefac, T *ker1, T *ker2, BIGINT ms, BIGINT mt,
-                                T *fk, BIGINT nf1, BIGINT nf2, std::complex<T> *fw,
-                                int modeord)
+static void deconvolveshuffle2d(int dir, T prefac, const std::vector<T> &ker1,
+                                const std::vector<T> &ker2, BIGINT ms, BIGINT mt, T *fk,
+                                BIGINT nf1, BIGINT nf2, std::complex<T> *fw, int modeord)
 /*
   2D version of deconvolveshuffle1d, calls it on each x-line using 1/ker2 fac.
 
@@ -376,7 +378,8 @@ static void deconvolveshuffle2d(int dir, T prefac, T *ker1, T *ker2, BIGINT ms,
 }
 
 template<typename T>
-static void deconvolveshuffle3d(int dir, T prefac, T *ker1, T *ker2, T *ker3, BIGINT ms,
+static void deconvolveshuffle3d(int dir, T prefac, std::vector<T> &ker1,
+                                std::vector<T> &ker2, std::vector<T> &ker3, BIGINT ms,
                                 BIGINT mt, BIGINT mu, T *fk, BIGINT nf1, BIGINT nf2,
                                 BIGINT nf3, std::complex<T> *fw, int modeord)
 /*
@@ -651,16 +654,12 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int
     return ier;
 
   // set others as defaults (or unallocated for arrays)...
-  p->X           = NULL;
-  p->Y           = NULL;
-  p->Z           = NULL;
-  p->phiHat1     = NULL;
-  p->phiHat2     = NULL;
-  p->phiHat3     = NULL;
-  p->nf1         = 1;
-  p->nf2         = 1;
-  p->nf3         = 1;    // crucial to leave as 1 for unused dims
-  p->sortIndices = NULL; // used in all three types
+  p->X   = NULL;
+  p->Y   = NULL;
+  p->Z   = NULL;
+  p->nf1 = 1;
+  p->nf2 = 1;
+  p->nf3 = 1; // crucial to leave as 1 for unused dims
 
   //  ------------------------ types 1,2: planning needed ---------------------
   if (type == 1 || type == 2) {
@@ -686,16 +685,16 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int
     // determine fine grid sizes, sanity check..
     int nfier = set_nf_type12(p->ms, p->opts, p->spopts, &(p->nf1));
     if (nfier) return nfier; // nf too big; we're done
-    p->phiHat1 = (TF *)malloc(sizeof(TF) * (p->nf1 / 2 + 1));
+    p->phiHat1.resize(p->nf1 / 2 + 1);
     if (dim > 1) {
       nfier = set_nf_type12(p->mt, p->opts, p->spopts, &(p->nf2));
       if (nfier) return nfier;
-      p->phiHat2 = (TF *)malloc(sizeof(TF) * (p->nf2 / 2 + 1));
+      p->phiHat2.resize(p->nf2 / 2 + 1);
     }
     if (dim > 2) {
       nfier = set_nf_type12(p->mu, p->opts, p->spopts, &(p->nf3));
       if (nfier) return nfier;
-      p->phiHat3 = (TF *)malloc(sizeof(TF) * (p->nf3 / 2 + 1));
+      p->phiHat3.resize(p->nf3 / 2 + 1);
     }
 
     if (p->opts.debug) { // "long long" here is to avoid warnings with printf...
@@ -739,9 +738,6 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int
     if (!p->fwBatch) { // we don't catch all such mallocs, just this big one
       fprintf(stderr, "[%s] FFTW malloc failed for fwBatch (working fine grids)!\n",
               __func__);
-      free(p->phiHat1);
-      free(p->phiHat2);
-      free(p->phiHat3);
       return FINUFFT_ERR_ALLOC;
     }
 
@@ -756,13 +752,10 @@ int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int
 
     if (p->opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans);
     // in case destroy occurs before setpts, need safe dummy ptrs/plans...
-    p->CpBatch     = NULL;
     p->fwBatch     = NULL;
     p->Sp          = NULL;
     p->Tp          = NULL;
     p->Up          = NULL;
-    p->prephase    = NULL;
-    p->deconv      = NULL;
     p->innerT2plan = NULL;
     // Type 3 will call finufft_makeplan for type 2; no need to init FFTW
     // Note we don't even know nj or nk yet, so can't do anything else!
@@ -810,15 +803,7 @@ int finufft_setpts_t(FINUFFT_PLAN_T<TF> *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B
     if (ier) // no warnings allowed here
       return ier;
     timer.restart();
-    // Free sortIndices if it has been allocated before in case of repeated setpts
-    // calls causing memory leak. We don't know it is the same size as before, so we
-    // have to malloc each time.
-    if (p->sortIndices) free(p->sortIndices);
-    p->sortIndices = (BIGINT *)malloc(sizeof(BIGINT) * p->nj);
-    if (!p->sortIndices) {
-      fprintf(stderr, "[%s] failed to allocate sortIndices!\n", __func__);
-      return FINUFFT_ERR_SPREAD_ALLOC;
-    }
+    p->sortIndices.resize(p->nj);
     p->didSort =
         indexSort(p->sortIndices, p->nf1, p->nf2, p->nf3, p->nj, xj, yj, zj, p->spopts);
     if (p->opts.debug)
@@ -884,18 +869,13 @@ int finufft_setpts_t(FINUFFT_PLAN_T<TF> *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B
     p->fftPlan->free(p->fwBatch);
     p->fwBatch = p->fftPlan->alloc_complex(p->nf * p->batchSize); // maybe big workspace
 
-    // (note FFTW_ALLOC is not needed over malloc, but matches its type)
-    if (p->CpBatch) free(p->CpBatch);
-    p->CpBatch =
-        (std::complex<TF> *)malloc(sizeof(std::complex<TF>) * nj * p->batchSize); // batch
-                                                                                  // c'
-                                                                                  // work
+    p->CpBatch.resize(nj * p->batchSize);                         // batch c' work
 
     if (p->opts.debug)
       printf("[%s t3] widcen, batch %.2fGB alloc:\t%.3g s\n", __func__,
              (double)1E-09 * sizeof(std::complex<TF>) * (p->nf + nj) * p->batchSize,
              timer.elapsedsec());
-    if (!p->fwBatch || !p->CpBatch) {
+    if (!p->fwBatch) {
       fprintf(stderr, "[%s t3] malloc fail for fwBatch or CpBatch!\n", __func__);
       return FINUFFT_ERR_ALLOC;
     }
@@ -935,8 +915,7 @@ int finufft_setpts_t(FINUFFT_PLAN_T<TF> *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B
     // set up prephase array...
     std::complex<TF> imasign =
         (p->fftSign >= 0) ? std::complex<TF>(0, 1) : std::complex<TF>(0, -1); // +-i
-    if (p->prephase) free(p->prephase);
-    p->prephase = (std::complex<TF> *)malloc(sizeof(std::complex<TF>) * nj);
+    p->prephase.resize(nj);
     if (p->t3P.D1 != 0.0 || p->t3P.D2 != 0.0 || p->t3P.D3 != 0.0) {
 #pragma omp parallel for num_threads(p->opts.nthreads) schedule(static)
       for (BIGINT j = 0; j < nj; ++j) { // ... loop over src NU locs
@@ -963,17 +942,16 @@ int finufft_setpts_t(FINUFFT_PLAN_T<TF> *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B
     }
     // (old STEP 3a) Compute deconvolution post-factors array (per targ pt)...
     // (exploits that FT separates because kernel is prod of 1D funcs)
-    if (p->deconv) free(p->deconv);
-    p->deconv    = (std::complex<TF> *)malloc(sizeof(std::complex<TF>) * nk);
-    TF *phiHatk1 = (TF *)malloc(sizeof(TF) * nk);       // don't confuse w/ p->phiHat
+    p->deconv.resize(nk);
+    std::vector<TF> phiHatk1(nk);                       // don't confuse w/ p->phiHat
     onedim_nuft_kernel(nk, p->Sp, phiHatk1, p->spopts); // fill phiHat1
-    TF *phiHatk2 = NULL, *phiHatk3 = NULL;
+    std::vector<TF> phiHatk2, phiHatk3;
     if (d > 1) {
-      phiHatk2 = (TF *)malloc(sizeof(TF) * nk);
+      phiHatk2.resize(nk);
       onedim_nuft_kernel(nk, p->Tp, phiHatk2, p->spopts); // fill phiHat2
     }
     if (d > 2) {
-      phiHatk3 = (TF *)malloc(sizeof(TF) * nk);
+      phiHatk3.resize(nk);
       onedim_nuft_kernel(nk, p->Up, phiHatk3, p->spopts); // fill phiHat3
     }
     int Cfinite =
@@ -995,23 +973,12 @@ int finufft_setpts_t(FINUFFT_PLAN_T<TF> *p, BIGINT nj, TF *xj, TF *yj, TF *zj, B
         p->deconv[k] *= cos(phase) + imasign * sin(phase); // Euler e^{+-i.phase}
       }
     }
-    free(phiHatk1);
-    free(phiHatk2);
-    free(phiHatk3); // done w/ deconv fill
     if (p->opts.debug)
       printf("[%s t3] phase & deconv factors:\t%.3g s\n", __func__, timer.elapsedsec());
 
     // Set up sort for spreading Cp (from primed NU src pts X, Y, Z) to fw...
     timer.restart();
-    // Free sortIndices if it has been allocated before in case of repeated setpts
-    // calls causing memory leak. We don't know it is the same size as before, so we
-    // have to malloc each time.
-    if (p->sortIndices) free(p->sortIndices);
-    p->sortIndices = (BIGINT *)malloc(sizeof(BIGINT) * p->nj);
-    if (!p->sortIndices) {
-      fprintf(stderr, "[%s t3] failed to allocate sortIndices!\n", __func__);
-      return FINUFFT_ERR_SPREAD_ALLOC;
-    }
+    p->sortIndices.resize(p->nj);
     p->didSort = indexSort(p->sortIndices, p->nf1, p->nf2, p->nf3, p->nj, p->X, p->Y,
                            p->Z, p->spopts);
     if (p->opts.debug)
@@ -1167,8 +1134,8 @@ int finufft_execute_t(FINUFFT_PLAN_T<TF> *p, std::complex<TF> *cj, std::complex<
 
       // STEP 1: spread c'_j batch (x'_j NU pts) into fw batch grid...
       timer.restart();
-      p->spopts.spread_direction = 1;                            // spread
-      spreadinterpSortedBatch<TF>(thisBatchSize, p, p->CpBatch); // p->X are primed
+      p->spopts.spread_direction = 1;                                   // spread
+      spreadinterpSortedBatch<TF>(thisBatchSize, p, p->CpBatch.data()); // p->X are primed
       t_spr += timer.elapsedsec();
 
       // STEP 2: type 2 NUFFT from fw batch to user output fk array batch...
@@ -1213,23 +1180,16 @@ template<typename TF> FINUFFT_PLAN_T<TF>::~FINUFFT_PLAN_T() {
   // Thus either each thing free'd here is guaranteed to be NULL or correctly
   // allocated.
   if (fftPlan) fftPlan->free(fwBatch); // free the big FFTW (or t3 spread) working array
-  free(sortIndices);
   if (type == 1 || type == 2) {
-    free(phiHat1);
-    free(phiHat2);
-    free(phiHat3);
-  } else {                 // free the stuff alloc for type 3 only
+  } else {                             // free the stuff alloc for type 3 only
     delete innerT2plan;
-    innerT2plan = nullptr; // if NULL, ignore its error code
-    free(CpBatch);
+    innerT2plan = nullptr;             // if NULL, ignore its error code
     free(Sp);
     free(Tp);
     free(Up);
     free(X);
     free(Y);
     free(Z);
-    free(prephase);
-    free(deconv);
   }
 }
 template FINUFFT_PLAN_T<float>::~FINUFFT_PLAN_T();
diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp
index a7a9db46..7c7309de 100644
--- a/src/spreadinterp.cpp
+++ b/src/spreadinterp.cpp
@@ -1401,9 +1401,10 @@ static void add_wrapped_subgrid(BIGINT offset1, BIGINT offset2, BIGINT offset3,
 }
 
 template<typename T>
-static void bin_sort_singlethread(
-    BIGINT *ret, UBIGINT M, const T *kx, const T *ky, const T *kz, UBIGINT N1, UBIGINT N2,
-    UBIGINT N3, double bin_size_x, double bin_size_y, double bin_size_z, int debug)
+static void bin_sort_singlethread(std::vector<BIGINT> &ret, UBIGINT M, const T *kx,
+                                  const T *ky, const T *kz, UBIGINT N1, UBIGINT N2,
+                                  UBIGINT N3, double bin_size_x, double bin_size_y,
+                                  double bin_size_z, int debug)
 /* Returns permutation of all nonuniform points with good RAM access,
  * ie less cache misses for spreading, in 1D, 2D, or 3D. Single-threaded version
  *
@@ -1475,9 +1476,10 @@ static void bin_sort_singlethread(
 }
 
 template<typename T>
-static void bin_sort_multithread(
-    BIGINT *ret, UBIGINT M, T *kx, T *ky, T *kz, UBIGINT N1, UBIGINT N2, UBIGINT N3,
-    double bin_size_x, double bin_size_y, double bin_size_z, int debug, int nthr)
+static void bin_sort_multithread(std::vector<BIGINT> &ret, UBIGINT M, T *kx, T *ky, T *kz,
+                                 UBIGINT N1, UBIGINT N2, UBIGINT N3, double bin_size_x,
+                                 double bin_size_y, double bin_size_z, int debug,
+                                 int nthr)
 /* Mostly-OpenMP'ed version of bin_sort.
    For documentation see: bin_sort_singlethread.
    Caution: when M (# NU pts) << N (# U pts), is SLOWER than single-thread.
@@ -1690,15 +1692,10 @@ FINUFFT_EXPORT int FINUFFT_CDECL spreadinterp(
 {
   int ier = spreadcheck(N1, N2, N3, M, kx, ky, kz, opts);
   if (ier) return ier;
-  BIGINT *sort_indices = (BIGINT *)malloc(sizeof(BIGINT) * M);
-  if (!sort_indices) {
-    fprintf(stderr, "%s failed to allocate sort_indices!\n", __func__);
-    return FINUFFT_ERR_SPREAD_ALLOC;
-  }
+  std::vector<BIGINT> sort_indices(M);
   int did_sort = indexSort(sort_indices, N1, N2, N3, M, kx, ky, kz, opts);
   spreadinterpSorted<T>(sort_indices, N1, N2, N3, data_uniform, M, kx, ky, kz,
                         data_nonuniform, opts, did_sort);
-  free(sort_indices);
   return 0;
 }
 
@@ -1749,8 +1746,8 @@ template int spreadcheck<double>(UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT M,
                                  const finufft_spread_opts &opts);
 
 template<typename T>
-int indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT M, T *kx,
-              T *ky, T *kz, const finufft_spread_opts &opts)
+int indexSort(std::vector<BIGINT> &sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3,
+              UBIGINT M, T *kx, T *ky, T *kz, const finufft_spread_opts &opts)
 /* This makes a decision whether or not to sort the NU pts (influenced by
    opts.sort), and if yes, calls either single- or multi-threaded bin sort,
    writing reordered index list to sort_indices. If decided not to sort, the
@@ -1825,17 +1822,17 @@ int indexSort(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3, UBIGINT
   }
   return did_sort;
 }
-template int indexSort<float>(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3,
-                              UBIGINT M, float *kx, float *ky, float *kz,
+template int indexSort<float>(std::vector<BIGINT> &sort_indices, UBIGINT N1, UBIGINT N2,
+                              UBIGINT N3, UBIGINT M, float *kx, float *ky, float *kz,
                               const finufft_spread_opts &opts);
-template int indexSort<double>(BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3,
-                               UBIGINT M, double *kx, double *ky, double *kz,
+template int indexSort<double>(std::vector<BIGINT> &sort_indices, UBIGINT N1, UBIGINT N2,
+                               UBIGINT N3, UBIGINT M, double *kx, double *ky, double *kz,
                                const finufft_spread_opts &opts);
 
 // --------------------------------------------------------------------------
 template<typename T>
-static int spreadSorted(const BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIGINT N3,
-                        T *FINUFFT_RESTRICT data_uniform, UBIGINT M,
+static int spreadSorted(const std::vector<BIGINT> &sort_indices, UBIGINT N1, UBIGINT N2,
+                        UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, UBIGINT M,
                         T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky,
                         T *FINUFFT_RESTRICT kz, const T *data_nonuniform,
                         const finufft_spread_opts &opts, int did_sort)
@@ -1961,8 +1958,8 @@ static int spreadSorted(const BIGINT *sort_indices, UBIGINT N1, UBIGINT N2, UBIG
 // --------------------------------------------------------------------------
 template<typename T, uint16_t ns, uint16_t kerevalmeth>
 FINUFFT_NEVER_INLINE static int interpSorted_kernel(
-    const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3,
-    const T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx,
+    const std::vector<BIGINT> &sort_indices, const UBIGINT N1, const UBIGINT N2,
+    const UBIGINT N3, const T *data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx,
     T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform,
     const finufft_spread_opts &opts)
 // Interpolate to NU pts in sorted order from a uniform grid.
@@ -2069,10 +2066,10 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel(
 
 template<typename T, uint16_t NS>
 static int interpSorted_dispatch(
-    const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3,
-    T *FINUFFT_RESTRICT data_uniform, const UBIGINT M, T *FINUFFT_RESTRICT kx,
-    T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform,
-    const finufft_spread_opts &opts) {
+    const std::vector<BIGINT> &sort_indices, const UBIGINT N1, const UBIGINT N2,
+    const UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, const UBIGINT M,
+    T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz,
+    T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) {
   static_assert(MIN_NSPREAD <= NS && NS <= MAX_NSPREAD,
                 "NS must be in the range (MIN_NSPREAD, MAX_NSPREAD)");
   if constexpr (NS == MIN_NSPREAD) { // Base case
@@ -2100,19 +2097,19 @@ static int interpSorted_dispatch(
 }
 
 template<typename T>
-static int interpSorted(const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2,
-                        const UBIGINT N3, T *FINUFFT_RESTRICT data_uniform,
-                        const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky,
-                        T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform,
-                        const finufft_spread_opts &opts) {
+static int interpSorted(
+    const std::vector<BIGINT> &sort_indices, const UBIGINT N1, const UBIGINT N2,
+    const UBIGINT N3, T *FINUFFT_RESTRICT data_uniform, const UBIGINT M,
+    T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky, T *FINUFFT_RESTRICT kz,
+    T *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts) {
   return interpSorted_dispatch<T, MAX_NSPREAD>(sort_indices, N1, N2, N3, data_uniform, M,
                                                kx, ky, kz, data_nonuniform, opts);
 }
 
 template<typename T>
-int spreadinterpSorted(const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2,
-                       const UBIGINT N3, T *data_uniform, const UBIGINT M,
-                       T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky,
+int spreadinterpSorted(const std::vector<BIGINT> &sort_indices, const UBIGINT N1,
+                       const UBIGINT N2, const UBIGINT N3, T *data_uniform,
+                       const UBIGINT M, T *FINUFFT_RESTRICT kx, T *FINUFFT_RESTRICT ky,
                        T *FINUFFT_RESTRICT kz, T *FINUFFT_RESTRICT data_nonuniform,
                        const finufft_spread_opts &opts, int did_sort)
 /* Logic to select the main spreading (dir=1) vs interpolation (dir=2) routine.
@@ -2132,14 +2129,14 @@ int spreadinterpSorted(const BIGINT *sort_indices, const UBIGINT N1, const UBIGI
   return 0;
 }
 template int spreadinterpSorted<float>(
-    const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3,
-    float *data_uniform, const UBIGINT M, float *FINUFFT_RESTRICT kx,
+    const std::vector<BIGINT> &sort_indices, const UBIGINT N1, const UBIGINT N2,
+    const UBIGINT N3, float *data_uniform, const UBIGINT M, float *FINUFFT_RESTRICT kx,
     float *FINUFFT_RESTRICT ky, float *FINUFFT_RESTRICT kz,
     float *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts,
     int did_sort);
 template int spreadinterpSorted<double>(
-    const BIGINT *sort_indices, const UBIGINT N1, const UBIGINT N2, const UBIGINT N3,
-    double *data_uniform, const UBIGINT M, double *FINUFFT_RESTRICT kx,
+    const std::vector<BIGINT> &sort_indices, const UBIGINT N1, const UBIGINT N2,
+    const UBIGINT N3, double *data_uniform, const UBIGINT M, double *FINUFFT_RESTRICT kx,
     double *FINUFFT_RESTRICT ky, double *FINUFFT_RESTRICT kz,
     double *FINUFFT_RESTRICT data_nonuniform, const finufft_spread_opts &opts,
     int did_sort);