cuda: convert tests to 64-bit M

flatironinstitute · Dec 27, 2023 · b4e1c4e · b4e1c4e
1 parent 0434f6d
commit b4e1c4e
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 33 deletions.
diff --git a/test/cuda/cufinufft1d_test.cu b/test/cuda/cufinufft1d_test.cu
@@ -17,7 +17,7 @@
 using cufinufft::utils::infnorm;
 
 template <typename T>
-int run_test(int method, int type, int N1, int M, T tol, T checktol, int iflag) {
+int run_test(int method, int type, int N1, int64_t M, T tol, T checktol, int iflag) {
     std::cout << std::scientific << std::setprecision(3);
     int ier;
 
@@ -191,7 +191,7 @@ int main(int argc, char *argv[]) {
     const int method = atoi(argv[1]);
     const int type = atoi(argv[2]);
     const int N1 = atof(argv[3]);
-    const int M = atof(argv[4]);
+    const int64_t M = atof(argv[4]);
     const double tol = atof(argv[5]);
     const double checktol = atof(argv[6]);
     const int iflag = 1;

diff --git a/test/cuda/cufinufft2d1nupts_test.cu b/test/cuda/cufinufft2d1nupts_test.cu
@@ -23,8 +23,8 @@ int run_test(int method) {
     int N1 = 100;
     int N2 = 100;
     int N = N1 * N2;
-    int M1 = N1 * N2;
-    int M2 = 2 * N1 * N2;
+    int64_t M1 = N1 * N2;
+    int64_t M2 = 2 * N1 * N2;
 
     T tol = 1e-5;
     int iflag = 1;
@@ -47,14 +47,14 @@ int run_test(int method) {
     auto randm11 = [&eng, &dist11]() { return dist11(eng); };
 
     // Making data
-    for (int i = 0; i < M1; i++) {
+    for (int64_t i = 0; i < M1; i++) {
         x1[i] = M_PI * randm11(); // x in [-pi,pi)
         y1[i] = M_PI * randm11();
         c1[i].real(randm11());
         c1[i].imag(randm11());
     }
 
-    for (int i = 0; i < M2; i++) {
+    for (int64_t i = 0; i < M2; i++) {
         x2[i] = M_PI * randm11(); // x in [-pi,pi)
         y2[i] = M_PI * randm11();
         c2[i].real(randm11());
@@ -184,14 +184,14 @@ int run_test(int method) {
 
     int nt1 = (int)(0.37 * N1), nt2 = (int)(0.26 * N2); // choose some mode index to check
     thrust::complex<T> Ft(0, 0), J(0, iflag);
-    for (int j = 0; j < M1; ++j)
+    for (int64_t j = 0; j < M1; ++j)
         Ft += c1[j] * exp(J * (nt1 * x1[j] + nt2 * y1[j])); // crude direct
     int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2);            // index in complex F as 1d array
 
     printf("[gpu   ] one mode: rel err in F[%d,%d] is %.3g (set 1)\n", (int)nt1, (int)nt2,
            abs(Ft - fk1[it]) / infnorm(N, (std::complex<T> *)fk1.data()));
     Ft = thrust::complex<T>(0, 0);
-    for (int j = 0; j < M2; ++j)
+    for (int64_t j = 0; j < M2; ++j)
         Ft += c2[j] * exp(J * (nt1 * x2[j] + nt2 * y2[j])); // crude direct
     printf("[gpu   ] one mode: rel err in F[%d,%d] is %.3g (set 2)\n", (int)nt1, (int)nt2,
            abs(Ft - fk2[it]) / infnorm(N, (std::complex<T> *)fk2.data()));

diff --git a/test/cuda/cufinufft2d_test.cu b/test/cuda/cufinufft2d_test.cu
@@ -18,7 +18,7 @@
 using cufinufft::utils::infnorm;
 
 template <typename T>
-int run_test(int method, int type, int N1, int N2, int M, T tol, T checktol, int iflag) {
+int run_test(int method, int type, int N1, int N2, int64_t M, T tol, T checktol, int iflag) {
     std::cout << std::scientific << std::setprecision(3);
 
     thrust::host_vector<T> x(M), y(M);
@@ -32,12 +32,12 @@ int run_test(int method, int type, int N1, int N2, int M, T tol, T checktol, int
     auto randm11 = [&eng, &dist11]() { return dist11(eng); };
 
     // Making data
-    for (int i = 0; i < M; i++) {
+    for (int64_t i = 0; i < M; i++) {
         x[i] = M_PI * randm11(); // x in [-pi,pi)
         y[i] = M_PI * randm11();
     }
     if (type == 1) {
-        for (int i = 0; i < M; i++) {
+        for (int64_t i = 0; i < M; i++) {
             c[i].real(randm11());
             c[i].imag(randm11());
         }
@@ -148,14 +148,14 @@ int run_test(int method, int type, int N1, int N2, int M, T tol, T checktol, int
         const int nt1 = 0.37 * N1;
         const int nt2 = 0.26 * N2; // choose some mode index to check
         thrust::complex<T> Ft = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
-        for (int j = 0; j < M; ++j)
+        for (int64_t j = 0; j < M; ++j)
             Ft += c[j] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
         const int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2);   // index in complex F as 1d array
 
         rel_error = abs(Ft - fk[it]) / infnorm(N1, (std::complex<T> *)fk.data());
         printf("[gpu   ] one mode: rel err in F[%d,%d] is %.3g\n", nt1, nt2, rel_error);
     } else if (type == 2) {
-        int jt = M / 2; // check arbitrary choice of one targ pt
+        int64_t jt = M / 2; // check arbitrary choice of one targ pt
         thrust::complex<T> J = thrust::complex<T>(0, iflag);
         thrust::complex<T> ct = thrust::complex<T>(0, 0);
 
@@ -190,7 +190,7 @@ int main(int argc, char *argv[]) {
     const int type = atoi(argv[2]);
     const int N1 = atof(argv[3]);
     const int N2 = atof(argv[4]);
-    const int M = atof(argv[5]);
+    const int64_t M = atof(argv[5]);
     const double tol = atof(argv[6]);
     const double checktol = atof(argv[7]);
     const char prec = argv[8][0];

diff --git a/test/cuda/cufinufft2dmany_test.cu b/test/cuda/cufinufft2dmany_test.cu
@@ -18,7 +18,7 @@
 using cufinufft::utils::infnorm;
 
 template <typename T>
-int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize, int M, T tol, T checktol, int iflag) {
+int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize, int64_t M, T tol, T checktol, int iflag) {
     std::cout << std::scientific << std::setprecision(3);
 
     int ier;
@@ -36,12 +36,12 @@ int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize
     auto randm11 = [&eng, &dist11]() { return dist11(eng); };
 
     // Making data
-    for (int i = 0; i < M; i++) {
+    for (int64_t i = 0; i < M; i++) {
         x[i] = M_PI * randm11(); // x in [-pi,pi)
         y[i] = M_PI * randm11();
     }
     if (type == 1) {
-        for (int i = 0; i < ntransf * M; i++) {
+        for (int64_t i = 0; i < ntransf * M; i++) {
             c[i].real(randm11());
             c[i].imag(randm11());
         }
@@ -147,7 +147,7 @@ int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize
         int i = ntransf - 1;                                // // choose some data to check
         int nt1 = (int)(0.37 * N1), nt2 = (int)(0.26 * N2); // choose some mode index to check
         thrust::complex<T> Ft = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
-        for (int j = 0; j < M; ++j)
+        for (int64_t j = 0; j < M; ++j)
             Ft += c[j + i * M] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
         int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2);                 // index in complex F as 1d array
         rel_error = abs(Ft - fk[it + i * N]) / infnorm(N1, (std::complex<T> *)fk.data() + i * N);
@@ -156,7 +156,7 @@ int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize
         const int t = ntransf - 1;
         thrust::complex<T> *fkstart = fk.data() + t * N1 * N2;
         const thrust::complex<T> *cstart = c.data() + t * M;
-        const int jt = M / 2; // check arbitrary choice of one targ pt
+        const int64_t jt = M / 2; // check arbitrary choice of one targ pt
         const thrust::complex<T> J(0, iflag);
         thrust::complex<T> ct(0, 0);
         int m = 0;
@@ -196,7 +196,7 @@ int main(int argc, char *argv[]) {
     const int N2 = atof(argv[4]);
     const int ntransf = atof(argv[5]);
     const int maxbatchsize = atoi(argv[6]);
-    const int M = atoi(argv[7]);
+    const int64_t M = atoi(argv[7]);
     const double tol = atof(argv[8]);
     const double checktol = atof(argv[9]);
     const char prec = argv[10][0];

diff --git a/test/cuda/cufinufft3d_test.cu b/test/cuda/cufinufft3d_test.cu
@@ -18,7 +18,7 @@
 using cufinufft::utils::infnorm;
 
 template <typename T>
-int run_test(int method, int type, int N1, int N2, int N3, int M, T tol, T checktol, int iflag) {
+int run_test(int method, int type, int N1, int N2, int N3, int64_t M, T tol, T checktol, int iflag) {
     std::cout << std::scientific << std::setprecision(3);
     int ier;
 
@@ -33,13 +33,13 @@ int run_test(int method, int type, int N1, int N2, int N3, int M, T tol, T check
     auto randm11 = [&eng, &dist11]() { return dist11(eng); };
 
     // Making data
-    for (int i = 0; i < M; i++) {
+    for (int64_t i = 0; i < M; i++) {
         x[i] = M_PI * randm11(); // x in [-pi,pi)
         y[i] = M_PI * randm11();
         z[i] = M_PI * randm11();
     }
     if (type == 1) {
-        for (int i = 0; i < M; i++) {
+        for (int64_t i = 0; i < M; i++) {
             c[i].real(randm11());
             c[i].imag(randm11());
         }
@@ -154,14 +154,14 @@ int run_test(int method, int type, int N1, int N2, int N3, int M, T tol, T check
     if (type == 1) {
         int nt1 = (int)(0.37 * N1), nt2 = (int)(0.26 * N2), nt3 = (int)(0.13 * N3); // choose some mode index to check
         thrust::complex<T> Ft = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
-        for (int j = 0; j < M; ++j)
+        for (int64_t j = 0; j < M; ++j)
             Ft += c[j] * exp(J * (nt1 * x[j] + nt2 * y[j] + nt3 * z[j])); // crude direct
 
         int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2) + N1 * N2 * (N3 / 2 + nt3); // index in complex F as 1d array
         rel_error = abs(Ft - fk[it]) / infnorm(N1, (std::complex<T> *)fk.data());
         printf("[gpu   ] one mode: rel err in F[%d,%d,%d] is %.3g\n", nt1, nt2, nt3, rel_error);
     } else if (type == 2) {
-        int jt = M / 2; // check arbitrary choice of one targ pt
+        int64_t jt = M / 2; // check arbitrary choice of one targ pt
         thrust::complex<T> J = thrust::complex<T>(0, iflag);
         thrust::complex<T> ct = thrust::complex<T>(0, 0);
 
@@ -199,7 +199,7 @@ int main(int argc, char *argv[]) {
     const int N1 = atof(argv[3]);
     const int N2 = atof(argv[4]);
     const int N3 = atof(argv[5]);
-    const int M = atof(argv[6]);
+    const int64_t M = atof(argv[6]);
     const double tol = atof(argv[7]);
     const double checktol = atof(argv[8]);
     const char prec = argv[9][0];

diff --git a/test/cuda/public_api_test.c b/test/cuda/public_api_test.c
@@ -6,7 +6,7 @@
 
 #include <complex.h>
 
-int test_float(int M, int N) {
+int test_float(int64_t M, int N) {
     // Size of the grid as an array.
     int64_t modes[1] = {N};
 
@@ -35,7 +35,7 @@ int test_float(int M, int N) {
     // while strengths can be any value.
     srand(0);
 
-    for(int j = 0; j < M; ++j) {
+    for(int64_t j = 0; j < M; ++j) {
         x[j] = 2 * M_PI * (((float) rand()) / RAND_MAX - 1);
         c[j] = (2 * ((float) rand()) / RAND_MAX - 1)
                + I * (2 * ((float) rand()) / RAND_MAX - 1);
@@ -79,7 +79,7 @@ int test_float(int M, int N) {
     // transform.
     f0 = 0;
 
-    for(int j = 0; j < M; ++j) {
+    for(int64_t j = 0; j < M; ++j) {
         f0 += c[j] * cexp(I * x[j] * (idx - N / 2));
     }
 
@@ -93,7 +93,7 @@ int test_float(int M, int N) {
     return 0;
 }
 
-int test_double(int M, int N) {
+int test_double(int64_t M, int N) {
     // Size of the grid as an array.
     int64_t modes[1] = {N};
 
@@ -122,7 +122,7 @@ int test_double(int M, int N) {
     // while strengths can be any value.
     srand(0);
 
-    for(int j = 0; j < M; ++j) {
+    for(int64_t j = 0; j < M; ++j) {
         x[j] = 2 * M_PI * (((double) rand()) / RAND_MAX - 1);
         c[j] = (2 * ((double) rand()) / RAND_MAX - 1)
                + I * (2 * ((double) rand()) / RAND_MAX - 1);
@@ -166,7 +166,7 @@ int test_double(int M, int N) {
     // transform.
     f0 = 0;
 
-    for(int j = 0; j < M; ++j) {
+    for(int64_t j = 0; j < M; ++j) {
         f0 += c[j] * cexp(I * x[j] * (idx - N / 2));
     }
 
@@ -182,7 +182,8 @@ int test_double(int M, int N) {
 
 int main() {
     // Problem size: number of nonuniform points (M) and grid size (N).
-    const int M = 100, N = 200;
+    const int64_t M = 100;
+    const int N = 200;
     int errf = test_float(M, N);
     int err = test_double(M, N);