Skip to content

Commit

Permalink
De-macroize FFT (#558)
Browse files Browse the repository at this point in the history
De-macroize FFT remove ifdefs from finufft.cpp
  • Loading branch information
mreineck authored Sep 24, 2024
1 parent 4a1c99e commit 7113b0e
Show file tree
Hide file tree
Showing 9 changed files with 272 additions and 210 deletions.
67 changes: 43 additions & 24 deletions include/finufft/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
// public header gives access to f_opts, f_spread_opts, f_plan...
// (and clobbers FINUFFT* macros; watch out!)
#include <finufft.h>
#include <memory>

// --------------- Private data types for compilation in either prec ---------
// Devnote: must match those in relevant prec of public finufft.h interface!
Expand Down Expand Up @@ -89,7 +90,7 @@ inline constexpr BIGINT MAX_NF = BIGINT(1e12);
inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14);

// -------------- Math consts (not in math.h) and useful math macros ----------
#include <math.h>
#include <cmath>

// either-precision unit imaginary number...
#define IMA (CPX(0.0, 1.0))
Expand Down Expand Up @@ -118,11 +119,11 @@ inline constexpr BIGINT MAX_NU_PTS = BIGINT(1e14);
// These macros should probably be replaced by modern C++ std lib or random123.
// (RAND_MAX is in stdlib.h)
#include <stdlib.h>
static inline FLT rand01() { return FLT(rand()) / FLT(RAND_MAX); }
static inline FLT rand01 [[maybe_unused]] () { return FLT(rand()) / FLT(RAND_MAX); }
// unif[-1,1]:
static inline FLT randm11() { return 2 * rand01() - FLT(1); }
static inline FLT randm11 [[maybe_unused]] () { return 2 * rand01() - FLT(1); }
// complex unif[-1,1] for Re and Im:
static inline CPX crandm11() { return randm11() + IMA * randm11(); }
static inline CPX crandm11 [[maybe_unused]] () { return randm11() + IMA * randm11(); }

// Thread-safe seed-carrying versions of above (x is ptr to seed)...
// MR: we have to leave those as macros for now, as "rand_r" is deprecated
Expand All @@ -134,11 +135,17 @@ static inline CPX crandm11() { return randm11() + IMA * randm11(); }
// complex unif[-1,1] for Re and Im:
#define crandm11r(x) (randm11r(x) + IMA * randm11r(x))
#else
static inline FLT rand01r(unsigned int *x) { return FLT(rand_r(x)) / FLT(RAND_MAX); }
static inline FLT rand01r [[maybe_unused]] (unsigned int *x) {
return FLT(rand_r(x)) / FLT(RAND_MAX);
}
// unif[-1,1]:
static inline FLT randm11r(unsigned int *x) { return 2 * rand01r(x) - FLT(1); }
static inline FLT randm11r [[maybe_unused]] (unsigned int *x) {
return 2 * rand01r(x) - FLT(1);
}
// complex unif[-1,1] for Re and Im:
static inline CPX crandm11r(unsigned int *x) { return randm11r(x) + IMA * randm11r(x); }
static inline CPX crandm11r [[maybe_unused]] (unsigned int *x) {
return randm11r(x) + IMA * randm11r(x);
}
#endif

// ----- OpenMP macros which also work when omp not present -----
Expand All @@ -147,16 +154,24 @@ static inline CPX crandm11r(unsigned int *x) { return randm11r(x) + IMA * randm1
#ifdef _OPENMP
#include <omp.h>
// point to actual omp utils
static inline int MY_OMP_GET_NUM_THREADS() { return omp_get_num_threads(); }
static inline int MY_OMP_GET_MAX_THREADS() { return omp_get_max_threads(); }
static inline int MY_OMP_GET_THREAD_NUM() { return omp_get_thread_num(); }
static inline void MY_OMP_SET_NUM_THREADS(int x) { omp_set_num_threads(x); }
static inline int MY_OMP_GET_NUM_THREADS [[maybe_unused]] () {
return omp_get_num_threads();
}
static inline int MY_OMP_GET_MAX_THREADS [[maybe_unused]] () {
return omp_get_max_threads();
}
static inline int MY_OMP_GET_THREAD_NUM [[maybe_unused]] () {
return omp_get_thread_num();
}
static inline void MY_OMP_SET_NUM_THREADS [[maybe_unused]] (int x) {
omp_set_num_threads(x);
}
#else
// non-omp safe dummy versions of omp utils...
static inline int MY_OMP_GET_NUM_THREADS() { return 1; }
static inline int MY_OMP_GET_MAX_THREADS() { return 1; }
static inline int MY_OMP_GET_THREAD_NUM() { return 0; }
static inline void MY_OMP_SET_NUM_THREADS(int) {}
static inline int MY_OMP_GET_NUM_THREADS [[maybe_unused]] () { return 1; }
static inline int MY_OMP_GET_MAX_THREADS [[maybe_unused]] () { return 1; }
static inline int MY_OMP_GET_THREAD_NUM [[maybe_unused]] () { return 0; }
static inline void MY_OMP_SET_NUM_THREADS [[maybe_unused]] (int) {}
#endif

// Prec-switching name macros (respond to SINGLE), used in lib & test sources
Expand Down Expand Up @@ -212,10 +227,17 @@ template<typename T> struct type3params {
T X3, C3, D3, h3, gam3; // z
};

typedef struct FINUFFT_PLAN_S { // the main plan object, fully C++

int type; // transform type (Rokhlin naming): 1,2 or 3
int dim; // overall dimension: 1,2 or 3
struct FINUFFT_PLAN_S { // the main plan object, fully C++
// These default and delete specifications just state the obvious,
// but are here to silence compiler warnings.
FINUFFT_PLAN_S() = default;
// Copy construction and assignent are already deleted implicitly
// because of the unique_ptr member.
FINUFFT_PLAN_S(const FINUFFT_PLAN_S &) = delete;
FINUFFT_PLAN_S &operator=(const FINUFFT_PLAN_S &) = delete;

int type; // transform type (Rokhlin naming): 1,2 or 3
int dim; // overall dimension: 1,2 or 3
int ntrans; // how many transforms to do at once (vector or "many" mode)

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, Off, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, Off, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, On, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, On, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, On, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, On, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, Off, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, Off, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, On, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, On, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, Off, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, Off, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, On, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:SSE2, On, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, On, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, On, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, On, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, On, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, Off, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, Off, Debug, cl, cl, On)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, Off, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, native, Off, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, Off, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'

Check warning on line 241 in include/finufft/defs.h

View workflow job for this annotation

GitHub Actions / cmake-ci (windows-2022, msvc, /arch:AVX2, Off, Debug, cl, cl, Off)

'finufftf_plan_s': '4' bytes padding added after data member 'finufftf_plan_s::ntrans'
BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts)
BIGINT nk; // number of NU freq pts (type 3 only)
Expand Down Expand Up @@ -258,12 +280,9 @@ typedef struct FINUFFT_PLAN_S { // the main plan object, fully C++
FINUFFT_PLAN innerT2plan; // ptr used for type 2 in step 2 of type 3

// other internal structs; each is C-compatible of course
#ifndef FINUFFT_USE_DUCC0
FFTW_PLAN fftwPlan;
#endif
std::unique_ptr<Finufft_FFT_plan<FLT>> fftPlan;
finufft_opts opts; // this and spopts could be made ptrs
finufft_spread_opts spopts;

} FINUFFT_PLAN_S;
};

#endif // DEFS_H
185 changes: 178 additions & 7 deletions include/finufft/fft.h
Original file line number Diff line number Diff line change
@@ -1,18 +1,189 @@
#ifndef FINUFFT_INCLUDE_FINUFFT_FFT_H
#define FINUFFT_INCLUDE_FINUFFT_FFT_H

#include <vector>

#ifdef FINUFFT_USE_DUCC0
#include "ducc0/fft/fftnd_impl.h"
// temporary hacks to allow compilation of tests that assume FFTW is used
static inline void FFTW_FORGET_WISDOM() {}
static inline void FFTW_CLEANUP() {}
static inline void FFTW_CLEANUP_THREADS() {}
#include <complex>

template<typename T> class Finufft_FFT_plan {
public:
[[maybe_unused]] Finufft_FFT_plan(void (*)(void *) = nullptr,
void (*)(void *) = nullptr, void * = nullptr) {}
[[maybe_unused]] void plan(const std::vector<int> & /*dims*/, size_t /*batchSize*/,
std::complex<T> * /*ptr*/, int /*sign*/, int /*options*/,
int /*nthreads*/) {}
[[maybe_unused]] static std::complex<T> *alloc_complex(size_t N) {
return new std::complex<T>[N];
}
[[maybe_unused]] static void free(std::complex<T> *ptr) { delete[] ptr; }

[[maybe_unused]] static void forget_wisdom() {}
[[maybe_unused]] static void cleanup() {}
[[maybe_unused]] static void cleanup_threads() {}
};

#else
#include "fftw_defs.h"

//clang-format off
#include <complex>
#include <fftw3.h> // (after complex) needed so can typedef FFTW_CPX
//clang-format on
#include <mutex>

template<typename T> class Finufft_FFT_plan {};

template<> struct Finufft_FFT_plan<float> {
private:
static std::mutex &mut() {
static std::mutex mut_;
return mut_;
}
fftwf_plan plan_;

void (*fftw_lock_fun)(void *); // Function ptr that locks the FFTW planner
void (*fftw_unlock_fun)(void *); // Function ptr that unlocks the FFTW planner
void *lock_data;
void lock() { fftw_lock_fun ? fftw_lock_fun(lock_data) : mut().lock(); }
void unlock() { fftw_lock_fun ? fftw_unlock_fun(lock_data) : mut().unlock(); }

public:
[[maybe_unused]] Finufft_FFT_plan(void (*fftw_lock_fun_)(void *) = nullptr,
void (*fftw_unlock_fun_)(void *) = nullptr,
void *lock_data_ = nullptr)
: plan_(nullptr), fftw_lock_fun(fftw_lock_fun_), fftw_unlock_fun(fftw_unlock_fun_),
lock_data(lock_data_) {
lock();
#ifdef _OPENMP
static bool initialized = false;
if (!initialized) {
fftwf_init_threads();
initialized = true;
}
#endif
unlock();
}
[[maybe_unused]] ~Finufft_FFT_plan() {
lock();
fftwf_destroy_plan(plan_);
unlock();
}

void plan
[[maybe_unused]] (const std::vector<int> &dims, size_t batchSize,
std::complex<float> *ptr, int sign, int options, int nthreads) {
uint64_t nf = 1;
for (auto i : dims) nf *= i;
lock();
#ifdef _OPENMP
fftwf_plan_with_nthreads(nthreads);
#endif
plan_ = fftwf_plan_many_dft(int(dims.size()), dims.data(), int(batchSize),
reinterpret_cast<fftwf_complex *>(ptr), nullptr, 1,
int(nf), reinterpret_cast<fftwf_complex *>(ptr), nullptr,
1, int(nf), sign, unsigned(options));
unlock();
}
static std::complex<float> *alloc_complex [[maybe_unused]] (size_t N) {
return reinterpret_cast<std::complex<float> *>(fftwf_alloc_complex(N));
}
static void free [[maybe_unused]] (std::complex<float> *ptr) {
if (ptr) fftwf_free(reinterpret_cast<fftwf_complex *>(ptr));
}
void execute [[maybe_unused]] () { fftwf_execute(plan_); }

static void forget_wisdom [[maybe_unused]] () { fftwf_forget_wisdom(); }
static void cleanup [[maybe_unused]] () { fftwf_cleanup(); }
static void cleanup_threads [[maybe_unused]] () {
#ifdef _OPENMP
fftwf_cleanup_threads();
#endif
}
};

template<> struct Finufft_FFT_plan<double> {
private:
static std::mutex &mut() {
static std::mutex mut_;
return mut_;
}
fftw_plan plan_;

void (*fftw_lock_fun)(void *); // Function ptr that locks the FFTW planner
void (*fftw_unlock_fun)(void *); // Function ptr that unlocks the FFTW planner
void *lock_data;
void lock() { fftw_lock_fun ? fftw_lock_fun(lock_data) : mut().lock(); }
void unlock() { fftw_lock_fun ? fftw_unlock_fun(lock_data) : mut().unlock(); }

public:
[[maybe_unused]] Finufft_FFT_plan(void (*fftw_lock_fun_)(void *) = nullptr,
void (*fftw_unlock_fun_)(void *) = nullptr,
void *lock_data_ = nullptr)
: plan_(nullptr), fftw_lock_fun(fftw_lock_fun_), fftw_unlock_fun(fftw_unlock_fun_),
lock_data(lock_data_) {
lock();
#ifdef _OPENMP
static bool initialized = false;
if (!initialized) {
fftw_init_threads();
initialized = true;
}
#endif
unlock();
}
[[maybe_unused]] ~Finufft_FFT_plan() {
lock();
fftw_destroy_plan(plan_);
unlock();
}

void plan
[[maybe_unused]] (const std::vector<int> &dims, size_t batchSize,
std::complex<double> *ptr, int sign, int options, int nthreads) {
uint64_t nf = 1;
for (auto i : dims) nf *= i;
lock();
#ifdef _OPENMP
fftw_plan_with_nthreads(nthreads);
#endif
plan_ = fftw_plan_many_dft(int(dims.size()), dims.data(), int(batchSize),
reinterpret_cast<fftw_complex *>(ptr), nullptr, 1, int(nf),
reinterpret_cast<fftw_complex *>(ptr), nullptr, 1, int(nf),
sign, unsigned(options));
unlock();
}
static std::complex<double> *alloc_complex [[maybe_unused]] (size_t N) {
return reinterpret_cast<std::complex<double> *>(fftw_alloc_complex(N));
}
static void free [[maybe_unused]] (std::complex<double> *ptr) {
fftw_free(reinterpret_cast<fftw_complex *>(ptr));
}
void execute [[maybe_unused]] () { fftw_execute(plan_); }

static void forget_wisdom [[maybe_unused]] () { fftw_forget_wisdom(); }
static void cleanup [[maybe_unused]] () { fftw_cleanup(); }
static void cleanup_threads [[maybe_unused]] () {
#ifdef _OPENMP
fftw_cleanup_threads();
#endif
}
};

#endif

#include <finufft/defs.h>

int *gridsize_for_fft(FINUFFT_PLAN p);
static inline void finufft_fft_forget_wisdom [[maybe_unused]] () {
Finufft_FFT_plan<FLT>::forget_wisdom();
}
static inline void finufft_fft_cleanup [[maybe_unused]] () {
Finufft_FFT_plan<FLT>::cleanup();
}
static inline void finufft_fft_cleanup_threads [[maybe_unused]] () {
Finufft_FFT_plan<FLT>::cleanup_threads();
}

std::vector<int> gridsize_for_fft(FINUFFT_PLAN p);
void do_fft(FINUFFT_PLAN p);

#endif // FINUFFT_INCLUDE_FINUFFT_FFT_H
44 changes: 0 additions & 44 deletions include/finufft/fftw_defs.h

This file was deleted.

6 changes: 3 additions & 3 deletions perftest/guru_timing_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ int main(int argc, char *argv[])
}

// Andrea found the following are needed to get reliable independent timings:
FFTW_CLEANUP();
FFTW_CLEANUP_THREADS();
FFTW_FORGET_WISDOM();
finufft_fft_cleanup();
finufft_fft_cleanup_threads();
finufft_fft_forget_wisdom();
// std::this_thread::sleep_for(std::chrono::seconds(1));
sleep(tsleep);

Expand Down
Loading

0 comments on commit 7113b0e

Please sign in to comment.