From 026d8d39c901f7c611797e935ce4380e4a99cee1 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Tue, 2 Jul 2024 14:42:32 -0400 Subject: [PATCH 01/83] test kernel sym with aligned store --- src/spreadinterp.cpp | 128 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 117 insertions(+), 11 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 3212e6705..a47f0582b 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -24,6 +24,14 @@ namespace { // anonymous namespace for internal structs equivalent to declaring // static struct zip_low; struct zip_hi; +template +struct reverse_index; +template +struct select_index; +template +struct reverse_index_tail; +template +struct shuffle_index; // forward declaration to clean up the code and be able to use this everywhere in the file template static constexpr auto BestSIMDHelper(); template constexpr auto GetPaddedSIMDWidth(); @@ -521,15 +529,15 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel( if (!(opts.flags & TF_OMIT_SPREADING)) { switch (ndims) { case 1: - ker_eval(kernel_values.data(), opts, x1); + ker_eval(kernel_values.data(), opts, x1); interp_line(target, data_uniform, ker1, i1, N1); break; case 2: - ker_eval(kernel_values.data(), opts, x1, x2); + ker_eval(kernel_values.data(), opts, x1, x2); interp_square(target, data_uniform, ker1, ker2, i1, i2, N1, N2); break; case 3: - ker_eval(kernel_values.data(), opts, x1, x2, x3); + ker_eval(kernel_values.data(), opts, x1, x2, x3); interp_cube(target, data_uniform, ker1, ker2, ker3, i1, i2, i3, N1, N2, N3); break; @@ -760,25 +768,99 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in // [-1,1] if (opts.upsampfac == 2.0) { // floating point equality is fine here - static constexpr auto alignment = simd_type::arch_type::alignment(); + using arch_t = typename simd_type::arch_type; + static constexpr auto alignment = arch_t::alignment(); static constexpr auto simd_size = simd_type::size; static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1); static constexpr auto nc = nc200(); static constexpr auto horner_coeffs = get_horner_coeffs_200(); + static constexpr auto use_ker_sym = (simd_size < w); alignas(alignment) static constexpr auto padded_coeffs = pad_2D_array_with_zeros(horner_coeffs); - const simd_type zv(z); + // use kernel symmetry trick if w > simd_size + if constexpr (use_ker_sym) { + static constexpr uint8_t tail = w % simd_size; + static constexpr uint8_t if_odd_degree = ((nc+1) % 2); + static const simd_type zerov(0.0); + const simd_type zv(z); + const simd_type z2v = zv * zv; + + // no xsimd::select neeeded if tail is zero + if constexpr (tail) { + // some xsimd constants for shuffle + //static constexpr auto reverse_batch_head = xsimd::make_batch_constant, arch_t, reverse_index>(); + //static constexpr auto reverse_batch_tail = xsimd::make_batch_constant, arch_t, reverse_index_tail>(); + static constexpr auto shuffle_batch = xsimd::make_batch_constant, arch_t, shuffle_index>(); + //static constexpr auto select_batch = xsimd::make_batch_bool_constant>(); + + // process simd vecs + simd_type k_odd, k_even, k_prev, k_sym = zerov; + for (uint8_t i = 0, offset = w - tail; i < (w+1)/2; i += simd_size, offset -= simd_size) { + k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) : zerov; + k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); + for (uint8_t j = 1+if_odd_degree; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + const auto cji_even = simd_type::load_aligned(padded_coeffs[j+1].data() + i); + k_even = xsimd::fma(k_even, z2v, cji_even); + } + xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + if (offset >= (w+1)/2) { + k_prev = k_sym; + k_sym = xsimd::fma(k_odd, -zv, k_even); + xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); + /* + if (i==0) { + // save one xsimd::swizzle for the first iteration(k_prev is zerov) + // by assumption, ker is padded to be multiple of simd_size + // the padded part must be zero because in spread_subproblem_*d_kernel, trg has out of bound writes. + xsimd::select(select_batch, xsimd::swizzle(k_sym, reverse_batch_head), zerov).store_aligned(ker + offset); + } + else { + // xsimd::select of two xsimd::swizzle is the xsimd::shuffle for the general shuffle case + //xsimd::select(select_batch, xsimd::swizzle(k_sym, reverse_batch_head), xsimd::swizzle(k_prev, reverse_batch_tail)).store_aligned(ker + offset); + xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); + } + */ + } + } + } + else { + // xsimd constants for reverse + static constexpr auto reverse_batch = xsimd::make_batch_constant, arch_t, reverse_index>(); + + // process simd vecs + for (uint8_t i = 0, offset = w - simd_size; i < w/2; i += simd_size, offset -= simd_size) { + auto k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) : zerov; + auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); + for (uint8_t j = 1+if_odd_degree; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + const auto cji_even = simd_type::load_aligned(padded_coeffs[j+1].data() + i); + k_even = xsimd::fma(k_even, z2v, cji_even); + } + xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + if(offset >= w/2) { + xsimd::swizzle(xsimd::fma(k_odd, -zv, k_even), reverse_batch).store_aligned(ker + offset); + } + } + } + } + else { + const simd_type zv(z); - for (uint8_t i = 0; i < w; i += simd_size) { - auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); - for (uint8_t j = 1; j < nc; ++j) { - const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); - k = xsimd::fma(k, zv, cji); + for (uint8_t i = 0; i < w; i += simd_size) { + auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); + for (uint8_t j = 1; j < nc; ++j) { + const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); + k = xsimd::fma(k, zv, cji); + } + k.store_aligned(ker + i); } - k.store_aligned(ker + i); } + return; } // insert the auto-generated code which expects z, w args, writes to ker... @@ -1928,6 +2010,30 @@ struct zip_hi { return (size + index) / 2; } }; +template +struct reverse_index { + static constexpr unsigned get(unsigned index, const unsigned size) { + return index < cap ? (cap - 1 - index) : index; + } +}; +template +struct select_index { + static constexpr bool get(unsigned index, const unsigned size) { + return index < cap ? 1 : 0; + } +}; +template +struct reverse_index_tail { + static constexpr unsigned get(unsigned index, const unsigned size) { + return index < cap ? index : size + cap - 1 - index; + } +}; +template +struct shuffle_index { + static constexpr unsigned get(unsigned index, const unsigned size) { + return index < cap ? (cap - 1 - index) : size + size + cap - 1 - index; + } +}; void print_subgrid_info(int ndims, BIGINT offset1, BIGINT offset2, BIGINT offset3, BIGINT padded_size1, BIGINT size1, BIGINT size2, BIGINT size3, From 1ae88211535b626d913f04ee697b2c7ed4937456 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Wed, 3 Jul 2024 16:30:38 -0400 Subject: [PATCH 02/83] add Horner sym eval without explicit aligned store(Martin does this in ducc) --- src/spreadinterp.cpp | 66 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index ac6bc2bc1..34b3cb7e2 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -75,6 +75,8 @@ template()>> // aka ns static FINUFFT_ALWAYS_INLINE void eval_kernel_vec_Horner( FLT *FINUFFT_RESTRICT ker, FLT x, const finufft_spread_opts &opts) noexcept; +static FINUFFT_ALWAYS_INLINE void eval_kernel_vec_Horner_unaligned_store( + FLT *FINUFFT_RESTRICT ker, FLT x, const finufft_spread_opts &opts) noexcept; template static void interp_line(FLT *FINUFFT_RESTRICT out, const FLT *du, const FLT *ker, BIGINT i1, BIGINT N1); @@ -762,6 +764,66 @@ void evaluate_kernel_vector(FLT *ker, FLT *args, const finufft_spread_opts &opts if (abs(args[i]) >= (FLT)opts.ES_halfwidth) ker[i] = 0.0; } +template // aka ns +void eval_kernel_vec_Horner_unaligned_store(FLT *FINUFFT_RESTRICT ker, const FLT x, + const finufft_spread_opts &opts) noexcept +/* Fill ker[] with Horner piecewise poly approx to [-w/2,w/2] ES kernel eval at +x_j = x + j, for j=0,..,w-1. Thus x in [-w/2,-w/2+1]. w is aka ns. +This is the current evaluation method, since it's faster (except i7 w=16). +Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ + +{ + const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in + // [-1,1] + if (opts.upsampfac == 2.0) { // floating point equality is fine here + static constexpr auto alignment = simd_type::arch_type::alignment(); + static constexpr auto simd_size = simd_type::size; + static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1); + static constexpr auto nc = nc200(); + static constexpr auto horner_coeffs = get_horner_coeffs_200(); + + alignas(alignment) static constexpr auto padded_coeffs = + pad_2D_array_with_zeros(horner_coeffs); + + static constexpr uint8_t nvec = (w+simd_size-1)/simd_size; + static constexpr uint8_t nvec_eval = (nvec+1)/2; + static constexpr uint8_t n_eval = simd_size*nvec_eval; + static constexpr uint8_t if_odd_degree = ((nc+1) % 2); + static const simd_type zerov(0.0); + const simd_type zv(z); + const simd_type z2v = zv * zv; + alignas(alignment) std::array sym_{}; + + // process simd vecs + for (uint8_t i = 0; i < n_eval; i += simd_size) { + auto k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) : zerov; + auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); + for (uint8_t j = 1+if_odd_degree; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + const auto cji_even = simd_type::load_aligned(padded_coeffs[j+1].data() + i); + k_even = xsimd::fma(k_even, z2v, cji_even); + } + + // left + xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + + // right + xsimd::fma(k_odd, -zv, k_even).store_aligned(sym_.data()); + // let compiler optimize the store, probably unaligned? + for (uint8_t j=0, j2=w-1-i; (j=n_eval); ++j,--j2) { + ker[j2] = sym_[j]; + } + } + return; + } + // insert the auto-generated code which expects z, w args, writes to ker... + if (opts.upsampfac == 1.25) { +#include "ker_lowupsampfac_horner_allw_loop_constexpr.c" + return; + } +} + template // aka ns void eval_kernel_vec_Horner(FLT *FINUFFT_RESTRICT ker, const FLT x, const finufft_spread_opts &opts) noexcept @@ -798,8 +860,8 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ // some xsimd constants for shuffle //static constexpr auto reverse_batch_head = xsimd::make_batch_constant, arch_t, reverse_index>(); //static constexpr auto reverse_batch_tail = xsimd::make_batch_constant, arch_t, reverse_index_tail>(); - static constexpr auto shuffle_batch = xsimd::make_batch_constant, arch_t, shuffle_index>(); //static constexpr auto select_batch = xsimd::make_batch_bool_constant>(); + static constexpr auto shuffle_batch = xsimd::make_batch_constant, arch_t, shuffle_index>(); // process simd vecs simd_type k_odd, k_even, k_prev, k_sym = zerov; @@ -818,6 +880,8 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ k_sym = xsimd::fma(k_odd, -zv, k_even); xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); /* + // the following is the equivalent code for the shuffle operation to avoid one swizzle in the first iteration + // seems not helping the performance if (i==0) { // save one xsimd::swizzle for the first iteration(k_prev is zerov) // by assumption, ker is padded to be multiple of simd_size From 4d25c5154aed4aabd82e2e80db664c1198cbdc9c Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Tue, 9 Jul 2024 12:05:48 -0400 Subject: [PATCH 03/83] revert passing simd_type to ker_eval in interp, this is done in interp PR #471 --- src/spreadinterp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 34b3cb7e2..fff411a43 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -531,15 +531,15 @@ FINUFFT_NEVER_INLINE static int interpSorted_kernel( if (!(opts.flags & TF_OMIT_SPREADING)) { switch (ndims) { case 1: - ker_eval(kernel_values.data(), opts, x1); + ker_eval(kernel_values.data(), opts, x1); interp_line(target, data_uniform, ker1, i1, N1); break; case 2: - ker_eval(kernel_values.data(), opts, x1, x2); + ker_eval(kernel_values.data(), opts, x1, x2); interp_square(target, data_uniform, ker1, ker2, i1, i2, N1, N2); break; case 3: - ker_eval(kernel_values.data(), opts, x1, x2, x3); + ker_eval(kernel_values.data(), opts, x1, x2, x3); interp_cube(target, data_uniform, ker1, ker2, ker3, i1, i2, i3, N1, N2, N3); break; From dd5dd2a40a5bf6f1a73f86442fd59419da1af4fb Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Thu, 11 Jul 2024 12:44:46 -0400 Subject: [PATCH 04/83] clean up --- src/spreadinterp.cpp | 173 +++++++++++-------------------------------- 1 file changed, 42 insertions(+), 131 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index fff411a43..9f681c9f0 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -24,14 +24,8 @@ namespace { // anonymous namespace for internal structs equivalent to declaring // static struct zip_low; struct zip_hi; -template -struct reverse_index; -template -struct select_index; -template -struct reverse_index_tail; -template -struct shuffle_index; +template struct reverse_index; +template struct shuffle_index; // forward declaration to clean up the code and be able to use this everywhere in the file template static constexpr auto BestSIMDHelper(); template constexpr auto GetPaddedSIMDWidth(); @@ -752,8 +746,7 @@ void evaluate_kernel_vector(FLT *ker, FLT *args, const finufft_spread_opts &opts if (opts.kerpad) { // padded part should be zero, in spread_subproblem_nd_kernels, there are // out of bound writes to trg arrays - for (int i = N; i < Npad; ++i) - ker[i] = 0.0; + for (int i = N; i < Npad; ++i) ker[i] = 0.0; } } else { for (int i = 0; i < N; i++) // dummy for timing only @@ -764,66 +757,6 @@ void evaluate_kernel_vector(FLT *ker, FLT *args, const finufft_spread_opts &opts if (abs(args[i]) >= (FLT)opts.ES_halfwidth) ker[i] = 0.0; } -template // aka ns -void eval_kernel_vec_Horner_unaligned_store(FLT *FINUFFT_RESTRICT ker, const FLT x, - const finufft_spread_opts &opts) noexcept -/* Fill ker[] with Horner piecewise poly approx to [-w/2,w/2] ES kernel eval at -x_j = x + j, for j=0,..,w-1. Thus x in [-w/2,-w/2+1]. w is aka ns. -This is the current evaluation method, since it's faster (except i7 w=16). -Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ - -{ - const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in - // [-1,1] - if (opts.upsampfac == 2.0) { // floating point equality is fine here - static constexpr auto alignment = simd_type::arch_type::alignment(); - static constexpr auto simd_size = simd_type::size; - static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1); - static constexpr auto nc = nc200(); - static constexpr auto horner_coeffs = get_horner_coeffs_200(); - - alignas(alignment) static constexpr auto padded_coeffs = - pad_2D_array_with_zeros(horner_coeffs); - - static constexpr uint8_t nvec = (w+simd_size-1)/simd_size; - static constexpr uint8_t nvec_eval = (nvec+1)/2; - static constexpr uint8_t n_eval = simd_size*nvec_eval; - static constexpr uint8_t if_odd_degree = ((nc+1) % 2); - static const simd_type zerov(0.0); - const simd_type zv(z); - const simd_type z2v = zv * zv; - alignas(alignment) std::array sym_{}; - - // process simd vecs - for (uint8_t i = 0; i < n_eval; i += simd_size) { - auto k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) : zerov; - auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j = 1+if_odd_degree; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - const auto cji_even = simd_type::load_aligned(padded_coeffs[j+1].data() + i); - k_even = xsimd::fma(k_even, z2v, cji_even); - } - - // left - xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - - // right - xsimd::fma(k_odd, -zv, k_even).store_aligned(sym_.data()); - // let compiler optimize the store, probably unaligned? - for (uint8_t j=0, j2=w-1-i; (j=n_eval); ++j,--j2) { - ker[j2] = sym_[j]; - } - } - return; - } - // insert the auto-generated code which expects z, w args, writes to ker... - if (opts.upsampfac == 1.25) { -#include "ker_lowupsampfac_horner_allw_loop_constexpr.c" - return; - } -} - template // aka ns void eval_kernel_vec_Horner(FLT *FINUFFT_RESTRICT ker, const FLT x, const finufft_spread_opts &opts) noexcept @@ -849,76 +782,68 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ // use kernel symmetry trick if w > simd_size if constexpr (use_ker_sym) { - static constexpr uint8_t tail = w % simd_size; - static constexpr uint8_t if_odd_degree = ((nc+1) % 2); + static constexpr uint8_t tail = w % simd_size; + static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); static const simd_type zerov(0.0); const simd_type zv(z); const simd_type z2v = zv * zv; // no xsimd::select neeeded if tail is zero if constexpr (tail) { - // some xsimd constants for shuffle - //static constexpr auto reverse_batch_head = xsimd::make_batch_constant, arch_t, reverse_index>(); - //static constexpr auto reverse_batch_tail = xsimd::make_batch_constant, arch_t, reverse_index_tail>(); - //static constexpr auto select_batch = xsimd::make_batch_bool_constant>(); - static constexpr auto shuffle_batch = xsimd::make_batch_constant, arch_t, shuffle_index>(); + // some xsimd constant for shuffle + static constexpr auto shuffle_batch = + xsimd::make_batch_constant, arch_t, + shuffle_index>(); // process simd vecs simd_type k_odd, k_even, k_prev, k_sym = zerov; - for (uint8_t i = 0, offset = w - tail; i < (w+1)/2; i += simd_size, offset -= simd_size) { - k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) : zerov; + for (uint8_t i = 0, offset = w - tail; i < (w + 1) / 2; + i += simd_size, offset -= simd_size) { + k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) + : zerov; k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j = 1+if_odd_degree; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - const auto cji_even = simd_type::load_aligned(padded_coeffs[j+1].data() + i); - k_even = xsimd::fma(k_even, z2v, cji_even); + for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + const auto cji_even = + simd_type::load_aligned(padded_coeffs[j + 1].data() + i); + k_even = xsimd::fma(k_even, z2v, cji_even); } xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - if (offset >= (w+1)/2) { + if (offset >= (w + 1) / 2) { k_prev = k_sym; - k_sym = xsimd::fma(k_odd, -zv, k_even); + k_sym = xsimd::fma(k_odd, -zv, k_even); xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); - /* - // the following is the equivalent code for the shuffle operation to avoid one swizzle in the first iteration - // seems not helping the performance - if (i==0) { - // save one xsimd::swizzle for the first iteration(k_prev is zerov) - // by assumption, ker is padded to be multiple of simd_size - // the padded part must be zero because in spread_subproblem_*d_kernel, trg has out of bound writes. - xsimd::select(select_batch, xsimd::swizzle(k_sym, reverse_batch_head), zerov).store_aligned(ker + offset); - } - else { - // xsimd::select of two xsimd::swizzle is the xsimd::shuffle for the general shuffle case - //xsimd::select(select_batch, xsimd::swizzle(k_sym, reverse_batch_head), xsimd::swizzle(k_prev, reverse_batch_tail)).store_aligned(ker + offset); - xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); - } - */ } } - } - else { + } else { // xsimd constants for reverse - static constexpr auto reverse_batch = xsimd::make_batch_constant, arch_t, reverse_index>(); + static constexpr auto reverse_batch = + xsimd::make_batch_constant, arch_t, + reverse_index>(); // process simd vecs - for (uint8_t i = 0, offset = w - simd_size; i < w/2; i += simd_size, offset -= simd_size) { - auto k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) : zerov; + for (uint8_t i = 0, offset = w - simd_size; i < w / 2; + i += simd_size, offset -= simd_size) { + auto k_odd = if_odd_degree + ? simd_type::load_aligned(padded_coeffs[0].data() + i) + : zerov; auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j = 1+if_odd_degree; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - const auto cji_even = simd_type::load_aligned(padded_coeffs[j+1].data() + i); - k_even = xsimd::fma(k_even, z2v, cji_even); + for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + const auto cji_even = + simd_type::load_aligned(padded_coeffs[j + 1].data() + i); + k_even = xsimd::fma(k_even, z2v, cji_even); } xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - if(offset >= w/2) { - xsimd::swizzle(xsimd::fma(k_odd, -zv, k_even), reverse_batch).store_aligned(ker + offset); + if (offset >= w / 2) { + xsimd::swizzle(xsimd::fma(k_odd, -zv, k_even), reverse_batch) + .store_aligned(ker + offset); } } } - } - else { + } else { const simd_type zv(z); for (uint8_t i = 0; i < w; i += simd_size) { @@ -2080,26 +2005,12 @@ struct zip_hi { return (size + index) / 2; } }; -template -struct reverse_index { +template struct reverse_index { static constexpr unsigned get(unsigned index, const unsigned size) { return index < cap ? (cap - 1 - index) : index; } }; -template -struct select_index { - static constexpr bool get(unsigned index, const unsigned size) { - return index < cap ? 1 : 0; - } -}; -template -struct reverse_index_tail { - static constexpr unsigned get(unsigned index, const unsigned size) { - return index < cap ? index : size + cap - 1 - index; - } -}; -template -struct shuffle_index { +template struct shuffle_index { static constexpr unsigned get(unsigned index, const unsigned size) { return index < cap ? (cap - 1 - index) : size + size + cap - 1 - index; } From a7bb9b422751b5a28284834f7b3e2bd6a90af1c5 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Thu, 11 Jul 2024 12:50:51 -0400 Subject: [PATCH 05/83] removed unused declare --- src/spreadinterp.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 9f681c9f0..e049ab69a 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -69,8 +69,6 @@ template()>> // aka ns static FINUFFT_ALWAYS_INLINE void eval_kernel_vec_Horner( FLT *FINUFFT_RESTRICT ker, FLT x, const finufft_spread_opts &opts) noexcept; -static FINUFFT_ALWAYS_INLINE void eval_kernel_vec_Horner_unaligned_store( - FLT *FINUFFT_RESTRICT ker, FLT x, const finufft_spread_opts &opts) noexcept; template static void interp_line(FLT *FINUFFT_RESTRICT out, const FLT *du, const FLT *ker, BIGINT i1, BIGINT N1); From eefac071a891123dd3e762002c3c398cd66dd081 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Thu, 11 Jul 2024 13:05:32 -0400 Subject: [PATCH 06/83] add some comments --- src/spreadinterp.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index d858ebd60..ea3736593 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -798,7 +798,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ const simd_type zv(z); const simd_type z2v = zv * zv; - // no xsimd::select neeeded if tail is zero + // no xsimd::shuffle neeeded if tail is zero if constexpr (tail) { // some xsimd constant for shuffle static constexpr auto shuffle_batch = @@ -819,8 +819,11 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ simd_type::load_aligned(padded_coeffs[j + 1].data() + i); k_even = xsimd::fma(k_even, z2v, cji_even); } + // left part xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + // right part symmetric to the left part if (offset >= (w + 1) / 2) { + // to use aligned store, we need shuffle the previous k_sym and current k_sym k_prev = k_sym; k_sym = xsimd::fma(k_odd, -zv, k_even); xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); @@ -846,8 +849,11 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ simd_type::load_aligned(padded_coeffs[j + 1].data() + i); k_even = xsimd::fma(k_even, z2v, cji_even); } + // left part xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + // right part symmetric to the left part if (offset >= w / 2) { + // reverse the order for symmetric part xsimd::swizzle(xsimd::fma(k_odd, -zv, k_even), reverse_batch) .store_aligned(ker + offset); } From c9fded59530e0cac4961f8ae54e596371176c9bd Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Fri, 12 Jul 2024 12:14:02 -0400 Subject: [PATCH 07/83] change to use fnma in sym part --- src/spreadinterp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index ea3736593..38371d17a 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -825,7 +825,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ if (offset >= (w + 1) / 2) { // to use aligned store, we need shuffle the previous k_sym and current k_sym k_prev = k_sym; - k_sym = xsimd::fma(k_odd, -zv, k_even); + k_sym = xsimd::fnma(k_odd, zv, k_even); xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); } } @@ -854,7 +854,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ // right part symmetric to the left part if (offset >= w / 2) { // reverse the order for symmetric part - xsimd::swizzle(xsimd::fma(k_odd, -zv, k_even), reverse_batch) + xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), reverse_batch) .store_aligned(ker + offset); } } From 9e6c0f2a6b32910913547d529bf72022a1eb673a Mon Sep 17 00:00:00 2001 From: DiamonDinoia Date: Tue, 23 Jan 2024 19:34:21 +0000 Subject: [PATCH 08/83] added pyptoject.toml for python build --- CMakeLists.txt | 6 ++++ CMakePresets.json | 12 ++++++++ pyproject.toml | 33 ++++++++++++++++++++ python/pyfinufft/CMakeLists.txt | 42 ++++++++++++++++++++++++++ python/pyfinufft/pyfinufft/__init__.py | 1 + python/pyfinufft/pyfinufft_ext.cpp | 14 +++++++++ 6 files changed, 108 insertions(+) create mode 100644 pyproject.toml create mode 100644 python/pyfinufft/CMakeLists.txt create mode 100644 python/pyfinufft/pyfinufft/__init__.py create mode 100644 python/pyfinufft/pyfinufft_ext.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index a3a61fba1..abf7d643b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ option(FINUFFT_BUILD_EXAMPLES "Whether to build the FINUFFT examples" OFF) option(FINUFFT_BUILD_TESTS "Whether to build the FINUFFT tests" OFF) option(FINUFFT_BUILD_FORTRAN "Whether to build the FINUFFT Fortran examples" OFF) option(FINUFFT_BUILD_MATLAB "Whether to build the FINUFFT Matlab interface" OFF) +option(FINUFFT_BUILD_PYTHON "Whether the python wrapper should be built." OFF) option(FINUFFT_ENABLE_SANITIZERS "Whether to enable sanitizers, only effective for Debug configuration." ON) option(FINUFFT_USE_OPENMP "Whether to use OpenMP for parallelization. If disabled, the finufft library will be single threaded. This does not affect the choice of FFTW library." ON) option(FINUFFT_USE_CUDA "Whether to build CUDA accelerated FINUFFT library (libcufinufft). This is completely independent of the main FINUFFT library" OFF) @@ -315,6 +316,11 @@ if (FINUFFT_BUILD_DEVEL) add_subdirectory(devel) endif () +if (FINUFFT_BUILD_PYTHON) + set(INSTALL_TARGETS "") + add_subdirectory(python/pyfinufft) +endif () + include(GNUInstallDirs) install(TARGETS ${INSTALL_TARGETS} PUBLIC_HEADER) install(FILES ${PROJECT_SOURCE_DIR}/LICENSE diff --git a/CMakePresets.json b/CMakePresets.json index b04204500..32229d1c5 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -110,6 +110,18 @@ "FINUFFT_BUILD_MATLAB": "ON", "FINUFFT_ENABLE_SANITIZERS": "OFF" } + }, + { + "name": "python", + "binaryDir": "build/python", + "displayName": "python", + "description": "Build with the python interface", + "generator": "Ninja Multi-Config", + "cacheVariables": { + "FINUFFT_BUILD_PYTHON": "ON", + "FINUFFT_ENABLE_SANITIZERS": "OFF", + "CMAKE_INSTALL_DATAROOTDIR": "." + } } ], "buildPresets": [ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..ba4f8e7ec --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = [ + "cmake>=3.19", + "numpy>=1.21.2", + "scikit-build-core >=0.4.3", + "ninja; platform_system!='Windows'" +] + +build-backend = "scikit_build_core.build" + +[project] +name = "pyfinufft" +version = "0.0.1" +description = "Python bindings for the FINUFFT C++ library" +readme = "README.md" +requires-python = ">=3.8" + +[tool.scikit-build] +# Protect the configuration against future changes in scikit-build-core +minimum-version = "0.4" +cmake.targets = ["pyfinufft_ext"] +cmake.args = ['-DFINUFFT_BUILD_PYTHON=ON', '-DCMAKE_BUILD_TYPE=Release'] # not sure if this is necessary +wheel.packages = ["python/pyfinufft/pyfinufft"] +wheel.exclude = ["**.pyx"] +# Setuptools-style build caching in a local directory +build-dir = "build/{wheel_tag}" + +# Build stable ABI wheels for CPython 3.8+ +wheel.py-api = "cp38" + +[tool.cibuildwheel] +# Necessary to see build output from the actual compilation +build-verbosity = 1 diff --git a/python/pyfinufft/CMakeLists.txt b/python/pyfinufft/CMakeLists.txt new file mode 100644 index 000000000..f0adbde41 --- /dev/null +++ b/python/pyfinufft/CMakeLists.txt @@ -0,0 +1,42 @@ +if (NOT SKBUILD) + message(WARNING "\ + This CMake file is meant to be executed using 'scikit-build'. Running + it directly will almost certainly not produce the desired result. If + you are a user trying to install this package, please use the command + below, which will install all necessary build dependencies, compile + the package in an isolated environment, and then install it. + ===================================================================== + $ pip install . + ===================================================================== + If you are a software developer, and this is your own package, then + it is usually much more efficient to install the build dependencies + in your environment once and use the following command that avoids + a costly creation of a new virtual environment at every compilation: + ===================================================================== + $ pip install nanobind scikit-build-core[pyproject] + $ pip install --no-build-isolation -ve . + ===================================================================== + You may optionally add -Ceditable.rebuild=true to auto-rebuild when + the package is imported. Otherwise, you need to re-run the above + after editing C++ files.") +endif() + +find_package(Python 3.8 + REQUIRED COMPONENTS Interpreter Development.Module + OPTIONAL_COMPONENTS Development.SABIModule) + +set(NANOBIND_DOWNLOAD_VERSION v1.8.0) + +CPMAddPackage( + NAME nanobind + GIT_REPOSITORY https://github.com/wjakob/nanobind + GIT_TAG ${NANOBIND_DOWNLOAD_VERSION} + GIT_SHALLOW YES + GIT_PROGRESS YES + EXCLUDE_FROM_ALL YES + SYSTEM +) + +nanobind_add_module(pyfinufft_ext STABLE_ABI NB_STATIC pyfinufft_ext.cpp) +target_link_libraries(pyfinufft_ext PRIVATE finufft_static) +install(TARGETS pyfinufft_ext LIBRARY DESTINATION .) diff --git a/python/pyfinufft/pyfinufft/__init__.py b/python/pyfinufft/pyfinufft/__init__.py new file mode 100644 index 000000000..0da1bb1da --- /dev/null +++ b/python/pyfinufft/pyfinufft/__init__.py @@ -0,0 +1 @@ +from pyfinufft_ext import * \ No newline at end of file diff --git a/python/pyfinufft/pyfinufft_ext.cpp b/python/pyfinufft/pyfinufft_ext.cpp new file mode 100644 index 000000000..b47cf5331 --- /dev/null +++ b/python/pyfinufft/pyfinufft_ext.cpp @@ -0,0 +1,14 @@ +#include + +#include "finufft.h" + +namespace nb = nanobind; + + +NB_MODULE(pyfinufft_ext, m) { + nb::class_(m, "finufft_opts") + .def(nb::init<>()); + m.def("finufft_default_opts", [](nb::capsule opts) { + finufft_default_opts((finufft_opts*)opts.data()); + }); +} \ No newline at end of file From e25dce4faff4dfbd4ac78fcdc63aa21dce44ebab Mon Sep 17 00:00:00 2001 From: Marco Date: Tue, 6 Feb 2024 16:37:37 +0000 Subject: [PATCH 09/83] using cmake to build cpython --- CMakeLists.txt | 2 +- pyproject.toml | 8 ++--- python/CMakeLists.txt | 7 +++++ python/pyfinufft/CMakeLists.txt | 42 -------------------------- python/pyfinufft/pyfinufft/__init__.py | 1 - python/pyfinufft/pyfinufft_ext.cpp | 14 --------- 6 files changed, 11 insertions(+), 63 deletions(-) create mode 100644 python/CMakeLists.txt delete mode 100644 python/pyfinufft/CMakeLists.txt delete mode 100644 python/pyfinufft/pyfinufft/__init__.py delete mode 100644 python/pyfinufft/pyfinufft_ext.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index abf7d643b..279e90de0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -318,7 +318,7 @@ endif () if (FINUFFT_BUILD_PYTHON) set(INSTALL_TARGETS "") - add_subdirectory(python/pyfinufft) + add_subdirectory(python) endif () include(GNUInstallDirs) diff --git a/pyproject.toml b/pyproject.toml index ba4f8e7ec..b232e9589 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ requires = [ build-backend = "scikit_build_core.build" [project] -name = "pyfinufft" +name = "finufft" version = "0.0.1" description = "Python bindings for the FINUFFT C++ library" readme = "README.md" @@ -18,16 +18,14 @@ requires-python = ">=3.8" [tool.scikit-build] # Protect the configuration against future changes in scikit-build-core minimum-version = "0.4" -cmake.targets = ["pyfinufft_ext"] +cmake.targets = ["finufft"] cmake.args = ['-DFINUFFT_BUILD_PYTHON=ON', '-DCMAKE_BUILD_TYPE=Release'] # not sure if this is necessary -wheel.packages = ["python/pyfinufft/pyfinufft"] +wheel.packages = ["python/finufft/finufft"] wheel.exclude = ["**.pyx"] # Setuptools-style build caching in a local directory build-dir = "build/{wheel_tag}" - # Build stable ABI wheels for CPython 3.8+ wheel.py-api = "cp38" - [tool.cibuildwheel] # Necessary to see build output from the actual compilation build-verbosity = 1 diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt new file mode 100644 index 000000000..9f69633bb --- /dev/null +++ b/python/CMakeLists.txt @@ -0,0 +1,7 @@ +if(FINUFFT_USE_CPU) + install(TARGETS finufft LIBRARY DESTINATION finufft) +endif() + +if(FINUFFT_USE_GPU) + install(TARGETS finufft_gpu LIBRARY DESTINATION cufinufft) +endif() diff --git a/python/pyfinufft/CMakeLists.txt b/python/pyfinufft/CMakeLists.txt deleted file mode 100644 index f0adbde41..000000000 --- a/python/pyfinufft/CMakeLists.txt +++ /dev/null @@ -1,42 +0,0 @@ -if (NOT SKBUILD) - message(WARNING "\ - This CMake file is meant to be executed using 'scikit-build'. Running - it directly will almost certainly not produce the desired result. If - you are a user trying to install this package, please use the command - below, which will install all necessary build dependencies, compile - the package in an isolated environment, and then install it. - ===================================================================== - $ pip install . - ===================================================================== - If you are a software developer, and this is your own package, then - it is usually much more efficient to install the build dependencies - in your environment once and use the following command that avoids - a costly creation of a new virtual environment at every compilation: - ===================================================================== - $ pip install nanobind scikit-build-core[pyproject] - $ pip install --no-build-isolation -ve . - ===================================================================== - You may optionally add -Ceditable.rebuild=true to auto-rebuild when - the package is imported. Otherwise, you need to re-run the above - after editing C++ files.") -endif() - -find_package(Python 3.8 - REQUIRED COMPONENTS Interpreter Development.Module - OPTIONAL_COMPONENTS Development.SABIModule) - -set(NANOBIND_DOWNLOAD_VERSION v1.8.0) - -CPMAddPackage( - NAME nanobind - GIT_REPOSITORY https://github.com/wjakob/nanobind - GIT_TAG ${NANOBIND_DOWNLOAD_VERSION} - GIT_SHALLOW YES - GIT_PROGRESS YES - EXCLUDE_FROM_ALL YES - SYSTEM -) - -nanobind_add_module(pyfinufft_ext STABLE_ABI NB_STATIC pyfinufft_ext.cpp) -target_link_libraries(pyfinufft_ext PRIVATE finufft_static) -install(TARGETS pyfinufft_ext LIBRARY DESTINATION .) diff --git a/python/pyfinufft/pyfinufft/__init__.py b/python/pyfinufft/pyfinufft/__init__.py deleted file mode 100644 index 0da1bb1da..000000000 --- a/python/pyfinufft/pyfinufft/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pyfinufft_ext import * \ No newline at end of file diff --git a/python/pyfinufft/pyfinufft_ext.cpp b/python/pyfinufft/pyfinufft_ext.cpp deleted file mode 100644 index b47cf5331..000000000 --- a/python/pyfinufft/pyfinufft_ext.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#include - -#include "finufft.h" - -namespace nb = nanobind; - - -NB_MODULE(pyfinufft_ext, m) { - nb::class_(m, "finufft_opts") - .def(nb::init<>()); - m.def("finufft_default_opts", [](nb::capsule opts) { - finufft_default_opts((finufft_opts*)opts.data()); - }); -} \ No newline at end of file From 8163f6672de3a8a136646c65e986e106f397be10 Mon Sep 17 00:00:00 2001 From: Marco Date: Tue, 6 Feb 2024 17:30:52 +0000 Subject: [PATCH 10/83] fixed so loding --- python/finufft/finufft/_finufft.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/finufft/finufft/_finufft.py b/python/finufft/finufft/_finufft.py index 96308db11..5f5947683 100644 --- a/python/finufft/finufft/_finufft.py +++ b/python/finufft/finufft/_finufft.py @@ -30,7 +30,8 @@ lib = None # Try to load a local library directly. try: - lib = ctypes.cdll.LoadLibrary('libfinufft.so') + path = os.path.dirname(__file__) + lib = ctypes.cdll.LoadLibrary(path+'/libfinufft.so') except OSError: pass From f1d28ebee7ff8fede9603a771ab7bfbc857f1c12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 28 May 2024 11:18:17 +0200 Subject: [PATCH 11/83] py: simplify pyproject.toml --- pyproject.toml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b232e9589..b106e30b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,31 +1,30 @@ [build-system] requires = [ - "cmake>=3.19", - "numpy>=1.21.2", - "scikit-build-core >=0.4.3", - "ninja; platform_system!='Windows'" + "scikit-build-core >= 0.4.3", + "ninja; platform_system != 'Windows'" ] build-backend = "scikit_build_core.build" [project] name = "finufft" -version = "0.0.1" +version = "2.2.0" description = "Python bindings for the FINUFFT C++ library" readme = "README.md" requires-python = ">=3.8" +dependencies = ["numpy >= 1.12.0"] [tool.scikit-build] # Protect the configuration against future changes in scikit-build-core minimum-version = "0.4" -cmake.targets = ["finufft"] -cmake.args = ['-DFINUFFT_BUILD_PYTHON=ON', '-DCMAKE_BUILD_TYPE=Release'] # not sure if this is necessary -wheel.packages = ["python/finufft/finufft"] -wheel.exclude = ["**.pyx"] # Setuptools-style build caching in a local directory build-dir = "build/{wheel_tag}" -# Build stable ABI wheels for CPython 3.8+ -wheel.py-api = "cp38" + +cmake.targets = ["finufft"] +cmake.define = {"FINUFFT_BUILD_PYTHON" = "ON"} + +wheel.packages = ["python/finufft/finufft"] + [tool.cibuildwheel] # Necessary to see build output from the actual compilation build-verbosity = 1 From 0624de5a14c23a0e2c0bcb0263bdd16bf981811d Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 12:49:28 -0400 Subject: [PATCH 12/83] finised python wrapper --- CMakeLists.txt | 11 +++-- pyproject.toml | 4 +- python/finufft/finufft/_finufft.py | 70 ++++++++++++++---------------- 3 files changed, 41 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 279e90de0..601c98d5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -59,6 +59,7 @@ option(FINUFFT_USE_CPU "Whether to build the ordinary FINUFFT library (libfinuff option(FINUFFT_STATIC_LINKING "Whether to link the static FINUFFT library (libfinufft_static)." ON) option(FINUFFT_BUILD_DEVEL "Whether to build development executables" OFF) option(FINUFFT_USE_DUCC0 "Whether to use DUCC0 (instead of FFTW) for CPU FFTs" OFF) +option(FINUFFT_ENABLE_INSTALL "Wheter to enable installation of FINUFFT library" ON) # sphinx tag (don't remove): @cmake_opts_end if (FINUFFT_USE_CPU) @@ -267,9 +268,10 @@ if (FINUFFT_USE_CPU) target_include_directories(finufft_static PUBLIC $) target_include_directories(finufft_static SYSTEM INTERFACE $) - file(GLOB FINUFFT_PUBLIC_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/finufft*.h") - set_target_properties(finufft PROPERTIES PUBLIC_HEADER "${FINUFFT_PUBLIC_HEADERS}") - + if (FINUFFT_ENABLE_INSTALL) + file(GLOB FINUFFT_PUBLIC_HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include/finufft*.h") + set_target_properties(finufft PROPERTIES PUBLIC_HEADER "${FINUFFT_PUBLIC_HEADERS}") + endif() list(APPEND INSTALL_TARGETS finufft finufft_static) endif () @@ -317,10 +319,10 @@ if (FINUFFT_BUILD_DEVEL) endif () if (FINUFFT_BUILD_PYTHON) - set(INSTALL_TARGETS "") add_subdirectory(python) endif () +if (FINUFFT_ENABLE_INSTALL) include(GNUInstallDirs) install(TARGETS ${INSTALL_TARGETS} PUBLIC_HEADER) install(FILES ${PROJECT_SOURCE_DIR}/LICENSE @@ -348,3 +350,4 @@ if (FINUFFT_USE_CUDA) PATTERN "CMakeLists.txt" EXCLUDE ) endif () +endif () diff --git a/pyproject.toml b/pyproject.toml index b106e30b2..511273ecc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,6 @@ [build-system] requires = [ "scikit-build-core >= 0.4.3", - "ninja; platform_system != 'Windows'" ] build-backend = "scikit_build_core.build" @@ -21,10 +20,9 @@ minimum-version = "0.4" build-dir = "build/{wheel_tag}" cmake.targets = ["finufft"] -cmake.define = {"FINUFFT_BUILD_PYTHON" = "ON"} +cmake.define = {"FINUFFT_BUILD_PYTHON" = "ON", "FINUFFT_ENABLE_INSTALL" = "OFF"} wheel.packages = ["python/finufft/finufft"] - [tool.cibuildwheel] # Necessary to see build output from the actual compilation build-verbosity = 1 diff --git a/python/finufft/finufft/_finufft.py b/python/finufft/finufft/_finufft.py index 5f5947683..fe2748d70 100644 --- a/python/finufft/finufft/_finufft.py +++ b/python/finufft/finufft/_finufft.py @@ -5,20 +5,18 @@ Seperate bindings are provided for single and double precision libraries, differentiated by 'f' suffix. """ - import ctypes -import os -import warnings -import platform -import importlib.util - -import numpy as np - +import pathlib +from ctypes.util import find_library from ctypes import c_double -from ctypes import c_int from ctypes import c_float -from ctypes import c_void_p +from ctypes import c_int from ctypes import c_longlong +from ctypes import c_void_p + +import numpy as np +import os +import platform from numpy.ctypeslib import ndpointer c_int_p = ctypes.POINTER(c_int) @@ -28,33 +26,31 @@ # TODO: See if there is a way to improve this so it is less hacky. lib = None -# Try to load a local library directly. -try: - path = os.path.dirname(__file__) - lib = ctypes.cdll.LoadLibrary(path+'/libfinufft.so') -except OSError: - pass - -# Should that not work, try to find the full path of a packaged lib. -# The packaged lib should have a py/platform decorated name, -# and be rpath'ed the true FINUFFT library through the Extension and wheel -# systems. -try: - if lib is None: - # Find the library. - lib_path = importlib.util.find_spec('finufft.finufftc').origin - # Get the full path for the ctypes loader. - if platform.system() == 'Windows': - os.environ["PATH"] += os.pathsep + os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(lib_path))),'finufft') - full_lib_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(lib_path))),'finufft','libfinufft.dll') - else: - full_lib_path = os.path.realpath(lib_path) - - # Load the library, - # which rpaths the libraries we care about. - lib = ctypes.cdll.LoadLibrary(full_lib_path) -except Exception: - raise ImportError('Failed to find a suitable finufft library') +# Try to load finufft installed from the python package. +path = pathlib.Path(__file__).parent.resolve() +# Ignoring the exceptions to avoid the print +# exception, during the process of an exception another exception occurred +# unix systems have lib prefix, non unix systems do not +library_names = ['libfinufft', 'finufft'] +for lib_name in library_names: + try: + lib = np.ctypeslib.load_library(lib_name, path) + break + except OSError: + # Paranoid, in case lib is set to something and then an exception is thrown + lib = None + +if lib is None: + # If that fails, try to load the library from the system path. + libname = find_library('finufft') + if libname is not None: + lib = ctypes.cdll.LoadLibrary(libname) + # we probably should add a version check and trow a warning if the version is different + else: + # if that does not work, finufft is not installed correctly. + raise ImportError('Failed to find a suitable finufft library. ' + 'Please check your installation, ' + 'finufft does not seem to be installed correctly.') class FinufftOpts(ctypes.Structure): From d0493dcecf5d9698117d5c125958002ffb63a56d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Thu, 13 Jun 2024 21:58:15 +0200 Subject: [PATCH 13/83] cmake: spelling in description --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 601c98d5c..bf0cec8b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,7 +51,7 @@ option(FINUFFT_BUILD_EXAMPLES "Whether to build the FINUFFT examples" OFF) option(FINUFFT_BUILD_TESTS "Whether to build the FINUFFT tests" OFF) option(FINUFFT_BUILD_FORTRAN "Whether to build the FINUFFT Fortran examples" OFF) option(FINUFFT_BUILD_MATLAB "Whether to build the FINUFFT Matlab interface" OFF) -option(FINUFFT_BUILD_PYTHON "Whether the python wrapper should be built." OFF) +option(FINUFFT_BUILD_PYTHON "Whether the Python wrapper should be built." OFF) option(FINUFFT_ENABLE_SANITIZERS "Whether to enable sanitizers, only effective for Debug configuration." ON) option(FINUFFT_USE_OPENMP "Whether to use OpenMP for parallelization. If disabled, the finufft library will be single threaded. This does not affect the choice of FFTW library." ON) option(FINUFFT_USE_CUDA "Whether to build CUDA accelerated FINUFFT library (libcufinufft). This is completely independent of the main FINUFFT library" OFF) @@ -59,7 +59,7 @@ option(FINUFFT_USE_CPU "Whether to build the ordinary FINUFFT library (libfinuff option(FINUFFT_STATIC_LINKING "Whether to link the static FINUFFT library (libfinufft_static)." ON) option(FINUFFT_BUILD_DEVEL "Whether to build development executables" OFF) option(FINUFFT_USE_DUCC0 "Whether to use DUCC0 (instead of FFTW) for CPU FFTs" OFF) -option(FINUFFT_ENABLE_INSTALL "Wheter to enable installation of FINUFFT library" ON) +option(FINUFFT_BUILD_DEVEL "Whether to build developement executables" OFF) # sphinx tag (don't remove): @cmake_opts_end if (FINUFFT_USE_CPU) From 8d6e44416393355819efe77cd9ea0cb3b6fcd6e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Thu, 13 Jun 2024 21:58:40 +0200 Subject: [PATCH 14/83] py: pull version from __init__ --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 511273ecc..a2bd9837c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,11 +7,11 @@ build-backend = "scikit_build_core.build" [project] name = "finufft" -version = "2.2.0" description = "Python bindings for the FINUFFT C++ library" readme = "README.md" requires-python = ">=3.8" dependencies = ["numpy >= 1.12.0"] +dynamic = ["version"] [tool.scikit-build] # Protect the configuration against future changes in scikit-build-core @@ -23,6 +23,11 @@ cmake.targets = ["finufft"] cmake.define = {"FINUFFT_BUILD_PYTHON" = "ON", "FINUFFT_ENABLE_INSTALL" = "OFF"} wheel.packages = ["python/finufft/finufft"] + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "python/finufft/finufft/__init__.py" + [tool.cibuildwheel] # Necessary to see build output from the actual compilation build-verbosity = 1 From d3db5d2478df4acd87af44d96978fbc0e1e5b1ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Thu, 13 Jun 2024 22:52:31 +0200 Subject: [PATCH 15/83] ci: try adding a workflow for skbuild wheels --- .github/workflows/python_skbuild_wheels.yml | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/python_skbuild_wheels.yml diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml new file mode 100644 index 000000000..e5667da49 --- /dev/null +++ b/.github/workflows/python_skbuild_wheels.yml @@ -0,0 +1,27 @@ +name: Build Python wheels (skbuild) + +on: [push, pull_request] + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + + + - name: Build wheels + uses: pypa/cibuildwheel@v2.19.1 + env: + CIBW_ARCHS_LINUX: "x86_64" + CIBW_SKIP: "pp* *musllinux*" + CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl From e5f78caa00d8cc87195db6d40c74fa7755a9fc6f Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 19:49:42 -0400 Subject: [PATCH 16/83] WIP workflow --- .github/workflows/python_cmake.yml | 61 ++++++++++++++++++ CMakeLists.txt | 99 +++++++++++++++++++----------- pyproject.toml | 2 + 3 files changed, 127 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/python_cmake.yml diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml new file mode 100644 index 000000000..06a07ae23 --- /dev/null +++ b/.github/workflows/python_cmake.yml @@ -0,0 +1,61 @@ +name: Pip + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + +jobs: + prepare: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Create matrix + id: create_matrix + uses: fabiocaccamo/create-matrix-action@v3 + with: + matrix: | + os {windows-latest}, compiler {msvc, llvm}, arch_flags {/arch:AVX2, /arch:AVX512}, python-version {3.8, 3.11} + os {ubuntu-latest}, compiler {gcc, llvm}, arch_flags {-march=native, -march=x86-64}, python-version {3.8, 3.11} + + outputs: + matrix: ${{ steps.create_matrix.outputs.matrix }} + build: + name: Build with Pip + runs-on: ${{ matrix.os }} + needs: prepare + strategy: + fail-fast: false + matrix: + include: ${{fromJson(needs.prepare.outputs.matrix)}} + steps: + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Setup Cpp + uses: aminya/setup-cpp@v1 + with: + compiler: ${{ matrix.compiler }} + vcvarsall: ${{ contains(matrix.os, 'windows') }} + cmake: false + ninja: false + vcpkg: false + cppcheck: false + clangtidy: false + + - name: Install pytest + run: python -m pip install pytest + - name: Set min macOS version + if: runner.os == 'macOS' + run: | + echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV + - name: Set compiler flags + run: | + echo MAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV + - name: Build + run: pip install . --verbose + - name: Test + run: python -m pytest python/finufft/test diff --git a/CMakeLists.txt b/CMakeLists.txt index bf0cec8b1..f3c2e130c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.19) -project(finufft VERSION 2.2.0 LANGUAGES C CXX) +project(FINUFFT VERSION 2.2.0 LANGUAGES C CXX) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -34,7 +36,7 @@ if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Set the default build type to Release" FORCE) endif() -if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS) +if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS AND NOT DEFINED FINUFFT_ARCH_FLAGS) if (CMAKE_SYSTEM_PROCESSOR MATCHES "ppc|ppc64|powerpc|powerpc64" OR (APPLE AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc|ppc64")) # PowerPC arch does not have -march flag. set(FINUFFT_ARCH_FLAGS "-mtune=native" CACHE STRING "Compiler flags for specifying target architecture.") @@ -60,6 +62,7 @@ option(FINUFFT_STATIC_LINKING "Whether to link the static FINUFFT library (libfi option(FINUFFT_BUILD_DEVEL "Whether to build development executables" OFF) option(FINUFFT_USE_DUCC0 "Whether to use DUCC0 (instead of FFTW) for CPU FFTs" OFF) option(FINUFFT_BUILD_DEVEL "Whether to build developement executables" OFF) +option(FINUFFT_ENABLE_INSTALL "Whether to enable installation of FINUFFT library" ON) # sphinx tag (don't remove): @cmake_opts_end if (FINUFFT_USE_CPU) @@ -148,18 +151,18 @@ function(finufft_link_test target) if (FINUFFT_STATIC_LINKING) target_link_libraries(${target} PRIVATE finufft_static) - if (FINUFFT_USE_OPENMP) + if(FINUFFT_USE_OPENMP) target_link_libraries(${target} PRIVATE OpenMP::OpenMP_CXX) - if (WIN32) + if(WIN32) target_link_options(${target} PRIVATE ${OpenMP_CXX_FLAGS}) - endif () - endif () - else () + endif() + endif() + else() target_link_libraries(${target} PRIVATE finufft) - if (WIN32) + if(WIN32) target_compile_definitions(${target} PRIVATE FINUFFT_DLL) - endif () - endif () + endif() + endif() enable_asan(${target}) endfunction() @@ -322,32 +325,58 @@ if (FINUFFT_BUILD_PYTHON) add_subdirectory(python) endif () -if (FINUFFT_ENABLE_INSTALL) -include(GNUInstallDirs) -install(TARGETS ${INSTALL_TARGETS} PUBLIC_HEADER) -install(FILES ${PROJECT_SOURCE_DIR}/LICENSE - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/licenses/finufft) +message(STATUS " CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") +message(STATUS "FINUFFT configuration summary:") +message(STATUS " FINUFFT_USE_CPU: ${FINUFFT_USE_CPU}") +message(STATUS " FINUFFT_USE_CUDA: ${FINUFFT_USE_CUDA}") +message(STATUS " FINUFFT_USE_OPENMP: ${FINUFFT_USE_OPENMP}") +message(STATUS " FINUFFT_STATIC_LINKING: ${FINUFFT_STATIC_LINKING}") +message(STATUS " FINUFFT_ENABLE_INSTALL: ${FINUFFT_ENABLE_INSTALL}") +message(STATUS " FINUFFT_BUILD_EXAMPLES: ${FINUFFT_BUILD_EXAMPLES}") +message(STATUS " FINUFFT_BUILD_TESTS: ${FINUFFT_BUILD_TESTS}") +message(STATUS " FINUFFT_BUILD_FORTRAN: ${FINUFFT_BUILD_FORTRAN}") +message(STATUS " FINUFFT_BUILD_MATLAB: ${FINUFFT_BUILD_MATLAB}") +message(STATUS " FINUFFT_BUILD_PYTHON: ${FINUFFT_BUILD_PYTHON}") +message(STATUS " FINUFFT_ENABLE_SANITIZERS: ${FINUFFT_ENABLE_SANITIZERS}") +message(STATUS " FINUFFT_FFTW_SUFFIX: ${FINUFFT_FFTW_SUFFIX}") +message(STATUS " FINUFFT_FFTW_LIBRARIES: ${FINUFFT_FFTW_LIBRARIES}") +message(STATUS " FINUFFT_ARCH_FLAGS: ${FINUFFT_ARCH_FLAGS}") + if (FINUFFT_USE_CPU) - install(DIRECTORY ${PROJECT_SOURCE_DIR}/examples - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/finufft - PATTERN "CMakeLists.txt" EXCLUDE - PATTERN "README" EXCLUDE - PATTERN "examples/cuda" EXCLUDE - ) - if (FINUFFT_BUILD_FORTRAN) - install(DIRECTORY ${PROJECT_SOURCE_DIR}/fortran/examples - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/finufft/fortran - ) - install(FILES ${PROJECT_SOURCE_DIR}/include/finufft.fh - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - ) + if (FINUFFT_STATIC_LINKING) + get_target_property(FINUFFT_COMPILE_OPTIONS finufft_static COMPILE_OPTIONS) + else () + get_target_property(FINUFFT_COMPILE_OPTIONS finufft COMPILE_OPTIONS) endif () + message("Compile options for finufft: ${MY_COMPILE_OPTIONS}") endif () -if (FINUFFT_USE_CUDA) - install(DIRECTORY ${PROJECT_SOURCE_DIR}/examples/cuda - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/finufft/examples - PATTERN "README" EXCLUDE - PATTERN "CMakeLists.txt" EXCLUDE - ) -endif () + +if (FINUFFT_ENABLE_INSTALL) + include(GNUInstallDirs) + install(TARGETS ${INSTALL_TARGETS} PUBLIC_HEADER) + install(FILES ${PROJECT_SOURCE_DIR}/LICENSE + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/licenses/finufft) + if (FINUFFT_USE_CPU) + install(DIRECTORY ${PROJECT_SOURCE_DIR}/examples + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/finufft + PATTERN "CMakeLists.txt" EXCLUDE + PATTERN "README" EXCLUDE + PATTERN "examples/cuda" EXCLUDE + ) + if (FINUFFT_BUILD_FORTRAN) + install(DIRECTORY ${PROJECT_SOURCE_DIR}/fortran/examples + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/finufft/fortran + ) + install(FILES ${PROJECT_SOURCE_DIR}/include/finufft.fh + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + ) + endif() + endif () + if (FINUFFT_USE_CUDA) + install(DIRECTORY ${PROJECT_SOURCE_DIR}/examples/cuda + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/finufft/examples + PATTERN "README" EXCLUDE + PATTERN "CMakeLists.txt" EXCLUDE + ) + endif() endif () diff --git a/pyproject.toml b/pyproject.toml index a2bd9837c..a6a54e4c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,8 @@ [build-system] requires = [ "scikit-build-core >= 0.4.3", + "cmake >= 3.19", + "ninja >= 1.9.0", ] build-backend = "scikit_build_core.build" From 8f69abacd7981d9e515b90cdfd823b849b635cde Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 21:56:15 -0400 Subject: [PATCH 17/83] added workflow --- .github/workflows/python_cmake.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 06a07ae23..3da13b5fe 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -1,11 +1,14 @@ name: Pip on: - workflow_dispatch: - pull_request: push: branches: - master + tags: + - v* + pull_request: + branches: + - master jobs: prepare: From e0181c751192200ada6290052239abb541838034 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 21:56:45 -0400 Subject: [PATCH 18/83] added workflow --- .github/workflows/python_cmake.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 3da13b5fe..8fbd74485 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -1,14 +1,6 @@ name: Pip -on: - push: - branches: - - master - tags: - - v* - pull_request: - branches: - - master +on: [push, pull_request] jobs: prepare: From a99635bdd36f1bd4bda53ed7a5f825f8181514f1 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 21:57:51 -0400 Subject: [PATCH 19/83] added workflow --- .github/workflows/python_cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 8fbd74485..3621e6489 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -1,4 +1,4 @@ -name: Pip +name: Test python skbuild with CMake on: [push, pull_request] From 44a25b00a799210c4198aa719dc0d548faf62f13 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 21:59:54 -0400 Subject: [PATCH 20/83] added workflow --- .github/workflows/python_cmake.yml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 3621e6489..b9323cdea 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -8,16 +8,11 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - - name: Create matrix - id: create_matrix - uses: fabiocaccamo/create-matrix-action@v3 - with: - matrix: | - os {windows-latest}, compiler {msvc, llvm}, arch_flags {/arch:AVX2, /arch:AVX512}, python-version {3.8, 3.11} - os {ubuntu-latest}, compiler {gcc, llvm}, arch_flags {-march=native, -march=x86-64}, python-version {3.8, 3.11} - - outputs: - matrix: ${{ steps.create_matrix.outputs.matrix }} + - name: Generate matrix + id: generate_matrix + run: | + MATRIX=$(python3 ../.github/workflows/generate_matrix.py) + echo "matrix=$MATRIX" >> $GITHUB_OUTPUT build: name: Build with Pip runs-on: ${{ matrix.os }} From 3a6a716159ffcd0dc16909dffed8eba15d090ffd Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:01:18 -0400 Subject: [PATCH 21/83] fixed workdir --- .github/workflows/python_cmake.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index b9323cdea..fd89563ef 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -10,8 +10,9 @@ jobs: uses: actions/checkout@v4 - name: Generate matrix id: generate_matrix + working-directory: ${{ github.workspace }} run: | - MATRIX=$(python3 ../.github/workflows/generate_matrix.py) + MATRIX=$(python3 .github/workflows/generate_matrix.py) echo "matrix=$MATRIX" >> $GITHUB_OUTPUT build: name: Build with Pip From 79a2ff61e5f31b72edf21cb7251a759ccf321dca Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:03:17 -0400 Subject: [PATCH 22/83] fixed workdir --- .github/workflows/python_cmake.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index fd89563ef..e581cf8d2 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -10,9 +10,8 @@ jobs: uses: actions/checkout@v4 - name: Generate matrix id: generate_matrix - working-directory: ${{ github.workspace }} run: | - MATRIX=$(python3 .github/workflows/generate_matrix.py) + MATRIX=$(python3 ${{ github.workspace }}/.github/workflows/generate_matrix.py) echo "matrix=$MATRIX" >> $GITHUB_OUTPUT build: name: Build with Pip From d3dbac1e4a0fdd615e5aac375d534d599a8ffd2d Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:04:50 -0400 Subject: [PATCH 23/83] added the python script --- .github/workflows/generate_matrix.py | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/generate_matrix.py diff --git a/.github/workflows/generate_matrix.py b/.github/workflows/generate_matrix.py new file mode 100644 index 000000000..0c1b3e35a --- /dev/null +++ b/.github/workflows/generate_matrix.py @@ -0,0 +1,36 @@ +import json + +matrix = { + "include": [] +} + +platforms = ["windows-latest", "macos-latest", "ubuntu-latest"] +python_versions = ["3.8", "3.11"] + +combinations = { + "ubuntu-latest": { + "compiler": ["llvm", "gcc"], + "arch_flags": ["-march=native", "-march=x86-64", ""] + }, + "windows-latest": { + "compiler": ["msvc"], + "arch_flags": ["/arch:AVX2", "/arch:AVX512", "/arch:SSE2"] + }, + "macos-latest": { + "compiler": ["llvm", "gcc"], + "arch_flags": ["-march=native", "-march=x86-64", ""] + } +} + +for platform in combinations.keys(): + for python_version in python_versions: + for compiler in combinations[platform]["compiler"]: + for arch_flag in combinations[platform]["arch_flags"]: + matrix["include"].append({ + "os": platform, + "python-version": python_version, + "compiler": compiler, + "arch_flags": arch_flag + }) +json_str = json.dumps(matrix, ensure_ascii=False) +print(json_str) From ffb1eb7fd6372e0d9f5d594aec467d759997c842 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:06:53 -0400 Subject: [PATCH 24/83] it should not fail fast --- .github/workflows/python_cmake.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index e581cf8d2..eb86bddd2 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -19,8 +19,7 @@ jobs: needs: prepare strategy: fail-fast: false - matrix: - include: ${{fromJson(needs.prepare.outputs.matrix)}} + matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }} steps: - uses: actions/setup-python@v5 with: From 9ee56cdbc2e5907238753f16215df145a62bc5a9 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:15:41 -0400 Subject: [PATCH 25/83] added missing dependencies --- .github/workflows/python_cmake.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index eb86bddd2..999687249 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -34,13 +34,21 @@ jobs: vcpkg: false cppcheck: false clangtidy: false - + - name: Set up Homebrew + id: set-up-homebrew + uses: Homebrew/actions/setup-homebrew@master - name: Install pytest run: python -m pip install pytest - - name: Set min macOS version + - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' run: | echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV + brew install fftw libomp + - name: Install fftw and libomp + if: runner.os == 'linux' + run: | + sudo apt-get update + sudo apt-get install -y libfftw3-dev libomp-dev - name: Set compiler flags run: | echo MAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV From 8d5d1037b0c9a94fa9dbe38201da4184099d164a Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:17:22 -0400 Subject: [PATCH 26/83] homebrew only on mac --- .github/workflows/python_cmake.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 999687249..ebf20a32a 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -34,13 +34,11 @@ jobs: vcpkg: false cppcheck: false clangtidy: false - - name: Set up Homebrew - id: set-up-homebrew - uses: Homebrew/actions/setup-homebrew@master - name: Install pytest run: python -m pip install pytest - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' + uses: Homebrew/actions/setup-homebrew@master run: | echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV brew install fftw libomp From 8dcea9882e1403ee0dcc874076d28a40476ce68e Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:20:33 -0400 Subject: [PATCH 27/83] homebrew only on mac --- .github/workflows/python_cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index ebf20a32a..257ca4fb5 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -38,8 +38,8 @@ jobs: run: python -m pip install pytest - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' - uses: Homebrew/actions/setup-homebrew@master run: | + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV brew install fftw libomp - name: Install fftw and libomp From 5b7d0bdca7776b228e11ad131b7dc11b5df66731 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Thu, 13 Jun 2024 22:25:36 -0400 Subject: [PATCH 28/83] homebrew only on mac --- .github/workflows/python_cmake.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 257ca4fb5..1704feffb 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -21,6 +21,13 @@ jobs: fail-fast: false matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }} steps: + - uses: actions/checkout@v4 + - name: Set min macOS version and install fftw and libomp + if: runner.os == 'macOS' + run: | + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV + brew install fftw libomp - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -36,12 +43,7 @@ jobs: clangtidy: false - name: Install pytest run: python -m pip install pytest - - name: Set min macOS version and install fftw and libomp - if: runner.os == 'macOS' - run: | - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" - echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV - brew install fftw libomp + - name: Install fftw and libomp if: runner.os == 'linux' run: | From 5ae133f9c72a6ea37c0387b0754bb215c2e96fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 12:31:08 +0200 Subject: [PATCH 29/83] ci: try adding macos --- .github/workflows/python_skbuild_wheels.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index e5667da49..d7013412e 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -8,7 +8,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest] + os: [ubuntu-latest, macos-13] steps: - uses: actions/checkout@v4 @@ -18,8 +18,9 @@ jobs: uses: pypa/cibuildwheel@v2.19.1 env: CIBW_ARCHS_LINUX: "x86_64" - CIBW_SKIP: "pp* *musllinux*" + CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel + CIBW_BEFORE_ALL_MACOS: brew install gcc fftw - uses: actions/upload-artifact@v4 with: From 3a6a385c56176c4261395ab7709c4a30fd5e5b9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 12:44:24 +0200 Subject: [PATCH 30/83] ci: force gcc for cibw --- .github/workflows/python_skbuild_wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index d7013412e..2a32a5516 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -21,6 +21,7 @@ jobs: CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc fftw + CIBW_ENVIRONMENT_MACOS: "FC=gfortran CC=gcc CXX=g++" - uses: actions/upload-artifact@v4 with: From 40a251bc18935332278ae60481df82dce88dd74e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 14:39:53 +0200 Subject: [PATCH 31/83] ci: install libomp --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 2a32a5516..ef9f59b95 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -20,7 +20,7 @@ jobs: CIBW_ARCHS_LINUX: "x86_64" CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel - CIBW_BEFORE_ALL_MACOS: brew install gcc fftw + CIBW_BEFORE_ALL_MACOS: brew install gcc libomp fftw CIBW_ENVIRONMENT_MACOS: "FC=gfortran CC=gcc CXX=g++" - uses: actions/upload-artifact@v4 From f57015cb245cbff971bb852f49b3c0e4bff1e6f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 14:43:57 +0200 Subject: [PATCH 32/83] ci: hint at libomp root --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index ef9f59b95..55b1d7801 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -21,7 +21,7 @@ jobs: CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc libomp fftw - CIBW_ENVIRONMENT_MACOS: "FC=gfortran CC=gcc CXX=g++" + CIBW_ENVIRONMENT_MACOS: "FC=gfortran CC=gcc CXX=g++ OpenMP_ROOT=/usr/local/opt/libomp" - uses: actions/upload-artifact@v4 with: From bca90408509f1649aa1dc61c4dae25edd66a33b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 14:47:29 +0200 Subject: [PATCH 33/83] ci: downgrade gcc --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 55b1d7801..a247fdf65 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -21,7 +21,7 @@ jobs: CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc libomp fftw - CIBW_ENVIRONMENT_MACOS: "FC=gfortran CC=gcc CXX=g++ OpenMP_ROOT=/usr/local/opt/libomp" + CIBW_ENVIRONMENT_MACOS: "FC=gfortran-11 CC=gcc-11 CXX=g++-11 OpenMP_ROOT=/usr/local/opt/libomp" - uses: actions/upload-artifact@v4 with: From 7744a26101b2580d413b26fb5ea7a82fd95248ee Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 11:05:59 -0400 Subject: [PATCH 34/83] Fixed conflicts --- .github/workflows/python_cmake.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 1704feffb..e0930d57e 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -43,7 +43,6 @@ jobs: clangtidy: false - name: Install pytest run: python -m pip install pytest - - name: Install fftw and libomp if: runner.os == 'linux' run: | From 1e49d837748060570bd4dd99c801115f83801a1d Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 11:54:13 -0400 Subject: [PATCH 35/83] fixed workflows --- .github/workflows/generate_matrix.py | 6 +++--- .github/workflows/python_cmake.yml | 25 ++++++++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.github/workflows/generate_matrix.py b/.github/workflows/generate_matrix.py index 0c1b3e35a..ecfeb39e4 100644 --- a/.github/workflows/generate_matrix.py +++ b/.github/workflows/generate_matrix.py @@ -4,13 +4,12 @@ "include": [] } -platforms = ["windows-latest", "macos-latest", "ubuntu-latest"] python_versions = ["3.8", "3.11"] combinations = { "ubuntu-latest": { "compiler": ["llvm", "gcc"], - "arch_flags": ["-march=native", "-march=x86-64", ""] + "arch_flags": ["-march=native", "-march=x86-64"] }, "windows-latest": { "compiler": ["msvc"], @@ -18,7 +17,7 @@ }, "macos-latest": { "compiler": ["llvm", "gcc"], - "arch_flags": ["-march=native", "-march=x86-64", ""] + "arch_flags": ["-march=native", "-march=x86-64"] } } @@ -32,5 +31,6 @@ "compiler": compiler, "arch_flags": arch_flag }) + json_str = json.dumps(matrix, ensure_ascii=False) print(json_str) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index e0930d57e..e329d3dd1 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -5,6 +5,8 @@ on: [push, pull_request] jobs: prepare: runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.generate_matrix.outputs.matrix }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -21,12 +23,18 @@ jobs: fail-fast: false matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }} steps: - - uses: actions/checkout@v4 - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' run: | /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV + echo "HOMEBREW_NO_AUTO_UPDATE=1" >> $GITHUB_ENV + if [ -f /usr/local/bin/brew ]; then + echo "PATH=/usr/local/bin:$PATH" >> $GITHUB_ENV + endif + if [ -f /opt/homebrew/bin/brew ]; then + echo "PATH=/opt/homebrew/bin:$PATH" >> $GITHUB_ENV + endif brew install fftw libomp - uses: actions/setup-python@v5 with: @@ -42,16 +50,19 @@ jobs: cppcheck: false clangtidy: false - name: Install pytest - run: python -m pip install pytest + run: | + python3 -m pip install --upgrade pip + python3 -m pip install pytest - name: Install fftw and libomp if: runner.os == 'linux' run: | - sudo apt-get update - sudo apt-get install -y libfftw3-dev libomp-dev + apt-get update + apt install build-essential + apt-get install -y libfftw3-dev libomp-dev - name: Set compiler flags run: | - echo MAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV + echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV - name: Build - run: pip install . --verbose + run: python3 -m pip install . --verbose - name: Test - run: python -m pytest python/finufft/test + run: python3 -m pytest python/finufft/test From 8f7d37385b632902d41db34f812c398df929f505 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 11:56:44 -0400 Subject: [PATCH 36/83] fixed windows path --- .github/workflows/python_cmake.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index e329d3dd1..425b80a4a 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -63,6 +63,6 @@ jobs: run: | echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV - name: Build - run: python3 -m pip install . --verbose + run: python3 -m pip install ${{ github.workspace }} --verbose - name: Test - run: python3 -m pytest python/finufft/test + run: python3 -m pytest ${{ github.workspace }}/python/finufft/test From 5e7b6d3d3e6ae08ac6b6e686c8ad0db7e7da1a95 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 11:59:05 -0400 Subject: [PATCH 37/83] not installing homebrew --- .github/workflows/python_cmake.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 425b80a4a..abd5918bb 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -26,9 +26,7 @@ jobs: - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' run: | - /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV - echo "HOMEBREW_NO_AUTO_UPDATE=1" >> $GITHUB_ENV if [ -f /usr/local/bin/brew ]; then echo "PATH=/usr/local/bin:$PATH" >> $GITHUB_ENV endif From 7e076bf97e98748a859d7ecf9cb52ceda82aadba Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 12:00:10 -0400 Subject: [PATCH 38/83] using sudo in linux --- .github/workflows/python_cmake.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index abd5918bb..450de4d09 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -54,9 +54,8 @@ jobs: - name: Install fftw and libomp if: runner.os == 'linux' run: | - apt-get update - apt install build-essential - apt-get install -y libfftw3-dev libomp-dev + sudo apt update + sudo apt install -y libfftw3-dev libomp-dev build-essential - name: Set compiler flags run: | echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV From 95d439089f89d31f48cca3c002a7f6c142a96a03 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 12:11:36 -0400 Subject: [PATCH 39/83] using sudo in linux --- .github/workflows/python_cmake.yml | 33 +++++++++++++++--------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 450de4d09..49160ae08 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -23,6 +23,16 @@ jobs: fail-fast: false matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }} steps: + - name: Setup Cpp + uses: aminya/setup-cpp@v1 + with: + compiler: ${{ matrix.compiler }} + vcvarsall: ${{ contains(matrix.os, 'windows') }} + cmake: false + ninja: false + vcpkg: false + cppcheck: false + clangtidy: false - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' run: | @@ -34,28 +44,19 @@ jobs: echo "PATH=/opt/homebrew/bin:$PATH" >> $GITHUB_ENV endif brew install fftw libomp - - uses: actions/setup-python@v5 + - name: Install fftw and libomp + if: runner.os == 'linux' + run: | + sudo apt update + sudo apt install -y libfftw3-dev libomp-dev libomp5 + - name: Setup Python + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Setup Cpp - uses: aminya/setup-cpp@v1 - with: - compiler: ${{ matrix.compiler }} - vcvarsall: ${{ contains(matrix.os, 'windows') }} - cmake: false - ninja: false - vcpkg: false - cppcheck: false - clangtidy: false - name: Install pytest run: | python3 -m pip install --upgrade pip python3 -m pip install pytest - - name: Install fftw and libomp - if: runner.os == 'linux' - run: | - sudo apt update - sudo apt install -y libfftw3-dev libomp-dev build-essential - name: Set compiler flags run: | echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV From 691556db8c27869ede1f742e02953c0cc0f40145 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 12:19:12 -0400 Subject: [PATCH 40/83] pyproject.toml was missing --- .github/workflows/generate_matrix.py | 6 +++--- .github/workflows/python_cmake.yml | 7 ------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/generate_matrix.py b/.github/workflows/generate_matrix.py index ecfeb39e4..abf696806 100644 --- a/.github/workflows/generate_matrix.py +++ b/.github/workflows/generate_matrix.py @@ -7,15 +7,15 @@ python_versions = ["3.8", "3.11"] combinations = { - "ubuntu-latest": { + "ubuntu-22.04": { "compiler": ["llvm", "gcc"], "arch_flags": ["-march=native", "-march=x86-64"] }, - "windows-latest": { + "windows-11": { "compiler": ["msvc"], "arch_flags": ["/arch:AVX2", "/arch:AVX512", "/arch:SSE2"] }, - "macos-latest": { + "macos-13": { "compiler": ["llvm", "gcc"], "arch_flags": ["-march=native", "-march=x86-64"] } diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 49160ae08..8c321f701 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -36,13 +36,6 @@ jobs: - name: Set min macOS version and install fftw and libomp if: runner.os == 'macOS' run: | - echo "MACOS_DEPLOYMENT_TARGET=10.14" >> $GITHUB_ENV - if [ -f /usr/local/bin/brew ]; then - echo "PATH=/usr/local/bin:$PATH" >> $GITHUB_ENV - endif - if [ -f /opt/homebrew/bin/brew ]; then - echo "PATH=/opt/homebrew/bin:$PATH" >> $GITHUB_ENV - endif brew install fftw libomp - name: Install fftw and libomp if: runner.os == 'linux' From 310dffe7fba58cf0b834b1232c5274c4c0592ada Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 12:21:43 -0400 Subject: [PATCH 41/83] python3- m pip does not work --- .github/workflows/python_cmake.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 8c321f701..a8c3a48da 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -48,12 +48,12 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install pytest run: | - python3 -m pip install --upgrade pip - python3 -m pip install pytest + pip install --upgrade pip + pip install pytest - name: Set compiler flags run: | echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV - name: Build - run: python3 -m pip install ${{ github.workspace }} --verbose + run: pip install ${{ github.workspace }} --verbose - name: Test run: python3 -m pytest ${{ github.workspace }}/python/finufft/test From a3c63327bad1ecb808f17ecda92bd725316163cc Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 12:32:33 -0400 Subject: [PATCH 42/83] checkout is necessary --- .github/workflows/python_cmake.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index a8c3a48da..2f05a84a1 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -23,6 +23,8 @@ jobs: fail-fast: false matrix: ${{ fromJSON(needs.prepare.outputs.matrix) }} steps: + - name: Checkout code + uses: actions/checkout@v4 - name: Setup Cpp uses: aminya/setup-cpp@v1 with: From cbd23e9bdcfc0984d6aa65a372fe53ee1463706e Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Fri, 14 Jun 2024 12:51:10 -0400 Subject: [PATCH 43/83] openmp is required in c++ code only --- .github/workflows/generate_matrix.py | 4 ++-- .github/workflows/python_cmake.yml | 7 ++++--- CMakeLists.txt | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/generate_matrix.py b/.github/workflows/generate_matrix.py index abf696806..a3cabbe3a 100644 --- a/.github/workflows/generate_matrix.py +++ b/.github/workflows/generate_matrix.py @@ -11,8 +11,8 @@ "compiler": ["llvm", "gcc"], "arch_flags": ["-march=native", "-march=x86-64"] }, - "windows-11": { - "compiler": ["msvc"], + "windows-2022": { + "compiler": ["msvc", "llvm"], "arch_flags": ["/arch:AVX2", "/arch:AVX512", "/arch:SSE2"] }, "macos-13": { diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 2f05a84a1..a1d224174 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -13,6 +13,7 @@ jobs: - name: Generate matrix id: generate_matrix run: | + echo "MACOSX_DEPLOYMENT_TARGET=11.0" >> $GITHUB_ENV MATRIX=$(python3 ${{ github.workspace }}/.github/workflows/generate_matrix.py) echo "matrix=$MATRIX" >> $GITHUB_OUTPUT build: @@ -50,12 +51,12 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install pytest run: | - pip install --upgrade pip - pip install pytest + python3 -m pip install --upgrade pip + python3 -m pip install pytest - name: Set compiler flags run: | echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV - name: Build - run: pip install ${{ github.workspace }} --verbose + run: python3 -m pip install ${{ github.workspace }} --verbose - name: Test run: python3 -m pytest ${{ github.workspace }}/python/finufft/test diff --git a/CMakeLists.txt b/CMakeLists.txt index f3c2e130c..b008510f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ if (FINUFFT_BUILD_MATLAB) else () # For non-matlab builds, find system OpenMP if (FINUFFT_USE_OPENMP) - find_package(OpenMP REQUIRED) + find_package(OpenMP COMPONENTS CXX REQUIRED) endif () endif () From e81ed614f801f3c6f9c660ae917f7f2b7d833743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 19:13:24 +0200 Subject: [PATCH 44/83] ci: try multiline env strings --- .github/workflows/python_skbuild_wheels.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index a247fdf65..0bb657ceb 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -21,7 +21,11 @@ jobs: CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc libomp fftw - CIBW_ENVIRONMENT_MACOS: "FC=gfortran-11 CC=gcc-11 CXX=g++-11 OpenMP_ROOT=/usr/local/opt/libomp" + CIBW_ENVIRONMENT_MACOS: > + FC=gfortran-11 + CC=gcc-11 + CXX=g++-11 + OpenMP_ROOT=/usr/local/opt/libomp - uses: actions/upload-artifact@v4 with: From 8466b80a6dda042bc7bc2a89e6f21ba7fbc5d19e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 19:15:22 +0200 Subject: [PATCH 45/83] ci: increase cibw verbosity --- .github/workflows/python_skbuild_wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 0bb657ceb..8f694bffb 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -17,6 +17,7 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.1 env: + CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS_LINUX: "x86_64" CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel From bf09b9680253c2c009f7ef0c9677a2a3e820aed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 19:32:24 +0200 Subject: [PATCH 46/83] ci: fix CIBW_SKIP --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 8f694bffb..4c64b65dc 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -18,8 +18,8 @@ jobs: uses: pypa/cibuildwheel@v2.19.1 env: CIBW_BUILD_VERBOSITY: 1 + CIBW_SKIP: "pp* *musllinux*" CIBW_ARCHS_LINUX: "x86_64" - CIBW_SKIP_LINUX: "pp* *musllinux*" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc libomp fftw CIBW_ENVIRONMENT_MACOS: > From 166a91599a5221488d66076f42a0186ac82e8637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 14 Jun 2024 19:33:53 +0200 Subject: [PATCH 47/83] ci: upgrade to gcc-13 --- .github/workflows/python_skbuild_wheels.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 4c64b65dc..a8fba641b 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -21,11 +21,11 @@ jobs: CIBW_SKIP: "pp* *musllinux*" CIBW_ARCHS_LINUX: "x86_64" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel - CIBW_BEFORE_ALL_MACOS: brew install gcc libomp fftw + CIBW_BEFORE_ALL_MACOS: brew install gcc@13 libomp fftw CIBW_ENVIRONMENT_MACOS: > - FC=gfortran-11 - CC=gcc-11 - CXX=g++-11 + FC=gfortran-13 + CC=gcc-13 + CXX=g++-13 OpenMP_ROOT=/usr/local/opt/libomp - uses: actions/upload-artifact@v4 From 663c8312cfa6cd4d5fc689b87767a0444d05267d Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Sun, 16 Jun 2024 17:21:33 -0400 Subject: [PATCH 48/83] try to make python_cmake.yml and python_skbuild_wheels.yml work --- .github/workflows/generate_matrix.py | 4 ++-- .github/workflows/python_cmake.yml | 9 +++++---- .github/workflows/python_skbuild_wheels.yml | 10 +++++----- python/CMakeLists.txt | 6 +++++- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/.github/workflows/generate_matrix.py b/.github/workflows/generate_matrix.py index a3cabbe3a..29049e864 100644 --- a/.github/workflows/generate_matrix.py +++ b/.github/workflows/generate_matrix.py @@ -13,10 +13,10 @@ }, "windows-2022": { "compiler": ["msvc", "llvm"], - "arch_flags": ["/arch:AVX2", "/arch:AVX512", "/arch:SSE2"] + "arch_flags": ["/arch:AVX2", "/arch:SSE2"] }, "macos-13": { - "compiler": ["llvm", "gcc"], + "compiler": ["llvm", "gcc-14"], "arch_flags": ["-march=native", "-march=x86-64"] } } diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index a1d224174..9423f42c2 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -36,15 +36,15 @@ jobs: vcpkg: false cppcheck: false clangtidy: false - - name: Set min macOS version and install fftw and libomp + - name: Set min macOS version and install fftw if: runner.os == 'macOS' run: | - brew install fftw libomp - - name: Install fftw and libomp + brew install fftw + - name: Install fftw if: runner.os == 'linux' run: | sudo apt update - sudo apt install -y libfftw3-dev libomp-dev libomp5 + sudo apt install -y libfftw3-dev - name: Setup Python uses: actions/setup-python@v5 with: @@ -56,6 +56,7 @@ jobs: - name: Set compiler flags run: | echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV + shell: bash - name: Build run: python3 -m pip install ${{ github.workspace }} --verbose - name: Test diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index a8fba641b..7ba0a9f23 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -21,12 +21,12 @@ jobs: CIBW_SKIP: "pp* *musllinux*" CIBW_ARCHS_LINUX: "x86_64" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel - CIBW_BEFORE_ALL_MACOS: brew install gcc@13 libomp fftw + CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw CIBW_ENVIRONMENT_MACOS: > - FC=gfortran-13 - CC=gcc-13 - CXX=g++-13 - OpenMP_ROOT=/usr/local/opt/libomp + FC=gfortran-14 + CC=gcc-14 + CXX=g++-14 + MACOSX_DEPLOYMENT_TARGET=13 - uses: actions/upload-artifact@v4 with: diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 9f69633bb..e1eb698fb 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,9 @@ if(FINUFFT_USE_CPU) - install(TARGETS finufft LIBRARY DESTINATION finufft) + if (WIN32) + install(TARGETS finufft LIBRARY DESTINATION finufft RUNTIME DESTINATION finufft) + else () + install(TARGETS finufft LIBRARY DESTINATION finufft) + endif () endif() if(FINUFFT_USE_GPU) From 4560f12973d95839e20fd83d867cfaa704aee4db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 18 Jun 2024 18:16:45 +0200 Subject: [PATCH 49/83] ci: add windows to cibw --- .github/workflows/python_skbuild_wheels.yml | 37 ++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 7ba0a9f23..943d51613 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -3,7 +3,7 @@ name: Build Python wheels (skbuild) on: [push, pull_request] jobs: - build_wheels: + build_wheels_unix: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: @@ -32,3 +32,38 @@ jobs: with: name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} path: ./wheelhouse/*.whl + + build_wheels_win: + name: Build wheels on Windows + runs-on: windows-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install FFTW + run: | + Invoke-WebRequest -Uri "https://www.fftw.org/fftw-3.3.10.tar.gz" -OutFile "${{ github.workspace }}\fftw.tar.gz" + + New-Item -Path "${{ github.workspace }}" -Name "fftw-source" -ItemType Directory + tar --strip-components=1 -C "${{ github.workspace }}\fftw-source" -zxf "${{ github.workspace }}\fftw.tar.gz" + + cmake -S "${{ github.workspace }}\fftw-source" -D CMAKE_INSTALL_PREFIX="${{ github.workspace }}\fftw" -D ENABLE_AVX2=ON -D BUILD_TESTS=OFF -D BUILD_SHARED_LIBS=OFF -D ENABLE_OPENMP=ON -D ENABLE_FLOAT=OFF -B "${{ github.workspace }}\fftw-build" + cmake --build "${{ github.workspace }}\fftw-build" --config Release + cmake --install "${{ github.workspace }}\fftw-build" + + cmake -S "${{ github.workspace }}\fftw-source" -D CMAKE_INSTALL_PREFIX="${{ github.workspace }}\fftw" -D ENABLE_AVX2=ON -D BUILD_TESTS=OFF -D BUILD_SHARED_LIBS=OFF -D ENABLE_OPENMP=ON -D ENABLE_FLOAT=ON -B "${{ github.workspace }}\fftw-build" + cmake --build "${{ github.workspace }}\fftw-build" --config Release + cmake --install "${{ github.workspace }}\fftw-build" + + - name: Build wheels + uses: pypa/cibuildwheel@v2.19.1 + env: + CIBW_BUILD_VERBOSITY: 1 + CIBW_ARCHS: AMD64 + CIBW_ENVIRONMENT: > + FFTWDIR='${{ github.workspace }}\fftw' + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl From 2bdeb7178127e768673cc072076de9d1ef2f8f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 11:18:39 +0200 Subject: [PATCH 50/83] ci: lowercase windows --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 943d51613..45ef9c75c 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -34,7 +34,7 @@ jobs: path: ./wheelhouse/*.whl build_wheels_win: - name: Build wheels on Windows + name: Build wheels on windows runs-on: windows-latest steps: From 5aa595636e4fd4519173aa8f2fc67969f1038dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 11:29:00 +0200 Subject: [PATCH 51/83] ci: bump cibw Fixes outdated links to CentOS mirror list: https://github.com/pypa/cibuildwheel/issues/1915 --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 45ef9c75c..8d24b31d2 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -15,7 +15,7 @@ jobs: - name: Build wheels - uses: pypa/cibuildwheel@v2.19.1 + uses: pypa/cibuildwheel@v2.19.2 env: CIBW_BUILD_VERBOSITY: 1 CIBW_SKIP: "pp* *musllinux*" From b5e742c77e6c7d6d92d970a21a8f9df1202adc13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 11:35:56 +0200 Subject: [PATCH 52/83] ci: move macos-arm64 out --- .github/workflows/python_skbuild_wheels.yml | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 8d24b31d2..4df57db42 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -33,6 +33,34 @@ jobs: name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} path: ./wheelhouse/*.whl + build_wheels_macos_arm64: + name: Build wheels on macos-14 + runs-on: macos-14 + + steps: + - uses: actions/checkout@v4 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.19.2 + env: + CIBW_BUILD_VERBOSITY: 1 + CIBW_SKIP: "pp*" + CIBW_ARCHS_MACOS: "arm64" + CIBW_BEFORE_ALL_MACOS: | + pkg=$(brew fetch --force --bottle-tag=arm64_ventura fftw | grep 'Downloaded to' | cut -d' ' -f3) + brew install $pkg + pkg=$(brew fetch --force --bottle-tag=arm64_ventura gcc | grep 'Downloaded to' | cut -d' ' -f3) + brew install $pkg + CIBW_ENVIRONMENT_MACOS: > + CC=gcc-14 + CXX=g++-14 + MACOSX_DEPLOYMENT_TARGET=14 + + - uses: actions/upload-artifact@v4 + with: + name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + path: ./wheelhouse/*.whl + build_wheels_win: name: Build wheels on windows runs-on: windows-latest From d88da71726280697a0307b1317c765f2687e911a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 18:35:12 +0200 Subject: [PATCH 53/83] py: remove dependency on py version --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a6a54e4c7..e778693f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,9 @@ cmake.define = {"FINUFFT_BUILD_PYTHON" = "ON", "FINUFFT_ENABLE_INSTALL" = "OFF"} wheel.packages = ["python/finufft/finufft"] +# Indicate that we don't depend on the CPython API +wheel.py-api = "py3" + [tool.scikit-build.metadata.version] provider = "scikit_build_core.metadata.regex" input = "python/finufft/finufft/__init__.py" From e79641b69e334c7c27866af70dd4f4bac8cac0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 18:47:27 +0200 Subject: [PATCH 54/83] ci: fix wheel artifact names --- .github/workflows/python_skbuild_wheels.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 4df57db42..d283ee786 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -30,7 +30,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + name: cibw-wheels-${{ matrix.os }} path: ./wheelhouse/*.whl build_wheels_macos_arm64: @@ -58,7 +58,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + name: cibw-wheels-macos-arm64 path: ./wheelhouse/*.whl build_wheels_win: @@ -93,5 +93,5 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} + name: cibw-wheels-windows path: ./wheelhouse/*.whl From 003d607ed1d6fd0a03917570939050210c0eabc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 18:46:11 +0200 Subject: [PATCH 55/83] ci: add testing to cibw --- .github/workflows/python_skbuild_wheels.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index d283ee786..591e7c149 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -27,6 +27,8 @@ jobs: CC=gcc-14 CXX=g++-14 MACOSX_DEPLOYMENT_TARGET=13 + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - uses: actions/upload-artifact@v4 with: @@ -55,6 +57,8 @@ jobs: CC=gcc-14 CXX=g++-14 MACOSX_DEPLOYMENT_TARGET=14 + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - uses: actions/upload-artifact@v4 with: @@ -90,6 +94,8 @@ jobs: CIBW_ARCHS: AMD64 CIBW_ENVIRONMENT: > FFTWDIR='${{ github.workspace }}\fftw' + CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - uses: actions/upload-artifact@v4 with: From 9bded6e8d8d8fa454e85cf0babbbc2012de43313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 21:06:47 +0200 Subject: [PATCH 56/83] ci: skip pp310 for win --- .github/workflows/python_skbuild_wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 591e7c149..9c20cece7 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -96,6 +96,8 @@ jobs: FFTWDIR='${{ github.workspace }}\fftw' CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: pytest {project}/python/finufft/test + # PyPy 3.10 triggers access violation under Windows, for some reason. + CIBW_TEST_SKIP: "pp310-win*" - uses: actions/upload-artifact@v4 with: From e1eff5c2b2f21651188dc4800c4cd0f983f7ab25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 9 Jul 2024 22:49:12 +0200 Subject: [PATCH 57/83] ci: delvewheel --- .github/workflows/python_skbuild_wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 9c20cece7..f67a59f59 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -94,6 +94,8 @@ jobs: CIBW_ARCHS: AMD64 CIBW_ENVIRONMENT: > FFTWDIR='${{ github.workspace }}\fftw' + CIBW_BEFORE_BUILD: pip install delvewheel + CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v -w {dest_dir} {wheel} CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: pytest {project}/python/finufft/test # PyPy 3.10 triggers access violation under Windows, for some reason. From 8aab607d76dfed07168a7cc007d60d9f78c58659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 12 Jul 2024 13:38:19 +0200 Subject: [PATCH 58/83] ci: update cibw for win --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index f67a59f59..66e7d9394 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -88,7 +88,7 @@ jobs: cmake --install "${{ github.workspace }}\fftw-build" - name: Build wheels - uses: pypa/cibuildwheel@v2.19.1 + uses: pypa/cibuildwheel@v2.19.2 env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: AMD64 From 81e5e3979737b29b3c77b79c0bf457385cb0152b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 12 Jul 2024 13:39:04 +0200 Subject: [PATCH 59/83] ci: replace msvc with gcc (mingw64) --- .github/workflows/python_skbuild_wheels.yml | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 66e7d9394..ca830fc2a 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -72,28 +72,17 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Install FFTW + - name: Install dependencies run: | - Invoke-WebRequest -Uri "https://www.fftw.org/fftw-3.3.10.tar.gz" -OutFile "${{ github.workspace }}\fftw.tar.gz" - - New-Item -Path "${{ github.workspace }}" -Name "fftw-source" -ItemType Directory - tar --strip-components=1 -C "${{ github.workspace }}\fftw-source" -zxf "${{ github.workspace }}\fftw.tar.gz" - - cmake -S "${{ github.workspace }}\fftw-source" -D CMAKE_INSTALL_PREFIX="${{ github.workspace }}\fftw" -D ENABLE_AVX2=ON -D BUILD_TESTS=OFF -D BUILD_SHARED_LIBS=OFF -D ENABLE_OPENMP=ON -D ENABLE_FLOAT=OFF -B "${{ github.workspace }}\fftw-build" - cmake --build "${{ github.workspace }}\fftw-build" --config Release - cmake --install "${{ github.workspace }}\fftw-build" - - cmake -S "${{ github.workspace }}\fftw-source" -D CMAKE_INSTALL_PREFIX="${{ github.workspace }}\fftw" -D ENABLE_AVX2=ON -D BUILD_TESTS=OFF -D BUILD_SHARED_LIBS=OFF -D ENABLE_OPENMP=ON -D ENABLE_FLOAT=ON -B "${{ github.workspace }}\fftw-build" - cmake --build "${{ github.workspace }}\fftw-build" --config Release - cmake --install "${{ github.workspace }}\fftw-build" + c:\msys64\usr\bin\pacman.exe -Sy --noconfirm mingw-w64-x86_64-gcc mingw-w64-x86_64-fftw mingw-w64-x86_64-pkgconf + echo "c:\msys64\mingw64\bin;" >> $env:GITHUB_PATH - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: AMD64 - CIBW_ENVIRONMENT: > - FFTWDIR='${{ github.workspace }}\fftw' + CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'" CIBW_BEFORE_BUILD: pip install delvewheel CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v -w {dest_dir} {wheel} CIBW_TEST_REQUIRES: pytest From 18716b5a52ca2a8a4363d79492bd9000e687da8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 12 Jul 2024 13:39:58 +0200 Subject: [PATCH 60/83] ci: analyze existing in delvewheel --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index ca830fc2a..89eae4af1 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -84,7 +84,7 @@ jobs: CIBW_ARCHS: AMD64 CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'" CIBW_BEFORE_BUILD: pip install delvewheel - CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v -w {dest_dir} {wheel} + CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: pytest {project}/python/finufft/test # PyPy 3.10 triggers access violation under Windows, for some reason. From 72db6dbca00dacec4e983fb2c218c89d33d48b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Fri, 12 Jul 2024 13:40:12 +0200 Subject: [PATCH 61/83] ci: skip pypy tests under win For some reason, these tests are failing because Python is unable to find the DLL. Should be fixable, but skipping tests for now. --- .github/workflows/python_skbuild_wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 89eae4af1..680518302 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -87,8 +87,8 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} CIBW_TEST_REQUIRES: pytest CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - # PyPy 3.10 triggers access violation under Windows, for some reason. - CIBW_TEST_SKIP: "pp310-win*" + # PyPy doesn't seem to work under windows, for some reason. + CIBW_TEST_SKIP: "pp*-win*" - uses: actions/upload-artifact@v4 with: From 9befad557a608e9e848484e12613924805e0c9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 22:45:40 +0200 Subject: [PATCH 62/83] ci: document cibw workflow --- .github/workflows/python_skbuild_wheels.yml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index 680518302..dbca14694 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -13,17 +13,19 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 env: CIBW_BUILD_VERBOSITY: 1 + # Not building for PyPy and musllinux for now. CIBW_SKIP: "pp* *musllinux*" CIBW_ARCHS_LINUX: "x86_64" CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw + # Need following versions of GCC for compatibility with fftw + # installed by homebrew. Similarly, we set the macOS version + # for compatibility with those libraries. CIBW_ENVIRONMENT_MACOS: > - FC=gfortran-14 CC=gcc-14 CXX=g++-14 MACOSX_DEPLOYMENT_TARGET=13 @@ -48,6 +50,9 @@ jobs: CIBW_BUILD_VERBOSITY: 1 CIBW_SKIP: "pp*" CIBW_ARCHS_MACOS: "arm64" + # Make sure to install the ARM64-specific versions of FFTW and GCC. + # Perhaps this is done automatically on the macos-14 image. We should + # look into this further. CIBW_BEFORE_ALL_MACOS: | pkg=$(brew fetch --force --bottle-tag=arm64_ventura fftw | grep 'Downloaded to' | cut -d' ' -f3) brew install $pkg @@ -74,7 +79,12 @@ jobs: - name: Install dependencies run: | + # Here we install the mingw64 versions of gcc and FFTW that we will + # use to compile the library. We also need pkg-config so that cmake + # can easily find FFTW when configurating the build. c:\msys64\usr\bin\pacman.exe -Sy --noconfirm mingw-w64-x86_64-gcc mingw-w64-x86_64-fftw mingw-w64-x86_64-pkgconf + # This particular install of mingw64 *is not* in the path by default + # (another one at c:\mingw64 is, however), so we add it to the path. echo "c:\msys64\mingw64\bin;" >> $env:GITHUB_PATH - name: Build wheels @@ -82,7 +92,12 @@ jobs: env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: AMD64 + # This is required to force cmake to avoid using MSVC (the default). + # By setting the generator to Ninja, cmake will pick gcc (mingw64) + # as the compiler. CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'" + # CIBW doesn't do vendoring of DLLs on Windows by defaulat, so we + # have to install delvewheel and run it. CIBW_BEFORE_BUILD: pip install delvewheel CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} CIBW_TEST_REQUIRES: pytest From 982f09fabe71d96496ef346d6d46c15afb8c800e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 22:46:42 +0200 Subject: [PATCH 63/83] ci: typo in doc --- .github/workflows/python_skbuild_wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index dbca14694..f81a9869c 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -96,7 +96,7 @@ jobs: # By setting the generator to Ninja, cmake will pick gcc (mingw64) # as the compiler. CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'" - # CIBW doesn't do vendoring of DLLs on Windows by defaulat, so we + # CIBW doesn't do vendoring of DLLs on Windows by default, so we # have to install delvewheel and run it. CIBW_BEFORE_BUILD: pip install delvewheel CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} From 584c544a2f9dddef73733cdcb8f04ff70949d884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 22:56:34 +0200 Subject: [PATCH 64/83] ci: move some cibw options into pyproject.toml --- .github/workflows/python_skbuild_wheels.yml | 22 --------------------- pyproject.toml | 18 +++++++++++++++++ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index f81a9869c..c2d92b9b3 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -16,11 +16,6 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 env: - CIBW_BUILD_VERBOSITY: 1 - # Not building for PyPy and musllinux for now. - CIBW_SKIP: "pp* *musllinux*" - CIBW_ARCHS_LINUX: "x86_64" - CIBW_BEFORE_ALL_LINUX: yum install -y fftw3-devel CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw # Need following versions of GCC for compatibility with fftw # installed by homebrew. Similarly, we set the macOS version @@ -29,8 +24,6 @@ jobs: CC=gcc-14 CXX=g++-14 MACOSX_DEPLOYMENT_TARGET=13 - CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - uses: actions/upload-artifact@v4 with: @@ -47,9 +40,6 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 env: - CIBW_BUILD_VERBOSITY: 1 - CIBW_SKIP: "pp*" - CIBW_ARCHS_MACOS: "arm64" # Make sure to install the ARM64-specific versions of FFTW and GCC. # Perhaps this is done automatically on the macos-14 image. We should # look into this further. @@ -62,8 +52,6 @@ jobs: CC=gcc-14 CXX=g++-14 MACOSX_DEPLOYMENT_TARGET=14 - CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - uses: actions/upload-artifact@v4 with: @@ -90,20 +78,10 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 env: - CIBW_BUILD_VERBOSITY: 1 - CIBW_ARCHS: AMD64 # This is required to force cmake to avoid using MSVC (the default). # By setting the generator to Ninja, cmake will pick gcc (mingw64) # as the compiler. CIBW_CONFIG_SETTINGS: "cmake.args='-G Ninja'" - # CIBW doesn't do vendoring of DLLs on Windows by default, so we - # have to install delvewheel and run it. - CIBW_BEFORE_BUILD: pip install delvewheel - CIBW_REPAIR_WHEEL_COMMAND: delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} - CIBW_TEST_REQUIRES: pytest - CIBW_TEST_COMMAND: pytest {project}/python/finufft/test - # PyPy doesn't seem to work under windows, for some reason. - CIBW_TEST_SKIP: "pp*-win*" - uses: actions/upload-artifact@v4 with: diff --git a/pyproject.toml b/pyproject.toml index e778693f0..f3e718f74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,3 +36,21 @@ input = "python/finufft/finufft/__init__.py" [tool.cibuildwheel] # Necessary to see build output from the actual compilation build-verbosity = 1 +# Not building for PyPy and musllinux for now. +skip = "pp* *musllinux" +test-requires = "pytest" +test-command = "pytest {project}/python/finufft/test" + +[tool.cibuildwheel.linux] +archs = "x86_64" +before-all = "yum install -y fftw3-devel" + +[tool.cibuildwheel.macos] +archs = "arm64" + +[tool.cibuildwheel.windows] +archs = "amd64" +before-build = "pip install delvewheel" +# CIBW doesn't do vendoring of DLLs on Windows by default, so we have to +# install delvewheel and run it. +repair-wheel-command = "delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} From 3f6debf10a977acf7b8706ecf5c580a9db0f8b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 22:58:47 +0200 Subject: [PATCH 65/83] py: fix typo in pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f3e718f74..6f3ca29eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,4 +53,4 @@ archs = "amd64" before-build = "pip install delvewheel" # CIBW doesn't do vendoring of DLLs on Windows by default, so we have to # install delvewheel and run it. -repair-wheel-command = "delvewheel repair -v --analyze-existing -w {dest_dir} {wheel} +repair-wheel-command = "delvewheel repair -v --analyze-existing -w {dest_dir} {wheel}" From 562d53c1699baeecbac1e4456205b34f46349c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:01:56 +0200 Subject: [PATCH 66/83] ci: fix capitalization for AMD64 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6f3ca29eb..308edc8e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ before-all = "yum install -y fftw3-devel" archs = "arm64" [tool.cibuildwheel.windows] -archs = "amd64" +archs = "AMD64" before-build = "pip install delvewheel" # CIBW doesn't do vendoring of DLLs on Windows by default, so we have to # install delvewheel and run it. From e318f4f7e398a85e44c436b2f27950432148cd6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:04:09 +0200 Subject: [PATCH 67/83] ci: exclude musllinux --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 308edc8e3..e36da5253 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ input = "python/finufft/finufft/__init__.py" # Necessary to see build output from the actual compilation build-verbosity = 1 # Not building for PyPy and musllinux for now. -skip = "pp* *musllinux" +skip = "pp* *musllinux*" test-requires = "pytest" test-command = "pytest {project}/python/finufft/test" From 1e0da5b65fe5af15b58d05ad5739b71471a73bf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:25:15 +0200 Subject: [PATCH 68/83] py: update pyproject.toml Transter the metadata we had in setup.py. --- pyproject.toml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e36da5253..1cd480b01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,10 +9,25 @@ build-backend = "scikit_build_core.build" [project] name = "finufft" -description = "Python bindings for the FINUFFT C++ library" readme = "README.md" requires-python = ">=3.8" dependencies = ["numpy >= 1.12.0"] +authors = [ + {name = "Jeremy Magland"}, + {name = "Daniel Foreman-Mackey"}, + {name = "Joakim Anden"}, + {name = "Libin Lu"}, + {name = "Alex Barnett"}] +maintainers = [{name = "Alex Barnett", email = "abarnett@flatironinstitute.org"}] +description = "Python interface to FINUFFT" +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: C++", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows" + ] dynamic = ["version"] [tool.scikit-build] From fff9dc9a36f0922aa23d8cf5752d6a2afec930ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:28:40 +0200 Subject: [PATCH 69/83] py: more comments for pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1cd480b01..008f7041b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ wheel.packages = ["python/finufft/finufft"] wheel.py-api = "py3" [tool.scikit-build.metadata.version] +# Instead of hardcoding the version here, extract it from the source files. provider = "scikit_build_core.metadata.regex" input = "python/finufft/finufft/__init__.py" From c3e3ea34549fca0f57f59cb7fb281948541973c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:40:57 +0200 Subject: [PATCH 70/83] py: replace setup.py with pyproject.toml --- .../finufft/pyproject.toml | 6 +- python/finufft/setup.py | 99 ------------------- 2 files changed, 4 insertions(+), 101 deletions(-) rename pyproject.toml => python/finufft/pyproject.toml (92%) delete mode 100644 python/finufft/setup.py diff --git a/pyproject.toml b/python/finufft/pyproject.toml similarity index 92% rename from pyproject.toml rename to python/finufft/pyproject.toml index 008f7041b..e98041647 100644 --- a/pyproject.toml +++ b/python/finufft/pyproject.toml @@ -36,10 +36,12 @@ minimum-version = "0.4" # Setuptools-style build caching in a local directory build-dir = "build/{wheel_tag}" +# Tell skbuild to look for the CMakeLists.txt file two directories up. +cmake.source-dir = "../../" cmake.targets = ["finufft"] cmake.define = {"FINUFFT_BUILD_PYTHON" = "ON", "FINUFFT_ENABLE_INSTALL" = "OFF"} -wheel.packages = ["python/finufft/finufft"] +wheel.packages = ["finufft"] # Indicate that we don't depend on the CPython API wheel.py-api = "py3" @@ -47,7 +49,7 @@ wheel.py-api = "py3" [tool.scikit-build.metadata.version] # Instead of hardcoding the version here, extract it from the source files. provider = "scikit_build_core.metadata.regex" -input = "python/finufft/finufft/__init__.py" +input = "finufft/__init__.py" [tool.cibuildwheel] # Necessary to see build output from the actual compilation diff --git a/python/finufft/setup.py b/python/finufft/setup.py deleted file mode 100644 index c2bf12f8d..000000000 --- a/python/finufft/setup.py +++ /dev/null @@ -1,99 +0,0 @@ -# This defines the Python module installation. - -# Barnett 3/1/18. Updates by Yu-Hsuan Shih, June 2018. -# win32 mingw patch by Vineet Bansal, Feb 2019. -# attempt ../make.inc reading (failed) and default finufftdir. 2/25/20 -# Barnett trying to get sphinx.ext.autodoc to work w/ this, 10/5/20 - -__version__ = '2.2.0' - -from setuptools import setup, Extension -import os -import platform -from pathlib import Path - -from tempfile import mkstemp - -finufft_dir = os.environ.get('FINUFFT_DIR') - -# Note: This will not work if run through pip install since setup.py is copied -# to a different location. -if finufft_dir == None or finufft_dir == '': - finufft_dir = Path(__file__).resolve().parents[2] - -# Set include and library paths relative to FINUFFT root directory. -inc_dir = os.path.join(finufft_dir, 'include') -lib_dir = os.path.join(finufft_dir, 'lib') -lib_dir_cmake = os.path.join(finufft_dir, 'build') # lib may be only here - -# Read in long description from README.md. -with open(os.path.join(finufft_dir, 'python', 'finufft', 'README.md'), 'r') as f: - long_description = f.read() - -finufft_dlib = 'finufft' - -# Windows does not have the concept of rpath and as a result, MSVC crashes if -# supplied with one. -if platform.system() != "Windows": - runtime_library_dirs = [lib_dir, lib_dir_cmake] -else: - runtime_library_dirs = [] - -# For certain platforms (e.g. Ubuntu 20.04), we need to create a dummy source -# that calls one of the functions in the FINUFFT dynamic library. The reason -# is that these platforms override the default --no-as-needed flag for ld, -# which means that the linker will only link to those dynamic libraries for -# which there are unresolved symbols in the object files. Since we do not have -# a real source, the result is that no dynamic libraries are linked. To -# prevent this, we create a dummy source so that the library will link as -# expected. -fd, source_filename = mkstemp(suffix='.c', text=True) - -with open(fd, 'w') as f: - f.write( \ -""" -#include - -void PyInit_finufftc(void) { - finufft_opts opt; - - finufft_default_opts(&opt); -} -""") - - -########## SETUP ########### -setup( - name='finufft', - version=__version__, - author='Python interfaces by: Jeremy Magland, Daniel Foreman-Mackey, Joakim Anden, Libin Lu, and Alex Barnett', - author_email='abarnett@flatironinstitute.org', - url='https://github.com/flatironinstitute/finufft', - description='Python interface to FINUFFT', - long_description=long_description, - long_description_content_type='text/markdown', - license="Apache 2", - packages=['finufft'], - classifiers=[ - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3', - 'Programming Language :: C++', - 'Operating System :: POSIX :: Linux', - 'Operating System :: MacOS :: MacOS X', - 'Operating System :: Microsoft :: Windows', - ], - install_requires=['numpy>=1.12.0'], - python_requires='>=3.6', - zip_safe=False, - py_modules=['finufft.finufftc'], - ext_modules=[ - Extension(name='finufft.finufftc', - sources=[source_filename], - include_dirs=[inc_dir, '/usr/local/include'], - library_dirs=[lib_dir, lib_dir_cmake, '/usr/local/lib'], - libraries=[finufft_dlib], - runtime_library_dirs=runtime_library_dirs) - ] -) - -os.unlink(source_filename) From 32f4e9b77caa732315c50c2fe1f5ebb69100cf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:41:49 +0200 Subject: [PATCH 71/83] ci: update paths for workflows Point the Python wheel building workflows to the new package directory in `python/finufft` instead of the repository root. --- .github/workflows/python_cmake.yml | 2 +- .github/workflows/python_skbuild_wheels.yml | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_cmake.yml b/.github/workflows/python_cmake.yml index 9423f42c2..3079f852a 100644 --- a/.github/workflows/python_cmake.yml +++ b/.github/workflows/python_cmake.yml @@ -58,6 +58,6 @@ jobs: echo CMAKE_ARGS="-DFINUFFT_ARCH_FLAGS=${{ matrix.arch_flags }}" >> $GITHUB_ENV shell: bash - name: Build - run: python3 -m pip install ${{ github.workspace }} --verbose + run: python3 -m pip install ${{ github.workspace }}/python/finufft --verbose - name: Test run: python3 -m pytest ${{ github.workspace }}/python/finufft/test diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_skbuild_wheels.yml index c2d92b9b3..3d58dcc77 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_skbuild_wheels.yml @@ -15,6 +15,8 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 + with: + package-dir: 'python/finufft' env: CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw # Need following versions of GCC for compatibility with fftw @@ -39,6 +41,8 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 + with: + package-dir: 'python/finufft' env: # Make sure to install the ARM64-specific versions of FFTW and GCC. # Perhaps this is done automatically on the macos-14 image. We should @@ -77,6 +81,8 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v2.19.2 + with: + package-dir: 'python/finufft' env: # This is required to force cmake to avoid using MSVC (the default). # By setting the generator to Ninja, cmake will pick gcc (mingw64) From 8561f521faad9ded7c7fff6eebabb3870a9309ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:52:06 +0200 Subject: [PATCH 72/83] ci: rename wheel-building workflow --- .../{python_skbuild_wheels.yml => python_build_wheels.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{python_skbuild_wheels.yml => python_build_wheels.yml} (98%) diff --git a/.github/workflows/python_skbuild_wheels.yml b/.github/workflows/python_build_wheels.yml similarity index 98% rename from .github/workflows/python_skbuild_wheels.yml rename to .github/workflows/python_build_wheels.yml index 3d58dcc77..87ed3ce9c 100644 --- a/.github/workflows/python_skbuild_wheels.yml +++ b/.github/workflows/python_build_wheels.yml @@ -1,4 +1,4 @@ -name: Build Python wheels (skbuild) +name: Build and test Python wheels on: [push, pull_request] From 8bf525efa7625abd1a6cf9b102fb4ddda6837b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:56:42 +0200 Subject: [PATCH 73/83] ci: remove old wheel-building workflows --- .github/workflows/cygwinccompiler.py | 408 ------------------ .github/workflows/libvcruntime140.a | Bin 56262 -> 0 bytes .github/workflows/python_build_win.ps1 | 54 --- .github/workflows/python_test_win.ps1 | 9 - .github/workflows/python_wheel.yml | 242 ----------- .../workflows/python_wheel_macos_arm64.yml | 54 --- tools/finufft/build-wheels-linux.sh | 54 --- 7 files changed, 821 deletions(-) delete mode 100755 .github/workflows/cygwinccompiler.py delete mode 100755 .github/workflows/libvcruntime140.a delete mode 100644 .github/workflows/python_build_win.ps1 delete mode 100644 .github/workflows/python_test_win.ps1 delete mode 100644 .github/workflows/python_wheel.yml delete mode 100644 .github/workflows/python_wheel_macos_arm64.yml delete mode 100755 tools/finufft/build-wheels-linux.sh diff --git a/.github/workflows/cygwinccompiler.py b/.github/workflows/cygwinccompiler.py deleted file mode 100755 index 72d8558ae..000000000 --- a/.github/workflows/cygwinccompiler.py +++ /dev/null @@ -1,408 +0,0 @@ -"""distutils.cygwinccompiler - -Provides the CygwinCCompiler class, a subclass of UnixCCompiler that -handles the Cygwin port of the GNU C compiler to Windows. It also contains -the Mingw32CCompiler class which handles the mingw32 port of GCC (same as -cygwin in no-cygwin mode). -""" - -# problems: -# -# * if you use a msvc compiled python version (1.5.2) -# 1. you have to insert a __GNUC__ section in its config.h -# 2. you have to generate an import library for its dll -# - create a def-file for python??.dll -# - create an import library using -# dlltool --dllname python15.dll --def python15.def \ -# --output-lib libpython15.a -# -# see also http://starship.python.net/crew/kernr/mingw32/Notes.html -# -# * We put export_symbols in a def-file, and don't use -# --export-all-symbols because it doesn't worked reliable in some -# tested configurations. And because other windows compilers also -# need their symbols specified this no serious problem. -# -# tested configurations: -# -# * cygwin gcc 2.91.57/ld 2.9.4/dllwrap 0.2.4 works -# (after patching python's config.h and for C++ some other include files) -# see also http://starship.python.net/crew/kernr/mingw32/Notes.html -# * mingw32 gcc 2.95.2/ld 2.9.4/dllwrap 0.2.4 works -# (ld doesn't support -shared, so we use dllwrap) -# * cygwin gcc 2.95.2/ld 2.10.90/dllwrap 2.10.90 works now -# - its dllwrap doesn't work, there is a bug in binutils 2.10.90 -# see also http://sources.redhat.com/ml/cygwin/2000-06/msg01274.html -# - using gcc -mdll instead dllwrap doesn't work without -static because -# it tries to link against dlls instead their import libraries. (If -# it finds the dll first.) -# By specifying -static we force ld to link against the import libraries, -# this is windows standard and there are normally not the necessary symbols -# in the dlls. -# *** only the version of June 2000 shows these problems -# * cygwin gcc 3.2/ld 2.13.90 works -# (ld supports -shared) -# * mingw gcc 3.2/ld 2.13 works -# (ld supports -shared) - -import os -import sys -import copy -from subprocess import Popen, PIPE, check_output -import re - -from distutils.ccompiler import gen_preprocess_options, gen_lib_options -from distutils.unixccompiler import UnixCCompiler -from distutils.file_util import write_file -from distutils.errors import (DistutilsExecError, CCompilerError, - CompileError, UnknownFileError) -from distutils import log -from distutils.version import LooseVersion -from distutils.spawn import find_executable - -def get_msvcr(): - """Include the appropriate MSVC runtime library if Python was built - with MSVC 7.0 or later. - """ - msc_pos = sys.version.find('MSC v.') - if msc_pos != -1: - msc_ver = sys.version[msc_pos+6:msc_pos+10] - if msc_ver == '1300': - # MSVC 7.0 - return ['msvcr70'] - elif msc_ver == '1310': - # MSVC 7.1 - return ['msvcr71'] - elif msc_ver == '1400': - # VS2005 / MSVC 8.0 - return ['msvcr80'] - elif msc_ver == '1500': - # VS2008 / MSVC 9.0 - return ['msvcr90'] - elif msc_ver == '1600': - # VS2010 / MSVC 10.0 - return ['msvcr100'] - elif msc_ver == '1900': - return ['vcruntime140'] - else: - return ['vcruntime140'] - #raise ValueError("Unknown MS Compiler version %s " % msc_ver) - - -class CygwinCCompiler(UnixCCompiler): - """ Handles the Cygwin port of the GNU C compiler to Windows. - """ - compiler_type = 'cygwin' - obj_extension = ".o" - static_lib_extension = ".a" - shared_lib_extension = ".dll" - static_lib_format = "lib%s%s" - shared_lib_format = "%s%s" - exe_extension = ".exe" - - def __init__(self, verbose=0, dry_run=0, force=0): - - UnixCCompiler.__init__(self, verbose, dry_run, force) - - status, details = check_config_h() - self.debug_print("Python's GCC status: %s (details: %s)" % - (status, details)) - if status is not CONFIG_H_OK: - self.warn( - "Python's pyconfig.h doesn't seem to support your compiler. " - "Reason: %s. " - "Compiling may fail because of undefined preprocessor macros." - % details) - - self.gcc_version, self.ld_version, self.dllwrap_version = \ - get_versions() - self.debug_print(self.compiler_type + ": gcc %s, ld %s, dllwrap %s\n" % - (self.gcc_version, - self.ld_version, - self.dllwrap_version) ) - - # ld_version >= "2.10.90" and < "2.13" should also be able to use - # gcc -mdll instead of dllwrap - # Older dllwraps had own version numbers, newer ones use the - # same as the rest of binutils ( also ld ) - # dllwrap 2.10.90 is buggy - if self.ld_version >= "2.10.90": - self.linker_dll = "gcc" - else: - self.linker_dll = "dllwrap" - - # ld_version >= "2.13" support -shared so use it instead of - # -mdll -static - if self.ld_version >= "2.13": - shared_option = "-shared" - else: - shared_option = "-mdll -static" - - # Hard-code GCC because that's what this is all about. - # XXX optimization, warnings etc. should be customizable. - self.set_executables(compiler='gcc -mcygwin -O -Wall', - compiler_so='gcc -mcygwin -mdll -O -Wall', - compiler_cxx='g++ -mcygwin -O -Wall', - linker_exe='gcc -mcygwin', - linker_so=('%s -mcygwin %s' % - (self.linker_dll, shared_option))) - - # cygwin and mingw32 need different sets of libraries - if self.gcc_version == "2.91.57": - # cygwin shouldn't need msvcrt, but without the dlls will crash - # (gcc version 2.91.57) -- perhaps something about initialization - self.dll_libraries=["msvcrt"] - self.warn( - "Consider upgrading to a newer version of gcc") - else: - # Include the appropriate MSVC runtime library if Python was built - # with MSVC 7.0 or later. - self.dll_libraries = get_msvcr() - - def _compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts): - """Compiles the source by spawning GCC and windres if needed.""" - if ext == '.rc' or ext == '.res': - # gcc needs '.res' and '.rc' compiled to object files !!! - try: - self.spawn(["windres", "-i", src, "-o", obj]) - except DistutilsExecError as msg: - raise CompileError(msg) - else: # for other files use the C-compiler - try: - self.spawn(self.compiler_so + cc_args + [src, '-o', obj] + - extra_postargs) - except DistutilsExecError as msg: - raise CompileError(msg) - - def link(self, target_desc, objects, output_filename, output_dir=None, - libraries=None, library_dirs=None, runtime_library_dirs=None, - export_symbols=None, debug=0, extra_preargs=None, - extra_postargs=None, build_temp=None, target_lang=None): - """Link the objects.""" - # use separate copies, so we can modify the lists - extra_preargs = copy.copy(extra_preargs or []) - libraries = copy.copy(libraries or []) - objects = copy.copy(objects or []) - - # Additional libraries - libraries.extend(self.dll_libraries) - - # handle export symbols by creating a def-file - # with executables this only works with gcc/ld as linker - if ((export_symbols is not None) and - (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")): - # (The linker doesn't do anything if output is up-to-date. - # So it would probably better to check if we really need this, - # but for this we had to insert some unchanged parts of - # UnixCCompiler, and this is not what we want.) - - # we want to put some files in the same directory as the - # object files are, build_temp doesn't help much - # where are the object files - temp_dir = os.path.dirname(objects[0]) - # name of dll to give the helper files the same base name - (dll_name, dll_extension) = os.path.splitext( - os.path.basename(output_filename)) - - # generate the filenames for these files - def_file = os.path.join(temp_dir, dll_name + ".def") - lib_file = os.path.join(temp_dir, 'lib' + dll_name + ".a") - - # Generate .def file - contents = [ - "LIBRARY %s" % os.path.basename(output_filename), - "EXPORTS"] - for sym in export_symbols: - contents.append(sym) - self.execute(write_file, (def_file, contents), - "writing %s" % def_file) - - # next add options for def-file and to creating import libraries - - # dllwrap uses different options than gcc/ld - if self.linker_dll == "dllwrap": - extra_preargs.extend(["--output-lib", lib_file]) - # for dllwrap we have to use a special option - extra_preargs.extend(["--def", def_file]) - # we use gcc/ld here and can be sure ld is >= 2.9.10 - else: - # doesn't work: bfd_close build\...\libfoo.a: Invalid operation - #extra_preargs.extend(["-Wl,--out-implib,%s" % lib_file]) - # for gcc/ld the def-file is specified as any object files - objects.append(def_file) - - #end: if ((export_symbols is not None) and - # (target_desc != self.EXECUTABLE or self.linker_dll == "gcc")): - - # who wants symbols and a many times larger output file - # should explicitly switch the debug mode on - # otherwise we let dllwrap/ld strip the output file - # (On my machine: 10KB < stripped_file < ??100KB - # unstripped_file = stripped_file + XXX KB - # ( XXX=254 for a typical python extension)) - if not debug: - extra_preargs.append("-s") - - UnixCCompiler.link(self, target_desc, objects, output_filename, - output_dir, libraries, library_dirs, - runtime_library_dirs, - None, # export_symbols, we do this in our def-file - debug, extra_preargs, extra_postargs, build_temp, - target_lang) - - # -- Miscellaneous methods ----------------------------------------- - - def object_filenames(self, source_filenames, strip_dir=0, output_dir=''): - """Adds supports for rc and res files.""" - if output_dir is None: - output_dir = '' - obj_names = [] - for src_name in source_filenames: - # use normcase to make sure '.rc' is really '.rc' and not '.RC' - base, ext = os.path.splitext(os.path.normcase(src_name)) - if ext not in (self.src_extensions + ['.rc','.res']): - raise UnknownFileError("unknown file type '%s' (from '%s')" % \ - (ext, src_name)) - if strip_dir: - base = os.path.basename (base) - if ext in ('.res', '.rc'): - # these need to be compiled to object files - obj_names.append (os.path.join(output_dir, - base + ext + self.obj_extension)) - else: - obj_names.append (os.path.join(output_dir, - base + self.obj_extension)) - return obj_names - -# the same as cygwin plus some additional parameters -class Mingw32CCompiler(CygwinCCompiler): - """ Handles the Mingw32 port of the GNU C compiler to Windows. - """ - compiler_type = 'mingw32' - - def __init__(self, verbose=0, dry_run=0, force=0): - - CygwinCCompiler.__init__ (self, verbose, dry_run, force) - - # ld_version >= "2.13" support -shared so use it instead of - # -mdll -static - if self.ld_version >= "2.13": - shared_option = "-shared" - else: - shared_option = "-mdll -static" - - # A real mingw32 doesn't need to specify a different entry point, - # but cygwin 2.91.57 in no-cygwin-mode needs it. - if self.gcc_version <= "2.91.57": - entry_point = '--entry _DllMain@12' - else: - entry_point = '' - - if is_cygwingcc(): - raise CCompilerError( - 'Cygwin gcc cannot be used with --compiler=mingw32') - - self.set_executables(compiler='gcc -O -Wall', - compiler_so='gcc -mdll -O -Wall', - compiler_cxx='g++ -O -Wall', - linker_exe='gcc', - linker_so='%s %s %s' - % (self.linker_dll, shared_option, - entry_point)) - # Maybe we should also append -mthreads, but then the finished - # dlls need another dll (mingwm10.dll see Mingw32 docs) - # (-mthreads: Support thread-safe exception handling on `Mingw32') - - # no additional libraries needed - self.dll_libraries=[] - - # Include the appropriate MSVC runtime library if Python was built - # with MSVC 7.0 or later. - self.dll_libraries = get_msvcr() - -# Because these compilers aren't configured in Python's pyconfig.h file by -# default, we should at least warn the user if he is using an unmodified -# version. - -CONFIG_H_OK = "ok" -CONFIG_H_NOTOK = "not ok" -CONFIG_H_UNCERTAIN = "uncertain" - -def check_config_h(): - """Check if the current Python installation appears amenable to building - extensions with GCC. - - Returns a tuple (status, details), where 'status' is one of the following - constants: - - - CONFIG_H_OK: all is well, go ahead and compile - - CONFIG_H_NOTOK: doesn't look good - - CONFIG_H_UNCERTAIN: not sure -- unable to read pyconfig.h - - 'details' is a human-readable string explaining the situation. - - Note there are two ways to conclude "OK": either 'sys.version' contains - the string "GCC" (implying that this Python was built with GCC), or the - installed "pyconfig.h" contains the string "__GNUC__". - """ - - # XXX since this function also checks sys.version, it's not strictly a - # "pyconfig.h" check -- should probably be renamed... - - from distutils import sysconfig - - # if sys.version contains GCC then python was compiled with GCC, and the - # pyconfig.h file should be OK - if "GCC" in sys.version: - return CONFIG_H_OK, "sys.version mentions 'GCC'" - - # let's see if __GNUC__ is mentioned in python.h - fn = sysconfig.get_config_h_filename() - try: - config_h = open(fn) - try: - if "__GNUC__" in config_h.read(): - return CONFIG_H_OK, "'%s' mentions '__GNUC__'" % fn - else: - return CONFIG_H_NOTOK, "'%s' does not mention '__GNUC__'" % fn - finally: - config_h.close() - except OSError as exc: - return (CONFIG_H_UNCERTAIN, - "couldn't read '%s': %s" % (fn, exc.strerror)) - -RE_VERSION = re.compile(br'(\d+\.\d+(\.\d+)*)') - -def _find_exe_version(cmd): - """Find the version of an executable by running `cmd` in the shell. - - If the command is not found, or the output does not match - `RE_VERSION`, returns None. - """ - executable = cmd.split()[0] - if find_executable(executable) is None: - return None - out = Popen(cmd, shell=True, stdout=PIPE).stdout - try: - out_string = out.read() - finally: - out.close() - result = RE_VERSION.search(out_string) - if result is None: - return None - # LooseVersion works with strings - # so we need to decode our bytes - return LooseVersion(result.group(1).decode()) - -def get_versions(): - """ Try to find out the versions of gcc, ld and dllwrap. - - If not possible it returns None for it. - """ - commands = ['gcc -dumpversion', 'ld -v', 'dllwrap --version'] - return tuple([_find_exe_version(cmd) for cmd in commands]) - -def is_cygwingcc(): - '''Try to determine if the gcc that would be used is from cygwin.''' - out_string = check_output(['gcc', '-dumpmachine']) - return out_string.strip().endswith(b'cygwin') diff --git a/.github/workflows/libvcruntime140.a b/.github/workflows/libvcruntime140.a deleted file mode 100755 index 3075d72b0cc2cfca98fef161006cc20f2c051cef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 56262 zcmeHQ|BoC;m9LDG0KO2CVDbe583#j5Fq>KL?(8~*V0+i!a&f#_j~xSdY0u8|?zTPC zJ@oY0yMep2I27eh5tozDU35ALgakr5386a)34{bXAx=U9A&^e!g!qY{_zU;}!M#`2 z)m7b9-BZ;p8(QhszV7W;HQilRpZfHxdR5g2pX!Fat*7@t(M)f%3ybZg)_l9Y&}_D7 zki2g;=N1>3qIq>cA>=he?*0uS_r6NVfg4O7c$g5Bhkn51;cJATJaUN1!5p#0N*Cjb0H zLQuX4*F^bOxGu`S!SkT}`%jpB>0byz`3l5A`D%yB*Ov%E`HvYU-+;10`R4DK{1^Nl z}$DF1yslOOyaAt?XzT_!*J0wE|rzRcvOFA{?Cb2vx&-^ZE! z0`B*V-#N~*DxN>{~2kFZE436T;@yXbY9O+{0 z`7+9(I|NO9=lBE;!{7#F08J1-gJW;BBb0c8y5UID!3pY;gvTd?z~8)J1~s2WW|x7QsEoUN|kAGoRLY8cnVkC*(KID-JL0$Fp@^LeIcbl%u; zw|nj=@&ez9c1CWn!7rl?+SFyWlhxumf2wR6a>4+ak@IZO?LX_Shuv`J)bXc$mx>(n5BigVdzR*U=Kh+{^si2P1NS+o zPkFzS!zX^PJK5ZdMCOl^$JK|a+JYZI(8OQChmo>D*2Moym!PrXJaPx_(2c?!XV~?8 zCv>Ap2*?P%%}s&Bsj)Iw@%$UXb;N(+Rg_rcs82BoJhc%xjH_r>bsTL_TDjF7ZzT!T z2BnpK7w9C|0h%0hp2*%O6D9roUN9VWL(V4IpNdfWD>obh`9_)8k``l$ni=0Xl^k7Y z`d7Um;W73&;wN!X=#B!Ycbnslt~UVENp7GC7==O49gjVK(}}ji;ARq67vMu#`(v+% z3q}L~=>>k|Zi_-IilKxhx6*}lfDzAkqn-egtXbF~wo;9t*vlVYV!Us*q zk+ncztMKyK2*TkhPWk7cVkH+Xh;TXtF1bYCBS2%3#00vK6|6~W^B z;mh0Gm;BJ(gevHUc|V^L34+(AUm-sV{Qmigi*KO#y?7DcF3Gd!g@T@D1$%ddbMg$TR^NRo-Pm4>6NR&J>b12$U;s1myzYO4Vmgz_K$2)6Ls}3LTpyjvu%*gTxaR z#!;_O@IweXIq?SlWkfUs&sldcl3jE zljy?6d3P8{$XEs&ZN1yO&TnEC_uM!sCziHOaMGo9BG^fcV`o~d(_qt`@cWnif#+YR z2;)zo3xP82U?C@W=8-i4%b^QzCVEMkAF}Fx_tWdqOpr8mlNRoLx`jL2YPH+#X0tsz z7dMHo?(e^RC&BITZG^mo{&xqNp&U+6a(7L#dUdAXjk>upSio_z`u&;p@pwu!atq^m zc)>&SJnmcJpWETNxEJO^dW-Xu=F=Sz zR1k;e5h7vVZTF@x5})BcaKkta@&xQH+O_);p<6ZxT>_ZWO(3)lxp@yh0=fN-Mkw^F zuniG~2z@XvX)G-rLgVPEgA^}8d@4eGg7I{E)ZGQLkM6PV?xXqhcKV+QUbmEwmLV4d zcS_uXchGIO1ji=aiTOAWH+xaKFA?!lT)YHvw)F85q+>dcL!3|_ug#we^Wkh9hwC84 z-~Yo0@a(nMgiic-hWC$w3dbXur|`=sY31QW{;%buAKtcgq`b8~|{3HnqQpfR91O54XwQF^RKDdaW- zVq$^(Q7Ao)rB1t0D}>Vcd5ckcALJC@8sww!Z-P?RWo+8bP3;C`)Ao`nPHQ&BoZn5H zoW@~j;$zxSIMG2R;F9Q>zv*<^(& za0S1%4Us!r^D7nu1Mu$H0m}sXXyFg9TMd2{&s&aPT!aDizJh+GVgw@ud2oRPE^At zda4dH1>HBu^Ki_hSUWb%7{r2Wj~b%5wp?jhFfi|q<@#|Pc?ZRc7p{ioDxS9-*T%m$ zhH1jJv!*CjxwiHO97?#hj5pwEL@6CzS|OCi&s&VrJj<1**BGXPO>3`}jwKbF;7{ST z`WPL@4C4f=#HKhTJIOKDd&R8-VGHD0u7FcIzT33BinHm0vDd0@Ut!_w<~#o>ic%O@ z4_js4`8Z>MZkQ8gP>K@?tWjF~tdcyJ+F^Ubr5~jzt&-0QxsW|?@jJhWOK}d2HA+o= zR`R|Qknpma_rXH&Htm0dJ1qy9iZ`D;+u(W*l zvbXA*bYF=jmGM-OKAys+f2)s|w4m+>4BPfr9V43Qad4e#y#KM*a+?%0hrRzFKs9W_ zN^LO(-8aZ5;HXJ8>$TDam=do*w5d0%t&SF`5iR-z9)yyzMf4IYTV-Zsl@a~!7{3D1 zNu`I725~r|l~hZhR>SOZZR-7+Q<=K%>k_!evE8<~UL?tE-%7JHjsU_TM}NVWz! z1OJPZ#`xvfH8+9<7aRM+)7Jv9=Uh`S7%qB3H=I|)@LSswf^*B*5(~u9EznSD{(UG$t-x{8 z(14Hz`D?_nF4Zv=YlG}jZR#~!P%Vjign~eInGp)}B)gUfYdQV-&!s`=s_nf4sD@1- z`f;Y9`v$oP$83sYFmI-ZYLw`^F!id{Ej?Nq%ve&#D3oDmnEsFD$8AGCX};3zY}C@z zrdmR@k|XxGo;UTw)wteuT;oy|bk(EGN=;R^o{ey=O?8ZGeVDb~F>mO(TWqO5k=G4c zgYwA3y=)EUjAT^$n$dQ`EyKf!3CQQ}enB5E>2|`lk8NvkmgBl;nJ~C+m2v(UW-e^P z7FlHqx^EC1QW4kgqFhLUzA&PF4KVczE~@RxR}E@FUqa4a)&O6+Z;`iUTOUuU0iV;y zOKJcznr#he@v?_ipwg@0RU7rN04Lo9>Yt1?cxaFw>|@j`DNjLN4YkL&sW)*^W%wH3 zqGgF?YK!kDlu{P!F;&&7TAedc(&{>)<4RjCX4bhb)g0T}Fne5^`WNfIZXNy;+oTMxKcdBTwe1q4zHUuz zhOEHVw{;c0ga>lo)B`!ES|qgxQdlHq){ZzrQO#Frt~Q#{1B;umLLOxbx^IxbgyWPo zsqGor)caVE_@Jebu}Bg|E;Hie!=f}RZCx0R_~2`)OVk^$mTr$(Q{Un-m2bZY1vqWQ zhrAZfUiNV0tgWN;@!>lIw+t2)>-Zn_@e<4;yV$k>ABXbGtV^F+-M-o!e2C_(%)!sZ z8ay-zE`~cT2e@%joKGcuHm1JF8uzr!GWFw+!u^NTQqO#8;Z2D9Ob+DWJ{!xL@8^tt zm1>XnD*%FNe1BEq$-U(Jy`Dc%rqy^DaG(A{2U~c!%DsJNtwv3%Ily&6_NX@WRML-~ z)#LuM)=t9aWl;T?f@=Dmn@U^ZWl%l!>pu3VHuXL(skZrR23)=M261)HssV~^jzDQM5>z+g8n(9i2NhJuUp!)8W2!W|4`QamCi+HJn1b#bWC+LXn<|xhJ$I+0 z*rVOp7pYn%wJ%ci{FU*we?Td3{&lcwTjFNE_OuDYQ?3Vj5^9fcQ$J)qiktFKhV>IR zFEfh!5XX1&J-ce-`asOrg26STxLI2xNpT9#LTis>Q@>+9CaMs}WqA7iF^<`X&hQJV z@kUUD@QElp* ztk(@GMD_g;YTK4P2&h&*Ut4j;Hd;4CO*JQLB|GdrICG|+%No(Uj_4*JTKUNGn*z~F zsyU+70DF!$^-0#2MsSmQPt_W4Lx4?QC_I_Ab z+JJ#7vp3pixCx9OWeU1)kWax;gK^L?`c|-AVvfO4Q%_?($AFefo+qZv9E0~jNp^Nd z?)h%i+k&fC8OvsK3`A-)3raV0Wz(0eZbYUcAcKHTkxxlv)nm zB>5#Q2~)pe?f))BZW;ggLln6yJ22kV>n(T3RkxJQ{NHOGImH6PDXl#NoB9Xq?*;BU zjvu5rMgVt$%R%*7*yMYG$ih;J1&n2-J&H~Jf%9s+?iLt?)8+wOVMZezySbP10IIOk z?*oA1=bk;|thv!7^p95Bj=;AOZlXP>S?M>GS`J`22TE8Irry8WnxN&A*M=)&P23;j zcf}izuo;If_$u4NX4VAaRis!(v81)q%Bq&7rLcv1adCQ%_*Mejd_Jb|zPw zDq&Y<{k(SqrdQl?6i#|kLfseEuerUb(#&nPeqLV6Il6Ph?J;iZ0X(L9yXzG~1aChI zIJfn7-v=09-rm0Chwi2~2F}g`9j;krE9Lz$0f^S)*;s>z2KhAXr{n+!d|@_~uvScc zg!SB8T4H(a%O-@{qW)f1=4*MFuQK8f(%h2ma^2ip0CQgICA{ZGvFCVGuVOu?s}SvF z=5*b|(Ej|y4R;cHzv#W>!kwy&bh9~KfOJjDC6Mbv?eT5ud#vXQ6ym!~+w&a^-?X`P zw(Iu?Za9ZGsWP_B#-IYW)2WtVoe8i#y92b|!i=Y%T7$tB9^Nn(;k_n`l?gFa_N=$O~|!?Mj)3^T1pN%ckDR zdL|bwk-XMpnVDR7v64+pY2MjgWqZSH4mpr;Hq{b8D?#=QZR)G6ZH_`zm$9P%f}wiF zji7%U299in!A-nLl~HYMMW<6OVLB6Fjp&xCPx6>LH?j7<3!9?}NVK&%_9LR5@yP9Y z8(xopkks)5m-^u=jqFFUYQiS$kJC&+_YH!>3yM>pfV?1w>}~9pslT$eOK3^$rd{$j zZHZT#!42&a1;daPal26yWY6NJ9?C`4Ls_pJDOkLW7yhkKlqVnr#Gg^Bb z8+sNOXVqTo*MX*{&7&DHD%*{C47; z8zRRSTvl=w1j`6Jv zvBz}Qdzp)AwO9NV1DodR8)Vb$^*6%GV7ftYz;_4TI|-vRma8tNjar*(s$)oN0_+i; z^<3s5dO@9gR*x+aJ%nY(mfQx24&83#QhH`{E30gcyo> make.inc - echo "CC=gcc-13" >> make.inc - echo "CXX=g++-13" >> make.inc - echo "FFLAGS += -march=x86-64" >> make.inc - echo "CFLAGS += -march=x86-64" >> make.inc - echo "CXXFLAGS += -march=x86-64" >> make.inc - # link statically to libgcc, libgfortran and libquadmath - # otherwise binaries are incompatible with older systems - echo "LIBS += -static-libgfortran -static-libgcc -static-libstdc++" >> make.inc - # hack to make libquadmath link statically - sudo rm /usr/local/opt/gcc@13/lib/gcc/13/libquadmath.*dylib - - # Download and install Python instead of using the setup_python - # as the python interpreters in the Github machines - # were compiled in 10.14, the wheels built with them - # are incompatible with older MacOS versions - - if: steps.cache-python.outputs.cache-hit != 'true' - name: Download and install Python - run: | - curl \ - https://www.python.org/ftp/python/3.6.8/python-3.6.8-macosx10.9.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - curl \ - https://www.python.org/ftp/python/3.7.9/python-3.7.9-macosx10.9.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - curl \ - https://www.python.org/ftp/python/3.8.10/python-3.8.10-macosx10.9.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - curl \ - https://www.python.org/ftp/python/3.9.13/python-3.9.13-macos11.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - curl \ - https://www.python.org/ftp/python/3.10.11/python-3.10.11-macos11.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - curl \ - https://www.python.org/ftp/python/3.11.7/python-3.11.7-macos11.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - curl \ - https://www.python.org/ftp/python/3.12.1/python-3.12.1-macos11.pkg \ - --output python_installer.pkg - sudo installer -pkg python_installer.pkg -target / - - - name: Compile python bindings - run: | - make lib - export FINUFFT_DIR=`pwd` - export CC=gcc-13 - export CXX=g++-13 - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -m pip wheel python/finufft -w wheelhouse - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pip wheel python/finufft -w wheelhouse - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip wheel python/finufft -w wheelhouse - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 -m pip wheel python/finufft -w wheelhouse - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 -m pip wheel python/finufft -w wheelhouse - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 -m pip wheel python/finufft -w wheelhouse - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m pip install --upgrade setuptools wheel numpy pip - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m pip install -U wheel --user - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m pip wheel python/finufft -w wheelhouse - - PYTHON_BIN=/Library/Frameworks/Python.framework/Versions/3.12/bin/ - $PYTHON_BIN/python3 -m pip install delocate==0.10.7 - ls wheelhouse/finufft*.whl | xargs -n1 $PYTHON_BIN/delocate-wheel -w fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.6/bin/python3 -m pytest python/finufft/test - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.7/bin/python3 -m pytest python/finufft/test - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pytest python/finufft/test - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.9/bin/python3 -m pytest python/finufft/test - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.10/bin/python3 -m pytest python/finufft/test - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.11/bin/python3 -m pytest python/finufft/test - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m pip install --pre finufft -f fixed_wheel/ - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 python/finufft/test/run_accuracy_tests.py - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 python/finufft/examples/simple1d1.py - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m pip install pytest - /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m pytest python/finufft/test - - - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: macos-wheels - path: fixed_wheel/*.whl - - Windows: - runs-on: windows-latest - - steps: - - uses: actions/checkout@v4 - - - name: Install GCC and make - run: C:\msys64\usr\bin\bash.exe -lc "pacman -Sy --noconfirm make mingw-w64-x86_64-toolchain mingw-w64-x86_64-fftw git" - - - name: Build and Test Python 3.8 - uses: actions/setup-python@v5 - with: - python-version: '3.8' - architecture: 'x64' - - run: | - .\.github\workflows\python_build_win.ps1 - .\.github\workflows\python_test_win.ps1 - - - name: Build and Test Python 3.9 - uses: actions/setup-python@v5 - with: - python-version: '3.9' - architecture: 'x64' - - run: | - .\.github\workflows\python_build_win.ps1 - .\.github\workflows\python_test_win.ps1 - - - name: Build and Test Python 3.10 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - architecture: 'x64' - - run: | - .\.github\workflows\python_build_win.ps1 - .\.github\workflows\python_test_win.ps1 - - - name: Build and Test Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: '3.11' - architecture: 'x64' - - run: | - .\.github\workflows\python_build_win.ps1 - .\.github\workflows\python_test_win.ps1 - - - name: Build and Test Python 3.12 - uses: actions/setup-python@v5 - with: - python-version: '3.12' - architecture: 'x64' - - run: | - .\.github\workflows\python_build_win.ps1 - .\.github\workflows\python_test_win.ps1 - - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: windows-wheels - path: wheelhouse\*.whl diff --git a/.github/workflows/python_wheel_macos_arm64.yml b/.github/workflows/python_wheel_macos_arm64.yml deleted file mode 100644 index 4bde22f3e..000000000 --- a/.github/workflows/python_wheel_macos_arm64.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Python Wheel Build MacOS Arm64 - -on: - push: - branches: - - master - tags: - - v* - pull_request: - branches: - - master - -jobs: - MacOS: - runs-on: macos-13 - env: - MACOSX_DEPLOYMENT_TARGET: 11.0 - - steps: - - uses: actions/checkout@v4 - - - name: Install libomp and fftw - run: | - pkg=$(brew fetch --force --bottle-tag=arm64_ventura fftw | grep 'Downloaded to' | cut -d' ' -f3) - brew install $pkg - pkg=$(brew fetch --force --bottle-tag=arm64_ventura libomp | grep 'Downloaded to' | cut -d' ' -f3) - brew install $pkg - - - name: Compile libfinufft - run: | - cp make.inc.macosx_arm64 make.inc - make lib - - - name: Build wheels - uses: pypa/cibuildwheel@v2.17.0 - env: - FINUFFT_DIR: ${{ github.workspace }} - CC: Clang - CXX: Clang++ - CIBW_ARCHS_MACOS: arm64 - CIBW_BUILD: cp38-* cp39-* cp310-* cp311-* cp312-* - with: - package-dir: ./python/finufft - output-dir: wheelhouse - - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: macos-arm64-wheels - path: wheelhouse/*.whl - - - name: Setup tmate session - if: ${{ failure() }} - uses: mxschmitt/action-tmate@v3 diff --git a/tools/finufft/build-wheels-linux.sh b/tools/finufft/build-wheels-linux.sh deleted file mode 100755 index 5ea8d9303..000000000 --- a/tools/finufft/build-wheels-linux.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -set -e -x - -# Replace native compilation flags with more generic ones. -cp make.inc.manylinux make.inc - -# Clean up the build and make the library. -make clean -make lib - -# Test to make sure everything is ok. -make test - -# Needed for pip install to work -export FINUFFT_DIR=$(pwd) -# Needed for auditwheel to find the dynamic libraries -export LD_LIBRARY_PATH=${FINUFFT_DIR}/lib:${LD_LIBRARY_PATH} - -# Explicitly list Python versions to build -versions=("cp36-cp36m" - "cp37-cp37m" - "cp38-cp38" - "cp39-cp39" - "cp310-cp310" - "cp311-cp311" - "cp312-cp312" - "pp39-pypy39_pp73") - -pys=() -for version in "${versions[@]}"; do - pys+=("/opt/python/${version}/bin") -done - -# build wheel -for pybin in "${pys[@]}"; do - "${pybin}/pip" install --upgrade pip - "${pybin}/pip" install auditwheel wheel numpy - "${pybin}/pip" wheel ./python/finufft -w python/finufft/wheelhouse -done - -# fix wheel -for whl in python/finufft/wheelhouse/finufft-*.whl; do - auditwheel repair "$whl" -w python/finufft/wheelhouse/ -done - -# test wheel -for pybin in "${pys[@]}"; do - "${pybin}/pip" install --pre finufft -f ./python/finufft/wheelhouse/ - "${pybin}/python" ./python/finufft/test/run_accuracy_tests.py - "${pybin}/python" ./python/finufft/examples/simple1d1.py - "${pybin}/pip" install pytest - "${pybin}/pytest" python/finufft/test -done From 53fc7ba036ad05bd63f14cb5b488c4abe6642a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Mon, 15 Jul 2024 23:57:51 +0200 Subject: [PATCH 74/83] ci: fix archs for macOS --- python/finufft/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/finufft/pyproject.toml b/python/finufft/pyproject.toml index e98041647..64691ace7 100644 --- a/python/finufft/pyproject.toml +++ b/python/finufft/pyproject.toml @@ -64,7 +64,7 @@ archs = "x86_64" before-all = "yum install -y fftw3-devel" [tool.cibuildwheel.macos] -archs = "arm64" +archs = ["x86_64", "arm64"] [tool.cibuildwheel.windows] archs = "AMD64" From d4631d779a06149851040fc33c641661b7fb7fad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20And=C3=A9n?= Date: Tue, 16 Jul 2024 00:04:09 +0200 Subject: [PATCH 75/83] ci: specify archs explicitly for macOS --- .github/workflows/python_build_wheels.yml | 2 ++ python/finufft/pyproject.toml | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python_build_wheels.yml b/.github/workflows/python_build_wheels.yml index 87ed3ce9c..4d8a2f549 100644 --- a/.github/workflows/python_build_wheels.yml +++ b/.github/workflows/python_build_wheels.yml @@ -19,6 +19,7 @@ jobs: package-dir: 'python/finufft' env: CIBW_BEFORE_ALL_MACOS: brew install gcc@14 fftw + CIBW_ARCHS_MACOS: "x86_64" # Need following versions of GCC for compatibility with fftw # installed by homebrew. Similarly, we set the macOS version # for compatibility with those libraries. @@ -44,6 +45,7 @@ jobs: with: package-dir: 'python/finufft' env: + CIBW_ARCHS_MACOS: "arm64" # Make sure to install the ARM64-specific versions of FFTW and GCC. # Perhaps this is done automatically on the macos-14 image. We should # look into this further. diff --git a/python/finufft/pyproject.toml b/python/finufft/pyproject.toml index 64691ace7..f9c74fc75 100644 --- a/python/finufft/pyproject.toml +++ b/python/finufft/pyproject.toml @@ -63,9 +63,6 @@ test-command = "pytest {project}/python/finufft/test" archs = "x86_64" before-all = "yum install -y fftw3-devel" -[tool.cibuildwheel.macos] -archs = ["x86_64", "arm64"] - [tool.cibuildwheel.windows] archs = "AMD64" before-build = "pip install delvewheel" From d04ffcfde66de28f8cd3c75066b1b42ae6358d2a Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Tue, 16 Jul 2024 12:55:35 -0400 Subject: [PATCH 76/83] cleanup a bit, a bit slower --- src/spreadinterp.cpp | 90 ++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 38371d17a..b820af054 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -794,67 +794,51 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ if constexpr (use_ker_sym) { static constexpr uint8_t tail = w % simd_size; static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); - static const simd_type zerov(0.0); + static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; + static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; const simd_type zv(z); const simd_type z2v = zv * zv; - // no xsimd::shuffle neeeded if tail is zero - if constexpr (tail) { - // some xsimd constant for shuffle - static constexpr auto shuffle_batch = - xsimd::make_batch_constant, arch_t, - shuffle_index>(); - - // process simd vecs - simd_type k_odd, k_even, k_prev, k_sym = zerov; - for (uint8_t i = 0, offset = w - tail; i < (w + 1) / 2; - i += simd_size, offset -= simd_size) { - k_odd = if_odd_degree ? simd_type::load_aligned(padded_coeffs[0].data() + i) - : zerov; - k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - const auto cji_even = - simd_type::load_aligned(padded_coeffs[j + 1].data() + i); - k_even = xsimd::fma(k_even, z2v, cji_even); + // some xsimd constant for shuffle or inverse + static constexpr auto shuffle_batch = []() constexpr noexcept { + if constexpr (tail) { + return xsimd::make_batch_constant, arch_t, + shuffle_index>(); + } else { + return xsimd::make_batch_constant, arch_t, + reverse_index>(); + } + }(); + + // process simd vecs + simd_type k_odd, k_even, k_prev, k_sym{0}; + for (uint8_t i = 0, offset = offset_start; i < end_idx; + i += simd_size, offset -= simd_size) { + k_odd = [i]() constexpr noexcept { + if constexpr (if_odd_degree) { + return simd_type::load_aligned(padded_coeffs[0].data() + i); + } else { + return simd_type{0}; } - // left part - xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - // right part symmetric to the left part - if (offset >= (w + 1) / 2) { + }(); + k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); + for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + const auto cji_even = simd_type::load_aligned(padded_coeffs[j + 1].data() + i); + k_even = xsimd::fma(k_even, z2v, cji_even); + } + // left part + xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + // right part symmetric to the left part + if (offset >= end_idx) { + if constexpr (tail) { // to use aligned store, we need shuffle the previous k_sym and current k_sym k_prev = k_sym; k_sym = xsimd::fnma(k_odd, zv, k_even); xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); - } - } - } else { - // xsimd constants for reverse - static constexpr auto reverse_batch = - xsimd::make_batch_constant, arch_t, - reverse_index>(); - - // process simd vecs - for (uint8_t i = 0, offset = w - simd_size; i < w / 2; - i += simd_size, offset -= simd_size) { - auto k_odd = if_odd_degree - ? simd_type::load_aligned(padded_coeffs[0].data() + i) - : zerov; - auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - const auto cji_even = - simd_type::load_aligned(padded_coeffs[j + 1].data() + i); - k_even = xsimd::fma(k_even, z2v, cji_even); - } - // left part - xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - // right part symmetric to the left part - if (offset >= w / 2) { - // reverse the order for symmetric part - xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), reverse_batch) + } else { + xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), shuffle_batch) .store_aligned(ker + offset); } } From 6dcf55d544f0004bfbc7a5f0facb590b3184f0bf Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Tue, 16 Jul 2024 14:57:05 -0400 Subject: [PATCH 77/83] small fixes --- src/spreadinterp.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index b820af054..14a057062 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -796,8 +796,8 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; - const simd_type zv(z); - const simd_type z2v = zv * zv; + const simd_type zv{z}; + const simd_type z2v{zv * zv}; // some xsimd constant for shuffle or inverse static constexpr auto shuffle_batch = []() constexpr noexcept { @@ -811,21 +811,24 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ }(); // process simd vecs - simd_type k_odd, k_even, k_prev, k_sym{0}; - for (uint8_t i = 0, offset = offset_start; i < end_idx; + struct EmptySimd {}; + // these exist only if tail > 0 + typename std::conditional<(tail > 0), simd_type, EmptySimd>::type k_prev, k_sym; + if constexpr (tail) k_sym = {0}; + for (uint8_t i{0}, offset = offset_start; i < end_idx; i += simd_size, offset -= simd_size) { - k_odd = [i]() constexpr noexcept { + auto k_odd = [i]() constexpr noexcept { if constexpr (if_odd_degree) { return simd_type::load_aligned(padded_coeffs[0].data() + i); } else { return simd_type{0}; } }(); - k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j = 1 + if_odd_degree; j < nc; j += 2) { + auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); + for (uint8_t j{1 + if_odd_degree}; j < nc; j += 2) { const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); const auto cji_even = simd_type::load_aligned(padded_coeffs[j + 1].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); k_even = xsimd::fma(k_even, z2v, cji_even); } // left part @@ -845,7 +848,6 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ } } else { const simd_type zv(z); - for (uint8_t i = 0; i < w; i += simd_size) { auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); for (uint8_t j = 1; j < nc; ++j) { @@ -855,7 +857,6 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ k.store_aligned(ker + i); } } - return; } // insert the auto-generated code which expects z, w args, writes to ker... From 2f1f13fecc74bf6e0962b2d9121b96628eaec093 Mon Sep 17 00:00:00 2001 From: Marco Barbone Date: Tue, 16 Jul 2024 17:02:23 -0400 Subject: [PATCH 78/83] fixed compile flags that was breaking clang code --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a3a61fba1..0bf33a035 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,6 @@ if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS) -fno-trapping-math -fassociative-math -freciprocal-math - -fmerge-all-constants -ftree-vectorize ) # if -fimplicit-constexpr is supported, add it to the list of flags From 9d9b64c1bcc51bd1703975938606e7e06360b2cb Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Wed, 17 Jul 2024 12:16:17 -0400 Subject: [PATCH 79/83] remove conditional declaration --- CMakeLists.txt | 1 + src/spreadinterp.cpp | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bf33a035..a3a61fba1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS) -fno-trapping-math -fassociative-math -freciprocal-math + -fmerge-all-constants -ftree-vectorize ) # if -fimplicit-constexpr is supported, add it to the list of flags diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 14a057062..169e09dbf 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -811,9 +811,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ }(); // process simd vecs - struct EmptySimd {}; - // these exist only if tail > 0 - typename std::conditional<(tail > 0), simd_type, EmptySimd>::type k_prev, k_sym; + simd_type k_prev, k_sym; if constexpr (tail) k_sym = {0}; for (uint8_t i{0}, offset = offset_start; i < end_idx; i += simd_size, offset -= simd_size) { From 5a6827273fbd6a262b1a4147c704432653f6064b Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Wed, 17 Jul 2024 12:36:56 -0400 Subject: [PATCH 80/83] try to make -fmerge-all-constants work --- src/spreadinterp.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 169e09dbf..aff8112d4 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -796,8 +796,8 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; - const simd_type zv{z}; - const simd_type z2v{zv * zv}; + const simd_type zv(z); + const simd_type z2v = zv * zv; // some xsimd constant for shuffle or inverse static constexpr auto shuffle_batch = []() constexpr noexcept { @@ -811,8 +811,7 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ }(); // process simd vecs - simd_type k_prev, k_sym; - if constexpr (tail) k_sym = {0}; + simd_type k_prev, k_sym{0}; for (uint8_t i{0}, offset = offset_start; i < end_idx; i += simd_size, offset -= simd_size) { auto k_odd = [i]() constexpr noexcept { From 8747df58a3f90494d39abec3392b6f071cc0a9e7 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Wed, 17 Jul 2024 14:16:15 -0400 Subject: [PATCH 81/83] use auto for z2v --- src/spreadinterp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index aff8112d4..3f03c79bb 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -796,8 +796,8 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; - const simd_type zv(z); - const simd_type z2v = zv * zv; + const simd_type zv{z}; + const auto z2v = zv * zv; // some xsimd constant for shuffle or inverse static constexpr auto shuffle_batch = []() constexpr noexcept { From 4ea00960faa40e8dfdb22d691a7c449b7f3d7a8b Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Wed, 17 Jul 2024 14:28:05 -0400 Subject: [PATCH 82/83] Horner's rule for polynomial evaluation with symmetry idea diccussed in discussions #461 (#477) * test kernel sym with aligned store * add Horner sym eval without explicit aligned store(Martin does this in ducc) * revert passing simd_type to ker_eval in interp, this is done in interp PR #471 * clean up * removed unused declare * add some comments * change to use fnma in sym part * cleanup a bit, a bit slower * small fixes * fixed compile flags that was breaking clang code * remove conditional declaration * try to make -fmerge-all-constants work * use auto for z2v --------- Co-authored-by: Marco Barbone --- src/spreadinterp.cpp | 85 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 4374c6e9d..3f03c79bb 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -24,6 +24,8 @@ namespace { // anonymous namespace for internal structs equivalent to declaring // static struct zip_low; struct zip_hi; +template struct reverse_index; +template struct shuffle_index; struct select_even; struct select_odd; // forward declaration to clean up the code and be able to use this everywhere in the file @@ -777,23 +779,80 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in // [-1,1] if (opts.upsampfac == 2.0) { // floating point equality is fine here - static constexpr auto alignment = simd_type::arch_type::alignment(); + using arch_t = typename simd_type::arch_type; + static constexpr auto alignment = arch_t::alignment(); static constexpr auto simd_size = simd_type::size; static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1); static constexpr auto nc = nc200(); static constexpr auto horner_coeffs = get_horner_coeffs_200(); + static constexpr auto use_ker_sym = (simd_size < w); alignas(alignment) static constexpr auto padded_coeffs = pad_2D_array_with_zeros(horner_coeffs); - const simd_type zv(z); - for (uint8_t i = 0; i < w; i += simd_size) { - auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); - for (uint8_t j = 1; j < nc; ++j) { - const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); - k = xsimd::fma(k, zv, cji); + // use kernel symmetry trick if w > simd_size + if constexpr (use_ker_sym) { + static constexpr uint8_t tail = w % simd_size; + static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); + static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; + static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; + const simd_type zv{z}; + const auto z2v = zv * zv; + + // some xsimd constant for shuffle or inverse + static constexpr auto shuffle_batch = []() constexpr noexcept { + if constexpr (tail) { + return xsimd::make_batch_constant, arch_t, + shuffle_index>(); + } else { + return xsimd::make_batch_constant, arch_t, + reverse_index>(); + } + }(); + + // process simd vecs + simd_type k_prev, k_sym{0}; + for (uint8_t i{0}, offset = offset_start; i < end_idx; + i += simd_size, offset -= simd_size) { + auto k_odd = [i]() constexpr noexcept { + if constexpr (if_odd_degree) { + return simd_type::load_aligned(padded_coeffs[0].data() + i); + } else { + return simd_type{0}; + } + }(); + auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); + for (uint8_t j{1 + if_odd_degree}; j < nc; j += 2) { + const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); + const auto cji_even = simd_type::load_aligned(padded_coeffs[j + 1].data() + i); + k_odd = xsimd::fma(k_odd, z2v, cji_odd); + k_even = xsimd::fma(k_even, z2v, cji_even); + } + // left part + xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); + // right part symmetric to the left part + if (offset >= end_idx) { + if constexpr (tail) { + // to use aligned store, we need shuffle the previous k_sym and current k_sym + k_prev = k_sym; + k_sym = xsimd::fnma(k_odd, zv, k_even); + xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); + } else { + xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), shuffle_batch) + .store_aligned(ker + offset); + } + } + } + } else { + const simd_type zv(z); + for (uint8_t i = 0; i < w; i += simd_size) { + auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); + for (uint8_t j = 1; j < nc; ++j) { + const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); + k = xsimd::fma(k, zv, cji); + } + k.store_aligned(ker + i); } - k.store_aligned(ker + i); } return; } @@ -2168,6 +2227,16 @@ struct zip_hi { return (size + index) / 2; } }; +template struct reverse_index { + static constexpr unsigned get(unsigned index, const unsigned size) { + return index < cap ? (cap - 1 - index) : index; + } +}; +template struct shuffle_index { + static constexpr unsigned get(unsigned index, const unsigned size) { + return index < cap ? (cap - 1 - index) : size + size + cap - 1 - index; + } +}; struct select_even { static constexpr unsigned get(unsigned index, unsigned /*size*/) { return index * 2; } From e30a3fa4dbca29ad09d7aedd2b441cfe0d5c1c79 Mon Sep 17 00:00:00 2001 From: Libin Lu Date: Thu, 18 Jul 2024 08:19:09 -0400 Subject: [PATCH 83/83] =?UTF-8?q?Revert=20"Horner's=20rule=20for=20polynom?= =?UTF-8?q?ial=20evaluation=20with=20symmetry=20idea=20diccussed=20?= =?UTF-8?q?=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 4ea00960faa40e8dfdb22d691a7c449b7f3d7a8b. --- src/spreadinterp.cpp | 85 +++++--------------------------------------- 1 file changed, 8 insertions(+), 77 deletions(-) diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp index 3f03c79bb..4374c6e9d 100644 --- a/src/spreadinterp.cpp +++ b/src/spreadinterp.cpp @@ -24,8 +24,6 @@ namespace { // anonymous namespace for internal structs equivalent to declaring // static struct zip_low; struct zip_hi; -template struct reverse_index; -template struct shuffle_index; struct select_even; struct select_odd; // forward declaration to clean up the code and be able to use this everywhere in the file @@ -779,80 +777,23 @@ Two upsampfacs implemented. Params must match ref formula. Barnett 4/24/18 */ const FLT z = std::fma(FLT(2.0), x, FLT(w - 1)); // scale so local grid offset z in // [-1,1] if (opts.upsampfac == 2.0) { // floating point equality is fine here - using arch_t = typename simd_type::arch_type; - static constexpr auto alignment = arch_t::alignment(); + static constexpr auto alignment = simd_type::arch_type::alignment(); static constexpr auto simd_size = simd_type::size; static constexpr auto padded_ns = (w + simd_size - 1) & ~(simd_size - 1); static constexpr auto nc = nc200(); static constexpr auto horner_coeffs = get_horner_coeffs_200(); - static constexpr auto use_ker_sym = (simd_size < w); alignas(alignment) static constexpr auto padded_coeffs = pad_2D_array_with_zeros(horner_coeffs); - // use kernel symmetry trick if w > simd_size - if constexpr (use_ker_sym) { - static constexpr uint8_t tail = w % simd_size; - static constexpr uint8_t if_odd_degree = ((nc + 1) % 2); - static constexpr uint8_t offset_start = tail ? w - tail : w - simd_size; - static constexpr uint8_t end_idx = (w + (tail > 0)) / 2; - const simd_type zv{z}; - const auto z2v = zv * zv; - - // some xsimd constant for shuffle or inverse - static constexpr auto shuffle_batch = []() constexpr noexcept { - if constexpr (tail) { - return xsimd::make_batch_constant, arch_t, - shuffle_index>(); - } else { - return xsimd::make_batch_constant, arch_t, - reverse_index>(); - } - }(); - - // process simd vecs - simd_type k_prev, k_sym{0}; - for (uint8_t i{0}, offset = offset_start; i < end_idx; - i += simd_size, offset -= simd_size) { - auto k_odd = [i]() constexpr noexcept { - if constexpr (if_odd_degree) { - return simd_type::load_aligned(padded_coeffs[0].data() + i); - } else { - return simd_type{0}; - } - }(); - auto k_even = simd_type::load_aligned(padded_coeffs[if_odd_degree].data() + i); - for (uint8_t j{1 + if_odd_degree}; j < nc; j += 2) { - const auto cji_odd = simd_type::load_aligned(padded_coeffs[j].data() + i); - const auto cji_even = simd_type::load_aligned(padded_coeffs[j + 1].data() + i); - k_odd = xsimd::fma(k_odd, z2v, cji_odd); - k_even = xsimd::fma(k_even, z2v, cji_even); - } - // left part - xsimd::fma(k_odd, zv, k_even).store_aligned(ker + i); - // right part symmetric to the left part - if (offset >= end_idx) { - if constexpr (tail) { - // to use aligned store, we need shuffle the previous k_sym and current k_sym - k_prev = k_sym; - k_sym = xsimd::fnma(k_odd, zv, k_even); - xsimd::shuffle(k_sym, k_prev, shuffle_batch).store_aligned(ker + offset); - } else { - xsimd::swizzle(xsimd::fnma(k_odd, zv, k_even), shuffle_batch) - .store_aligned(ker + offset); - } - } - } - } else { - const simd_type zv(z); - for (uint8_t i = 0; i < w; i += simd_size) { - auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); - for (uint8_t j = 1; j < nc; ++j) { - const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); - k = xsimd::fma(k, zv, cji); - } - k.store_aligned(ker + i); + const simd_type zv(z); + for (uint8_t i = 0; i < w; i += simd_size) { + auto k = simd_type::load_aligned(padded_coeffs[0].data() + i); + for (uint8_t j = 1; j < nc; ++j) { + const auto cji = simd_type::load_aligned(padded_coeffs[j].data() + i); + k = xsimd::fma(k, zv, cji); } + k.store_aligned(ker + i); } return; } @@ -2227,16 +2168,6 @@ struct zip_hi { return (size + index) / 2; } }; -template struct reverse_index { - static constexpr unsigned get(unsigned index, const unsigned size) { - return index < cap ? (cap - 1 - index) : index; - } -}; -template struct shuffle_index { - static constexpr unsigned get(unsigned index, const unsigned size) { - return index < cap ? (cap - 1 - index) : size + size + cap - 1 - index; - } -}; struct select_even { static constexpr unsigned get(unsigned index, unsigned /*size*/) { return index * 2; }