Skip to content

Commit 456ca04

Browse files
committed
Introduce some function barriers in fft! for CompositeFFT
This avoid repeated dynamical dispatch in the loops.
1 parent d577a6b commit 456ca04

File tree

1 file changed

+35
-4
lines changed

1 file changed

+35
-4
lines changed

Diff for: src/algos.jl

+35-4
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,56 @@ function fft!(out::AbstractVector{T}, in::AbstractVector{U}, start_out::Int, sta
3333
N2 = right.sz
3434
s_in = root.s_in
3535
s_out = root.s_out
36+
tmp = g.workspace[idx]
37+
38+
_CompositeFFT_right_loop(tmp, in, N1, N2, start_in, s_in, d, g, right.type, right_idx)
39+
_CompositeFFT_left_loop(out, tmp, N2, start_out, s_out, d, g, left.type, left_idx)
40+
end
3641

42+
function _CompositeFFT_right_loop(
43+
tmp::AbstractVector{T},
44+
in::AbstractVector{T},
45+
N1::Int,
46+
N2::Int,
47+
start_in::Int,
48+
s_in::Int,
49+
d::Direction,
50+
g::CallGraph,
51+
type::AbstractFFTType,
52+
idx::Int
53+
) where T
54+
55+
N = N1 * N2
3756
w1 = convert(T, cispi(direction_sign(d)*2/N))
3857
wj1 = one(T)
39-
tmp = g.workspace[idx]
4058
@inbounds for j1 in 0:N1-1
4159
wk2 = wj1
42-
g(tmp, in, N2*j1+1, start_in + j1*s_in, d, right.type, right_idx)
60+
g(tmp, in, N2*j1+1, start_in + j1*s_in, d, type, idx)
4361
j1 > 0 && @inbounds for k2 in 1:N2-1
4462
tmp[N2*j1 + k2 + 1] *= wk2
4563
wk2 *= wj1
4664
end
4765
wj1 *= w1
4866
end
67+
end
4968

50-
@inbounds for k2 in 0:N2-1
51-
g(out, tmp, start_out + k2*s_out, k2+1, d, left.type, left_idx)
69+
function _CompositeFFT_left_loop(
70+
out::AbstractVector{T},
71+
tmp::AbstractVector{T},
72+
N::Int,
73+
start_out::Int,
74+
s_out::Int,
75+
d::Direction,
76+
g::CallGraph,
77+
type::AbstractFFTType,
78+
idx::Int
79+
) where T
80+
@inbounds for k in 0:N-1
81+
g(out, tmp, start_out + k*s_out, k+1, d, type, idx)
5282
end
5383
end
5484

85+
5586
"""
5687
$(TYPEDSIGNATURES)
5788
Discrete Fourier Transform, O(N^2) algorithm, in place.

0 commit comments

Comments
 (0)