Skip to content

Commit

Permalink
Pecuzal has samplesize keyword (#104)
Browse files Browse the repository at this point in the history
* fixed tests w.r.t. deprecations in Peaks

* samplesize keyword in uzal_cost_pecuzal(); dragged NN-computation out of loop

* samplesize keyword in uzal_cost_pecuzal(); dragged NN-computation out of loop

* adjusted tests w.r.t. recent changes in pecora()

* right version increment

* shortened keyword-call
  • Loading branch information
hkraemer committed Feb 25, 2022
1 parent 2d43d48 commit a9f782e
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 18 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DelayEmbeddings"
uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
version = "2.0.4"
version = "2.1.0"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand Down
43 changes: 26 additions & 17 deletions src/unified_de/pecuzal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ function local_L_statistics(ε★::Vector{T}, Y_act::Dataset{D, T}, s::Vector{T}
Y_trial = DelayEmbeddings.hcat_lagged_values(Y_act, s, τs[τ_idx-1])
# compute L-statistic for Y_act and Y_trial and get the maximum decrease
L_decrease[i] = uzal_cost_pecuzal(Y_act, Y_trial, τs[end]; K = KNN,
w = w, metric = metric, econ = econ)
w , metric, econ, samplesize)
end
return L_decrease, max_idx
end
Expand Down Expand Up @@ -451,13 +451,16 @@ minimum. Return the according minimum `L_decrease`-value.
* `econ::Bool = false`: Economy-mode for L-statistic computation. Instead of
computing L-statistics for time horizons `2:Tw`, here we only compute them for
`2:2:Tw`.
* `samplesize::Real = 1`: determine the fraction of all phase space points
to be considered (fiducial points v) for computing the L-statistic
"""
function uzal_cost_pecuzal(Y::Dataset{D, ET}, Y_trial::Dataset{DT, ET}, Tw::Int;
K::Int = 3, w::Int = 1, econ::Bool = false, metric = Euclidean()
) where {D, DT, ET}
K::Int = 3, w::Int = 1, econ::Bool = false, metric = Euclidean(),
samplesize::Real = 1) where {D, DT, ET}

@assert DT == D+1
@assert Tw 0
@assert 0 < samplesize 1.0 "`samplesize` must be ∈ (0,1]"

if econ
tws = 2:2:Tw # start at 2 will eliminate bad results for noise
Expand All @@ -479,31 +482,37 @@ function uzal_cost_pecuzal(Y::Dataset{D, ET}, Y_trial::Dataset{DT, ET}, Tw::Int;

dist_former = 9999999 # intial L-decrease

# loop over each time horizon
cnt = 1
for T in tws
NN = length(Y_trial)-T
if NN < 1
error("Time series too short for given possible delays and Theiler window to find enough nearest neighbours")
end
NN = length(Y_trial)-tws[end]
if NN < 1
error("Time series too short for given possible delays and Theiler window to find enough nearest neighbours")
end
if samplesize==1
ns = 1:NN
Nfp = length(ns)
else
Nfp = Int(floor(samplesize*NN)) # number of considered fiducial points
ns = sample(vec(1:NN), Nfp, replace = false) # indices of fiducial points
end

vs = Y[ns] # the fiducial points in the data set
vs_trial = Y_trial[ns] # the fiducial points in the data set
vs = Y[ns] # the fiducial points in the data set
vs_trial = Y_trial[ns] # the fiducial points in the data set

vtree = KDTree(Y[1:NN], metric)
allNNidxs, allNNdist = DelayEmbeddings.all_neighbors(vtree, vs, ns, K, w)
vtree_trial = KDTree(Y_trial[1:NN], metric)
allNNidxs_trial, allNNdist_trial = DelayEmbeddings.all_neighbors(vtree_trial, vs_trial, ns, K, w)
vtree = KDTree(Y[1:NN], metric)
allNNidxs, allNNdist = DelayEmbeddings.all_neighbors(vtree, vs, ns, K, w)
vtree_trial = KDTree(Y_trial[1:NN], metric)
allNNidxs_trial, allNNdist_trial = DelayEmbeddings.all_neighbors(vtree_trial, vs_trial, ns, K, w)

# loop over each time horizon
cnt = 1
for T in tws
# compute conditional variances and neighborhood-sizes
compute_conditional_variances!(ns, vs, vs_trial, allNNidxs,
allNNidxs_trial, Y, Y_trial, ϵ_ball, ϵ_ball_trial, u_k, u_k_trial,
T, K, metric, ϵ², ϵ²_trial, E², E²_trial, cnt)

# compute distance of L-values and check whether that distance can be
# increased
dist = compute_L_decrease(E², E²_trial, ϵ², ϵ²_trial, cnt, NN)
dist = compute_L_decrease(E², E²_trial, ϵ², ϵ²_trial, cnt, Nfp)
if isnan(dist)
error("Computed 0-distances, due to duplicate datapoints in your data. Try to add minimal additive noise to the signal you wish to embed and try again.")
end
Expand Down
30 changes: 30 additions & 0 deletions test/unified/test_pecuzal_embedding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ tr = Dataset(tr)
@test τ_vals[4] == 80
@test length(ts_vals) == 4

# lower samplesize
Random.seed!(123)
@time Y2, τ_vals2, ts_vals2, Ls2 , εs2 = pecuzal_embedding(s[1:5000];
τs = 0:Tmax , w = w, L_threshold = 0.05, econ=true, samplesize = 0.5)
@test Ls[1] - .1 < Ls2[1] < Ls[1] + .1
@test Ls[2] - .1 < Ls2[2] < Ls[2] + .1
@test Ls[3] - .1 < Ls2[3] < Ls[3] + .1

@test τ_vals2[2] == 19
@test τ_vals2[3] == 97
@test τ_vals2[4] == 62
@test length(ts_vals2) == 4

@time Y, τ_vals, ts_vals, Ls , εs = pecuzal_embedding(s;
τs = 0:Tmax , w = w, L_threshold = 0.05)
@test -0.95 < Ls[1] < -0.91
Expand Down Expand Up @@ -74,6 +87,23 @@ end
@test -0.76 < Ls[2] < -0.72
@test -0.1 < Ls[3] < -0.06

# less fiducial points for computation
Random.seed!(123)
@time Y2, τ_vals2, ts_vals2, Ls2 , ε★2 = pecuzal_embedding(tr[1:5000,:];
τs = 0:Tmax , w = w, econ = true, samplesize = 0.5)

@test length(ts_vals2) == 4
@test ts_vals2[2] == ts_vals2[3] == ts_vals2[4] == 1
@test ts_vals2[1] == 3
@test τ_vals2[1] == 0
@test τ_vals2[2] == 7
@test τ_vals2[3] == 61
@test τ_vals2[4] == 51
@test Ls[1] - .1 < Ls2[1] < Ls[1] + .1
@test Ls[2] - .1 < Ls2[2] < Ls[2] + .1
@test Ls[3] - .1 < Ls2[3] < Ls[3] + .1

# L-threshold
@time Y, τ_vals, ts_vals, Ls , ε★ = pecuzal_embedding(tr[1:5000,:];
τs = 0:Tmax , w = w, econ = true, L_threshold = 0.2)
@test -1.40 < Ls[1] < -1.36
Expand Down

0 comments on commit a9f782e

Please sign in to comment.