Skip to content

Commit

Permalink
Bug fix pecora samplesize (#103)
Browse files Browse the repository at this point in the history
* fixed tests w.r.t. deprecations in Peaks

* fixed samplesize-bug; new default for samplesize; adjusted tests

* rm Peaks.jl-dep; revised tests for mdop and garcia
  • Loading branch information
hkraemer committed Feb 24, 2022
1 parent ac3d903 commit 2d43d48
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 128 deletions.
5 changes: 2 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DelayEmbeddings"
uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
version = "2.0.3"
version = "2.0.4"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand All @@ -26,9 +26,8 @@ julia = "1.5"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4"
Peaks = "18e31ff7-3703-566c-8e60-38913d67486b"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["DynamicalSystemsBase", "Test", "Random", "DelimitedFiles", "Peaks", "Downloads"]
test = ["DynamicalSystemsBase", "Test", "Random", "DelimitedFiles", "Downloads"]
6 changes: 3 additions & 3 deletions src/unified_de/pecora.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,14 @@ written in detail (several pages!) in the source code of `pecora`.
"""
function pecora(
s, τs::NTuple{D, Int}, js::NTuple{D, Int} = Tuple(ones(Int, D));
delays = 0:50 , J=maxdimspan(s), samplesize::Real = 0.1, K::Int = 13, w::Int = 1,
delays = 0:50 , J=maxdimspan(s), samplesize::Real = 0.5, K::Int = 13, w::Int = 1,
db = 250, undersampling = false, metric = Chebyshev(), α::T = 0.05,
p::T = 0.5) where {D, T<:Real}

@assert K 8 "You must provide a δ-neighborhood size consisting of at least 8 neighbors."
@assert all(x -> x 0, js) "j's for generalized embedding must be positive integers"
@assert 0 < samplesize 1 "`samplesize` must be ∈ (0,1]"

N = floor(Int,samplesize*length(s)) #number of fiducial points
if undersampling
error("Undersampling statistic is not yet accurate for production use.")
end
Expand All @@ -225,9 +224,10 @@ function pecora(
ns = vec(1:length(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays)))))
else
ns = sample(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays))),
length(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays)))),
floor(Int,samplesize*length(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays))))),
replace = false)
end

vs = vspace[ns]
allNNidxs, allNNdist = all_neighbors(vtree, vs, ns, K, w)
# prepare things for undersampling statistic
Expand Down
9 changes: 2 additions & 7 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,8 @@ diffeq = (atol = 1e-9, rtol = 1e-9, maxiters = typemax(Int))
include("unified/test_pecora.jl")
include("unified/uzal_cost_test.jl")
include("unified/test_pecuzal_embedding.jl")

# The following methods have been tested throughly and also published in research.
# We know that they work, but unfortunately the tests we have written
# about them are not good.
# See https://github.com/JuliaDynamics/DelayEmbeddings.jl/issues/95
# include("unified/mdop_tests.jl")
# include("unified/test_garcia.jl")
include("unified/test_garcia.jl")
include("unified/mdop_tests.jl")

end

Expand Down
32 changes: 13 additions & 19 deletions test/unified/mdop_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,20 @@ s = readdlm(joinpath(tsfolder, "1.csv"))
s = vec(s)
Y = Dataset(s)

# For comparison reasons using Travis CI we carry out the integration on a UNIX
# OS and save the resulting time series
# roe = Systems.roessler([1.0, 0, 0]; a=0.2, b=0.2, c=5.7)
# sroe = trajectory(roe, 500; dt = 0.05, Ttr = 100.0)
# writedlm("2.csv", sroe)
sroe = readdlm(joinpath(tsfolder, "2.csv"))

theiler = 57

@testset "beta statistic" begin
## Test beta_statistic (core algorithm of mdop_embedding)

taus = 0:100
β = @inferred DelayEmbeddings.beta_statistic(Y, s, taus, theiler)
β = DelayEmbeddings.beta_statistic(Y, s, taus, theiler)
maxi, max_idx = findmax(β)

@test maxi>4.1
Expand All @@ -71,7 +78,7 @@ theiler = 57

# test different tau range
taus2 = 1:4:100
β2 = @inferred beta_statistic(Y, s, taus2, theiler)
β2 = beta_statistic(Y, s, taus2, theiler)
maxi2, max_idx2 = findmax(β2)

@test maxi2>4.1
Expand Down Expand Up @@ -133,18 +140,12 @@ end
end

@testset "estimate τ max (Roessler)" begin
# For comparison reasons using Travis CI we carry out the integration on a UNIX
# OS and save the resulting time series
# roe = Systems.roessler([1.0, 0, 0]; a=0.2, b=0.2, c=5.7)
# sroe = trajectory(roe, 500; dt = 0.05, Ttr = 100.0)
# writedlm("2.csv", sroe)

sroe = readdlm(joinpath(tsfolder, "2.csv"))
tws = 25:32

τ_m, L = @inferred mdop_maximum_delay(sroe[:, 2], tws)
τ_m, L = mdop_maximum_delay(sroe[:, 2], tws)
@test τ_m == 26
τ_m, Ls = @inferred mdop_maximum_delay(Dataset(sroe[:, 1:2]), tws)
τ_m, Ls = mdop_maximum_delay(Dataset(sroe[:, 1:2]), tws)
@test τ_m == 26

# # reproduce Fig.2 of the paper
Expand All @@ -171,13 +172,6 @@ end
end

@testset "mdop_embedding multivariate" begin
# For comparison reasons using Travis CI we carry out the integration on a UNIX
# OS and save the resulting time series
# roe = Systems.roessler([1.0, 0, 0]; a=0.2, b=0.2, c=5.7)
# sroe = trajectory(roe, 500; dt = 0.05, Ttr = 100.0)
# writedlm("2.csv", sroe)

sroe = readdlm(joinpath(tsfolder, "2.csv"))
tra = Dataset(sroe)
w1 = estimate_delay(sroe[:,1], "mi_min")
w2 = estimate_delay(sroe[:,2], "mi_min")
Expand All @@ -188,9 +182,9 @@ end

Y, τ_vals, ts_vals, FNNs, betas = mdop_embedding(sroe[:,1]; τs = taus, w = theiler, max_num_of_cycles = mc)

max_idx, ts_number = @inferred DelayEmbeddings.choose_optimal_tau2(betas)
max_idx, ts_number = DelayEmbeddings.choose_optimal_tau2(betas)
@test ts_number == 1
@test taus[max_idx] == τ_vals[2]
@test taus[max_idx] == τ_vals[2] == 26

Y2, τ_vals2, ts_vals2, FNNs2, betas2 = mdop_embedding(tra; τs = taus, w = theiler, max_num_of_cycles = mc)
ttra = standardize(tra)
Expand Down
58 changes: 37 additions & 21 deletions test/unified/test_garcia.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
using DynamicalSystemsBase
using DelayEmbeddings
using Test
import Peaks
using Test, DelimitedFiles

println("\nTesting garcia_almeida.jl...")
@testset "Garcia & Almeida method" begin

lo = Systems.lorenz()

tr = trajectory(lo, 80; Δt = 0.01, Ttr = 10)
# Test Lorenz example
# For comparison reasons using Travis CI we carry out the integration on a UNIX
# OS and save the resulting time series
# See https://github.com/JuliaDynamics/JuliaDynamics for the storage of
# the time series used for testing
#
# u0 = [0, 10.0, 0.0]
# lo = Systems.lorenz(u0; σ=10, ρ=28, β=8/3)
# tr = trajectory(lo, 100; Δt = 0.01, Ttr = 100)
tr = readdlm(joinpath(tsfolder, "test_time_series_lorenz_standard_N_10000_multivariate.csv"))
tr = Dataset(tr)

x = tr[:, 1]
Y = Dataset(x)
Expand All @@ -25,14 +32,16 @@ Y = standardize(Y)
@test NN_distances[1][1:50] == NN_distances2[1][1:50]
@test NN_distances[5][1:100] == NN_distances2[5][1:100]

min_dist = 12
(max_1_idx,_) = Peaks.findmaxima(N,min_dist)
(max_2_idx,_) = Peaks.findmaxima(N2,min_dist)
(max_1_idx,_) = DelayEmbeddings.findlocalextrema(N)
(max_2_idx,min_2_idx) = DelayEmbeddings.findlocalextrema(N2)

@test 22 max_1_idx[5] 24
@test 35 max_1_idx[6] 37
@test 79 max_1_idx[12] 81

@test 30 max_1_idx[1] 35
@test 30 max_2_idx[1] 35
@test 69 max_1_idx[2] 75
@test 69 max_2_idx[2] 75
@test 11 min_2_idx[1] 13
@test 36 max_2_idx[3] 38
@test 79 max_2_idx[7] 81

# # plot N-Statistic for the Lorenz system as in Fig. 2(a) in [^Garcia2005a]
# using Plots
Expand All @@ -48,13 +57,16 @@ end
@testset "garcia_embed univariate" begin
Y_act, τ_vals, ts_vals, FNNs, NS = garcia_almeida_embedding(x; τs=0:100, w = 17, T = 17)
Y_act2, τ_vals2, ts_vals2, FNNs2, NS2 = garcia_almeida_embedding(x; τs=0:100, w = 1, T = 1)
Y_act3, τ_vals3, ts_vals3, FNNs3, NS3 = garcia_almeida_embedding(x; τs=0:100, w = 17, T = 17, fnn_thres=0.1)

@test size(Y_act,2) == 2
@test size(Y_act,2) == 3
@test size(Y_act2,2) == 3
@test 10 τ_vals[2] 18
@test 15 τ_vals[2] 17
@test 1 τ_vals2[2] 3
@test FNNs[end]<=0.05
@test FNNs[1] FNNs[2] FNNs[3]
@test FNNs2[end]>FNNs2[end-1]
@test size(Y_act3,2) == 2
@test τ_vals3 == τ_vals[1:2]

# using Plots
# plot3d(Y_act[:,1],Y_act[:,2],Y_act[:,3], marker=2, camera = (6, 4))
Expand All @@ -66,15 +78,19 @@ end


@testset "garcia_embed multivariate" begin

Y_act, τ_vals, ts_vals, FNNs, NS = garcia_almeida_embedding(tr; τs=0:100, w = 17, T = 17)
Y_act2, τ_vals2, ts_vals2, FNNs2, NS2 = garcia_almeida_embedding(tr; τs=0:100, w = 1, T = 1)

@test size(Y_act,2) == size(Y_act2,2) == 3
@test ts_vals[1] == ts_vals2[1] == ts_vals2[2] == ts_vals2[3] ==1
@test ts_vals[2] == 2
@test ts_vals[3] == 3
@test 10 τ_vals[3] 18
@test 1 τ_vals2[3] 3
@test size(Y_act,2) == 5
@test size(Y_act2,2) == 3
@test ts_vals[1] == ts_vals[4] == 1
@test ts_vals[2] == ts_vals[5] == 3
@test ts_vals[3] == 2
@test τ_vals == [0, 5, 5, 38, 55]

@test ts_vals2 == [1, 1, 1]
@test τ_vals2 == [0, 1, 2]

# try to reproduce Fig.2a in [^Garcia2005b]
tra = Dataset(hcat(tr[:,1], tr[:,3]))
Expand Down
127 changes: 52 additions & 75 deletions test/unified/test_pecora.jl
Original file line number Diff line number Diff line change
@@ -1,103 +1,80 @@
using DelayEmbeddings, DynamicalSystemsBase
using Test
using Random
import Peaks

@testset "Pecora" begin
# %% Generate data
lor = Systems.lorenz([0.0;1.0;0.0];ρ=60)
data = trajectory(lor, 200; Δt=0.02, Ttr = 10)
metric = Chebyshev()

UNDERSAMPLING = false

@testset "Pecora univariate" begin
# %% Timeseries case

s = data[:, 2] # input timeseries = first entry of lorenz
optimal_τ = estimate_delay(s, "mi_min")
Tmax = 100
K = 14
samplesize = 1
s = data[:, 2] # input timeseries = first entry of lorenz
optimal_τ = estimate_delay(s, "mi_min")
Tmax = 100
K = 14
samplesize = 1

# using PyPlot
# pygui(true)
# figure()
# ax1 = subplot(211)
# ylabel("⟨ε★⟩")
# ax2 = subplot(212)
# xlabel("τ (index units)")
# ylabel("⟨Γ⟩")
τs = (0,)
Random.seed!(123)
es_ref, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
(max1,_) = DelayEmbeddings.findlocalextrema(vec(es_ref))
@test optimal_τ - 2 max1[1]-1 optimal_τ + 2
maxi = maximum(es_ref)
@test maxi 1.3

τs = (0,)
Random.seed!(123)
es_ref, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
(max1,_) = Peaks.findmaxima(vec(es_ref))
@test optimal_τ - 2 max1[1]-1 optimal_τ + 2
maxi = maximum(es_ref)
@test maxi 1.3
# ax1.plot(es_ref, label = "τs = $(τs)")
# ax2.plot(Γs)
Random.seed!(123)
es_ref2, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, K = K, metric = metric)
(max2,_) = DelayEmbeddings.findlocalextrema(vec(es_ref2))
@test optimal_τ - 2 max2[1]-1 optimal_τ + 2
maxi2 = maximum(es_ref2)
@test maxi - .1 maxi2 maxi + .1

τs = (0, max1[1]-1,)
Random.seed!(123)
es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
(max2,_) = Peaks.findmaxima(vec(es))
(min1,_) = Peaks.findminima(vec(es))
@test 1 max2[1]-1 4
@test optimal_τ - 2 min1[1]-1 optimal_τ + 2
# ax1.plot(es, label = "τs = $(τs)")
# ax2.plot(Γs)
τs = (0, max1[1]-1,)
Random.seed!(123)
es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
(max2,min1) = DelayEmbeddings.findlocalextrema(vec(es))
@test 1 max2[1]-1 4
@test optimal_τ - 2 min1[2]-1 optimal_τ + 2

τs = (0, max1[1]-1, max2[1]-1)
Random.seed!(123)
es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
(min2,_) = Peaks.findminima(vec(es))
@test max2[1]-2 min2[1]-1 max2[1]
# ax1.plot(es, label = "τs = $(τs)")
# ax2.plot(Γs)
# ax1.legend()
# ax1.set_title("lorenz data")
τs = (0, max1[1]-1, max2[1]-1)
Random.seed!(123)
es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
(_,min2) = DelayEmbeddings.findlocalextrema(vec(es))
@test max2[1]-2 min2[1]-1 max2[1]

end

@testset "Pecora multivariate" begin
## %% Trajectory case
s = Dataset(data)
optimal_τ = estimate_delay(s[:,2], "mi_min")
Tmax = 100
K = 14
samplesize = 1
s = Dataset(data)
optimal_τ = estimate_delay(s[:,2], "mi_min")
Tmax = 100
K = 14
samplesize = 1

js = (2,)
τs = (0,)
Random.seed!(123)
es_ref, _ = pecora(s[:,2], τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
Random.seed!(123)
es_ref2, _ = pecora(s, τs, js; delays = 0:Tmax, w = optimal_τ, samplesize = .2, K = K, metric = metric)
Random.seed!(123)
es, _= pecora(s, τs, js; delays = 0:Tmax, samplesize = samplesize, w = optimal_τ, K = K, metric = metric)

js = (2,)
τs = (0,)
Random.seed!(123)
es_ref, Γs = pecora(s[:,2], τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
Random.seed!(123)
es, Γs = pecora(s, τs, js; delays = 0:Tmax, samplesize = samplesize, w = optimal_τ, K = K, metric = metric, undersampling = UNDERSAMPLING)

@test round.(es[:,2], digits = 4) == round.(vec(es_ref),digits = 4)
(x_maxi,_) = Peaks.findmaxima(es[:,1])
@test 8 x_maxi[1]-1 10
(z_maxi,_) = Peaks.findmaxima(es[:,3])
@test 13 z_maxi[1]-1 15
@test round.(es[:,2], digits = 4) == round.(vec(es_ref),digits = 4)
(x_maxi,_) = DelayEmbeddings.findlocalextrema(es[:,1])
@test 8 x_maxi[2]-1 10
(z_maxi,_) = DelayEmbeddings.findlocalextrema(es[:,3])
@test 13 z_maxi[2]-1 15
(x_maxi_ref,_) = DelayEmbeddings.findlocalextrema(es[:,2])
(x_maxi_ref2,_) = DelayEmbeddings.findlocalextrema(es_ref2[:,2])

# using PyPlot
# pygui(true)
# figure()
# subplot(211)
# for i in 1:3
# plot(es[:, i], label = "τs = $(τs), js = $(js), J = $(i)")
# end
# ylabel("⟨ε★⟩")
# title("lorenz system, multiple timeseries")
# legend()
# grid()
# subplot(212)
# plot(es_ref, label = "τs = $(τs), js = $(js), J = 2")
# ylabel("⟨ε★⟩-ref")
# xlabel("τ (index units)")
# grid()
@test x_maxi_ref[1:4] == x_maxi_ref2[1:4]
@test es[x_maxi_ref[1:4],2] .- .1 es_ref2[x_maxi_ref2[1:4],2] es[x_maxi_ref[1:4],2] .+ .1

end
end

0 comments on commit 2d43d48

Please sign in to comment.