Bug fix pecora samplesize (#103)

* fixed tests w.r.t. deprecations in Peaks * fixed samplesize-bug; new default for samplesize; adjusted tests * rm Peaks.jl-dep; revised tests for mdop and garcia
JuliaDynamics · Feb 24, 2022 · 2d43d48 · 2d43d48
1 parent ac3d903
commit 2d43d48
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 128 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "DelayEmbeddings"
 uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
 repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
-version = "2.0.3"
+version = "2.0.4"
 
 [deps]
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
@@ -26,9 +26,8 @@ julia = "1.5"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 DynamicalSystemsBase = "6e36e845-645a-534a-86f2-f5d4aa5a06b4"
-Peaks = "18e31ff7-3703-566c-8e60-38913d67486b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["DynamicalSystemsBase", "Test", "Random", "DelimitedFiles", "Peaks", "Downloads"]
+test = ["DynamicalSystemsBase", "Test", "Random", "DelimitedFiles", "Downloads"]
diff --git a/src/unified_de/pecora.jl b/src/unified_de/pecora.jl
@@ -201,15 +201,14 @@ written in detail (several pages!) in the source code of `pecora`.
 """
 function pecora(
         s, τs::NTuple{D, Int}, js::NTuple{D, Int} = Tuple(ones(Int, D));
-        delays = 0:50 , J=maxdimspan(s), samplesize::Real = 0.1, K::Int = 13, w::Int = 1,
+        delays = 0:50 , J=maxdimspan(s), samplesize::Real = 0.5, K::Int = 13, w::Int = 1,
         db = 250, undersampling = false, metric = Chebyshev(), α::T = 0.05,
         p::T = 0.5) where {D, T<:Real}
 
     @assert K ≥ 8 "You must provide a δ-neighborhood size consisting of at least 8 neighbors."
     @assert all(x -> x ≥ 0, js) "j's for generalized embedding must be positive integers"
     @assert 0 < samplesize ≤ 1 "`samplesize` must be ∈ (0,1]"
 
-    N = floor(Int,samplesize*length(s)) #number of fiducial points
     if undersampling
         error("Undersampling statistic is not yet accurate for production use.")
     end
@@ -225,9 +224,10 @@ function pecora(
         ns = vec(1:length(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays)))))
     else
         ns = sample(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays))),
-        length(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays)))),
+        floor(Int,samplesize*length(vec(max(1, (-minimum(delays) + 1)):min(L, L - maximum(delays))))),
         replace = false)
     end
+
     vs = vspace[ns]
     allNNidxs, allNNdist = all_neighbors(vtree, vs, ns, K, w)
     # prepare things for undersampling statistic

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -26,13 +26,8 @@ diffeq = (atol = 1e-9, rtol = 1e-9, maxiters = typemax(Int))
     include("unified/test_pecora.jl")
     include("unified/uzal_cost_test.jl")
     include("unified/test_pecuzal_embedding.jl")
-
-    # The following methods have been tested throughly and also published in research.
-    # We know that they work, but unfortunately the tests we have written
-    # about them are not good.
-    # See https://github.com/JuliaDynamics/DelayEmbeddings.jl/issues/95
-    # include("unified/mdop_tests.jl")
-    # include("unified/test_garcia.jl")
+    include("unified/test_garcia.jl")
+    include("unified/mdop_tests.jl")
 
 end
 

diff --git a/test/unified/mdop_tests.jl b/test/unified/mdop_tests.jl
@@ -38,13 +38,20 @@ s = readdlm(joinpath(tsfolder, "1.csv"))
 s = vec(s)
 Y = Dataset(s)
 
+# For comparison reasons using Travis CI we carry out the integration on a UNIX
+# OS and save the resulting time series
+# roe = Systems.roessler([1.0, 0, 0]; a=0.2, b=0.2, c=5.7)
+# sroe = trajectory(roe, 500; dt = 0.05, Ttr = 100.0)
+# writedlm("2.csv", sroe)
+sroe = readdlm(joinpath(tsfolder, "2.csv"))
+
 theiler = 57
 
 @testset "beta statistic" begin
     ## Test beta_statistic (core algorithm of mdop_embedding)
 
     taus = 0:100
-    β = @inferred DelayEmbeddings.beta_statistic(Y, s, taus, theiler)
+    β = DelayEmbeddings.beta_statistic(Y, s, taus, theiler)
     maxi, max_idx = findmax(β)
 
     @test maxi>4.1
@@ -71,7 +78,7 @@ theiler = 57
 
     # test different tau range
     taus2 = 1:4:100
-    β2 = @inferred beta_statistic(Y, s, taus2, theiler)
+    β2 = beta_statistic(Y, s, taus2, theiler)
     maxi2, max_idx2 = findmax(β2)
 
     @test maxi2>4.1
@@ -133,18 +140,12 @@ end
 end
 
 @testset "estimate τ max (Roessler)" begin
-    # For comparison reasons using Travis CI we carry out the integration on a UNIX
-    # OS and save the resulting time series
-    # roe = Systems.roessler([1.0, 0, 0]; a=0.2, b=0.2, c=5.7)
-    # sroe = trajectory(roe, 500; dt = 0.05, Ttr = 100.0)
-    # writedlm("2.csv", sroe)
 
-    sroe = readdlm(joinpath(tsfolder, "2.csv"))
     tws = 25:32
 
-    τ_m, L = @inferred mdop_maximum_delay(sroe[:, 2], tws)
+    τ_m, L = mdop_maximum_delay(sroe[:, 2], tws)
     @test τ_m == 26
-    τ_m, Ls = @inferred mdop_maximum_delay(Dataset(sroe[:, 1:2]), tws)
+    τ_m, Ls = mdop_maximum_delay(Dataset(sroe[:, 1:2]), tws)
     @test τ_m == 26
 
     # # reproduce Fig.2 of the paper
@@ -171,13 +172,6 @@ end
 end
 
 @testset "mdop_embedding multivariate" begin
-    # For comparison reasons using Travis CI we carry out the integration on a UNIX
-    # OS and save the resulting time series
-    # roe = Systems.roessler([1.0, 0, 0]; a=0.2, b=0.2, c=5.7)
-    # sroe = trajectory(roe, 500; dt = 0.05, Ttr = 100.0)
-    # writedlm("2.csv", sroe)
-
-    sroe = readdlm(joinpath(tsfolder, "2.csv"))
     tra = Dataset(sroe)
     w1 = estimate_delay(sroe[:,1], "mi_min")
     w2 = estimate_delay(sroe[:,2], "mi_min")
@@ -188,9 +182,9 @@ end
 
     Y, τ_vals, ts_vals, FNNs, betas =  mdop_embedding(sroe[:,1]; τs = taus, w = theiler, max_num_of_cycles = mc)
 
-    max_idx, ts_number = @inferred DelayEmbeddings.choose_optimal_tau2(betas)
+    max_idx, ts_number = DelayEmbeddings.choose_optimal_tau2(betas)
     @test ts_number == 1
-    @test taus[max_idx] == τ_vals[2]
+    @test taus[max_idx] == τ_vals[2] == 26
 
     Y2, τ_vals2, ts_vals2, FNNs2, betas2 = mdop_embedding(tra; τs = taus, w = theiler, max_num_of_cycles = mc)
     ttra = standardize(tra)

diff --git a/test/unified/test_garcia.jl b/test/unified/test_garcia.jl
@@ -1,14 +1,21 @@
 using DynamicalSystemsBase
 using DelayEmbeddings
-using Test
-import Peaks
+using Test, DelimitedFiles
 
 println("\nTesting garcia_almeida.jl...")
 @testset "Garcia & Almeida method" begin
 
-lo = Systems.lorenz()
-
-tr = trajectory(lo, 80; Δt = 0.01, Ttr = 10)
+# Test Lorenz example
+# For comparison reasons using Travis CI we carry out the integration on a UNIX
+# OS and save the resulting time series
+# See https://github.com/JuliaDynamics/JuliaDynamics for the storage of
+# the time series used for testing
+#
+# u0 = [0, 10.0, 0.0]
+# lo = Systems.lorenz(u0; σ=10, ρ=28, β=8/3)
+# tr = trajectory(lo, 100; Δt = 0.01, Ttr = 100)
+tr = readdlm(joinpath(tsfolder, "test_time_series_lorenz_standard_N_10000_multivariate.csv"))
+tr = Dataset(tr)
 
 x = tr[:, 1]
 Y = Dataset(x)
@@ -25,14 +32,16 @@ Y = standardize(Y)
     @test NN_distances[1][1:50] == NN_distances2[1][1:50]
     @test NN_distances[5][1:100] == NN_distances2[5][1:100]
 
-    min_dist = 12
-    (max_1_idx,_) = Peaks.findmaxima(N,min_dist)
-    (max_2_idx,_) = Peaks.findmaxima(N2,min_dist)
+    (max_1_idx,_) = DelayEmbeddings.findlocalextrema(N)
+    (max_2_idx,min_2_idx) = DelayEmbeddings.findlocalextrema(N2)
+
+    @test 22 ≤ max_1_idx[5] ≤ 24
+    @test 35 ≤ max_1_idx[6] ≤ 37
+    @test 79 ≤ max_1_idx[12] ≤ 81
 
-    @test 30 ≤ max_1_idx[1] ≤ 35
-    @test 30 ≤ max_2_idx[1] ≤ 35
-    @test 69 ≤ max_1_idx[2] ≤ 75
-    @test 69 ≤ max_2_idx[2] ≤ 75
+    @test 11 ≤ min_2_idx[1] ≤ 13
+    @test 36 ≤ max_2_idx[3] ≤ 38
+    @test 79 ≤ max_2_idx[7] ≤ 81
 
     # # plot N-Statistic for the Lorenz system as in Fig. 2(a) in [^Garcia2005a]
     # using Plots
@@ -48,13 +57,16 @@ end
 @testset "garcia_embed univariate" begin
     Y_act, τ_vals, ts_vals, FNNs, NS = garcia_almeida_embedding(x; τs=0:100,  w = 17, T = 17)
     Y_act2, τ_vals2, ts_vals2, FNNs2, NS2 = garcia_almeida_embedding(x; τs=0:100,  w = 1, T = 1)
+    Y_act3, τ_vals3, ts_vals3, FNNs3, NS3 = garcia_almeida_embedding(x; τs=0:100,  w = 17, T = 17, fnn_thres=0.1)
 
-    @test size(Y_act,2) == 2
+    @test size(Y_act,2) == 3
     @test size(Y_act2,2) == 3
-    @test 10 ≤ τ_vals[2] ≤ 18
+    @test 15 ≤ τ_vals[2] ≤ 17
     @test 1 ≤ τ_vals2[2] ≤ 3
-    @test FNNs[end]<=0.05
+    @test FNNs[1] ≥ FNNs[2] ≤ FNNs[3]
     @test FNNs2[end]>FNNs2[end-1]
+    @test size(Y_act3,2) == 2
+    @test τ_vals3 == τ_vals[1:2]
 
     # using Plots
     # plot3d(Y_act[:,1],Y_act[:,2],Y_act[:,3], marker=2, camera = (6, 4))
@@ -66,15 +78,19 @@ end
 
 
 @testset "garcia_embed multivariate" begin
+
     Y_act, τ_vals, ts_vals, FNNs, NS = garcia_almeida_embedding(tr; τs=0:100,  w = 17, T = 17)
     Y_act2, τ_vals2, ts_vals2, FNNs2, NS2 = garcia_almeida_embedding(tr; τs=0:100,  w = 1, T = 1)
 
-    @test size(Y_act,2) == size(Y_act2,2) == 3
-    @test ts_vals[1] == ts_vals2[1] == ts_vals2[2] == ts_vals2[3] ==1
-    @test ts_vals[2] == 2
-    @test ts_vals[3] == 3
-    @test 10 ≤ τ_vals[3] ≤ 18
-    @test 1 ≤ τ_vals2[3] ≤ 3
+    @test size(Y_act,2) == 5
+    @test size(Y_act2,2) == 3
+    @test ts_vals[1] == ts_vals[4] == 1
+    @test ts_vals[2] == ts_vals[5] == 3
+    @test ts_vals[3] == 2
+    @test τ_vals == [0, 5, 5, 38, 55]
+
+    @test ts_vals2 == [1, 1, 1]
+    @test τ_vals2 == [0, 1, 2]
 
     # try to reproduce Fig.2a in [^Garcia2005b]
     tra = Dataset(hcat(tr[:,1], tr[:,3]))

diff --git a/test/unified/test_pecora.jl b/test/unified/test_pecora.jl
@@ -1,103 +1,80 @@
 using DelayEmbeddings, DynamicalSystemsBase
 using Test
 using Random
-import Peaks
 
 @testset "Pecora" begin
 # %% Generate data
 lor = Systems.lorenz([0.0;1.0;0.0];ρ=60)
 data = trajectory(lor, 200; Δt=0.02, Ttr = 10)
 metric = Chebyshev()
 
-UNDERSAMPLING = false
-
 @testset "Pecora univariate" begin
 # %% Timeseries case
 
-s = data[:, 2] # input timeseries = first entry of lorenz
-optimal_τ = estimate_delay(s, "mi_min")
-Tmax = 100
-K = 14
-samplesize = 1
+    s = data[:, 2] # input timeseries = first entry of lorenz
+    optimal_τ = estimate_delay(s, "mi_min")
+    Tmax = 100
+    K = 14
+    samplesize = 1
 
-# using PyPlot
-# pygui(true)
-# figure()
-# ax1 = subplot(211)
-# ylabel("⟨ε★⟩")
-# ax2 = subplot(212)
-# xlabel("τ (index units)")
-# ylabel("⟨Γ⟩")
+    τs = (0,)
+    Random.seed!(123)
+    es_ref, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
+    (max1,_) = DelayEmbeddings.findlocalextrema(vec(es_ref))
+    @test optimal_τ - 2 ≤ max1[1]-1 ≤ optimal_τ + 2
+    maxi = maximum(es_ref)
+    @test maxi ≤ 1.3
 
-τs = (0,)
-Random.seed!(123)
-es_ref, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
-(max1,_) = Peaks.findmaxima(vec(es_ref))
-@test optimal_τ - 2 ≤ max1[1]-1 ≤ optimal_τ + 2
-maxi = maximum(es_ref)
-@test maxi ≤ 1.3
-# ax1.plot(es_ref, label = "τs = $(τs)")
-# ax2.plot(Γs)
+    Random.seed!(123)
+    es_ref2, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, K = K, metric = metric)
+    (max2,_) = DelayEmbeddings.findlocalextrema(vec(es_ref2))
+    @test optimal_τ - 2 ≤ max2[1]-1 ≤ optimal_τ + 2
+    maxi2 = maximum(es_ref2)
+    @test maxi - .1 ≤ maxi2 ≤ maxi + .1
 
-τs = (0, max1[1]-1,)
-Random.seed!(123)
-es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
-(max2,_) = Peaks.findmaxima(vec(es))
-(min1,_) = Peaks.findminima(vec(es))
-@test 1 ≤ max2[1]-1 ≤ 4
-@test optimal_τ - 2 ≤ min1[1]-1 ≤ optimal_τ + 2
-# ax1.plot(es, label = "τs = $(τs)")
-# ax2.plot(Γs)
+    τs = (0, max1[1]-1,)
+    Random.seed!(123)
+    es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
+    (max2,min1) = DelayEmbeddings.findlocalextrema(vec(es))
+    @test 1 ≤ max2[1]-1 ≤ 4
+    @test optimal_τ - 2 ≤ min1[2]-1 ≤ optimal_τ + 2
 
-τs = (0, max1[1]-1, max2[1]-1)
-Random.seed!(123)
-es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
-(min2,_) = Peaks.findminima(vec(es))
-@test max2[1]-2 ≤ min2[1]-1 ≤ max2[1]
-# ax1.plot(es, label = "τs = $(τs)")
-# ax2.plot(Γs)
-# ax1.legend()
-# ax1.set_title("lorenz data")
+    τs = (0, max1[1]-1, max2[1]-1)
+    Random.seed!(123)
+    es, Γs = pecora(s, τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
+    (_,min2) = DelayEmbeddings.findlocalextrema(vec(es))
+    @test max2[1]-2 ≤ min2[1]-1 ≤ max2[1]
 
 end
 
 @testset "Pecora multivariate" begin
 ## %% Trajectory case
-s = Dataset(data)
-optimal_τ = estimate_delay(s[:,2], "mi_min")
-Tmax = 100
-K = 14
-samplesize = 1
+    s = Dataset(data)
+    optimal_τ = estimate_delay(s[:,2], "mi_min")
+    Tmax = 100
+    K = 14
+    samplesize = 1
+
+    js = (2,)
+    τs = (0,)
+    Random.seed!(123)
+    es_ref, _ = pecora(s[:,2], τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric)
+    Random.seed!(123)
+    es_ref2, _ = pecora(s, τs, js; delays = 0:Tmax, w = optimal_τ, samplesize = .2, K = K, metric = metric)
+    Random.seed!(123)
+    es, _= pecora(s, τs, js; delays = 0:Tmax, samplesize = samplesize, w = optimal_τ, K = K, metric = metric)
 
-js = (2,)
-τs = (0,)
-Random.seed!(123)
-es_ref, Γs = pecora(s[:,2], τs; delays = 0:Tmax, w = optimal_τ, samplesize = samplesize, K = K, metric = metric, undersampling = UNDERSAMPLING)
-Random.seed!(123)
-es, Γs = pecora(s, τs, js; delays = 0:Tmax, samplesize = samplesize, w = optimal_τ, K = K, metric = metric, undersampling = UNDERSAMPLING)
 
-@test round.(es[:,2], digits = 4) == round.(vec(es_ref),digits = 4)
-(x_maxi,_) = Peaks.findmaxima(es[:,1])
-@test 8 ≤ x_maxi[1]-1 ≤ 10
-(z_maxi,_) = Peaks.findmaxima(es[:,3])
-@test 13 ≤ z_maxi[1]-1 ≤ 15
+    @test round.(es[:,2], digits = 4) == round.(vec(es_ref),digits = 4)
+    (x_maxi,_) = DelayEmbeddings.findlocalextrema(es[:,1])
+    @test 8 ≤ x_maxi[2]-1 ≤ 10
+    (z_maxi,_) = DelayEmbeddings.findlocalextrema(es[:,3])
+    @test 13 ≤ z_maxi[2]-1 ≤ 15
+    (x_maxi_ref,_) = DelayEmbeddings.findlocalextrema(es[:,2])
+    (x_maxi_ref2,_) = DelayEmbeddings.findlocalextrema(es_ref2[:,2])
 
-# using PyPlot
-# pygui(true)
-# figure()
-# subplot(211)
-# for i in 1:3
-#     plot(es[:, i], label = "τs = $(τs), js = $(js), J = $(i)")
-# end
-# ylabel("⟨ε★⟩")
-# title("lorenz system, multiple timeseries")
-# legend()
-# grid()
-# subplot(212)
-# plot(es_ref, label = "τs = $(τs), js = $(js), J = 2")
-# ylabel("⟨ε★⟩-ref")
-# xlabel("τ (index units)")
-# grid()
+    @test x_maxi_ref[1:4] == x_maxi_ref2[1:4]
+    @test es[x_maxi_ref[1:4],2] .- .1 ≤ es_ref2[x_maxi_ref2[1:4],2] ≤ es[x_maxi_ref[1:4],2] .+ .1
 
 end
 end