Pecuzal has samplesize keyword (#104)

* fixed tests w.r.t. deprecations in Peaks * samplesize keyword in uzal_cost_pecuzal(); dragged NN-computation out of loop * samplesize keyword in uzal_cost_pecuzal(); dragged NN-computation out of loop * adjusted tests w.r.t. recent changes in pecora() * right version increment * shortened keyword-call
JuliaDynamics · Feb 25, 2022 · a9f782e · a9f782e
1 parent 2d43d48
commit a9f782e
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 18 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "DelayEmbeddings"
 uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
 repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
-version = "2.0.4"
+version = "2.1.0"
 
 [deps]
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"

diff --git a/src/unified_de/pecuzal.jl b/src/unified_de/pecuzal.jl
@@ -361,7 +361,7 @@ function local_L_statistics(ε★::Vector{T}, Y_act::Dataset{D, T}, s::Vector{T}
         Y_trial = DelayEmbeddings.hcat_lagged_values(Y_act, s, τs[τ_idx-1])
         # compute L-statistic for Y_act and Y_trial and get the maximum decrease
         L_decrease[i] = uzal_cost_pecuzal(Y_act, Y_trial, τs[end]; K = KNN,
-                                w = w, metric = metric, econ = econ)
+                    w , metric, econ, samplesize)
     end
     return L_decrease, max_idx
 end
@@ -451,13 +451,16 @@ minimum. Return the according minimum `L_decrease`-value.
 * `econ::Bool = false`: Economy-mode for L-statistic computation. Instead of
   computing L-statistics for time horizons `2:Tw`, here we only compute them for
   `2:2:Tw`.
+* `samplesize::Real = 1`: determine the fraction of all phase space points
+  to be considered (fiducial points v) for computing the L-statistic
 """
 function uzal_cost_pecuzal(Y::Dataset{D, ET}, Y_trial::Dataset{DT, ET}, Tw::Int;
-        K::Int = 3, w::Int = 1, econ::Bool = false, metric = Euclidean()
-    ) where {D, DT, ET}
+        K::Int = 3, w::Int = 1, econ::Bool = false, metric = Euclidean(),
+        samplesize::Real = 1) where {D, DT, ET}
 
     @assert DT == D+1
     @assert Tw ≥ 0
+    @assert 0 < samplesize ≤ 1.0 "`samplesize` must be ∈ (0,1]"
 
     if econ
         tws = 2:2:Tw # start at 2 will eliminate bad results for noise
@@ -479,31 +482,37 @@ function uzal_cost_pecuzal(Y::Dataset{D, ET}, Y_trial::Dataset{DT, ET}, Tw::Int;
 
     dist_former = 9999999 # intial L-decrease
 
-    # loop over each time horizon
-    cnt = 1
-    for T in tws
-        NN = length(Y_trial)-T
-        if NN < 1
-            error("Time series too short for given possible delays and Theiler window to find enough nearest neighbours")
-        end
+    NN = length(Y_trial)-tws[end]
+    if NN < 1
+        error("Time series too short for given possible delays and Theiler window to find enough nearest neighbours")
+    end
+    if samplesize==1
         ns = 1:NN
+        Nfp = length(ns)
+    else
+        Nfp = Int(floor(samplesize*NN)) # number of considered fiducial points
+        ns = sample(vec(1:NN), Nfp, replace = false)  # indices of fiducial points
+    end
 
-        vs = Y[ns] # the fiducial points in the data set
-        vs_trial = Y_trial[ns] # the fiducial points in the data set
+    vs = Y[ns] # the fiducial points in the data set
+    vs_trial = Y_trial[ns] # the fiducial points in the data set
 
-        vtree = KDTree(Y[1:NN], metric)
-        allNNidxs, allNNdist = DelayEmbeddings.all_neighbors(vtree, vs, ns, K, w)
-        vtree_trial = KDTree(Y_trial[1:NN], metric)
-        allNNidxs_trial, allNNdist_trial = DelayEmbeddings.all_neighbors(vtree_trial, vs_trial, ns, K, w)
+    vtree = KDTree(Y[1:NN], metric)
+    allNNidxs, allNNdist = DelayEmbeddings.all_neighbors(vtree, vs, ns, K, w)
+    vtree_trial = KDTree(Y_trial[1:NN], metric)
+    allNNidxs_trial, allNNdist_trial = DelayEmbeddings.all_neighbors(vtree_trial, vs_trial, ns, K, w)
 
+    # loop over each time horizon
+    cnt = 1
+    for T in tws
         # compute conditional variances and neighborhood-sizes
         compute_conditional_variances!(ns, vs, vs_trial, allNNidxs,
             allNNidxs_trial, Y, Y_trial, ϵ_ball, ϵ_ball_trial, u_k, u_k_trial,
             T, K, metric, ϵ², ϵ²_trial, E², E²_trial, cnt)
 
         # compute distance of L-values and check whether that distance can be
         # increased
-        dist = compute_L_decrease(E², E²_trial, ϵ², ϵ²_trial, cnt, NN)
+        dist = compute_L_decrease(E², E²_trial, ϵ², ϵ²_trial, cnt, Nfp)
         if isnan(dist)
             error("Computed 0-distances, due to duplicate datapoints in your data. Try to add minimal additive noise to the signal you wish to embed and try again.")
         end

diff --git a/test/unified/test_pecuzal_embedding.jl b/test/unified/test_pecuzal_embedding.jl
@@ -34,6 +34,19 @@ tr = Dataset(tr)
     @test τ_vals[4] == 80
     @test length(ts_vals) == 4
 
+    # lower samplesize
+    Random.seed!(123)
+    @time Y2, τ_vals2, ts_vals2, Ls2 , εs2 = pecuzal_embedding(s[1:5000];
+                                        τs = 0:Tmax , w = w, L_threshold = 0.05, econ=true, samplesize = 0.5)
+    @test Ls[1] - .1 < Ls2[1] < Ls[1] + .1
+    @test Ls[2] - .1 < Ls2[2] < Ls[2] + .1
+    @test Ls[3] - .1 < Ls2[3] < Ls[3] + .1
+
+    @test τ_vals2[2] == 19
+    @test τ_vals2[3] == 97
+    @test τ_vals2[4] == 62
+    @test length(ts_vals2) == 4
+
     @time Y, τ_vals, ts_vals, Ls , εs = pecuzal_embedding(s;
                                         τs = 0:Tmax , w = w, L_threshold = 0.05)
     @test -0.95 < Ls[1] < -0.91
@@ -74,6 +87,23 @@ end
     @test -0.76 < Ls[2] < -0.72
     @test -0.1 < Ls[3] < -0.06
 
+    # less fiducial points for computation
+    Random.seed!(123)
+    @time Y2, τ_vals2, ts_vals2, Ls2 , ε★2 = pecuzal_embedding(tr[1:5000,:];
+                                        τs = 0:Tmax , w = w, econ = true, samplesize = 0.5)
+
+    @test length(ts_vals2) == 4
+    @test ts_vals2[2] == ts_vals2[3] == ts_vals2[4] == 1
+    @test ts_vals2[1] == 3
+    @test τ_vals2[1] == 0
+    @test τ_vals2[2] == 7
+    @test τ_vals2[3] == 61
+    @test τ_vals2[4] == 51
+    @test Ls[1] - .1 < Ls2[1] < Ls[1] + .1
+    @test Ls[2] - .1 < Ls2[2] < Ls[2] + .1
+    @test Ls[3] - .1 < Ls2[3] < Ls[3] + .1
+
+    # L-threshold
     @time Y, τ_vals, ts_vals, Ls , ε★ = pecuzal_embedding(tr[1:5000,:];
                                         τs = 0:Tmax , w = w, econ = true, L_threshold = 0.2)
     @test -1.40 < Ls[1] < -1.36