Skip to content

Commit

Permalink
Merge pull request #57 from JuliaDynamics/PR-Hegger_Cao_dimension
Browse files Browse the repository at this point in the history
hegger dimension and automated Cao's method
  • Loading branch information
Datseris committed Nov 14, 2020
2 parents 82ce3bd + 8817dbb commit c20d2fa
Show file tree
Hide file tree
Showing 18 changed files with 1,044 additions and 680 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# v1.12.0
* Possible delay times in `optimal_traditional_de` are now `1:100` for increased accuracy.
* New method for univariate non-unified delay embedding by Hegger, Kantz
* It is now possible to `embed` in one dimension (which just returns the vector as a Dataset)
* New function `optimal_traditional_de` for automated delay embeddings
* `delay_afnn, delay_ifnn, delay_fnn, delay_f1nn` are part of public API now.
* The argument `γs` and the function `reconstruct` is starting to be phased out in
favor of `ds` and `embed`.

## Deprecations
* Using `estimate_dimension` is deprecated in favor of either calling `afnn, fnn, ...` directly or using the function `optimal_traditional_de`

# v1.11.0
* Dropped RecipesBase
* Now exports `dimension` (there was a problem where we have forgotten to export `dimension` from here, and `DynamicalSystemsBase` was overwritting it)
Expand Down
4 changes: 2 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DelayEmbeddings"
uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
version = "1.11.0"
version = "1.12.0"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand All @@ -21,7 +21,7 @@ NearestNeighbors = "0.4"
Neighborhood = "0.1, 0.2"
StaticArrays = "0.8, 0.11, 0.12"
StatsBase = "0.24, 0.32, 0.33"
julia = "1"
julia = "1.5"

[extras]
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Expand Down
16 changes: 9 additions & 7 deletions src/DelayEmbeddings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,16 @@ module DelayEmbeddings

include("dataset.jl")
include("embeddings.jl")
include("various.jl")
include("utils.jl")
include("neighborhoods.jl")

include("estimate_delay.jl")
include("estimate_dimension.jl")
include("pecora.jl")
include("uzal_cost.jl")
include("MDOP.jl")
include("garcia_almeida.jl")
include("traditional_de/estimate_delay.jl")
include("traditional_de/estimate_dimension.jl")
include("traditional_de/automated.jl")

include("unified_de/pecora.jl")
include("unified_de/uzal_cost.jl")
include("unified_de/MDOP.jl")
include("unified_de/garcia_almeida.jl")

end
6 changes: 2 additions & 4 deletions src/dataset.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using StaticArrays, LinearAlgebra
using Base.Iterators: flatten

export Dataset, AbstractDataset, minima, maxima
export Dataset, AbstractDataset, SVector, minima, maxima
export minmaxima, columns, regularize, dimension

abstract type AbstractDataset{D, T} end
Expand Down Expand Up @@ -101,7 +101,6 @@ end
###########################################################################
# Concrete implementation
###########################################################################

"""
Dataset{D, T} <: AbstractDataset{D,T}
A dedicated interface for datasets.
Expand Down Expand Up @@ -146,8 +145,7 @@ Dataset(s::Dataset) = s
###########################################################################
# Dataset(Vectors of stuff)
###########################################################################
Dataset(s::AbstractVector{T}) where {T<:Number} =
Dataset{1, T}(reinterpret(SVector{1, T}, s))
Dataset(s::AbstractVector{T}) where {T} = Dataset(SVector.(s))

function Dataset(v::Vector{<:AbstractArray{T}}) where {T<:Number}
D = length(v[1])
Expand Down
4 changes: 2 additions & 2 deletions src/embeddings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,14 @@ Systems and Turbulence*, Lecture Notes in Mathematics **366**, Springer (1981)
"""
function reconstruct(s::AbstractVector{T}, γ, τ) where {T}
if γ == 0
return Dataset{1, T}(s)
return Dataset(s)
end
de::DelayEmbedding{γ} = DelayEmbedding(Val{γ}(), τ)
return reconstruct(s, de)
end
function reconstruct(s::AbstractVector{T}, γ, τ, w) where {T}
if γ == 0
return Dataset{1, T}(s)
return Dataset(s)
end
de = WeightedDelayEmbedding(Val{γ}(), τ, w)
return reconstruct(s, de)
Expand Down
187 changes: 187 additions & 0 deletions src/traditional_de/automated.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
export optimal_traditional_de

"""
optimal_traditional_de(s, method = "afnn", dmethod = "mi_min"; kwargs...) → 𝒟, τ, E
Produce an optimal delay embedding `𝒟` of the given timeseries `s` by
using the traditional approach of first finding an optimal (and constant) delay
time using [`estimate_delay`](@ref) with the given `dmethod`, and then an optimal
embedding dimension, by calculating an appropriate statistic for each dimension `d ∈ 1:dmax`.
Return the embedding `𝒟`, the optimal delay time `τ`
(the optimal embedding dimension `d` is just `size(𝒟, 2)`) and the actual
statistic `E` used to estimate optimal `d`.
Notice that `E` is a function of the embedding dimension, which ranges from 1 to `dmax`.
For calculating `E` to estimate the dimension we use the given `method` which can be:
* `"afnn"` (default) is Cao's "Averaged False Nearest Neighbors" method[^Cao1997],
which gives a ratio of distances between nearest neighbors.
* `"ifnn"` is the "Improved False Nearest Neighbors" from Hegger & Kantz[^Hegger1999],
which gives the fraction of false nearest neighbors.
* `"fnn"` is Kennel's "False Nearest Neighbors" method[^Kennel1992], which gives
the number of points that cease to be "nearest neighbors" when the dimension
increases.
* `"f1nn"` is Krakovská's "False First Nearest Neighbors" method[^Krakovská2015],
which gives the ratio of pairs of points that cease to be "nearest neighbors"
when the dimension increases.
For more details, see individual methods: [`delay_afnn`](@ref), [`delay_ifnn`](@ref),
[`delay_fnn`](@ref), [`delay_f1nn`](@ref).
The special keywords `` denote for which possible embedding
dimensions should the statistics be computed for.
!!! warn "Careful in automated methods"
While this method is automated if you want to be **really sure** of the results,
you should directly calculate the statistic and plot its values versus the
dimensions.
## Keywords
The keywords
```
τs = 1:100, dmax = 10
```
denote which delay times and embedding dimensions `ds ∈ 1:dmax` to consider when calculating
optimal embedding. All remaining keywords are propagated to the low level functions:
```
fnn_thres::Real = 0.05, slope_thres::Real= 0.2, w::Int=1,
rtol=10.0, atol=2.0, τs = 1:100, metric = Euclidean(), r::Real=2.0
```
## Description
We estimate the optimal embedding dimension based on the given delay time gained
from `dmethod` as follows: For Cao's method the optimal dimension is reached,
when the slope of the `E₁`-statistic (output from `"afnn"`) falls below the
threshold `slope_thres` (Default is .05) and the according stochastic test turns
out to be false, i.e. when the `E₂`-statistic is not "equal" to 1 for all en-
countered dimensions. We treat `E₂`-values as equal to 1, when `1-E₂ ≤ fnn_thres`.
For all the other methods we return the optimal embedding dimension
when the corresponding FNN-statistic (output from `"ifnn"`, `"fnn"` or `"f1nn"`)
falls below the fnn-threshold `fnn_thres` (Default is .05) AND the slope of the
statistic falls below the threshold `slope_thres`. Note that with noise
contaminated time series, one might need to adjust `fnn_thres` according to the
noise level.
See also the file `test/compare_different_dimension_estimations.jl` for a comparison.
[^Cao1997]: Liangyue Cao, [Physica D, pp. 43-50 (1997)](https://www.sciencedirect.com/science/article/pii/S0167278997001188?via%3Dihub)
[^Kennel1992]: M. Kennel *et al.*, [Phys. Review A **45**(6), (1992)](https://journals.aps.org/pra/abstract/10.1103/PhysRevA.45.3403).
[^Krakovská2015]: Anna Krakovská *et al.*, [J. Complex Sys. 932750 (2015)](https://doi.org/10.1155/2015/932750)
[^Hegger1999]: Hegger & Kantz, [Improved false nearest neighbor method to detect determinism in time series data. Physical Review E 60, 4970](https://doi.org/10.1103/PhysRevE.60.4970).
"""
function optimal_traditional_de(s::AbstractVector, dimensionmethod::String = "afnn",
delaymethod::String= "mi_min";
fnn_thres::Real = 0.05, slope_thres::Real = .05, dmax::Int = 10, w::Int=1,
rtol=10.0, atol=2.0, τs = 1:100, metric = Euclidean(), r::Real=2.0,
)

# TODO: This function needs to be reworked to use dimensions 2:dmax.
# starting with dimension 1:dmax is pointless, because 1 can **never** be
# a proper embedding dimension!!!

@assert dimensionmethod ("afnn", "fnn", "ifnn", "f1nn")
τ = estimate_delay(s, delaymethod, τs)
ds = 1:dmax
γs = ds .- 1 # TODO: This must be updated to dimension in 2.0

if dimensionmethod=="afnn"
dimension_statistic = delay_afnn(s, τ, ds, metric)
Y, τ = find_cao_optimal(s, τ, dimension_statistic, slope_thres)
E2 = stochastic_indicator(s, τ, ds)
flag = is_stochastic(E2, fnn_thres)
flag && println("Stochastic signal, valid embedding NOT achieved ⨉.")
elseif dimensionmethod=="fnn"
dimension_statistic = delay_fnn(s, τ, ds; rtol, atol)
Y, τ = find_fnn_optimal(s, τ, dimension_statistic, fnn_thres, slope_thres)
elseif dimensionmethod=="ifnn"
dimension_statistic = delay_ifnn(s, τ, ds; r, w, metric)
Y, τ = find_fnn_optimal(s, τ, dimension_statistic, fnn_thres, slope_thres)
elseif dimensionmethod=="f1nn"
dimension_statistic = delay_f1nn(s, τ, ds, metric)
Y, τ = find_fnn_optimal(s, τ, dimension_statistic, fnn_thres, slope_thres)
end
return Y, τ, dimension_statistic
end


"""
Helper function for selecting the appropriate embedding dimension from the
statistic when using Kennel's, Hegger's or Krakovskas's method.
"""
function find_fnn_optimal(s::Vector{T}, τ::Int, rat::Vector, fnn_thres::Real,
slope_thres::Real) where {T}
@assert length(rat) > 1
y = abs.(diff(rat))
flag = false
m = 0
for i = 2:length(rat)
if rat[i] fnn_thres && y[i-1] slope_thres
m = i
break
elseif rat[i] > rat[i-1]
flag = true
m = i-1
break
end
end
if m == length(rat) || m == 0
println("Sufficiently small FNNs NOT reached. "*
"Valid embedding NOT achieved ⨉.")
elseif flag
println("Algorithm stopped due to increasing FNNs. "*
"Double-check the FNN-statistic.")
else
println("Algorithm stopped due to sufficiently small FNNs. "*
"Valid embedding achieved ✓.")
end
Y = embed(s, max(1, m), τ) # you can embed in 1 dimension in latest version
return Y, τ
end


"""
Helper function for selecting the appropriate embedding dimension from the
statistic when using Cao's method.
"""
function find_cao_optimal(s::Vector{T}, τ::Int, rat::Vector, thres::Real) where {T}
m = 0
y = abs.(diff(rat))
for i = 1:length(rat)-1
if y[i] thres && rat[i] > 0.5
m = i
break
end
end
if m == 0
println("NO convergence of E₁-statistic. "*
"Valid embedding NOT achieved ⨉.")
else
Y = m > 1 ? embed(s, m, τ) : s
println("Algorithm stopped due to convergence of E₁-statistic. "*
"Valid embedding achieved ✓.")
end
Y = embed(s, max(1, m), τ) # you can embed in 1 dimension in latest version
return Y, τ
end

"""
Helper function for Cao's method, whether to decide if the input signal is
stochastic or not.
"""
function is_stochastic(rat::Vector{T}, thres::Real) where {T}
cnt = 0
for i = 1:length(rat)
if abs(1-rat[i]) thres
cnt += 1
end
end
if cnt == 0
flag = true
else
flag = false
end
return flag
end
43 changes: 4 additions & 39 deletions src/estimate_delay.jl → src/traditional_de/estimate_delay.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export estimate_delay, exponential_decay_fit, autocor
# Estimate Delay Times #
#####################################################################################
"""
estimate_delay(s, method::String [, τs = 1:2:100]; kwargs...) -> τ
estimate_delay(s, method::String [, τs = 1:100]; kwargs...) -> τ
Estimate an optimal delay to be used in [`reconstruct`](@ref) or [`embed`](@ref).
The `method` can be one of the following:
Expand All @@ -28,7 +28,7 @@ The method `mi_min` is significantly more accurate than the others and also retu
good results for most timeseries. It is however the slowest method (but still quite fast!).
"""
function estimate_delay(x::AbstractVector, method::String,
τs = 1:2:min(100, length(x)); kwargs...)
τs = 1:min(100, length(x)); kwargs...)

issorted(τs) || error("`τs` must be sorted")

Expand Down Expand Up @@ -59,7 +59,7 @@ function estimate_delay(x::AbstractVector, method::String,
return round(Int,τ)
elseif method=="exp_extrema"
c = autocor(x, τs; demean=true)
max_ind, min_ind = localextrema(c)
max_ind, min_ind = findlocalextrema(c)
idxs = sort!(append!(max_ind, min_ind))
ca = abs.(c[idxs])
τa = τs[idxs]
Expand Down Expand Up @@ -115,44 +115,9 @@ function exponential_decay_fit(X, Y, weight = :equal)
end
end

"""
localextrema(y) -> max_ind, min_ind
Find the local extrema of given array `y`, by scanning point-by-point. Return the
indices of the maxima (`max_ind`) and the indices of the minima (`min_ind`).
"""
function localextrema(y)
@inbounds begin
l = length(y)
i = 1
maxargs = Int[]
minargs = Int[]
if y[1] > y[2]
push!(maxargs, 1)
elseif y[1] < y[2]
push!(minargs, 1)
end

for i in 2:l-1
left = i-1
right = i+1
if y[left] < y[i] > y[right]
push!(maxargs, i)
elseif y[left] > y[i] < y[right]
push!(minargs, i)
end
end

if y[l] > y[l-1]
push!(maxargs, l)
elseif y[l] < y[l-1]
push!(minargs, l)
end
return maxargs, minargs
end
end

#####################################################################################
# Estimate Delay Times #
# Mutual information #
#####################################################################################
export mutualinformation
"""
Expand Down
Loading

4 comments on commit c20d2fa

@Datseris
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@Datseris
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@Datseris
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@Datseris
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

Please sign in to comment.