Skip to content

Commit

Permalink
implement SubDataset (#76)
Browse files Browse the repository at this point in the history
* implement SubDataset

* fix test on view(::Dataset,...)

* test equal dimensions in views

* update CHANGELOG and version

* upodated definitions to abstract

Co-authored-by: Datseris <[email protected]>
  • Loading branch information
heliosdrm and Datseris committed Dec 11, 2020
1 parent 9b8bbc6 commit 4507be8
Show file tree
Hide file tree
Showing 15 changed files with 68 additions and 20 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# v1.18.0
* `mutualinformation` is deprecated in favor of `selfmutualinfo`.
* `view` is now applicable to `AbstractDataset`, producing objects of the new type `SubDataset`.

# v1.17.0
* All code related to neighborhoods and finding nearest neighbors has moved to Neighborhood.jl, and thus old names like `FixedMassNeighborhood` and `neighborhood` have been deprecated.
* `mutualinformation` is deprecated in favor of `selfmutualinfo`.

# v1.16.0
* Arbitrary weights can be given as options to `genembed`.
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DelayEmbeddings"
uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
version = "1.17.0"
version = "1.18.0"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand Down
1 change: 1 addition & 0 deletions src/DelayEmbeddings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Basic package used in the ecosystem of DynamicalSystems.jl.
module DelayEmbeddings

include("dataset.jl")
include("subdataset.jl")
include("embeddings.jl")
include("utils.jl")
include("neighborhoods.jl")
Expand Down
4 changes: 2 additions & 2 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,8 @@ using Statistics
Create a regularized version of the input dataset where each timeseries (column)
is transformed to have mean 0 and standard deviation 1.
"""
regularize(d::Dataset) = Dataset(regularized_timeseries(d)[1]...)
function regularized_timeseries(d::Dataset)
regularize(d::AbstractDataset) = Dataset(regularized_timeseries(d)[1]...)
function regularized_timeseries(d::AbstractDataset)
xs = columns(d)
means = mean.(xs)
stds = std.(xs)
Expand Down
4 changes: 2 additions & 2 deletions src/embeddings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ end
const Data{T} = Union{Dataset{D, T}, AbstractVector{T}} where {D}

# dataset version
@generated function (g::GeneralizedEmbedding{D, W})(s::Dataset{Z, T}, i::Int) where {D,W,Z,T}
@generated function (g::GeneralizedEmbedding{D, W})(s::AbstractDataset{Z, T}, i::Int) where {D,W,Z,T}
gens = if W == Nothing
[:(s[i + g.τs[$k], g.js[$k]]) for k=1:D]
else
Expand Down Expand Up @@ -330,7 +330,7 @@ The generalized embedding works as follows:
`τs` is allowed to have *negative entries* as well.
- `js` denotes which of the timeseries contained in `s`
will be used for the entries of the delay vector. `js` can contain duplicate indices.
- `ws` are optional weights that weight each embedded entry (the i-th entry of the
- `ws` are optional weights that weight each embedded entry (the i-th entry of the
delay vector is weighted by `ws[i]`). If provided, it is recommended that `ws[1] = 1`
`τs, js, ws` are tuples (or vectors) of length `D`, which also coincides with the embedding
Expand Down
2 changes: 1 addition & 1 deletion src/neighborhoods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ KDTree(D.data, metric; kwargs...)
# Convenience extensions for ::Dataset in bulksearches
for f (:bulkisearch, :bulksearch)
for nt (:NeighborNumber, :WithinRange)
@eval Neighborhood.$(f)(ss::KDTree, D::Dataset, st::$nt, args...; kwargs...) =
@eval Neighborhood.$(f)(ss::KDTree, D::AbstractDataset, st::$nt, args...; kwargs...) =
$(f)(ss, D.data, st, args...; kwargs...)
end
end
Expand Down
41 changes: 41 additions & 0 deletions src/subdataset.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
SubDataset{D, T, <:AbstractDataset{D,T}, <:SubArray{<:SVector{D,T},1}} <: AbstractDataset{D,T}
A view of an `AbstractDataset`, as returned by the `view` function
or the `@view` macro. A `SubDataset` is an `AbstractDataset`
of the same type as its parent, so indexing and most functions
can be expected to work in the same way for both the parent and the view.
"""
struct SubDataset{D, T, P<:AbstractDataset{D,T}, S<:SubArray{<:SVector{D,T},1}} <: AbstractDataset{D,T}
parent::P
data::S
function SubDataset(par, data)
@assert parent(data) === par.data
P = typeof(par)
S = typeof(data)
SV = eltype(P)
T = eltype(SV)
D = length(SV)
new{D,T,P,S}(par, data)
end
end

function Base.summary(sd::SubDataset{D, T}) where {D, T}
N = length(sd)
return "$N-element view of $D-dimensional Dataset{$(T)}"
end

Base.parent(sd::SubDataset) = sd.parent
Base.parentindices(sd::SubDataset) = parentindices(sd.data)

"""
view(d::Dataset, indices)
Return a view into the parent dataset `d`, as a [`SubDataset`](@ref)
that contains the datapoints of `d` referred to by `indices`.
"""
Base.view(d::AbstractDataset, i) = SubDataset(d, view(d.data, i))

function Base.view(::AbstractDataset, ::Any, ::Any, ::Vararg)
throw(ArgumentError("Dataset views only accept indices on one dimension"))
end
6 changes: 3 additions & 3 deletions src/traditional_de/automated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ end
Helper function for selecting the appropriate embedding dimension from the
statistic when using Kennel's, Hegger's or Krakovskas's method.
"""
function find_fnn_optimal(s::Vector{T}, τ::Int, rat::Vector, fnn_thres::Real,
function find_fnn_optimal(s::AbstractVector{T}, τ::Int, rat::Vector, fnn_thres::Real,
slope_thres::Real) where {T}
@assert length(rat) > 1
y = abs.(diff(rat))
Expand Down Expand Up @@ -146,7 +146,7 @@ end
Helper function for selecting the appropriate embedding dimension from the
statistic when using Cao's method.
"""
function find_cao_optimal(s::Vector{T}, τ::Int, rat::Vector, thres::Real) where {T}
function find_cao_optimal(s::AbstractVector{T}, τ::Int, rat::Vector, thres::Real) where {T}
m = 0
y = abs.(diff(rat))
for i = 1:length(rat)-1
Expand All @@ -171,7 +171,7 @@ end
Helper function for Cao's method, whether to decide if the input signal is
stochastic or not.
"""
function is_stochastic(rat::Vector{T}, thres::Real) where {T}
function is_stochastic(rat::AbstractVector{T}, thres::Real) where {T}
cnt = 0
for i = 1:length(rat)
if abs(1-rat[i]) thres
Expand Down
2 changes: 1 addition & 1 deletion src/traditional_de/estimate_dimension.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ function delay_f1nn(s::AbstractVector, τ::Int, ds = 2:6, metric = Euclidean())
return f1nn_ratio
end

function _compare_first_nn(s, γ::Int, τ::Int, Rγ::Dataset{D,T}, metric) where {D,T}
function _compare_first_nn(s, γ::Int, τ::Int, Rγ::AbstractDataset{D,T}, metric) where {D,T}
# This function compares the first nearest neighbors of `s`
# embedded with Dimensions `γ` and `γ+1` (the former given as input)
tree = KDTree(Rγ,metric)
Expand Down
6 changes: 3 additions & 3 deletions src/unified_de/MDOP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ function mdop_embedding(s::Vector{T};
end


function mdop_embedding(Y::Dataset{D, T};
function mdop_embedding(Y::AbstractDataset{D, T};
τs = 0:50 , w::Int = 1, fnn_thres::Real = 0.05,
max_num_of_cycles = 50,
r::Real = 2) where {D, T<:Real}
Expand Down Expand Up @@ -377,7 +377,7 @@ trajectory `Y` can also be just a univariate time series.
[^Nichkawde2013]: Nichkawde, Chetan (2013). [Optimal state-space reconstruction using derivatives on projected manifold. Physical Review E 87, 022905](https://doi.org/10.1103/PhysRevE.87.022905).
[^Kennel1992]: Kennel, M. B., Brown, R., Abarbanel, H. D. I. (1992). [Determining embedding dimension for state-space reconstruction using a geometrical construction. Phys. Rev. A 45, 3403] (https://doi.org/10.1103/PhysRevA.45.3403).
"""
function beta_statistic(Y::Dataset{D,T}, s::Vector{T}, τs = 0:50, w::Int = 1) where {D, T<:Real}
function beta_statistic(Y::AbstractDataset{D,T}, s::Vector{T}, τs = 0:50, w::Int = 1) where {D, T<:Real}
@assert length(s) length(Y) "The length of the input time series `s` must be at least the length of the input trajectory `Y` "
@assert all(x -> x 0, τs)
τ_max = maximum(τs)
Expand Down Expand Up @@ -432,7 +432,7 @@ function mdop_maximum_delay(s::Vector{T}, tw=1:50, samplesize::Real=1) where {T<
return tw[τ_max], L
end

function mdop_maximum_delay(s::Dataset{D,T}, tw=1:50, samplesize::Real=1) where {D, T<:Real}
function mdop_maximum_delay(s::AbstractDataset{D,T}, tw=1:50, samplesize::Real=1) where {D, T<:Real}
τs_max = zeros(Int,D)
Ls = zeros(T, length(tw), D)
@inbounds for i = 1:D
Expand Down
4 changes: 2 additions & 2 deletions src/unified_de/garcia_almeida.jl
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ function garcia_almeida_embedding(s::Vector{F}; τs = 0:50 , w::Int = 1,
end


function garcia_almeida_embedding(Y::Dataset{D, F}; τs = 0:50 , w::Int = 1,
function garcia_almeida_embedding(Y::AbstractDataset{D, F}; τs = 0:50 , w::Int = 1,
r1::Real = 10, r2::Real = 2, fnn_thres::Real = 0.05,
T::Int = 1, metric = Euclidean(), max_num_of_cycles = 50) where {D, F<:Real}

Expand Down Expand Up @@ -206,7 +206,7 @@ this embedding cycle as the value, where `N` has its first local minimum.
[^Garcia2005a]: Garcia, S. P., Almeida, J. S. (2005). [Nearest neighbor embedding with different time delays. Physical Review E 71, 037204](https://doi.org/10.1103/PhysRevE.71.037204).
"""
function n_statistic(Y::Dataset{D, F}, s::Vector{F}; τs = 0:50 , r::Real = 10,
function n_statistic(Y::AbstractDataset{D, F}, s::Vector{F}; τs = 0:50 , r::Real = 10,
T::Int = 1, w::Int = 1, metric = Euclidean()) where {D, F<:Real}

# assert a minimum length of the input time series
Expand Down
2 changes: 1 addition & 1 deletion src/unified_de/pecuzal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ function pecuzal_embedding(s::Vector{T}; τs = 0:50 , w::Int = 1,

end

function pecuzal_embedding(𝒟::Dataset{D, T}; τs = 0:50 , w::Int = 1,
function pecuzal_embedding(𝒟::AbstractDataset{D, T}; τs = 0:50 , w::Int = 1,
samplesize::Real = 1, K::Int = 13, KNN::Int = 3, Tw::Int=4*w,
α::Real = 0.05, p::Real = 0.5, max_cycles::Int = 50) where {D, T<:Real}

Expand Down
4 changes: 2 additions & 2 deletions src/unified_de/uzal_cost.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ the inverse of all `ϵ²`'s for all considered reference points.
[^Uzal2011]: Uzal, L. C., Grinblat, G. L., Verdes, P. F. (2011). [Optimal reconstruction of dynamical systems: A noise amplification approach. Physical Review E 84, 016223](https://doi.org/10.1103/PhysRevE.84.016223).
"""
function uzal_cost(Y::Dataset{D, ET};
function uzal_cost(Y::AbstractDataset{D, ET};
Tw::Int = 40, K::Int = 3, w::Int = 1, samplesize::Real = 0.5,
metric = Euclidean()
) where {D, ET}
Expand Down Expand Up @@ -151,7 +151,7 @@ Keywords as in [`uzal_cost`](@ref).
[^Uzal2011]: Uzal, L. C., Grinblat, G. L., Verdes, P. F. (2011). [Optimal reconstruction of dynamical systems: A noise amplification approach. Physical Review E 84, 016223](https://doi.org/10.1103/PhysRevE.84.016223).
"""
function uzal_cost_local(Y::Dataset{D, ET};
function uzal_cost_local(Y::AbstractDataset{D, ET};
Tw::Int = 40, K::Int = 3, w::Int = 1, samplesize::Real = 0.5,
metric = Euclidean()
) where {D, ET}
Expand Down
2 changes: 1 addition & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ Add the `τ` lagged values of the timeseries `s` as additional component to `Y`
(`Vector` or `Dataset`), in order to form a higher embedded
dataset `Z`. The dimensionality of `Z` is thus equal to that of `Y` + 1.
"""
function hcat_lagged_values(Y::Dataset{D,T}, s::Vector{T}, τ::Int) where {D, T<:Real}
function hcat_lagged_values(Y::AbstractDataset{D,T}, s::Vector{T}, τ::Int) where {D, T<:Real}
N = length(Y)
@assert N length(s)
M = N - τ
Expand Down
5 changes: 5 additions & 0 deletions test/dataset_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ println("\nTesting Dataset...")
@test size(data[1:10,1:2]) == (10,2)
@test data[1:10,1:2] == Dataset(a[1:10], b[1:10])
@test data[SVector{10}(1:10), SVector(1, 2)] == data[1:10, 1:2]

sub = @view data[11:20]
@test sub isa DelayEmbeddings.SubDataset
@test sub[2] == data[12]
@test dimension(sub) == dimension(data)
end

@testset "minmax" begin
Expand Down

0 comments on commit 4507be8

Please sign in to comment.