Skip to content

Commit

Permalink
Hcat multiple datasets (#113)
Browse files Browse the repository at this point in the history
* hcat for multiple datasets

* Analytical tests

* Analytical tests for `append` too

* Rename  type variable

* Actually use new variable

* Update changelog and version
  • Loading branch information
kahaaga authored Oct 17, 2022
1 parent c44585d commit 5ef9162
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 21 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# v2.4
- It is now possible to horizontally concatenate more than two `Dataset`s using `hcat`. Providing multiple `Dataset`s of potentially different dimensions to the `Dataset` constructor will horizontally concatenate the inputs.

# v2.3
- New function `dataset_distance` that calculates distances between datasets.
- New method/metric `Hausdorff` that can be used in `dataset_distance`.
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "DelayEmbeddings"
uuid = "5732040d-69e3-5649-938a-b6b4f237613f"
repo = "https://github.com/JuliaDynamics/DelayEmbeddings.jl.git"
version = "2.3.4"
version = "2.4.0"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Expand Down
18 changes: 9 additions & 9 deletions src/datasets/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,14 @@ function Base.hcat(x::Vector{<:Real}, d::AbstractDataset{D, T}) where {D, T}
return Dataset(data)
end

function Base.hcat(x::AbstractDataset{D1, T}, y::AbstractDataset{D2, T}) where {D1, D2, T}
length(x) == length(y) || error("Datasets must be of same length")
L = length(x)
D = D1 + D2
v = Vector{SVector{D, T}}(undef, L)
for i = 1:L
v[i] = SVector{D, T}((x[i]..., y[i]...,))
function Base.hcat(ds::Vararg{AbstractDataset{D, T} where {D}, N}) where {T, N}
Ls = length.(ds)
maxlen = maximum(Ls)
all(Ls .== maxlen) || error("Datasets must be of same length")
newdim = sum(dimension.(ds))
v = Vector{SVector{newdim, T}}(undef, maxlen)
for i = 1:maxlen
v[i] = SVector{newdim, T}(Iterators.flatten(ds[d][i] for d = 1:N)...,)
end
return Dataset(v)
end
Expand Down Expand Up @@ -203,8 +204,7 @@ function Dataset(vecs::Vararg{<:AbstractVector{T}}) where {T}
return Dataset(_dataset(vecs...))
end

Dataset(x::AbstractDataset{D1, T}, y::AbstractDataset{D2, T}) where {D1, D2, T} =
hcat(x, y)
Dataset(xs::Vararg{AbstractDataset}) = hcat(xs...)
Dataset(x::Vector{<:Real}, y::AbstractDataset{D, T}) where {D, T} = hcat(x, y)
Dataset(x::AbstractDataset{D, T}, y::Vector{<:Real}) where {D, T} = hcat(x, y)

Expand Down
54 changes: 43 additions & 11 deletions test/dataset_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,51 @@ println("\nTesting Dataset...")
data = Dataset(rand(1001,3))
xs = columns(data)

@testset "Concatenation/Append" begin
@testset "Basics" begin
x, y, z = Dataset(rand(10, 2)), Dataset(rand(10, 2)), rand(10)
@test Dataset(x) == x
@test Dataset(x, y) isa Dataset
@test Dataset(x) == x # identity
@test Dataset(x, y,) isa Dataset
@test Dataset(x, y, y) isa Dataset
@test size(Dataset(x, y)) == (10, 4)
@test hcat(x, y) isa Dataset
@test Dataset(x, z) isa Dataset
@test Dataset(z, x) isa Dataset

append!(x, y)
@test length(x) == 20
w = hcat(x, rand(20))
@test size(w) == (20, 3)
end

@testset "Concatenation/Append" begin

@testset "append" begin
D1, D2 = Dataset([1:10 2:11]), Dataset([3:12 4:13])
append!(D1, D2)
@test length(D1) == 20
d1 = [1:10 |> collect; 3:12 |> collect]
d2 = [2:11 |> collect; 4:13 |> collect]
@test D1 == Dataset([d1 d2])
end

types = [Int, Float64]
@testset "hcat with identical element type ($(T))" for T in types
x1, x2, x3 = T.([1:5 2:6]), T.([3:7 4:8]), T.(5:9)
D1, D2, D3 = Dataset(x1), Dataset(x2), Dataset(x3)
y = T.(1:5) |> collect
@test hcat(D1, y) == Dataset([1:5 2:6 1:5])
@test hcat(D1, D2) == Dataset([1:5 2:6 3:7 4:8])
@test hcat(D1, D2, D3) == Dataset([1:5 2:6 3:7 4:8 5:9])
@test hcat(D1, y) |> size == (5, 3)
@test hcat(y, D1) |> size == (5, 3)
@test hcat(D1, y) == Dataset(([1:5 2:6 y]))
@test hcat(y, D1) == Dataset(([y 1:5 2:6]))
end

# TODO: By construction, these errors will occur, because the type constraints are
# not imposed on the vector inputs, only the dataset input. In contrast, for
# hcat on datasets only, the we force all datasets to have the same element type.
#
# Should we force the element types of the dataset and vector to be identical and
# throw an informative error message if they are not?
@testset "hcat with nonidentical element types" begin
D = Dataset([1:5 2:6]) # Dataset{2, Int}
x = rand(length(D)) # Vector{Float64}
@test_throws InexactError hcat(D, x)
@test_throws InexactError hcat(x, D)
end
end

@testset "Methods & Indexing" begin
Expand Down

0 comments on commit 5ef9162

Please sign in to comment.