From c53d576a2e1f746a53dcbad35d9ac282664b9838 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Fri, 23 Jun 2023 10:35:54 -0400 Subject: [PATCH 01/15] notes/WIP --- src/export_edf.jl | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index 2ebc74c..527b127 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -96,6 +96,33 @@ function onda_samples_to_edf_header(samples::AbstractVector{<:Samples}; is_contiguous, edf_record_metadata(samples)...) end +# TODO: change offset for UInt -> Int, change resolution for width. handle float-encoded values separately + +function reencode_samples(samples::Samples, sample_type::Type{T}) where {T<:Signed} + if sizeof(sample_type) > sizeof(Int16) + decoded_samples = Onda.decode(samples) + scaled_resolution = samples.info.sample_resolution_in_unit * (sizeof(sample_type) / sizeof(Int16)) + encode_info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_type=Int16, sample_resolution_in_unit=scaled_resolution)) + samples = encode(Onda.Samples(decoded_samples.data, encode_info, false)) + end + return samples +end + +# unsigned, we have to adjust resolution in the same way as signed, and offset by TODO +function reencode_samples(samples::Samples, sample_type::Type{T}) where {T<:Unsigned} + # anything smaller by even 1 bit we can just convert + if sizeof(sample_type) >= sizeof(Int16) + decoded_samples = Onda.decode(samples) + scaled_resolution = samples.info.sample_resolution_in_unit * (sizeof(sample_type) / sizeof(Int16)) + offset = + + end +end + +function reencode_samples(samples::Samples, sample_type::Type{T}) where {T<:AbstractFloat} + +end + function onda_samples_to_edf_signals(onda_samples::AbstractVector{<:Samples}, seconds_per_record::Float64) edf_signals = Union{EDF.AnnotationsSignal,EDF.Signal{Int16}}[] for samples in onda_samples @@ -118,7 +145,7 @@ function onda_samples_to_edf_signals(onda_samples::AbstractVector{<:Samples}, se extrema.physical_min, extrema.physical_max, extrema.digital_min, extrema.digital_max, "", sample_count) - sample_data = vec(samples[channel_name, :].data) + sample_data = Int16.(vec(samples[channel_name, :].data)) padding = Iterators.repeated(zero(Int16), (sample_count - (length(sample_data) % sample_count)) % sample_count) edf_signal_samples = append!(sample_data, padding) push!(edf_signals, EDF.Signal(edf_signal_header, edf_signal_samples)) From 89bf9d6a483d2746bd0222f2f9aca5e98a90786a Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Fri, 23 Jun 2023 13:47:37 -0400 Subject: [PATCH 02/15] reencode_samples --- src/export_edf.jl | 71 +++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index 527b127..f8ff364 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -96,45 +96,54 @@ function onda_samples_to_edf_header(samples::AbstractVector{<:Samples}; is_contiguous, edf_record_metadata(samples)...) end -# TODO: change offset for UInt -> Int, change resolution for width. handle float-encoded values separately - -function reencode_samples(samples::Samples, sample_type::Type{T}) where {T<:Signed} - if sizeof(sample_type) > sizeof(Int16) - decoded_samples = Onda.decode(samples) - scaled_resolution = samples.info.sample_resolution_in_unit * (sizeof(sample_type) / sizeof(Int16)) - encode_info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_type=Int16, sample_resolution_in_unit=scaled_resolution)) - samples = encode(Onda.Samples(decoded_samples.data, encode_info, false)) - end - return samples -end +""" + reencode_samples(samples::Samples, sample_type::Type=Int16) -# unsigned, we have to adjust resolution in the same way as signed, and offset by TODO -function reencode_samples(samples::Samples, sample_type::Type{T}) where {T<:Unsigned} - # anything smaller by even 1 bit we can just convert - if sizeof(sample_type) >= sizeof(Int16) - decoded_samples = Onda.decode(samples) - scaled_resolution = samples.info.sample_resolution_in_unit * (sizeof(sample_type) / sizeof(Int16)) - offset = - - end -end +Re-compute encoding parameters for `samples` so that they can be encoded as +`sample_type`. The default `sample_type` is `Int16` which is the target for EDF +format. + +This uses the actual signal extrema, choosing a resolution/offset that maps them +to `typemin(sample_type), typemax(sample_type`. + +Returns an encoded `Samples`, possibly with updated info. If the current +encoded values can be represented with `sample_type`, nothing is changed. If +they cannot, the `sample_type`, `sample_resolution_in_unit`, and +`sample_offset_in_unit` fields are changed to reflect the new encoding. +""" +function reencode_samples(samples::Samples, sample_type::Type=Int16) + current_type = Onda.sample_type(samples.info) + typemin(current_type) > typemin(sample_type) && + typemax(current_type) < typemax(sample_type) && + return samples + + samples = decode(samples) + smin, smax = extrema(samples.data) + + emin, emax = typemin(sample_type), typemax(sample_type) + + # re-use the import encoding calculator here: + # need to convert the digital min/max to floats due to overflow + mock_header = (; digital_minimum=Float64(emin), digital_maximum=Float64(emax), + physical_minimum=smin, physical_maximum=smax, + samples_per_record=0) # not using this -function reencode_samples(samples::Samples, sample_type::Type{T}) where {T<:AbstractFloat} + (; sample_resolution_in_unit, sample_offset_in_unit) = edf_signal_encoding(mock_header, 1) + new_info = Tables.rowmerge(samples.info; + sample_resolution_in_unit, + sample_offset_in_unit, + sample_type) + + new_samples = Samples(samples.data, SamplesInfoV2(new_info), samples.encoded) + return encode(new_samples) end function onda_samples_to_edf_signals(onda_samples::AbstractVector{<:Samples}, seconds_per_record::Float64) edf_signals = Union{EDF.AnnotationsSignal,EDF.Signal{Int16}}[] for samples in onda_samples # encode samples, rescaling if necessary - if sizeof(sample_type(samples.info)) > sizeof(Int16) - decoded_samples = Onda.decode(samples) - scaled_resolution = samples.info.sample_resolution_in_unit * (sizeof(sample_type(samples.info)) / sizeof(Int16)) - encode_info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_type=Int16, sample_resolution_in_unit=scaled_resolution)) - samples = encode(Onda.Samples(decoded_samples.data, encode_info, false)) - else - samples = Onda.encode(samples) - end + samples = reencode_samples(samples, Int16) signal_name = samples.info.sensor_type extrema = SignalExtrema(samples) for channel_name in samples.info.channels @@ -145,6 +154,8 @@ function onda_samples_to_edf_signals(onda_samples::AbstractVector{<:Samples}, se extrema.physical_min, extrema.physical_max, extrema.digital_min, extrema.digital_max, "", sample_count) + # manually convert here in case we have input samples whose encoded + # values are convertible losslessly to Int16: sample_data = Int16.(vec(samples[channel_name, :].data)) padding = Iterators.repeated(zero(Int16), (sample_count - (length(sample_data) % sample_count)) % sample_count) edf_signal_samples = append!(sample_data, padding) From af0d456a3ed9c85bb7b45ef546ffc506121ef7d5 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Mon, 26 Jun 2023 16:21:35 -0400 Subject: [PATCH 03/15] WIP tests (borken) --- test/export.jl | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/export.jl b/test/export.jl index 38cf214..281039a 100644 --- a/test/export.jl +++ b/test/export.jl @@ -122,4 +122,39 @@ @test_logs (:warn, r"No annotations found in") store_edf_as_onda(exported_edf2, mktempdir(), uuid; import_annotations=true) end + @testset "re-encoding" begin + _flatten_union(T::Union) = vcat(T.a, _flatten_union(T.b)) + _flatten_union(T::Type) = T + + onda_types = _flatten_union(Onda.LPCM_SAMPLE_TYPE_UNION) + + @testset "encoding $T" for T in onda_types + info = SamplesInfoV2(; sensor_type="x", + channels=["x"], + sample_unit="microvolt", + sample_resolution_in_unit=1.234, + sample_offset_in_unit=4.567, + sample_type=T, + sample_rate=1) + + min = typemin(T) + max = typemax(T) + + if T <: AbstractFloat + min = nextfloat(min) + max = prevfloat(max) + end + + data = range(min, max; length=9) + data = T <: AbstractFloat ? data : round.(T, data) + data = reshape(data, 1, :) + + samples = Samples(data, info, true) + + signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) + + @test vec(decode(samples).data) ≈ EDF.decode(signal) + end + end + end From 9bc8f83e5e27145266fa3c4fd3e2dd7259049c44 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Mon, 26 Jun 2023 18:26:44 -0400 Subject: [PATCH 04/15] WIP --- test/export.jl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/test/export.jl b/test/export.jl index 281039a..f72477a 100644 --- a/test/export.jl +++ b/test/export.jl @@ -128,12 +128,15 @@ onda_types = _flatten_union(Onda.LPCM_SAMPLE_TYPE_UNION) + # test that we can encode the full range of values expressible in each + # possible Onda sample type. + # @testset "encoding $T" for T in onda_types info = SamplesInfoV2(; sensor_type="x", channels=["x"], sample_unit="microvolt", - sample_resolution_in_unit=1.234, - sample_offset_in_unit=4.567, + sample_resolution_in_unit=2, + sample_offset_in_unit=1, sample_type=T, sample_rate=1) @@ -151,6 +154,19 @@ samples = Samples(data, info, true) + # for r e a s o n s we need to be a bit careful with just how large + # the values are that we're trying to use; EDF.jl (and maybe EDF + # generally, unclear) can't handle physical min/max more than like + # 1e8 (actually for EDF.jl it's 99999995 because Float32 precision). + # so, we try to do typemax/min of the encoded type, and if that + # leads to physical min/max that are too big, we clamp and + # re-encode. + if !all(<(1e10) ∘ abs ∘ float, decode(samples).data) + min_d, max_d = -1e10, 1e10 + data_d = reshape(range(min_d, max_d; length=9), 1, :) + samples = Onda.encode(Samples(data_d, info, false)) + end + signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) @test vec(decode(samples).data) ≈ EDF.decode(signal) From 34136a177c25a740687c128684129f3dce9eea8b Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Tue, 27 Jun 2023 18:15:40 -0400 Subject: [PATCH 05/15] better tests --- test/export.jl | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/test/export.jl b/test/export.jl index f72477a..9097877 100644 --- a/test/export.jl +++ b/test/export.jl @@ -128,9 +128,12 @@ onda_types = _flatten_union(Onda.LPCM_SAMPLE_TYPE_UNION) - # test that we can encode the full range of values expressible in each + onda_ints = filter(x -> x <: Integer, onda_types) + onda_floats = filter(x -> x <: AbstractFloat, onda_types) + @test issetequal(union(onda_ints, onda_floats), onda_types) + + # test that we can encode ≈ the full range of values expressible in each # possible Onda sample type. - # @testset "encoding $T" for T in onda_types info = SamplesInfoV2(; sensor_type="x", channels=["x"], @@ -140,18 +143,19 @@ sample_type=T, sample_rate=1) - min = typemin(T) - max = typemax(T) if T <: AbstractFloat - min = nextfloat(min) - max = prevfloat(max) + min = nextfloat(typemin(T)) + max = prevfloat(typemax(T)) + data = range(min, max; length=9) + else + min = typemin(T) + max = typemax(T) + step = max ÷ T(8) - min ÷ T(8) + data = range(min, max; step) end - data = range(min, max; length=9) - data = T <: AbstractFloat ? data : round.(T, data) data = reshape(data, 1, :) - samples = Samples(data, info, true) # for r e a s o n s we need to be a bit careful with just how large @@ -162,6 +166,7 @@ # leads to physical min/max that are too big, we clamp and # re-encode. if !all(<(1e10) ∘ abs ∘ float, decode(samples).data) + @info "clamped decoded $(T) samples to ±1e10" min_d, max_d = -1e10, 1e10 data_d = reshape(range(min_d, max_d; length=9), 1, :) samples = Onda.encode(Samples(data_d, info, false)) From bac746737bfa567e8948c84aefaf00a522b66df4 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Wed, 28 Jun 2023 14:59:13 -0400 Subject: [PATCH 06/15] add testset for loop, fix no-op reencode checks, floats --- src/export_edf.jl | 10 ++++++---- test/export.jl | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index f8ff364..b0bf60e 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -113,8 +113,10 @@ they cannot, the `sample_type`, `sample_resolution_in_unit`, and """ function reencode_samples(samples::Samples, sample_type::Type=Int16) current_type = Onda.sample_type(samples.info) - typemin(current_type) > typemin(sample_type) && - typemax(current_type) < typemax(sample_type) && + # if we can fit the encoded values in `sample_type` without any changes, + # return as-is. + typemin(current_type) >= typemin(sample_type) && + typemax(current_type) <= typemax(sample_type) && return samples samples = decode(samples) @@ -123,9 +125,9 @@ function reencode_samples(samples::Samples, sample_type::Type=Int16) emin, emax = typemin(sample_type), typemax(sample_type) # re-use the import encoding calculator here: - # need to convert the digital min/max to floats due to overflow + # need to convert all the min/max to floats due to overflow mock_header = (; digital_minimum=Float64(emin), digital_maximum=Float64(emax), - physical_minimum=smin, physical_maximum=smax, + physical_minimum=Float64(smin), physical_maximum=Float64(smax), samples_per_record=0) # not using this (; sample_resolution_in_unit, sample_offset_in_unit) = edf_signal_encoding(mock_header, 1) diff --git a/test/export.jl b/test/export.jl index 9097877..b2abf98 100644 --- a/test/export.jl +++ b/test/export.jl @@ -14,7 +14,8 @@ exported_edf = onda_to_edf(samples_to_export, annotations) @test exported_edf.header.record_count == 200 offset = 0 - for signal_name in signal_names + @testset "export $signal_name" for signal_name in signal_names + global offset samples = only(filter(s -> s.info.sensor_type == signal_name, onda_samples)) channel_names = samples.info.channels edf_indices = (1:length(channel_names)) .+ offset From 4b876f1a2f91060f666a96c5265a5314bdaefde3 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Wed, 28 Jun 2023 15:02:00 -0400 Subject: [PATCH 07/15] not global --- test/export.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/test/export.jl b/test/export.jl index b2abf98..c92e761 100644 --- a/test/export.jl +++ b/test/export.jl @@ -15,7 +15,6 @@ @test exported_edf.header.record_count == 200 offset = 0 @testset "export $signal_name" for signal_name in signal_names - global offset samples = only(filter(s -> s.info.sensor_type == signal_name, onda_samples)) channel_names = samples.info.channels edf_indices = (1:length(channel_names)) .+ offset From 84bcec9b27678f784b6159cc056ea57362292e2e Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Wed, 28 Jun 2023 15:47:20 -0400 Subject: [PATCH 08/15] update roundtrip tests --- test/export.jl | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/test/export.jl b/test/export.jl index c92e761..0bb1bf6 100644 --- a/test/export.jl +++ b/test/export.jl @@ -97,19 +97,17 @@ # new UUID for each annotation created during import @test all(getproperty.(nt.annotations, :id) .!= getproperty.(ann_sorted, :id)) - for (samples_orig, signal_round_tripped) in zip(onda_samples, nt.signals) + @testset "$(samples_orig.info.sensor_type)" for (samples_orig, signal_round_tripped) in zip(onda_samples, nt.signals) info_orig = samples_orig.info info_round_tripped = SamplesInfoV2(signal_round_tripped) - for p in setdiff(propertynames(info_orig), - (:edf_channels, :sample_type, :sample_resolution_in_unit)) - @test getproperty(info_orig, p) == getproperty(info_round_tripped, p) - end - if info_orig.sample_type == "int32" - resolution_orig = info_orig.sample_resolution_in_unit * 2 - else - resolution_orig = info_orig.sample_resolution_in_unit + + if info_orig.sample_type == "int16" + @test info_orig == info_round_tripped end - @test resolution_orig ≈ info_round_tripped.sample_resolution_in_unit + + samples_rt = Onda.load(signal_round_tripped) + @test all(isapprox.(decode(samples_orig).data, decode(samples_rt).data; + atol=info_orig.sample_resolution_in_unit)) end # don't import annotations From 07c90b2f896fb4d8a8ddf88da42dd1ca1d778c72 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Wed, 28 Jun 2023 16:58:47 -0400 Subject: [PATCH 09/15] another path for detecting when we do NOT need to re-encode --- src/export_edf.jl | 51 ++++++++++++++++++++++++++++++++++++++++++----- test/export.jl | 47 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 7 deletions(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index b0bf60e..f00cdbd 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -97,7 +97,7 @@ function onda_samples_to_edf_header(samples::AbstractVector{<:Samples}; end """ - reencode_samples(samples::Samples, sample_type::Type=Int16) + reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) Re-compute encoding parameters for `samples` so that they can be encoded as `sample_type`. The default `sample_type` is `Int16` which is the target for EDF @@ -111,21 +111,50 @@ encoded values can be represented with `sample_type`, nothing is changed. If they cannot, the `sample_type`, `sample_resolution_in_unit`, and `sample_offset_in_unit` fields are changed to reflect the new encoding. """ -function reencode_samples(samples::Samples, sample_type::Type=Int16) - current_type = Onda.sample_type(samples.info) +function reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) # if we can fit the encoded values in `sample_type` without any changes, # return as-is. + # + # first, check at the type level since this is cheap and doesn't require + # re-encoding possibly decoded values + current_type = Onda.sample_type(samples.info) typemin(current_type) >= typemin(sample_type) && typemax(current_type) <= typemax(sample_type) && - return samples + return encode(samples) + + # next, check whether the encoded values are <: Integers that lie within the + # range representable by `sample_type` and can be converted directly. + if Onda.sample_type(samples.info) <: Integer + smin, smax = extrema(samples.data) + if !samples.encoded + smin, smax = Onda.encode_sample.(Onda.sample_type(samples.info), + samples.info.sample_resolution_in_unit, + samples.info.sample_offset_in_unit, + (smin, smax)) + end + if smin >= typemin(sample_type) && smax <= typemax(sample_type) + # XXX: we're being a bit clever here in order to not allocate a + # whole new sample array, plugging in the new sample_type, re-using + # the old encodoed samples data, and skipping validation. this is + # okay in _this specific context_ since we know we're actually + # converting everything to Int16 in the actual export. + samples = encode(samples) + new_info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_type)) + return Samples(samples.data, new_info, true; validate=false) + end + end + # at this point, we know the currently _encoded_ values cannot be + # represented losslessly as Int16, so we need to re-encode. We'll pick new + # encoding parameters based on the actual signal values, in order to + # maximize the dynamic range of Int16 encoding. samples = decode(samples) smin, smax = extrema(samples.data) emin, emax = typemin(sample_type), typemax(sample_type) # re-use the import encoding calculator here: - # need to convert all the min/max to floats due to overflow + # need to convert all the min/max to floats due to possible overflow mock_header = (; digital_minimum=Float64(emin), digital_maximum=Float64(emax), physical_minimum=Float64(smin), physical_maximum=Float64(smax), samples_per_record=0) # not using this @@ -190,6 +219,18 @@ input `Onda.Samples`. The ordering of `EDF.Signal`s in the output will match the order of the input collection of `Samples` (and within each channel grouping, the order of the samples' channels). + +!!! note + + EDF signals are encoded as Int16, while Onda allows a range of different + sample types, some of which provide considerably more resolution than Int16. + During export, re-encoding may be necessary if the encoded Onda samples + cannot be represented directly as Int16 values. In this case, new encoding + (resolution and offset) will be chosen based on the minimum and maximum + values actually present in each _signal_ in the input Onda Samples. Thus, + it may not always be possible to losslessly round trip Onda-formatted + datasets to EDF and back. + """ function onda_to_edf(samples::AbstractVector{<:Samples}, annotations=[]; kwargs...) edf_header = onda_samples_to_edf_header(samples; kwargs...) diff --git a/test/export.jl b/test/export.jl index 0bb1bf6..ca177c0 100644 --- a/test/export.jl +++ b/test/export.jl @@ -101,6 +101,7 @@ info_orig = samples_orig.info info_round_tripped = SamplesInfoV2(signal_round_tripped) + # anything else, the encoding parameters may change on export if info_orig.sample_type == "int16" @test info_orig == info_round_tripped end @@ -141,7 +142,6 @@ sample_type=T, sample_rate=1) - if T <: AbstractFloat min = nextfloat(typemin(T)) max = prevfloat(typemax(T)) @@ -156,7 +156,7 @@ data = reshape(data, 1, :) samples = Samples(data, info, true) - # for r e a s o n s we need to be a bit careful with just how large + # for r e a s o n s we need to be a bit careful with just how large # the values are that we're trying to use; EDF.jl (and maybe EDF # generally, unclear) can't handle physical min/max more than like # 1e8 (actually for EDF.jl it's 99999995 because Float32 precision). @@ -174,6 +174,49 @@ @test vec(decode(samples).data) ≈ EDF.decode(signal) end + + @testset "skip reencoding" begin + info = SamplesInfoV2(; sensor_type="x", + channels=["x"], + sample_unit="microvolt", + sample_resolution_in_unit=2, + sample_offset_in_unit=1, + sample_type=Int32, + sample_rate=1) + + data = Int32[typemin(Int16) typemax(Int16)] + + samples = Samples(data, info, true) + # data is re-used if already encoded + @test OndaEDF.reencode_samples(samples, Int16).data === samples.data + signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) + @test EDF.decode(signal) == vec(decode(samples).data) + + # bump just outside the range representable as Int16 + samples.data .+= Int32[-1 1] + new_samples = OndaEDF.reencode_samples(samples, Int16) + @test new_samples != samples + @test decode(new_samples).data == decode(samples).data + + signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) + @test EDF.decode(signal) == vec(decode(samples).data) + + + uinfo = SamplesInfoV2(Tables.rowmerge(info; sample_type="uint64")) + data = UInt64[0 typemax(Int16)] + samples = Samples(data, uinfo, true) + @test OndaEDF.reencode_samples(samples, Int16).data === samples.data + signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) + @test EDF.decode(signal) == vec(decode(samples).data) + + samples.data .+= UInt64[0 1] + new_samples = OndaEDF.reencode_samples(samples, Int16) + @test new_samples != samples + @test decode(new_samples).data == decode(samples).data + + signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) + @test EDF.decode(signal) == vec(decode(samples).data) + end end end From 51e21a147b1076763ebffe92627ee53aece9906c Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Wed, 28 Jun 2023 17:06:41 -0400 Subject: [PATCH 10/15] 1.6 --- src/export_edf.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index f00cdbd..aba261c 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -159,7 +159,9 @@ function reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) physical_minimum=Float64(smin), physical_maximum=Float64(smax), samples_per_record=0) # not using this - (; sample_resolution_in_unit, sample_offset_in_unit) = edf_signal_encoding(mock_header, 1) + donor_info = edf_signal_encoding(mock_header, 1) + sample_resolution_in_unit = donor_info.sample_resolution_in_unit + sample_offset_in_unit = donor_info.sample_offset_in_unit new_info = Tables.rowmerge(samples.info; sample_resolution_in_unit, From 8fd825c49b6657c45869ad74b8e212402d62a4cf Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Wed, 28 Jun 2023 17:09:34 -0400 Subject: [PATCH 11/15] moar tests --- test/export.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/export.jl b/test/export.jl index ca177c0..256d5f8 100644 --- a/test/export.jl +++ b/test/export.jl @@ -192,6 +192,12 @@ signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) @test EDF.decode(signal) == vec(decode(samples).data) + # make sure it works with decoded too + signal2 = only(OndaEDF.onda_samples_to_edf_signals([Onda.decode(samples)], 1.0)) + @test EDF.decode(signal2) == vec(decode(samples).data) + # to confirm quantization settings are the same + @test signal.header == signal2.header + # bump just outside the range representable as Int16 samples.data .+= Int32[-1 1] new_samples = OndaEDF.reencode_samples(samples, Int16) @@ -200,6 +206,8 @@ signal = only(OndaEDF.onda_samples_to_edf_signals([samples], 1.0)) @test EDF.decode(signal) == vec(decode(samples).data) + # to confirm quantization settings are changed + @test signal.header != signal2.header uinfo = SamplesInfoV2(Tables.rowmerge(info; sample_type="uint64")) From e2f5eee3089b0c02d1e85a71822bfafd36f0bdcc Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Fri, 30 Jun 2023 10:42:32 -0400 Subject: [PATCH 12/15] Update src/export_edf.jl Co-authored-by: Phillip Alday --- src/export_edf.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index aba261c..cf53855 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -104,7 +104,7 @@ Re-compute encoding parameters for `samples` so that they can be encoded as format. This uses the actual signal extrema, choosing a resolution/offset that maps them -to `typemin(sample_type), typemax(sample_type`. +to `typemin(sample_type), typemax(sample_type)`. Returns an encoded `Samples`, possibly with updated info. If the current encoded values can be represented with `sample_type`, nothing is changed. If From b3d72bad69c9b8837c5f0b0fc7365fe59770e8b9 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Mon, 17 Jul 2023 14:45:08 -0400 Subject: [PATCH 13/15] clarify docstring --- src/export_edf.jl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index cf53855..e559732 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -99,14 +99,17 @@ end """ reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) -Re-compute encoding parameters for `samples` so that they can be encoded as -`sample_type`. The default `sample_type` is `Int16` which is the target for EDF -format. +Encode `samples` so that they can be encoded as `sample_type`. The default +`sample_type` is `Int16` which is the target for EDF format. The returned +`Samples` will be encoded, with a `info.sample_type` that is either equal to +`sample_type` or losslessly `convert`ible. -This uses the actual signal extrema, choosing a resolution/offset that maps them -to `typemin(sample_type), typemax(sample_type)`. +If the `info.sample_type` of the input samples cannot be losslessly converted to +`sample_type`, new quantization settings are chosen based on the actual signal +extrema, choosing a resolution/offset that maps them to `typemin(sample_type), +typemax(sample_type)`. -Returns an encoded `Samples`, possibly with updated info. If the current +Returns an encoded `Samples`, possibly with updated `info`. If the current encoded values can be represented with `sample_type`, nothing is changed. If they cannot, the `sample_type`, `sample_resolution_in_unit`, and `sample_offset_in_unit` fields are changed to reflect the new encoding. From 2eea302fdf2a5a95c57a3daa25cb02378fd63f94 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Tue, 18 Jul 2023 11:55:48 -0400 Subject: [PATCH 14/15] Apply suggestions from code review Co-authored-by: Phillip Alday --- src/export_edf.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/export_edf.jl b/src/export_edf.jl index e559732..3926f1c 100644 --- a/src/export_edf.jl +++ b/src/export_edf.jl @@ -99,7 +99,7 @@ end """ reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) -Encode `samples` so that they can be encoded as `sample_type`. The default +Encode `samples` so that they can be stored as `sample_type`. The default `sample_type` is `Int16` which is the target for EDF format. The returned `Samples` will be encoded, with a `info.sample_type` that is either equal to `sample_type` or losslessly `convert`ible. @@ -110,7 +110,7 @@ extrema, choosing a resolution/offset that maps them to `typemin(sample_type), typemax(sample_type)`. Returns an encoded `Samples`, possibly with updated `info`. If the current -encoded values can be represented with `sample_type`, nothing is changed. If +encoded values can be represented with `sample_type`, the `.info` is not changed. If they cannot, the `sample_type`, `sample_resolution_in_unit`, and `sample_offset_in_unit` fields are changed to reflect the new encoding. """ @@ -138,9 +138,9 @@ function reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) if smin >= typemin(sample_type) && smax <= typemax(sample_type) # XXX: we're being a bit clever here in order to not allocate a # whole new sample array, plugging in the new sample_type, re-using - # the old encodoed samples data, and skipping validation. this is + # the old encoded samples data, and skipping validation. this is # okay in _this specific context_ since we know we're actually - # converting everything to Int16 in the actual export. + # converting everything to sample_type in the actual export. samples = encode(samples) new_info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_type)) return Samples(samples.data, new_info, true; validate=false) @@ -148,7 +148,7 @@ function reencode_samples(samples::Samples, sample_type::Type{<:Integer}=Int16) end # at this point, we know the currently _encoded_ values cannot be - # represented losslessly as Int16, so we need to re-encode. We'll pick new + # represented losslessly as sample_type, so we need to re-encode. We'll pick new # encoding parameters based on the actual signal values, in order to # maximize the dynamic range of Int16 encoding. samples = decode(samples) From 9c93003928f4a123cf2b9b1a69a0bbf5aac07b75 Mon Sep 17 00:00:00 2001 From: Dave Kleinschmidt Date: Tue, 18 Jul 2023 12:18:02 -0400 Subject: [PATCH 15/15] breaking --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 19f389f..fd2754b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OndaEDF" uuid = "e3ed2cd1-99bf-415e-bb8f-38f4b42a544e" authors = ["Beacon Biosignals, Inc."] -version = "0.11.9" +version = "0.12.0" [deps] Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"