upgrade OndaEDF import/export to legolas 0.5/onda 0.15 (#62)

* compat, annotation * WIP * add separate CI stage for ondaedfschemas/ondaedf integration tests * relative path * 1 -> 2 * try again * accepted field type * write * bye bye kind * not exported * kind -> sensor_type in export tests * need schemas for tests * versions * actually document this * maybe this works 🤔 * maybe? * fix docs build * test (broken) for uniqueness of sensor_label * whoops * explicitly create sensor_label in plans * you know what I meant * updated readme * update readme with more accurate example * docs fix * missing docstring/fix xref * Apply suggestions from code review Co-authored-by: Curtis Vogt <[email protected]> * use StableRNG * move below definitions * V2 * fix monorepo setup for docs * the only reasonable CI is integration CI * need to dev this here too * don't build package * compt * attach docstrings directly to `@version` * turn off push docs builds on master * remove dev docs badge * Apply suggestions from code review Co-authored-by: Curtis Vogt <[email protected]> * we add these at build time Co-authored-by: Curtis Vogt <[email protected]>
beacon-biosignals · Jan 25, 2023 · 0a6a5c8 · 0a6a5c8 · kleinschmidt · Jan 25, 2023
1 parent 1edd946
commit 0a6a5c8
Show file tree

Hide file tree

Showing 15 changed files with 167 additions and 100 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -26,7 +26,7 @@ jobs:
         arch:
           - x64
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0
       - uses: julia-actions/setup-julia@v1
@@ -38,6 +38,11 @@ jobs:
           path: ~/.julia/artifacts
           key: ${{ runner.os }}-test-artifacts-${{ hashFiles('**/Project.toml') }}
           restore-keys: ${{ runner.os }}-test-artifacts
+      - name: "Monorepo setup"
+        shell: julia --color=yes --project {0}
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(; path="./OndaEDFSchemas.jl"))
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1

diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml
@@ -2,17 +2,26 @@ name: Documentation
 
 on:
   push:
-    branches:
-      - 'master'
     tags: '*'
   pull_request:
 jobs:
   Documenter:
     name: Documentation
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/julia-buildpkg@latest
+      - uses: actions/checkout@v3
+      - name: "Update dependencies"
+        # Only use a released version of dependencies when creating documentation for a tag
+        if: ${{ github.event_name != 'push' || !startsWith(github.ref, 'refs/tags') }}
+        shell: julia --color=yes --project=docs {0}
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(; path="./OndaEDFSchemas.jl"))
+      - name: "Use latest OndaEDF.jl"
+        shell: julia --color=yes --project=docs {0}
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(; path="."))
       - uses: julia-actions/julia-docdeploy@latest
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

diff --git a/OndaEDFSchemas.jl/Project.toml b/OndaEDFSchemas.jl/Project.toml
@@ -1,14 +1,14 @@
 name = "OndaEDFSchemas"
 uuid = "9c87d999-769b-4741-85b2-6f554d09e731"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.2.0"
+version = "0.2.1"
 
 [deps]
 Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
 Onda = "e853f5be-6863-11e9-128d-476edb89bfb5"
 
 [compat]
-Legolas = "0.5"
+Legolas = "0.5.5"
 Onda = "0.15"
 julia = "1.6"
 

diff --git a/OndaEDFSchemas.jl/src/OndaEDFSchemas.jl b/OndaEDFSchemas.jl/src/OndaEDFSchemas.jl
@@ -35,8 +35,6 @@ export PlanV1, PlanV2, FilePlanV1, FilePlanV2, EDFAnnotationV1
     error::Union{Nothing,String} = coalesce(error, nothing)
 end
 
-Legolas.accepted_field_type(::PlanV1SchemaVersion, ::Type{String}) = AbstractString
-
 @version PlanV2 begin
     # EDF.SignalHeader fields
     label::String
@@ -52,7 +50,8 @@ Legolas.accepted_field_type(::PlanV1SchemaVersion, ::Type{String}) = AbstractStr
     seconds_per_record::Float64
     # Onda.SamplesInfoV2 fields (channels -> channel), may be missing
     sensor_type::Union{Missing,AbstractString} = lift(_validate_signal_sensor_type, sensor_type)
-    sensor_label::Union{Missing,AbstractString} = lift(_validate_signal_sensor_label, sensor_type)
+    sensor_label::Union{Missing,AbstractString} = lift(_validate_signal_sensor_label,
+                                                       coalesce(sensor_label, sensor_type))
     channel::Union{Missing,AbstractString} = lift(_validate_signal_channel, channel)
     sample_unit::Union{Missing,AbstractString} = lift(String, sample_unit)
     sample_resolution_in_unit::Union{Missing,Float64}
@@ -63,7 +62,7 @@ Legolas.accepted_field_type(::PlanV1SchemaVersion, ::Type{String}) = AbstractStr
     error::Union{Nothing,String} = coalesce(error, nothing)
 end
 
-Legolas.accepted_field_type(::PlanV2SchemaVersion, ::Type{String}) = AbstractString
+
 
 const PLAN_DOC_TEMPLATE = """
     @version PlanV{{ VERSION }} begin
@@ -156,11 +155,13 @@ end
 @doc _file_plan_doc(1) FilePlanV1
 @doc _file_plan_doc(2) FilePlanV2
 
-@schema "edf.annotation" EDFAnnotation
+const OndaEDFSchemaVersions = Union{PlanV1SchemaVersion,PlanV2SchemaVersion,FilePlanV1SchemaVersion,FilePlanV2SchemaVersion}
+Legolas.accepted_field_type(::OndaEDFSchemaVersions, ::Type{String}) = AbstractString
+# we need this because Arrow write can introduce a Missing for the error column
+# (I think because of how missing/nothing sentinels are handled?)
+Legolas.accepted_field_type(::OndaEDFSchemaVersions, ::Type{Union{Nothing,String}}) = Union{Nothing,Missing,AbstractString}
 
-@version EDFAnnotationV1 > AnnotationV1 begin
-    value::String
-end
+@schema "edf.annotation" EDFAnnotation
 
 """
     @version EDFAnnotationV1 > AnnotationV1 begin
@@ -170,7 +171,9 @@ end
 A Legolas-generated record type that represents a single annotation imported
 from an EDF Annotation signal.  The `value` field contains the annotation value
 as a string.
-
 """
+@version EDFAnnotationV1 > AnnotationV1 begin
+    value::String
+end
 
 end # module
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "OndaEDF"
 uuid = "e3ed2cd1-99bf-415e-bb8f-38f4b42a544e"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.10.3"
+version = "0.11.0"
 
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
@@ -20,20 +20,22 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 Compat = "3.32"
 EDF = "0.7"
 FilePathsBase = "0.9"
-Legolas = "0.3.3, 0.4"
-Onda = "0.12, 0.13, 0.14"
-OndaEDFSchemas = "0.1"
+Legolas = "0.5"
+Onda = "0.15"
+OndaEDFSchemas = "0.2.1"
 PrettyTables = "1.3"
+StableRNGs = "1"
 StatsBase = "0.33"
 Tables = "1.4"
-TimeSpans = "0.2"
+TimeSpans = "0.3"
 julia = "1.6"
 
 [extras]
 FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["FilePathsBase", "Test", "Random", "Statistics"]
+test = ["FilePathsBase", "Test", "Random", "StableRNGs", "Statistics"]
diff --git a/README.md b/README.md
@@ -3,7 +3,6 @@
 [![CI](https://github.com/beacon-biosignals/OndaEDF.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/beacon-biosignals/OndaEDF.jl/actions/workflows/CI.yml)
 [![codecov](https://codecov.io/gh/beacon-biosignals/OndaEDF.jl/branch/master/graph/badge.svg?token=7oZhx7P9kq)](https://codecov.io/gh/beacon-biosignals/OndaEDF.jl)
 [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://beacon-biosignals.github.io/OndaEDF.jl/stable)
-[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://beacon-biosignals.github.io/OndaEDF.jl/dev)
 
 OndaEDF provides functionality to convert/import/export EDF files to/from Onda recordings; see the `edf_to_onda_samples`, `edf_to_onda_annotations`, and `onda_to_edf` docs/tests for details.
 
@@ -33,8 +32,8 @@ end
 ```
 The executed plan as returned is a [Tables.jl](https://github.com/JuliaData/Tables.jl)-compatible table, with one row per `EDF.Signal` and columns for
 - the fields of the original `EDF.SignalHeader`
-- the fields of the generated `Onda.SamplesInfo`, including
-  - `:kind`, the extracted signal kind
+- the fields of the generated `Onda.SamplesInfoV2`, including
+  - `:sensor_type`, the extracted sensor type
   - `:channel`, the extracted channel label (instead of `:channels`, since each `EDF.Signal` is exactly one channel in `Onda.Samples`)
 - `:edf_signal_index`, the 1-based numerical index of the source signal in `edf.signals`
 - `:onda_signal_index`, the ordinal index of the resulting samples (not necessarily the index into `samples`, since some groups might be skipped)
@@ -45,7 +44,7 @@ OndaEDF includes the OndaEDFSchemas sub-package, which provides [Legolas.jl Sche
 The `write_plan(io_or_path, plan_table)` provides a wrapper around [`Legolas.write`](https://beacon-biosignals.github.io/Legolas.jl/stable/#Legolas.write) which writes a table following the `"ondaedf.file-plan@1"` schema to a generic path-like destination.
 If you are including the plan tables in a dataset, you can add a dependency on OndaEDFSchemas to make sure the relevant schemas are defined without the full OndaEDF dependency.
 
-It can also be manipulated programmatically, by manually or semi-automatically modifying the `:kind`, `:channel`, or other columns to correct for missed signals by the default labels (for which `:kind` and `:channel` will be `missing`).
+It can also be manipulated programmatically, by manually or semi-automatically modifying the `:sensor_type`, `:channel`, or other columns to correct for missed signals by the default labels (for which `:sensor_type` and `:channel` will be `missing`).
 We give two examples of how such a workflow might work here: one where the plan is modified before being executed, and another where EDF signal headers are be _preprocessed_ before the plan is constructed.
 
 ### Modification of a plan
@@ -59,7 +58,7 @@ edf = EDF.File(my_edf_file_path)
 plans = plan_edf_to_onda_samples(edf; label=my_labels)
 
 function fix_millivolts(plan)
-    if plan.sample_unit == "millivolt" && plan.kind == "eeg"
+    if plan.sample_unit == "millivolt" && plan.sensor_type == "eeg"
         sample_resolution_in_unit = plan.sample_resolution_in_unit * 1000
         sample_offset_in_unit = plan.sample_offset_in_unit * 1000
         return Tables.rowmerge(plan; sample_unit="microvolt",
@@ -75,23 +74,22 @@ samples, plan_executed = edf_to_onda_samples(edf, new_plan)
 ```
 
 As another, similar example, sometimes EMG channels get recorded with different physical units.
-In such a case, OndaEDF will store them with different `kind` values (`emg_1`, `emg_2`, etc.).
-This can be corrected in a similar way, for exmaple by converting millivolts to microvolts (adjusting of course depending on the nature of your dataset) and re-grouping into Onda signals:
-
+In such a case, OndaEDF cannot merge these channels and will create multiple separate `Samples` objects which each have `sensor_type = "emg"`.
+This can be corrected in a similar way, for exmaple by converting millivolts to microvolts (adjusting of course depending on the nature of your dataset) and re-grouping into Onda samples:
 ```julia
 edf = EDF.File(my_edf_file_path)
 plans = plan_edf_to_onda_samples(edf; label=my_labels)
 
 function fix_emg(plan)
-    if startswith(plan.kind, "emg")
+    if plan.sensor_type == "emg"
         if plan.sample_unit == "millivolt"
             sample_resolution_in_unit = plan.sample_resolution_in_unit * 1000
             sample_offset_in_unit = plan.sample_offset_in_unit * 1000
             plan = Tables.rowmerge(plan; sample_unit="microvolt",
                                    sample_resolution_in_unit,
                                    sample_offset_in_unit)
         end
-        return Tables.rowmerge(plan; kind="emg")
+        return plan
     else
         return plan
     end

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,6 +1,5 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-OndaEDF = "e3ed2cd1-99bf-415e-bb8f-38f4b42a544e"
 
 [compat]
 Documenter = "0.26"
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,7 +1,8 @@
 using OndaEDF
+using OndaEDFSchemas
 using Documenter
 
-makedocs(modules=[OndaEDF],
+makedocs(modules=[OndaEDF, OndaEDFSchemas],
          sitename="OndaEDF",
          authors="Beacon Biosignals and other contributors",
          pages=["API Documentation" => "index.md"])

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -9,18 +9,26 @@ CurrentModule = OndaEDF
 OndaEDF.jl prefers "self-service" import over "automagic", and provides
 functionality to extract
 [`Onda.Samples`](https://beacon-biosignals.github.io/Onda.jl/stable/#Samples-1)
-and
-[`Onda.Annotation`](https://beacon-biosignals.github.io/Onda.jl/stable/#Onda.Annotation)s
+and [`EDFAnnotationV1`](@ref)s (which extend 
+[`Onda.AnnotationV1`](https://beacon-biosignals.github.io/Onda.jl/stable/#Onda.AnnotationV1)s)
 from an `EDF.File`.  These can be written to disk (with
 [`Onda.store`](https://beacon-biosignals.github.io/Onda.jl/stable/#Onda.store) /
-[`Onda.write_annotations`](https://beacon-biosignals.github.io/Onda.jl/stable/#Onda.write_annotations))
+[`Legolas.write`](https://beacon-biosignals.github.io/Legolas.jl/stable/#Legolas.write)
 or manipulated in memory as desired.
 
+### Import signal data as `Samples`
+
 ```@docs
 edf_to_onda_samples
 plan_edf_to_onda_samples
 plan_edf_to_onda_samples_groups
+```
+
+### Import annotations
+
+```@docs
 edf_to_onda_annotations
+EDFAnnotationV1
 ```
 
 ### Import plan table schemas
@@ -34,7 +42,7 @@ write_plan
 ### Full-service import
 
 For a more "full-service" experience, OndaEDF.jl also provides functionality to
-extract `Onda.Samples` and `Onda.Annotations` and then write them to disk:
+extract `Onda.Samples` and `EDFAnnotationV1`s and then write them to disk:
 
 ```@docs
 store_edf_as_onda
@@ -46,6 +54,7 @@ store_edf_as_onda
 OndaEDF.match_edf_label
 OndaEDF.merge_samples_info
 OndaEDF.onda_samples_from_edf_signals
+OndaEDF.promote_encodings
 ```
 
 ## Export EDF from Onda

diff --git a/src/OndaEDF.jl b/src/OndaEDF.jl
@@ -12,16 +12,13 @@ using TimeSpans
 using Tables
 using UUIDs
 
-using Legolas: @row, lift
+using Legolas: lift
 using Tables: rowmerge
 
 export write_plan
 export edf_to_onda_samples, edf_to_onda_annotations, plan_edf_to_onda_samples, plan_edf_to_onda_samples_groups, store_edf_as_onda
 export onda_to_edf
 
-# can be dropped if we drop Onda<0.14
-sample_type(x) = isdefined(Onda, :sample_type) ? Onda.sample_type(x) : x.sample_type
-
 include("standards.jl")
 
 """
@@ -32,7 +29,7 @@ Write a plan table to `io_or_path` using `Legolas.write`, using the
 """
 function write_plan(io_or_path, plan_table; kwargs...)
     return Legolas.write(io_or_path, plan_table,
-                         Legolas.Schema("ondaedf.file-plan@1");
+                         Legolas.SchemaVersion("ondaedf.file-plan", 2);
                          kwargs...)
 end
 

diff --git a/src/export_edf.jl b/src/export_edf.jl
@@ -10,7 +10,7 @@ struct SignalExtrema
 end
 
 SignalExtrema(samples::Samples) = SignalExtrema(samples.info)
-function SignalExtrema(info::SamplesInfo)
+function SignalExtrema(info::SamplesInfoV2)
     digital_extrema = (typemin(sample_type(info)), typemax(sample_type(info)))
     physical_extrema = @. (info.sample_resolution_in_unit * digital_extrema) + info.sample_offset_in_unit
     return SignalExtrema(physical_extrema..., digital_extrema...)
@@ -103,12 +103,12 @@ function onda_samples_to_edf_signals(onda_samples::AbstractVector{<:Samples}, se
         if sizeof(sample_type(samples.info)) > sizeof(Int16)
             decoded_samples = Onda.decode(samples)
             scaled_resolution = samples.info.sample_resolution_in_unit * (sizeof(sample_type(samples.info)) / sizeof(Int16))
-            encode_info = SamplesInfo(Tables.rowmerge(samples.info; sample_type=Int16, sample_resolution_in_unit=scaled_resolution))
+            encode_info = SamplesInfoV2(Tables.rowmerge(samples.info; sample_type=Int16, sample_resolution_in_unit=scaled_resolution))
             samples = encode(Onda.Samples(decoded_samples.data, encode_info, false))
         else
             samples = Onda.encode(samples)
         end
-        signal_name = samples.info.kind
+        signal_name = samples.info.sensor_type
         extrema = SignalExtrema(samples)
         for channel_name in samples.info.channels
             sample_count = edf_sample_count_per_record(samples, seconds_per_record)