From bd0222a28807c80023a2db5ccd60512cd6119de8 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sun, 14 Jul 2024 16:57:17 -0400 Subject: [PATCH 1/6] add max_channel column for phy interface --- .../ecephys/phy/phydatainterface.py | 51 ++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py index cac24faa21..0f9ebca303 100644 --- a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py @@ -1,7 +1,11 @@ -from typing import Optional +from pathlib import Path +from typing import Optional, Literal + +import numpy as np +from pynwb.file import NWBFile from ..basesortingextractorinterface import BaseSortingExtractorInterface -from ....utils import FolderPathType +from ....utils import FolderPathType, DeepDict class PhySortingInterface(BaseSortingExtractorInterface): @@ -23,6 +27,23 @@ def get_source_schema(cls) -> dict: ] = "Path to the output Phy folder (containing the params.py)." return source_schema + def get_max_channel(self): + folder_path = Path(self.source_data['folder_path']) + + templates = np.load(str(folder_path / 'templates.npy')) + channel_map = np.load(str(folder_path / 'channel_map.npy')) + whitening_mat_inv = np.load(str(folder_path / "whitening_mat_inv.npy")) + templates_unwh = templates @ whitening_mat_inv + + cluster_ids = self.sorting_extractor.get_property('original_cluster_id') + templates = templates_unwh[cluster_ids] + + max_over_time = np.max(templates, axis=1) + idx_max_channel = np.argmax(max_over_time, axis=1) + max_channel = channel_map[idx_max_channel].ravel() + + return max_channel + def __init__( self, folder_path: FolderPathType, @@ -41,3 +62,29 @@ def __init__( verbose : bool, default: True """ super().__init__(folder_path=folder_path, exclude_cluster_groups=exclude_cluster_groups, verbose=verbose) + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + metadata: Optional[DeepDict] = None, + stub_test: bool = False, + write_ecephys_metadata: bool = False, + write_as: Literal["units", "processing"] = "units", + units_name: str = "units", + units_description: str = "Autogenerated by neuroconv.", + ): + + super().add_to_nwbfile( + nwbfile=nwbfile, + metadata=metadata, + stub_test=stub_test, + write_ecephys_metadata=write_ecephys_metadata, + write_as=write_as, + units_name=units_name, + units_description=units_description, + ) + + max_channel = self.get_max_channel() + nwbfile.units.add_column(name='max_channel', description='Channel with maximum amplitude', data=max_channel) + + return nwbfile From ddba67e47428ff4ed1579be1ff599b3a01cb9a99 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Jul 2024 21:01:00 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../datainterfaces/ecephys/phy/phydatainterface.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py index 0f9ebca303..59fb0ab4e3 100644 --- a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py @@ -1,11 +1,11 @@ from pathlib import Path -from typing import Optional, Literal +from typing import Literal, Optional import numpy as np from pynwb.file import NWBFile from ..basesortingextractorinterface import BaseSortingExtractorInterface -from ....utils import FolderPathType, DeepDict +from ....utils import DeepDict, FolderPathType class PhySortingInterface(BaseSortingExtractorInterface): @@ -28,14 +28,14 @@ def get_source_schema(cls) -> dict: return source_schema def get_max_channel(self): - folder_path = Path(self.source_data['folder_path']) + folder_path = Path(self.source_data["folder_path"]) - templates = np.load(str(folder_path / 'templates.npy')) - channel_map = np.load(str(folder_path / 'channel_map.npy')) + templates = np.load(str(folder_path / "templates.npy")) + channel_map = np.load(str(folder_path / "channel_map.npy")) whitening_mat_inv = np.load(str(folder_path / "whitening_mat_inv.npy")) templates_unwh = templates @ whitening_mat_inv - cluster_ids = self.sorting_extractor.get_property('original_cluster_id') + cluster_ids = self.sorting_extractor.get_property("original_cluster_id") templates = templates_unwh[cluster_ids] max_over_time = np.max(templates, axis=1) @@ -85,6 +85,6 @@ def add_to_nwbfile( ) max_channel = self.get_max_channel() - nwbfile.units.add_column(name='max_channel', description='Channel with maximum amplitude', data=max_channel) + nwbfile.units.add_column(name="max_channel", description="Channel with maximum amplitude", data=max_channel) return nwbfile From 6a6af5a5693216b88487a51afd5037c1af3296f0 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 12 Aug 2024 21:12:46 -0400 Subject: [PATCH 3/6] Add a docs page in the user guide about adding trials --- docs/user_guide/adding_trials.rst | 36 ++++++++++++++++++++++++++++++ docs/user_guide/datainterfaces.rst | 25 +++++++++++++++++++-- docs/user_guide/index.rst | 1 + docs/user_guide/nwbconverter.rst | 17 ++++++++++---- 4 files changed, 73 insertions(+), 6 deletions(-) create mode 100644 docs/user_guide/adding_trials.rst diff --git a/docs/user_guide/adding_trials.rst b/docs/user_guide/adding_trials.rst new file mode 100644 index 0000000000..7fbc63c611 --- /dev/null +++ b/docs/user_guide/adding_trials.rst @@ -0,0 +1,36 @@ +.. _adding_trials: + +Adding Trials to NWB Files +========================== + +NWB allows you to store information about time intervals in a structured way. These structure are often used to store +information about trials, epochs, or other time intervals in the data. +You can add time intervals to an NWBFile object before writing it using PyNWB. +Here is an example of how to add trials to an NWBFile object. +Here is how you would add trials to an NWB file: + +.. code-block:: python + + # you can add custom columns to the trials table + nwbfile.add_trials_column(name="trial_type", description="the type of trial") + + nwbfile.add_trial(start_time=0.0, stop_time=1.0, trial_type="go") + nwbfile.add_trial(start_time=1.0, stop_time=2.0, trial_type="nogo") + +You can also add epochs or other types of time intervals to an NWB File. See +`PyNWB Annotating Time Intervals `_ +for more information. + +Once this information is added, you can write the NWB file to disk. + +.. code-block:: python + + from neuroconv.tools.nwb_helpers import configure_and_write_nwbfile + + configure_and_write_nwbfile(nwbfile, save_path="path/to/destination.nwb", backend="hdf5") + +.. note:: + + NWB generally recommends storing the full continuous stream of data in the NWB file, and then adding trials or + epochs as time intervals. Trial-aligning the data is then done on-the-fly when reading the file. This allows for + more flexibility in the analysis of the data. \ No newline at end of file diff --git a/docs/user_guide/datainterfaces.rst b/docs/user_guide/datainterfaces.rst index 8752bd387d..3fa1d07c3c 100644 --- a/docs/user_guide/datainterfaces.rst +++ b/docs/user_guide/datainterfaces.rst @@ -143,8 +143,8 @@ Here we can see that ``metadata["Ecephys"]["ElectrodeGroup"][0]["location"]`` is Use ``.get_metadata_schema()`` to get the schema of the metadata dictionary. This schema is a JSON-schema-like dictionary that specifies required and optional fields in the metadata dictionary. See :ref:`metadata schema ` for more information. -4. Run conversion -~~~~~~~~~~~~~~~~~ +4a. Run conversion +~~~~~~~~~~~~~~~~~~ The ``.run_conversion`` method takes the (edited) metadata dictionary and the path of an NWB file, and launches the actual data conversion into NWB. @@ -159,3 +159,24 @@ This method reads and writes large datasets piece-by-piece, so you can convert large datasets without overloading the computer's available RAM. It also uses good defaults for data chunking and lossless compression, reducing the file size of the output NWB file and optimizing the file for cloud compute. + +4b. Create an in-memory NWB file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you want to create an in-memory NWB file, you can use the ``.create_nwbfile`` method. + +.. code-block:: python + + nwbfile = spikeglx_interface.create_nwbfile(metadata=metadata) + +This is useful for add data such as trials, epochs, or other time intervals to the NWB file. See +:ref:`Adding Time Intervals to NWB Files ` for more information. + +This does not load large datasets into memory. Those remain in the source files and are read piece-by-piece during the +write process. Once you make all the modifications you want to the NWBfile, you can save it to disk. The following code +automatically optimizes datasets for cloud compute and writes the file to disk. + +.. code-block:: python + + from neuroconv.tools.nwb_helpers import configure_and_write_nwbfile + + configure_and_write_nwbfile(nwbfile, save_path="path/to/destination.nwb", backend="hdf5") \ No newline at end of file diff --git a/docs/user_guide/index.rst b/docs/user_guide/index.rst index e8c0827c82..4077f49bea 100644 --- a/docs/user_guide/index.rst +++ b/docs/user_guide/index.rst @@ -20,6 +20,7 @@ and synchronize data across multiple sources. datainterfaces nwbconverter + adding_trials temporal_alignment csvs expand_path diff --git a/docs/user_guide/nwbconverter.rst b/docs/user_guide/nwbconverter.rst index 2360905e69..e1db639458 100644 --- a/docs/user_guide/nwbconverter.rst +++ b/docs/user_guide/nwbconverter.rst @@ -44,21 +44,30 @@ keys of``data_interface_classes``. This creates an :py:class:`.NWBConverter` object that can aggregate and distribute across the data interfaces. To fetch metadata across all of the interfaces and merge -them together, call:: +them together, call. + +.. code-block:: python metadata = converter.get_metadata() -The metadata can then be manually modified with any additional user-input, just like ``DataInterface`` objects:: +The metadata can then be manually modified with any additional user-input, just like ``DataInterface`` objects. + +.. code-block:: python metadata["NWBFile"]["session_description"] = "NeuroConv tutorial." metadata["NWBFile"]["experimenter"] = "My name" metadata["Subject"]["subject_id"] = "ID of experimental subject" -The final metadata dictionary should follow the form defined by -``converter.get_metadata_schema()``. Now run the entire conversion with:: +The final metadata dictionary should follow the form defined by :meth:`.NWBConverter.get_metadata_schema`. +Now run the entire conversion with. + +.. code-block:: python converter.run_conversion(metadata=metadata, nwbfile_path="my_nwbfile.nwb") +Like ``DataInterface`` objects, :py:class:`.NWBConverter` objects can output an in-memory NWBFile object by +calling :meth:`.NWBConverter.create_nwbfile`. This can be useful for debugging or for further processing. + Though this example was only for two data streams (recording and spike-sorted data), it can easily extend to any number of sources, including video of a subject, extracted position estimates, stimuli, or any other data source. From c93a7ce6a27eae214b9237d9ba763644616ade92 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 16 Aug 2024 11:35:45 -0400 Subject: [PATCH 4/6] add max channel through sorting extractor properties. Add tests for PhyInterface --- .../ecephys/phy/phydatainterface.py | 11 ++++++----- tests/test_on_data/test_sorting_interfaces.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py index 59fb0ab4e3..599b7f0f90 100644 --- a/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py +++ b/src/neuroconv/datainterfaces/ecephys/phy/phydatainterface.py @@ -31,7 +31,7 @@ def get_max_channel(self): folder_path = Path(self.source_data["folder_path"]) templates = np.load(str(folder_path / "templates.npy")) - channel_map = np.load(str(folder_path / "channel_map.npy")) + channel_map = np.load(str(folder_path / "channel_map.npy")).T whitening_mat_inv = np.load(str(folder_path / "whitening_mat_inv.npy")) templates_unwh = templates @ whitening_mat_inv @@ -71,8 +71,12 @@ def add_to_nwbfile( write_ecephys_metadata: bool = False, write_as: Literal["units", "processing"] = "units", units_name: str = "units", - units_description: str = "Autogenerated by neuroconv.", + units_description: str = "Imported from Phy", + include_max_channel: bool = True, ): + if include_max_channel and "max_channel" not in self.sorting_extractor.get_property_keys(): + max_channels = self.get_max_channel() + self.sorting_extractor.set_property("max_channel", max_channels) super().add_to_nwbfile( nwbfile=nwbfile, @@ -84,7 +88,4 @@ def add_to_nwbfile( units_description=units_description, ) - max_channel = self.get_max_channel() - nwbfile.units.add_column(name="max_channel", description="Channel with maximum amplitude", data=max_channel) - return nwbfile diff --git a/tests/test_on_data/test_sorting_interfaces.py b/tests/test_on_data/test_sorting_interfaces.py index 8898d780be..f682a50f5d 100644 --- a/tests/test_on_data/test_sorting_interfaces.py +++ b/tests/test_on_data/test_sorting_interfaces.py @@ -2,6 +2,7 @@ from unittest import TestCase import numpy as np +from numpy.testing import assert_array_equal from pynwb import NWBHDF5IO from neuroconv.datainterfaces import ( @@ -16,6 +17,7 @@ from neuroconv.tools.testing.data_interface_mixins import ( SortingExtractorInterfaceTestMixin, ) +from spikeinterface.extractors.nwbextractors import read_nwbfile try: from .setup_paths import ECEPHY_DATA_PATH as DATA_PATH @@ -166,6 +168,22 @@ class TestPhySortingInterface(SortingExtractorInterfaceTestMixin, TestCase): interface_kwargs = dict(folder_path=str(DATA_PATH / "phy" / "phy_example_0")) save_directory = OUTPUT_PATH + def check_read_nwb(self, nwbfile_path: str): + # Test that the max channel is correctly extracted + super().check_read_nwb(nwbfile_path) + + # check that the max channel is correctly extracted + max_channel = self.interface.get_max_channel() + assert_array_equal(max_channel, [1, 2, 5, 5, 6, 21, 13, 13, 21, 21, 22, 22, 24]) + + # check that max channel was properly added to sorting extractor + assert_array_equal(self.interface.sorting_extractor.get_property("max_channel"), max_channel) + + # check that max channels were properly added to the NWB file + nwbfile = read_nwbfile(file_path=nwbfile_path, backend="hdf5") + assert_array_equal(nwbfile.units["max_channel"].data[:], max_channel) + + class TestPlexonSortingInterface(SortingExtractorInterfaceTestMixin, TestCase): data_interface_cls = PlexonSortingInterface From 379449f5db1c3121083b1af1b1e31e27ffc52d9b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 16 Aug 2024 15:36:20 +0000 Subject: [PATCH 5/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_on_data/test_sorting_interfaces.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_on_data/test_sorting_interfaces.py b/tests/test_on_data/test_sorting_interfaces.py index f682a50f5d..4603e9b1b2 100644 --- a/tests/test_on_data/test_sorting_interfaces.py +++ b/tests/test_on_data/test_sorting_interfaces.py @@ -4,6 +4,7 @@ import numpy as np from numpy.testing import assert_array_equal from pynwb import NWBHDF5IO +from spikeinterface.extractors.nwbextractors import read_nwbfile from neuroconv.datainterfaces import ( BlackrockRecordingInterface, @@ -17,7 +18,6 @@ from neuroconv.tools.testing.data_interface_mixins import ( SortingExtractorInterfaceTestMixin, ) -from spikeinterface.extractors.nwbextractors import read_nwbfile try: from .setup_paths import ECEPHY_DATA_PATH as DATA_PATH @@ -174,7 +174,7 @@ def check_read_nwb(self, nwbfile_path: str): # check that the max channel is correctly extracted max_channel = self.interface.get_max_channel() - assert_array_equal(max_channel, [1, 2, 5, 5, 6, 21, 13, 13, 21, 21, 22, 22, 24]) + assert_array_equal(max_channel, [1, 2, 5, 5, 6, 21, 13, 13, 21, 21, 22, 22, 24]) # check that max channel was properly added to sorting extractor assert_array_equal(self.interface.sorting_extractor.get_property("max_channel"), max_channel) @@ -184,7 +184,6 @@ def check_read_nwb(self, nwbfile_path: str): assert_array_equal(nwbfile.units["max_channel"].data[:], max_channel) - class TestPlexonSortingInterface(SortingExtractorInterfaceTestMixin, TestCase): data_interface_cls = PlexonSortingInterface interface_kwargs = dict(file_path=str(DATA_PATH / "plexon" / "File_plexon_2.plx")) From a82d98bac738edde8bd2008b00d68b5589f20d65 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 16 Aug 2024 12:20:26 -0400 Subject: [PATCH 6/6] Update CHANGELOG.md --- CHANGELOG.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb9b5f21fa..f4cd985674 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,26 +1,25 @@ -# Upcoming +# Upcoming (v0.6.0) ### Deprecations * Deprecated `WaveformExtractor` usage. [PR #821](https://github.com/catalystneuro/neuroconv/pull/821) * Deprecated use of `compression` and `compression_options` in `VideoInterface` [PR #1005](https://github.com/catalystneuro/neuroconv/pull/10) ### Features -* Added MedPCInterface for operant behavioral output files. [PR #883](https://github.com/catalystneuro/neuroconv/pull/883) +* Added `MedPCInterface` for operant behavioral output files. [PR #883](https://github.com/catalystneuro/neuroconv/pull/883) * Support `SortingAnalyzer` in the `SpikeGLXConverterPipe`. [PR #821](https://github.com/catalystneuro/neuroconv/pull/821) * Add argument to `add_electrodes` that grants fine control of what to do with the missing values. As a side effect this drops the implicit casting to int when writing int properties to the electrodes table [PR #985](https://github.com/catalystneuro/neuroconv/pull/985) * Add Plexon2 support [PR #918](https://github.com/catalystneuro/neuroconv/pull/918) -* Converter working with multiple VideoInterface instances [PR #914](https://github.com/catalystneuro/neuroconv/pull/914) +* Converter working with multiple `VideoInterface` instances [PR #914](https://github.com/catalystneuro/neuroconv/pull/914) * Added helper function `neuroconv.tools.data_transfers.submit_aws_batch_job` for basic automated submission of AWS batch jobs. [PR #384](https://github.com/catalystneuro/neuroconv/pull/384) * Data interfaces `run_conversion` method now performs metadata validation before running the conversion. [PR #949](https://github.com/catalystneuro/neuroconv/pull/949) * Introduced `null_values_for_properties` to `add_units_table` to give user control over null values behavior [PR #989](https://github.com/catalystneuro/neuroconv/pull/989) - +* For `PhySortingInterface`, automatically calculate `max_channel` for each unit and add to units table. [PR #961](https://github.com/catalystneuro/neuroconv/pull/989) ### Bug fixes * Fixed the default naming of multiple electrical series in the `SpikeGLXConverterPipe`. [PR #957](https://github.com/catalystneuro/neuroconv/pull/957) * Write new properties to the electrode table use the global identifier channel_name, group [PR #984](https://github.com/catalystneuro/neuroconv/pull/984) * Removed a bug where int64 was casted lossy to float [PR #989](https://github.com/catalystneuro/neuroconv/pull/989) - ### Improvements * The `OpenEphysBinaryRecordingInterface` now uses `lxml` for extracting the session start time from the settings.xml file and does not depend on `pyopenephys` anymore. [PR #971](https://github.com/catalystneuro/neuroconv/pull/971) * Swap the majority of package setup and build steps to `pyproject.toml` instead of `setup.py`. [PR #955](https://github.com/catalystneuro/neuroconv/pull/955)