From f3c6f4b649788134b4fe687b7742a5a8e86611b3 Mon Sep 17 00:00:00 2001
From: David Nicholson <NickleDave@users.noreply.github.com>
Date: Sun, 10 Sep 2023 21:42:08 -0400
Subject: [PATCH] TST/CLN: Fix unit tests (#693)

* Remove dataset_path option from tests/data_for_tests/configs/ConvEncoderUMAP_eval_audio_cbin_annot_notmat.toml

* Fix use_dataset_from_config option to be null for ConvEncoderUMAP_eval, stops generate-test-data script from crashing

* Fix 'teentytweetynet' -> 'TweetyNet' in SPECT_DIR_NPZ constant in tests/fixtures/spect.py

* Fix config name declared as constant 'teenytweetynet' -> 'TeenyTweetyNet'; fix reference to Metadata in tests/test_datasets/test_window_dataset/conftest.py

* Change options in ConvEncoderUMAP configs so training doesn't take forever

* Set num_workers = 16 in all test data configs

* Further modify config options in tests/data_for_tests/configs/ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml to make training run faster

* Fix how we handle labelmap_path in vaktestdata/configs.py

* Add tests/test_datasets/test_frame_classification/ with test_window_dataset.py

* Remove  tests/test_datasets/test_window_dataset/

* Move test_datasets/test_metadata.py into test_frame_classification, fix unit tests

* Delete tests/test_models/test_das.py for now

* Add missing parameter name 'split' to parametrize in test_frame_classification/test_window_dataset.py

* Fix 'WindowedFrameClassificationModel' -> 'FrameClassificationModel' in tests/test_models/test_decorator.py

* Remove tests/test_nets/test_das for now

* Remove tests/test_prep/test_frame_classification/test_helper.py -- helper module no longer exists

* Remove extra tests and set single unit test to assert False for now in tests/test_prep/test_audio_dataset.py

* Fix typo in docstring in src/vak/prep/frame_classification/dataset_arrays.py

* Add two line breaks after imports in src/vak/prep/frame_classification/learncurve.py

* WIP: fix tests/test_prep/test_frame_classification/test_dataset_arrays.py

* WIP: fix tests/test_prep/test_frame_classification/test_learncurve.py

* Rename 'labeled_timebins' -> 'frame_labels' in test_transforms/

* Fix key used by list_of_schematized_configs fixture -- 'configs' -> 'config_metadata'

* Change 'teenytweetynet', 'tweetynet' -> 'TeenyTweetyNet', 'TweetyNet' throughout tests

* Fix unit tests in tests/test_datasets/test_frame_classification/test_metadata.py

* Remove DATALOADER from parametrize in a unit test in tests/test_config/test_parse.py

* Fix how we load metadata in fixture in tests/fixtures/csv.py

* Fix order of attributes in Metadata docstring in src/vak/datasets/frame_classification/metadata.py

* Fix unit test in tests/test_datasets/test_frame_classification/test_window_dataset.py

* Move test_datasets/test_seq/test_validators.py -> tests/test_prep/test_sequence_dataset.py, fix unit test

* Import sequence_dataset in prep/__init__.py

* Fix tests/test_eval/test_eval.py so it works

* Rewrite test_eval/test_eval.py as tests/test_eval/test_frame_classification.py

* Rewrite test_learncurve/test_learncurve.py as tests/test_learncurve/test_frame_classification.py

* Add/fix imports in src/vak/learncurve/__init__.py

* Fix module-level docstring in tests/test_eval/test_frame_classification.py

* Fix unit tests in tests/test_learncurve/test_frame_classification.py

* Make results_path not default to None, fix docstring in src/vak/learncurve/frame_classification.py

* Remove stray backslash in docstring in src/vak/nets/tweetynet.py

* Fix unit tests so they run in tests/test_models/test_base.py

* Fix __init__ docstring for TweetyNet and TeenyTweetyNet so they define num_input_channels + num_freqbins, not input_shape

* Revise docstring for register_model

* Import 'model' and 'model_family' decorators in vak/models/__init__.py

* Add MockModelFamily to tests/test_models/conftest.py and revise some of the docstrings there

* Fix unit tests for vak.models.decorator.model in tests/test_models/test_decorator.py

* Remove a .lower in a test in tests/test_models/test_windowed_frame_classification_model.py so that it doesn't fail

* Reorganize / revise docstrings / add classes in tests/test_models/conftest.py

* WIP: Add unit tests to tests/test_models/test_registry.py

* Fix type annotation in src/vak/models/registry.py

* Finish writing unit tests in tests/test_models/test_registry.py

* Rename test_models/test_windowed_frame_classification_model.py -> test_frame_classification_model.py and fix tests

* Refactor src/vak/models/registry.py to just use MODEL_REGISTRY dict -- previous way was unneccesarily convoluted

* Have src/vak/models/get.py use registry.MODEL_REGISTRY

* Fix tests in tests/test_models/test_registry.py after refactoring module

* Fix unit test in tests/test_models/test_decorator.py so it removes the models it registers -- this way we don't raise errors in other unit tests because MockModel is already registered

* Fix model family name / arguments in test_tweetynet.py + test_teenytweetynet.py

* Add ignore for torchmetrics warning in pyproject.toml

* Remove reference to pytest.Mark in tests/conftest.py that caused warning -- was unused anyway

* Fix unit tests in tests/test_nets/test_tweetynet.py

* Remove unused variable in tests/conftest.py

* Remove unused import in tests/test_models/test_teenytweetynet.py

* Fix unit tests in tests/test_nets/test_teenytweetynet.py

* WIP: Fix tests in tests/test_predict/test_frame_classification.py

* Remove stale comment from src/vak/eval/parametric_umap.py

* Remove get_default_padding function from src/vak/models/convencoder_umap.py -- deciding to not do this

* Fix output_dir option in tests/data_for_tests/configs/ConvEncoderUMAP_eval_audio_cbin_annot_notmat.toml

* Remove calls to convencoder_umap.get_default padding in src/vak/train/parametric_umap.py

* Remove call to convencoder_umap.get_default padding in src/vak/eval/parametric_umap.py

* Do not add padding in src/vak/transforms/defaults/parametric_umap.py

* Have ConvEncoderUMAP eval config re-use datast from train config, so that there's no issue with input shape being different that will lead to cryptic 'incorrect parameter size' errors when we re-load the checkpoint

* Clean src/vak/cli/prep.py

- Import annotations from __future__ to be able to use pipe
  for type annotations
- Add type annotations to `purpose_from_toml`
- Change `Path` -> `pathlib.Path`, to be explicit

* Fix vaktestdata/configs.py so we get dataset paths from the right section

* Add test_dur in tests/data_for_tests/configs/ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml so that we can re-use the same dataset for the eval config

* Clean src/vak/common/tensorboard.py -- add type annotations, fix formatting in docstrings

* Fix 'vak.datasets.metadata.Metadata' -> 'vak.datasets.frame_classification.Metadata' in tests/test_predict/test_frame_classification.py

* Clean src/vak/prep/audio_dataset.py

- Fix order of parameters to `prep_audio_dataset`
- Fix type annotation, remove default for parameter `data_dir`
- Also fix parameter ordering in docstring
- Fix validation of `data_dir` in pre-condition section of function
- Use `vak.common.typing.PathLike` for type hint

* Rewrite fixtures in tests/fixtures/audio.py so we can import as constants in tests where needed, to parametrize specific unit tests

* WIP: Fix tests/test_prep/test_audio_dataset.py so it actually tests correctly -- need to add more cases to parametrize

* Rename vak/train/train.py -> train_.py so we can still import train from train_ in vak/train/__init__.py and write 'vak.train.train', but *also* use unitest.mock.patch on functions where they are looked up in the train_ module

* Fix imports in src/vak/train/__init__.py after renaming train.py -> train_.py

* Write unit test for tests/test_train/test_train.py

* Rename vak/eval/eval.py -> eval_.py as for train

* Rename vak/predict/predict.py -> predict_.py as for train

* Rename vak/prep/prep.py -> prep_.py as for train

* Fixup tests/test_train/test_train.py

* Add a 'break' in tests/fixtures/config.py fixture 'specific_config', so we don't loop unneccesarily through all configs

* WIP: Add unit test in tests/test_eval/test_eval.py

* Fix test in tests/test_eval/test_eval.py

* Fix unit test names in tests/test_eval/test_frame_classification.py

* Fix docstring, remove device fixture in tests/test_eval/test_eval.py

* Remove device fixture in tests/test_train/test_train.py -- not needed since we're mocking anyways

* Add tests/test_predict/test_predict.py

* Fix module-level docstring in tests/test_predict/test_predict.py

* Fix tests in tests/test_train/test_frame_classification.py

* Add input_shape attribute to ConvEncoder neural network

* Add tests/test_models/test_parametric_umap_model.py

* Fix docstring, remove unused variable and unused import in tests/test_models/test_frame_classification_model.py

* Add tests/test_train/test_parametric_umap.py

* Fix docstring in tests/test_datasets/test_frame_classification/test_window_dataset.py

* Add tests/test_datasets/test_frame_classification/test_frames_dataset.py

* Add tests/test_datasets/test_parametric_umap/

* Add tests/test_models/test_ed_tcn.py

* Add tests/test_models/test_convencoder_umap.py

* Add tests/test_nets/test_ed_tcn.py

* Add tests/test_nets/test_convencoder.py

* Fix a unit test in tests/test_transforms/test_frame_labels/test_functional.py

* Fix a test in tests/test_transforms/test_transforms.py

* Fix undeclared variable 'device' in tests/test_train/test_train.py

* Fix undeclared variable 'device' in tests/test_eval/test_eval.py

* Fix undeclared variable 'device' in tests/test_predict/test_predict.py

* Make othinor fixes in tests/test_predict/test_predict.py

* Make input size smaller to speed up test in tests/test_models/test_convencoder_umap.py

* Modify ConvEncoderUMAP configs to make dataset smaller, speed up tests

* Fix test in tests/test_prep/test_prep.py

* Fix docstring in src/vak/prep/frame_classification/dataset_arrays.py and fix function so that it does not add 'index' or 'level_0' columns to dataframes

* Fix tests in tests/test_prep/test_frame_classification/test_dataset_arrays.py

* Fix src/vak/prep/frame_classification/learncurve.py so it resets index on returned dataframe

* Fix how we reset index on dataframe (again) in src/vak/prep/frame_classification/dataset_arrays.py

* Fix how we reset index on dataframe in src/vak/prep/frame_classification/learncurve.py

* Fix tests in tests/test_prep/test_frame_classification/test_frame_classification.py

* Change LABELSET_YARDEN in tests/fixtures/annot.py to match what we use in config files in test data

* Add return type in annotations on from_path classmethod in src/vak/datasets/frame_classification/metadata.py

* Fix typo in docstring in src/vak/prep/split/split.py

* Rewrite fixtures in tests/fixtures/spect.py to return constants we define at module level so we can import those in tests where needed to parametrize

* Rewrite/fix tests for split_frame_classification_dataframe in tests/test_prep/test_split/test_split.py

* Add unit tests for split.unit_dataframe to tests/test_prep/test_split/test_split.py

* Rewrite one-line definition of prep_audio_dataset in src/vak/prep/audio_dataset.py for clarity

* Revise docstring of prep_spectrogram_dataset and add return type to type annotations, in src/vak/prep/spectrogram_dataset/spect_helper.py

* Fix how we build constants in tests/fixtures/spect.py so we don't clobber names of fixtures in other modules

* Fix SPECT_DIR_NPZ and glob of SPECT_DIR_NPZ that produces SPECT_LIST_NPZ so that we are using a specific 'spectrograms_generated' directory' inside a dataset dir

* Remove 'spect_annot_map' arg from src/vak/prep/spectrogram_dataset/spect_helper.py, and no longer do recursive glob of spect_dir

* Rewrite/fix unit tests in tests/test_prep/test_spectrogram_dataset/test_spect_helper.py

* Remove unused variable, add line break in docstring in tests/test_prep/test_spectrogram_dataset/test_spect_helper.py

* Fix unit test in tests/test_prep/test_frame_classification/test_learncurve.py

* Revise docstring in src/vak/prep/frame_classification/learncurve.py

* Add fixture 'specific_audio_list' in tests/fixtures/audio.py

* Fix variable name in tests/fixtures/audio.py

* Fix/rewrite unit tests in tests/test_prep/test_spectrogram_dataset/test_prep.py

* Change variable names for clarity in tests/test_prep/test_spectrogram_dataset/test_spect_helper.py

* Fix tests in tests/test_train/test_parametric_umap.py -- use correct models, remove inappropriate asserts

* Add tests/test_eval/test_parametric_umap.py

* Add tests/vak.tests.config.toml

* Use vak.tests.config.toml in tests/conftest.py to set default for command-line arg 'models'

* Use vak.tests.config.toml in noxfile.py, for running tests and for generating test data

* Fix root_results_dir option in train_continue configs

* Change default parameters for ConvEncoderUMAP + add maxpool layers to reduce checkpoint size

* Update GENERATED_TEST_DATA_ALL_URL in noxfile.py

* Rewrite tests/vak.tests.config.toml as tests/vak.tests.config.json

* Use json to load vak tests config in tests/conftest.py

* Use json to load vak tests config in noxfile.py

* Comment out calling fix_prep_csv_paths to see if we actually need to run it

* Fix how we build DEFAULT_MODELS constant in noxfile.py

* Remove constraints on dependencies on pyproject.toml to get pip to work

* Fix path in conftest.py to avoid FileNotFoundError

* Fix unit test in tests/test_cli/test_learncurve.py -- we just need to test that cli calls the right function

* Fix unit test in tests/test_cli/test_predict.py to not use 'model' fixture -- we just need to test that cli calls the right function

* Fix unit test in tests/test_cli/test_train.py to not use 'model' fixture -- we just need to test that cli calls the right function

* Fix unit test in tests/test_config/test_parse.py to not use 'model' fixture -- we're not testing something model specific here

* Fix 'accelerator' in src/vak/common/trainer.py so it is not set to None

* Fix 'accelerator' in src/vak/eval/frame_classification.py so it is not set to None

* Fix unit test in tests/test_eval/test_frame_classification.py to not use 'model' fixture -- we don't want to use ConvEncoderUMAP model here

* Fix 'accelerator' in src/vak/eval/parametric_umap.py so it is not set to None

* Add back lower bounds for pytorch-lightning + torch and torchvision in pyproject.toml

* Remove commented code in noxfile.py

* Delete tests/scripts/fix_prep_csv_paths.py, no longer needed

* Change unit test in tests/test_models/test_base.py to use locally parametrized model_name instead of model fixture

* Fix 'accelerator' in src/vak/predict/frame_classification.py so it is not set to None

* Fix 'accelerator' in src/vak/predict/parametric_umap.py so it is not set to None

* Fix 'parametric UMAP' -> 'parametric umap' in tests/fixtures/csv.py

* Fix fixture in tests/test_predict/test_frame_classification.py to use locally parametrized 'model_name' instead of model fixture

* Fix test in tests/test_prep/test_sequence_dataset.py to use locally parametrized 'model_name' instead of model fixture

* Fix test in tests/test_learncurve/test_frame_classification.py to use locally parametrized 'model_name' instead of model fixture

* Delete TeenyTweetyNet configs in tests/data_for_tests/configs

* Delete TeenyTweetyNet from vak/nets

* Fix test in tests/test_train/test_frame_classification.py to use locally parametrized 'model_name' instead of model fixture

* Delete TeenyTweetyNet from vak/models

* Add [TweetyNet.network] table to all TweetyNet configs in tests/data_for_tests/configs that makes a 'tiny' TweetyNet

* Remove metadata for TeenyTweetyNet configs from tests/data_for_tests/configs/configs.json after deleting those configs

* Add [ConvEncoderUMAP.network] table to all ConvEncoderUMAP configs in tests/data_for_tests/configs that makes a 'tiny' ConvEncoder

* Delete tests/test_models/test_teenytweetynet.py and tests/test_nets/test_teenytweetynet.py

* Change 'TeenyTweetyNet' -> 'TweetyNet' in tests/fixtures/dataframe.py

* Change 'TeenyTweetyNet' -> 'TweetyNet' in tests/test_cli/test_eval.py

* Change 'TeenyTweetyNet' -> 'TweetyNet' many places in tests

* Remove TeenyTweetyNet from modules in tests/test_models

* Fix test in tests/test_models/test_base.py to use network config from .toml file so we don't get tensor size mismatch errors

* Fix a unit test in tests/test_models/test_frame_classification_model.py

* Mark a test xfail in tests/test_models/test_parametric_umap_model.py because fixing it will require fixing/changing how we parse config files

* Fix 'accelerator' in src/vak/train/parametric_umap.py so it is not set to None

* Remove models command-line argument from tests, no longer used

* Add attribute 'dataset_type' to PrepConfig docstring

* Use locally parametrized variable 'model_name' in tests/test_cli/test_eval.py instead of 'model' fixture that was removed

* Fix unit tests in tests/test_config/ to not use 'model' fixture that was removed

* Refactor noxfile.py: separate into routinely used sessions at top and less-used sessions specific to test data at bottom. Remove use of model argument in test and coverage sessions, since that fixture was removed

* Fix lower bound on torchvision, '15.2' -> '0.15.2'

* Import annotations from __future__ in src/vak/transforms/transforms.py

* Import annotations from __future__ in src/vak/prep/frame_classification/frame_classification.py

* Import annotations from __future__ in src/vak/prep/parametric_umap/parametric_umap.py

* Import annotations from __future__ in src/vak/prep/prep_.py

* Remove 'running-on-ci' arg from call to nox session 'coverage' in .github/workflows/ci-linux.yml -- arg no longer used in that session
---
 .github/workflows/ci-linux.yml                |   2 +-
 noxfile.py                                    | 177 ++++----
 pyproject.toml                                |  10 +-
 src/vak/cli/prep.py                           |  16 +-
 src/vak/common/tensorboard.py                 |  33 +-
 src/vak/common/trainer.py                     |   3 +-
 src/vak/config/prep.py                        |   8 +
 .../datasets/frame_classification/metadata.py |   8 +-
 src/vak/eval/__init__.py                      |   5 +-
 src/vak/eval/{eval.py => eval_.py}            |   0
 src/vak/eval/frame_classification.py          |   3 +-
 src/vak/eval/parametric_umap.py               |   8 +-
 src/vak/learncurve/__init__.py                |  11 +-
 src/vak/learncurve/frame_classification.py    |   6 +-
 src/vak/models/__init__.py                    |   6 +-
 src/vak/models/convencoder_umap.py            |  15 -
 src/vak/models/get.py                         |   2 +-
 src/vak/models/registry.py                    |  45 +-
 src/vak/models/teenytweetynet.py              |  23 -
 src/vak/nets/__init__.py                      |   5 +-
 src/vak/nets/conv_encoder.py                  |   9 +-
 src/vak/nets/teenytweetynet.py                | 130 ------
 src/vak/nets/tweetynet.py                     |  14 +-
 src/vak/predict/__init__.py                   |   5 +-
 src/vak/predict/frame_classification.py       |   4 +-
 src/vak/predict/parametric_umap.py            |   4 +-
 src/vak/predict/{predict.py => predict_.py}   |   0
 src/vak/prep/__init__.py                      |   7 +-
 src/vak/prep/audio_dataset.py                 |  28 +-
 .../frame_classification/dataset_arrays.py    |  35 +-
 .../frame_classification.py                   |   2 +
 .../prep/frame_classification/learncurve.py   |  33 +-
 .../prep/parametric_umap/parametric_umap.py   |   2 +
 src/vak/prep/{prep.py => prep_.py}            |   2 +
 .../prep/spectrogram_dataset/spect_helper.py  |  55 +--
 src/vak/prep/split/split.py                   |   3 +-
 src/vak/train/__init__.py                     |   6 +-
 src/vak/train/parametric_umap.py              |  17 +-
 src/vak/train/{train.py => train_.py}         |   0
 .../transforms/defaults/parametric_umap.py    |   4 -
 src/vak/transforms/transforms.py              |   2 +
 tests/conftest.py                             |  22 +-
 ...oderUMAP_eval_audio_cbin_annot_notmat.toml |  17 +-
 ...derUMAP_train_audio_cbin_annot_notmat.toml |  26 +-
 ...weetyNet_eval_audio_cbin_annot_notmat.toml |  31 --
 ...et_learncurve_audio_cbin_annot_notmat.toml |  41 --
 ...tyNet_predict_audio_cbin_annot_notmat.toml |  30 --
 ...t_predict_audio_wav_annot_birdsongrec.toml |  30 --
 ...eetyNet_train_audio_cbin_annot_notmat.toml |  39 --
 ...Net_train_audio_wav_annot_birdsongrec.toml |  40 --
 ...rain_continue_audio_cbin_annot_notmat.toml |  41 --
 ..._continue_audio_wav_annot_birdsongrec.toml |  42 --
 ...train_continue_spect_mat_annot_yarden.toml |  40 --
 ...weetyNet_train_spect_mat_annot_yarden.toml |  39 --
 ...weetyNet_eval_audio_cbin_annot_notmat.toml |  13 +-
 ...et_learncurve_audio_cbin_annot_notmat.toml |  13 +-
 ...tyNet_predict_audio_cbin_annot_notmat.toml |  13 +-
 ...t_predict_audio_wav_annot_birdsongrec.toml |  13 +-
 ...eetyNet_train_audio_cbin_annot_notmat.toml |  13 +-
 ...Net_train_audio_wav_annot_birdsongrec.toml |  13 +-
 ...rain_continue_audio_cbin_annot_notmat.toml |  15 +-
 ..._continue_audio_wav_annot_birdsongrec.toml |  15 +-
 ...train_continue_spect_mat_annot_yarden.toml |  15 +-
 ...weetyNet_train_spect_mat_annot_yarden.toml |  13 +-
 tests/data_for_tests/configs/configs.json     | 100 -----
 .../invalid_train_and_learncurve_config.toml  |   4 +-
 tests/fixtures/annot.py                       |   2 +-
 tests/fixtures/audio.py                       |  72 ++--
 tests/fixtures/config.py                      |   4 +-
 tests/fixtures/csv.py                         |   6 +-
 tests/fixtures/dataframe.py                   |   2 +-
 tests/fixtures/model.py                       |   7 +-
 tests/fixtures/spect.py                       | 123 +++---
 tests/scripts/fix_prep_csv_paths.py           |  48 ---
 tests/scripts/vaktestdata/configs.py          |  33 +-
 tests/test_cli/test_eval.py                   |  10 +-
 tests/test_cli/test_learncurve.py             |   6 +-
 tests/test_cli/test_predict.py                |  12 +-
 tests/test_cli/test_train.py                  |  14 +-
 tests/test_common/test_labels.py              |   4 +-
 tests/test_config/test_parse.py               |  19 +-
 tests/test_config/test_prep.py                |   3 +-
 .../__init__.py                               |   0
 .../test_frames_dataset.py                    |  34 ++
 .../test_metadata.py                          |  79 ++++
 .../test_window_dataset.py                    |  37 ++
 tests/test_datasets/test_metadata.py          |  63 ---
 .../test_parametric_umap}/__init__.py         |   0
 .../test_parametric_umap.py                   |  34 ++
 .../test_datasets/test_seq/test_validators.py |  29 --
 .../test_window_dataset/conftest.py           |  70 ---
 .../test_window_dataset/test_class_.py        |  73 ----
 .../test_window_dataset/test_helper.py        | 210 ---------
 tests/test_eval/test_eval.py                  | 184 ++------
 tests/test_eval/test_frame_classification.py  | 201 +++++++++
 tests/test_eval/test_parametric_umap.py       | 171 ++++++++
 ...ncurve.py => test_frame_classification.py} |  97 ++---
 tests/test_models/conftest.py                 |  99 +++--
 tests/test_models/test_base.py                |  41 +-
 tests/test_models/test_convencoder_umap.py    |  36 ++
 tests/test_models/test_das.py                 |  76 ----
 tests/test_models/test_decorator.py           |  21 +-
 tests/test_models/test_definition.py          |  23 +-
 tests/test_models/test_ed_tcn.py              |  28 ++
 ....py => test_frame_classification_model.py} |  52 +--
 .../test_models/test_parametric_umap_model.py | 130 ++++++
 tests/test_models/test_registry.py            | 107 ++++-
 tests/test_models/test_teenytweetynet.py      |  38 --
 tests/test_models/test_tweetynet.py           |   7 +-
 tests/test_nets/test_convencoder.py           |  53 +++
 tests/test_nets/test_das/test_kapre.py        | 211 ---------
 tests/test_nets/test_das/test_net.py          | 136 ------
 tests/test_nets/test_das/test_nn.py           | 127 ------
 tests/test_nets/test_ed_tcn.py                |  84 ++++
 tests/test_nets/test_teenytweetynet.py        |  76 ----
 tests/test_nets/test_tweetynet.py             |  51 ++-
 .../test_predict/test_frame_classification.py | 214 ++++++++++
 tests/test_predict/test_predict.py            | 190 +--------
 tests/test_prep/test_audio_dataset.py         | 366 ++--------------
 .../test_dataset_arrays.py                    | 114 +++--
 .../test_frame_classification.py              |  26 +-
 .../test_frame_classification/test_helper.py  | 134 ------
 .../test_learncurve.py                        |  99 ++---
 tests/test_prep/test_prep.py                  |  15 +-
 tests/test_prep/test_sequence_dataset.py      |  31 ++
 .../test_spectrogram_dataset/test_prep.py     | 399 ++++++------------
 .../test_spect_helper.py                      | 387 +++++------------
 tests/test_prep/test_split/test_split.py      | 122 ++++--
 tests/test_train/test_frame_classification.py | 252 +++++++++++
 tests/test_train/test_parametric_umap.py      | 180 ++++++++
 tests/test_train/test_train.py                | 233 ++--------
 .../__init__.py                               |   0
 .../test_functional.py                        |  36 +-
 .../test_transforms.py                        |  30 +-
 tests/test_transforms/test_transforms.py      |  11 +-
 tests/vak.tests.config.json                   |   6 +
 136 files changed, 3122 insertions(+), 4192 deletions(-)
 rename src/vak/eval/{eval.py => eval_.py} (100%)
 delete mode 100644 src/vak/models/teenytweetynet.py
 delete mode 100644 src/vak/nets/teenytweetynet.py
 rename src/vak/predict/{predict.py => predict_.py} (100%)
 rename src/vak/prep/{prep.py => prep_.py} (99%)
 rename src/vak/train/{train.py => train_.py} (100%)
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_eval_audio_cbin_annot_notmat.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_learncurve_audio_cbin_annot_notmat.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_cbin_annot_notmat.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_wav_annot_birdsongrec.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_train_audio_cbin_annot_notmat.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_cbin_annot_notmat.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_wav_annot_birdsongrec.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_train_continue_spect_mat_annot_yarden.toml
 delete mode 100644 tests/data_for_tests/configs/TeenyTweetyNet_train_spect_mat_annot_yarden.toml
 delete mode 100644 tests/scripts/fix_prep_csv_paths.py
 rename tests/test_datasets/{test_window_dataset => test_frame_classification}/__init__.py (100%)
 create mode 100644 tests/test_datasets/test_frame_classification/test_frames_dataset.py
 create mode 100644 tests/test_datasets/test_frame_classification/test_metadata.py
 create mode 100644 tests/test_datasets/test_frame_classification/test_window_dataset.py
 delete mode 100644 tests/test_datasets/test_metadata.py
 rename tests/{test_nets/test_das => test_datasets/test_parametric_umap}/__init__.py (100%)
 create mode 100644 tests/test_datasets/test_parametric_umap/test_parametric_umap.py
 delete mode 100644 tests/test_datasets/test_seq/test_validators.py
 delete mode 100644 tests/test_datasets/test_window_dataset/conftest.py
 delete mode 100644 tests/test_datasets/test_window_dataset/test_class_.py
 delete mode 100644 tests/test_datasets/test_window_dataset/test_helper.py
 create mode 100644 tests/test_eval/test_frame_classification.py
 create mode 100644 tests/test_eval/test_parametric_umap.py
 rename tests/test_learncurve/{test_learncurve.py => test_frame_classification.py} (62%)
 create mode 100644 tests/test_models/test_convencoder_umap.py
 delete mode 100644 tests/test_models/test_das.py
 create mode 100644 tests/test_models/test_ed_tcn.py
 rename tests/test_models/{test_windowed_frame_classification_model.py => test_frame_classification_model.py} (67%)
 create mode 100644 tests/test_models/test_parametric_umap_model.py
 delete mode 100644 tests/test_models/test_teenytweetynet.py
 create mode 100644 tests/test_nets/test_convencoder.py
 delete mode 100644 tests/test_nets/test_das/test_kapre.py
 delete mode 100644 tests/test_nets/test_das/test_net.py
 delete mode 100644 tests/test_nets/test_das/test_nn.py
 create mode 100644 tests/test_nets/test_ed_tcn.py
 delete mode 100644 tests/test_nets/test_teenytweetynet.py
 create mode 100644 tests/test_predict/test_frame_classification.py
 delete mode 100644 tests/test_prep/test_frame_classification/test_helper.py
 create mode 100644 tests/test_prep/test_sequence_dataset.py
 create mode 100644 tests/test_train/test_frame_classification.py
 create mode 100644 tests/test_train/test_parametric_umap.py
 rename tests/test_transforms/{test_labeled_timebins => test_frame_labels}/__init__.py (100%)
 rename tests/test_transforms/{test_labeled_timebins => test_frame_labels}/test_functional.py (93%)
 rename tests/test_transforms/{test_labeled_timebins => test_frame_labels}/test_transforms.py (87%)
 create mode 100644 tests/vak.tests.config.json

diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
index a6b482bf1..9f1076839 100644
--- a/.github/workflows/ci-linux.yml
+++ b/.github/workflows/ci-linux.yml
@@ -24,6 +24,6 @@ jobs:
         run: |
           nox -s test-data-download-source
           nox -s test-data-download-generated-ci
-          nox -s coverage --verbose -- running-on-ci
+          nox -s coverage --verbose
       - name: upload code coverage
         uses: codecov/codecov-action@v3
diff --git a/noxfile.py b/noxfile.py
index 7637a8314..6adb4f33d 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -1,3 +1,4 @@
+import json
 import os
 import pathlib
 import shutil
@@ -10,6 +11,11 @@
 DIR = pathlib.Path(__file__).parent.resolve()
 VENV_DIR = pathlib.Path('./.venv').resolve()
 
+
+with pathlib.Path('./tests/vak.tests.config.json').open('rb') as fp:
+    VAK_TESTS_CONFIG = json.load(fp)
+
+
 nox.options.sessions = ['test', 'coverage']
 
 
@@ -62,13 +68,57 @@ def lint(session):
     session.run("flake8", "./src", "--max-line-length", "120", "--exclude", "./src/crowsetta/_vendor")
 
 
-# ---- used by sessions that "clean up" data for tests
-def clean_dir(dir_path):
+@nox.session
+def test(session) -> None:
+    """
+    Run the unit and regular tests.
+    """
+    session.install(".[test]")
+    if session.posargs:
+        session.run("pytest", *session.posargs)
+    else:
+        session.run("pytest", "-x", "--slow-last")
+
+
+@nox.session
+def coverage(session) -> None:
+    """
+    Run the unit and regular tests, and save coverage report
     """
-    "clean" a directory by removing all files
-    (that are not hidden)
-    without removing the directory itself
+    session.install(".[test]")
+    session.run(
+        "pytest", "--cov=./", "--cov-report=xml", *session.posargs
+    )
+
+
+@nox.session
+def doc(session: nox.Session) -> None:
+    """
+    Build the docs.
+
+    To run ``sphinx-autobuild``,  do:
+
+    .. code-block::console
+
+       nox -s doc -- autobuild
+
+    Otherwise the docs will be built once using
     """
+    session.install(".[doc]")
+    if session.posargs:
+        if "autobuild" in session.posargs:
+            print("Building docs at http://127.0.0.1:8000 with sphinx-autobuild -- use Ctrl-C to quit")
+            session.run("sphinx-autobuild", "doc", "doc/_build/html")
+        else:
+            print("Unsupported argument to docs")
+    else:
+        session.run("sphinx-build", "-nW", "--keep-going", "-b", "html", "doc/", "doc/_build/html")
+
+
+# ---- sessions below this all have to do with data for tests ----------------------------------------------------
+def clean_dir(dir_path):
+    """Helper function that "cleans" a directory by removing all files
+    (that are not hidden) without removing the directory itself."""
     dir_path = pathlib.Path(dir_path)
     dir_contents = dir_path.glob('*')
     for content in dir_contents:
@@ -92,9 +142,7 @@ def clean_dir(dir_path):
 
 @nox.session(name='test-data-clean-source')
 def test_data_clean_source(session) -> None:
-    """
-    Clean (remove) 'source' test data, used by TEST_DATA_GENERATE_SCRIPT.
-    """
+    """Clean (remove) 'source' test data, used by TEST_DATA_GENERATE_SCRIPT."""
     clean_dir(SOURCE_TEST_DATA_DIR)
 
 
@@ -109,18 +157,14 @@ def copy_url(url: str, path: str) -> None:
 
 @nox.session(name='test-data-tar-source')
 def test_data_tar_source(session) -> None:
-    """
-    Make a .tar.gz file of just the 'generated' test data used to run tests on CI.
-    """
+    """Make a .tar.gz file of just the 'generated' test data used to run tests on CI."""
     session.log(f"Making tarfile with source data: {SOURCE_TEST_DATA_TAR}")
     make_tarfile(SOURCE_TEST_DATA_TAR, SOURCE_TEST_DATA_DIRS)
 
 
 @nox.session(name='test-data-download-source')
 def test_data_download_source(session) -> None:
-    """
-    Download and extract a .tar.gz file of 'source' test data, used by TEST_DATA_GENERATE_SCRIPT.
-    """
+    """Download and extract a .tar.gz file of 'source' test data, used by TEST_DATA_GENERATE_SCRIPT."""
     session.log(f'Downloading: {SOURCE_TEST_DATA_URL}')
     copy_url(url=SOURCE_TEST_DATA_URL, path=SOURCE_TEST_DATA_TAR)
     session.log(f'Extracting downloaded tar: {SOURCE_TEST_DATA_TAR}')
@@ -133,9 +177,7 @@ def test_data_download_source(session) -> None:
 
 @nox.session(name='test-data-generate', python="3.10")
 def test_data_generate(session) -> None:
-    """
-    Produced 'generated' test data, by running TEST_DATA_GENERATE_SCRIPT on 'source' test data.
-    """
+    """Produced 'generated' test data, by running TEST_DATA_GENERATE_SCRIPT on 'source' test data."""
     session.install(".[test]")
     session.run("python", TEST_DATA_GENERATE_SCRIPT)
 
@@ -145,13 +187,12 @@ def test_data_generate(session) -> None:
 
 @nox.session(name='test-data-clean-generated')
 def test_data_clean_generated(session) -> None:
-    """
-    Clean (remove) 'generated' test data.
-    """
+    """Clean (remove) 'generated' test data."""
     clean_dir(GENERATED_TEST_DATA_DIR)
 
 
 def make_tarfile(name: str, to_add: list):
+    """Helper function that makes a tarfile"""
     with tarfile.open(name, "w:gz") as tf:
         for add_name in to_add:
             tf.add(name=add_name)
@@ -161,8 +202,21 @@ def make_tarfile(name: str, to_add: list):
 PREP_DIR = f'{GENERATED_TEST_DATA_DIR}prep/'
 RESULTS_DIR = f'{GENERATED_TEST_DATA_DIR}results/'
 
-PREP_CI = sorted(pathlib.Path(PREP_DIR).glob('*/*/teenytweetynet'))
-RESULTS_CI = sorted(pathlib.Path(RESULTS_DIR).glob('*/*/teenytweetynet'))
+PREP_CI: list = []
+for model_name in VAK_TESTS_CONFIG['models']:
+    PREP_CI.extend(
+        sorted(
+            pathlib.Path(PREP_DIR).glob(f'*/*/{model_name}')
+                 )
+    )
+RESULTS_CI: list = []
+for model_name in VAK_TESTS_CONFIG['models']:
+    PREP_CI.extend(
+        sorted(
+            pathlib.Path(RESULTS_DIR).glob(f'*/*/{model_name}')
+                 )
+    )
+
 GENERATED_TEST_DATA_CI_TAR = f'{GENERATED_TEST_DATA_DIR}generated_test_data-version-1.x.ci.tar.gz'
 GENERATED_TEST_DATA_CI_DIRS = [CONFIGS_DIR] + PREP_CI + RESULTS_CI
 
@@ -172,30 +226,24 @@ def make_tarfile(name: str, to_add: list):
 
 @nox.session(name='test-data-tar-generated-all')
 def test_data_tar_generated_all(session) -> None:
-    """
-    Make a .tar.gz file of all 'generated' test data.
-    """
+    """Make a .tar.gz file of all 'generated' test data."""
     session.log(f"Making tarfile with all generated data: {GENERATED_TEST_DATA_ALL_TAR}")
     make_tarfile(GENERATED_TEST_DATA_ALL_TAR, GENERATED_TEST_DATA_ALL_DIRS)
 
 
 @nox.session(name='test-data-tar-generated-ci')
 def test_data_tar_generated_ci(session) -> None:
-    """
-    Make a .tar.gz file of just the 'generated' test data used to run tests on CI.
-    """
+    """Make a .tar.gz file of just the 'generated' test data used to run tests on CI."""
     session.log(f"Making tarfile with generated data for CI: {GENERATED_TEST_DATA_CI_TAR}")
     make_tarfile(GENERATED_TEST_DATA_CI_TAR, GENERATED_TEST_DATA_CI_DIRS)
 
 
-GENERATED_TEST_DATA_ALL_URL = 'https://osf.io/uvgjt/download'
+GENERATED_TEST_DATA_ALL_URL = 'https://osf.io/xfp6n/download'
 
 
 @nox.session(name='test-data-download-generated-all')
 def test_data_download_generated_all(session) -> None:
-    """
-    Download and extract a .tar.gz file of all 'generated' test data
-    """
+    """Download and extract a .tar.gz file of all 'generated' test data"""
     session.install("pandas")
     session.log(f'Downloading: {GENERATED_TEST_DATA_ALL_URL}')
     copy_url(url=GENERATED_TEST_DATA_ALL_URL, path=GENERATED_TEST_DATA_ALL_TAR)
@@ -204,9 +252,6 @@ def test_data_download_generated_all(session) -> None:
         tf.extractall(path='.')
     session.log('Fixing paths in .csv files')
     session.install("pandas")
-    session.run(
-        "python", "./tests/scripts/fix_prep_csv_paths.py"
-    )
 
 
 GENERATED_TEST_DATA_CI_URL = 'https://osf.io/un2zs/download'
@@ -214,70 +259,10 @@ def test_data_download_generated_all(session) -> None:
 
 @nox.session(name='test-data-download-generated-ci')
 def test_data_download_generated_ci(session) -> None:
-    """
-    Download and extract a .tar.gz file of just the 'generated' test data used to run tests on CI
-    """
+    """Download and extract a .tar.gz file of just the 'generated' test data used to run tests on CI"""
     session.install("pandas")
     session.log(f'Downloading: {GENERATED_TEST_DATA_CI_URL}')
     copy_url(url=GENERATED_TEST_DATA_CI_URL, path=GENERATED_TEST_DATA_CI_TAR)
     session.log(f'Extracting downloaded tar: {GENERATED_TEST_DATA_CI_TAR}')
     with tarfile.open(GENERATED_TEST_DATA_CI_TAR, "r:gz") as tf:
         tf.extractall(path='.')
-    session.log('Fixing paths in .csv files')
-    session.run(
-        "python", "./tests/scripts/fix_prep_csv_paths.py"
-    )
-
-
-@nox.session
-def test(session) -> None:
-    """
-    Run the unit and regular tests.
-    """
-    session.install(".[test]")
-    session.run("pytest", *session.posargs)
-
-
-@nox.session
-def coverage(session) -> None:
-    """
-    Run the unit and regular tests, and save coverage report
-    """
-    session.install(".[test]")
-    if session.posargs:
-        if "running-on-ci" in session.posargs:
-            # on ci, just run `teenytweetynet` model
-            session.run(
-                "pytest", "--models", "teenytweetynet", "--cov=./", "--cov-report=xml"
-            )
-            return
-        else:
-            print("Unsupported argument to coverage")
-
-    session.run(
-        "pytest", "--cov=./", "--cov-report=xml", *session.posargs
-    )
-
-
-@nox.session
-def doc(session: nox.Session) -> None:
-    """
-    Build the docs.
-
-    To run ``sphinx-autobuild``,  do:
-
-    .. code-block::console
-
-       nox -s doc -- autobuild
-
-    Otherwise the docs will be built once using
-    """
-    session.install(".[doc]")
-    if session.posargs:
-        if "autobuild" in session.posargs:
-            print("Building docs at http://127.0.0.1:8000 with sphinx-autobuild -- use Ctrl-C to quit")
-            session.run("sphinx-autobuild", "doc", "doc/_build/html")
-        else:
-            print("Unsupported argument to docs")
-    else:
-        session.run("sphinx-build", "-nW", "--keep-going", "-b", "html", "doc/", "doc/_build/html")
diff --git a/pyproject.toml b/pyproject.toml
index af6a6c801..1f3413ac6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,7 +28,7 @@ dependencies = [
     "dask >=2.10.1",
     "evfuncs >=0.3.4",
     "joblib >=0.14.1",
-    "pytorch-lightning >=1.8.4.post0, <2.0",
+    "pytorch-lightning >=2.0.7",
     "matplotlib >=3.3.3",
     "numpy >=1.18.1",
     "pynndescent >=0.5.10",
@@ -37,8 +37,8 @@ dependencies = [
     "pandas >=1.0.1",
     "tensorboard >=2.8.0",
     "toml >=0.10.2",
-    "torch >=1.7.1, <2.0.0",
-    "torchvision >=0.5.0",
+    "torch >= 2.0.1",
+    "torchvision >=0.15.2",
     "tqdm >=4.42.1",
     "umap-learn >=0.5.3",
 ]
@@ -85,5 +85,7 @@ markers = [
 filterwarnings = [
     "ignore:::torch.utils.tensorboard",
     'ignore:Deprecated call to `pkg_resources.declare_namespace',
-    'ignore:pkg_resources is deprecated as an API'
+    'ignore:pkg_resources is deprecated as an API',
+    'ignore:Implementing implicit namespace packages',
+    'ignore:distutils Version classes are deprecated.',
 ]
diff --git a/src/vak/cli/prep.py b/src/vak/cli/prep.py
index 13636a4de..3ba299b61 100644
--- a/src/vak/cli/prep.py
+++ b/src/vak/cli/prep.py
@@ -1,8 +1,9 @@
-# note NO LOGGING -- we configure logger inside `core.prep`
-# so we can save log file inside dataset directory
+"""Function called by command-line interface for prep command"""
+from __future__ import annotations
+
 import shutil
 import warnings
-from pathlib import Path
+import pathlib
 
 import toml
 
@@ -12,7 +13,7 @@
 from ..config.validators import are_sections_valid
 
 
-def purpose_from_toml(config_toml, toml_path=None):
+def purpose_from_toml(config_toml: dict, toml_path: str | pathlib.Path | None = None) -> str:
     """determine "purpose" from toml config,
     i.e., the command that will be run after we ``prep`` the data.
 
@@ -35,6 +36,9 @@ def purpose_from_toml(config_toml, toml_path=None):
             return section_name.lower()  # this is the "purpose" of the file
 
 
+# note NO LOGGING -- we configure logger inside `core.prep`
+# so we can save log file inside dataset directory
+
 # see https://github.com/NickleDave/vak/issues/334
 SECTIONS_PREP_SHOULD_PARSE = ("PREP", "SPECT_PARAMS", "DATALOADER")
 
@@ -45,7 +49,7 @@ def prep(toml_path):
 
     Parameters
     ----------
-    toml_path : str, Path
+    toml_path : str, pathlib.Path
         path to a configuration file in TOML format.
         Used to rewrite file with options determined by this function and needed for other functions
 
@@ -75,7 +79,7 @@ def prep(toml_path):
     dataset, and for all rows the 'split' columns for that dataset
     will be 'predict' or 'test' (respectively).
     """
-    toml_path = Path(toml_path)
+    toml_path = pathlib.Path(toml_path)
 
     # open here because need to check for `dataset_path` in this function, see #314 & #333
     config_toml = _load_toml_from_path(toml_path)
diff --git a/src/vak/common/tensorboard.py b/src/vak/common/tensorboard.py
index 159853c38..1bb0c879c 100644
--- a/src/vak/common/tensorboard.py
+++ b/src/vak/common/tensorboard.py
@@ -1,4 +1,6 @@
-"""functions dealing with ``tensorboard``"""
+"""Functions dealing with ``tensorboard``"""
+from __future__ import annotations
+
 from pathlib import Path
 
 import pandas as pd
@@ -7,14 +9,16 @@
 )
 from torch.utils.tensorboard import SummaryWriter
 
+from ..common.typing import PathLike
+
 
-def get_summary_writer(log_dir, filename_suffix):
-    """get an instance of ``tensorboard.SummaryWriter``,
-    to use with a vak.Model during training
+def get_summary_writer(log_dir: PathLike, filename_suffix: str) -> SummaryWriter:
+    """Get an instance of ``tensorboard.SummaryWriter``,
+    to use with a vak.Model during training.
 
     Parameters
     ----------
-    log_dir : str
+    log_dir : str, pathlib.Path
         directory where event file will be written
     filename_suffix : str
         suffix added to events file name
@@ -40,15 +44,17 @@ def get_summary_writer(log_dir, filename_suffix):
 }
 
 
-def events2df(events_path, size_guidance=None, drop_wall_time=True):
-    """convert ``tensorboard`` "events" log file to pandas DataFrame
+def events2df(
+        events_path: PathLike, size_guidance: dict | None = None, drop_wall_time: bool = True
+) -> pd.DataFrame:
+    """Convert :mod:`tensorboard` events file to pandas.DataFrame
 
-    events files are created by SummaryWriter from PyTorch or Tensorflow.
+    Events files are created by SummaryWriter from PyTorch or Tensorflow.
 
     Parameters
     ----------
-    events_path : str, Path
-        path to either a log directory or a specific events file
+    events_path : str, pathlib.Path
+        Path to either a log directory or a specific events file
         saved by a SummaryWriter in a log directory.
         By default, ``vak`` saves logs in a directory with the model name
         inside a ``results`` directory generated at the start of training.
@@ -64,12 +70,12 @@ def events2df(events_path, size_guidance=None, drop_wall_time=True):
         For more information see
         https://github.com/tensorflow/tensorboard/blob/master/tensorboard/backend/event_processing/event_accumulator.py
     drop_wall_time : bool
-        if True, drop wall times logged in events file. Default is True.
+        If True, drop wall times logged in events file. Default is True.
 
     Returns
     -------
     df : pandas.Dataframe
-        with index 'step' and all Scalars from the events file
+        With index 'step' and all Scalars from the events file
 
     Examples
     --------
@@ -117,4 +123,5 @@ def events2df(events_path, size_guidance=None, drop_wall_time=True):
         ).set_index("step")
         if drop_wall_time:
             dfs[scalar_tag].drop("wall_time", axis=1, inplace=True)
-    return pd.concat([v for k, v in dfs.items()], axis=1)
+    df = pd.concat([v for k, v in dfs.items()], axis=1)
+    return df
diff --git a/src/vak/common/trainer.py b/src/vak/common/trainer.py
index 439d25034..7a6400d97 100644
--- a/src/vak/common/trainer.py
+++ b/src/vak/common/trainer.py
@@ -56,10 +56,11 @@ def get_default_trainer(
     else:
         callbacks = None
 
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
     if device == "cuda":
         accelerator = "gpu"
     else:
-        accelerator = None
+        accelerator = "auto"
 
     logger = lightning.loggers.TensorBoardLogger(save_dir=log_save_dir)
 
diff --git a/src/vak/config/prep.py b/src/vak/config/prep.py
index 717b7b445..7481d8cc2 100644
--- a/src/vak/config/prep.py
+++ b/src/vak/config/prep.py
@@ -71,6 +71,14 @@ class PrepConfig:
     output_dir : str
         Path to location where data sets should be saved. Default is None,
         in which case data sets are saved in the current working directory.
+    dataset_type : str
+        String name of the type of dataset, e.g.,
+        'frame_classification'. Dataset types are
+        defined by machine learning tasks, e.g.,
+        a 'frame_classification' dataset would be used
+        a :class:`vak.models.FrameClassificationModel` model.
+        Valid dataset types are defined as
+        :const:`vak.prep.prep.DATASET_TYPES`.
     audio_format : str
         format of audio files. One of {'wav', 'cbin'}.
     spect_format : str
diff --git a/src/vak/datasets/frame_classification/metadata.py b/src/vak/datasets/frame_classification/metadata.py
index 074e5d26b..61c7cb918 100644
--- a/src/vak/datasets/frame_classification/metadata.py
+++ b/src/vak/datasets/frame_classification/metadata.py
@@ -54,12 +54,12 @@ class Metadata:
         Name of csv file representing the source files in the dataset.
         Csv file will be located in root of directory representing dataset,
         so only the filename is given.
-    frame_dur: float, optional
-        Duration of a frame, i.e., a single sample in audio
-        or a single timebin in a spectrogram.
     input_type : str
         The modality of the input data "frames", either audio signals
         or spectrograms. One of {'audio', 'spect'}.
+    frame_dur: float
+        Duration of a frame, i.e., a single sample in audio
+        or a single timebin in a spectrogram.
     """
 
     # declare this as a constant to avoid
@@ -108,7 +108,7 @@ def is_valid_frame_dur(self, attribute, value):
     )
 
     @classmethod
-    def from_path(cls, json_path: str | pathlib.Path):
+    def from_path(cls, json_path: str | pathlib.Path) -> Metadata:
         """Load dataset metadata from a json file.
 
         Class method that returns an instance of
diff --git a/src/vak/eval/__init__.py b/src/vak/eval/__init__.py
index 67a5fe219..fba58db39 100644
--- a/src/vak/eval/__init__.py
+++ b/src/vak/eval/__init__.py
@@ -1,9 +1,10 @@
-from . import frame_classification, parametric_umap
-from .eval import eval
+from . import eval_, frame_classification, parametric_umap
+from .eval_ import eval
 
 
 __all__ = [
     "eval",
+    "eval_",
     "frame_classification",
     "parametric_umap",
 ]
diff --git a/src/vak/eval/eval.py b/src/vak/eval/eval_.py
similarity index 100%
rename from src/vak/eval/eval.py
rename to src/vak/eval/eval_.py
diff --git a/src/vak/eval/frame_classification.py b/src/vak/eval/frame_classification.py
index 52188cd7c..531c55d6c 100644
--- a/src/vak/eval/frame_classification.py
+++ b/src/vak/eval/frame_classification.py
@@ -191,10 +191,11 @@ def eval_frame_classification_model(
 
     model.load_state_dict_from_path(checkpoint_path)
 
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
     if device == "cuda":
         accelerator = "gpu"
     else:
-        accelerator = None
+        accelerator = "auto"
 
     trainer_logger = lightning.loggers.TensorBoardLogger(save_dir=output_dir)
     trainer = lightning.Trainer(accelerator=accelerator, logger=trainer_logger)
diff --git a/src/vak/eval/parametric_umap.py b/src/vak/eval/parametric_umap.py
index 594add11a..6c4b21b48 100644
--- a/src/vak/eval/parametric_umap.py
+++ b/src/vak/eval/parametric_umap.py
@@ -100,10 +100,6 @@ def eval_parametric_umap_model(
     # ---------------- load data for evaluation ------------------------------------------------------------------------
     if transform_params is None:
         transform_params = {}
-    if "padding" not in transform_params and model_name == "ConvEncoderUMAP":
-        padding = models.convencoder_umap.get_default_padding(metadata.shape)
-        transform_params["padding"] = padding
-
     item_transform = transforms.defaults.get_default_transform(
         model_name, "eval", transform_params
     )
@@ -133,10 +129,11 @@ def eval_parametric_umap_model(
 
     model.load_state_dict_from_path(checkpoint_path)
 
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
     if device == "cuda":
         accelerator = "gpu"
     else:
-        accelerator = None
+        accelerator = "auto"
 
     trainer_logger = lightning.loggers.TensorBoardLogger(save_dir=output_dir)
     trainer = lightning.Trainer(accelerator=accelerator, logger=trainer_logger)
@@ -155,7 +152,6 @@ def eval_parametric_umap_model(
             ("dataset_path", dataset_path),
         ]
     )
-    # TODO: is this still necessary after switching to Lightning? Stop saying "average"?
     # order metrics by name to be extra sure they will be consistent across runs
     row.update(sorted([(k, v) for k, v in metric_vals.items()]))
 
diff --git a/src/vak/learncurve/__init__.py b/src/vak/learncurve/__init__.py
index 29ca75d15..51e9a0e9d 100644
--- a/src/vak/learncurve/__init__.py
+++ b/src/vak/learncurve/__init__.py
@@ -1,7 +1,16 @@
-from . import learncurve
+from . import (
+    curvefit,
+    dirname,
+    frame_classification,
+    learncurve,
+)
 from .learncurve import learning_curve
 
+
 __all__ = [
+    "curvefit",
+    "dirname",
+    "frame_classification",
     "learncurve",
     "learning_curve",
 ]
diff --git a/src/vak/learncurve/frame_classification.py b/src/vak/learncurve/frame_classification.py
index 82da2ce36..265a6dc12 100644
--- a/src/vak/learncurve/frame_classification.py
+++ b/src/vak/learncurve/frame_classification.py
@@ -22,11 +22,11 @@ def learning_curve_for_frame_classification_model(
     batch_size: int,
     num_epochs: int,
     num_workers: int,
+    results_path: str | pathlib.Path,
     train_transform_params: dict | None = None,
     train_dataset_params: dict | None = None,
     val_transform_params: dict | None = None,
     val_dataset_params: dict | None = None,
-    results_path: str | pathlib.Path = None,
     post_tfm_kwargs: dict | None = None,
     normalize_spectrograms: bool = True,
     shuffle: bool = True,
@@ -66,6 +66,8 @@ def learning_curve_for_frame_classification_model(
     num_workers : int
         Number of processes to use for parallel loading of data.
         Argument to torch.DataLoader.
+    results_path : str, pathlib.Path
+        Directory where results will be saved.
     train_transform_params: dict, optional
         Parameters for training data transform.
         Passed as keyword arguments.
@@ -84,8 +86,6 @@ def learning_curve_for_frame_classification_model(
         Passed as keyword arguments to
         :class:`vak.datasets.frame_classification.FramesDataset`.
         Optional, default is None.
-    results_path : str, pathlib.Path
-        Directory where results will be saved.
     previous_run_path : str, Path
         Path to directory containing dataset .csv files
         that represent subsets of training set, created by
diff --git a/src/vak/models/__init__.py b/src/vak/models/__init__.py
index 5a8e9d031..604fa408e 100644
--- a/src/vak/models/__init__.py
+++ b/src/vak/models/__init__.py
@@ -1,11 +1,12 @@
 from . import base, decorator, definition, registry
 from .base import Model
 from .convencoder_umap import ConvEncoderUMAP
+from .decorator import model
 from .ed_tcn import ED_TCN
 from .frame_classification_model import FrameClassificationModel
 from .get import get
 from .parametric_umap_model import ParametricUMAPModel
-from .teenytweetynet import TeenyTweetyNet
+from .registry import model_family
 from .tweetynet import TweetyNet
 
 __all__ = [
@@ -17,8 +18,9 @@
     "FrameClassificationModel",
     "get",
     "Model",
+    "model",
+    "model_family",
     "ParametricUMAPModel",
     "registry",
-    "TeenyTweetyNet",
     "TweetyNet",
 ]
diff --git a/src/vak/models/convencoder_umap.py b/src/vak/models/convencoder_umap.py
index edffdf11f..cbed718d1 100644
--- a/src/vak/models/convencoder_umap.py
+++ b/src/vak/models/convencoder_umap.py
@@ -7,8 +7,6 @@
 """
 from __future__ import annotations
 
-import math
-
 import torch
 
 from .. import metrics, nets, nn
@@ -65,16 +63,3 @@ class ConvEncoderUMAP:
     default_config = {
         "optimizer": {"lr": 1e-3},
     }
-
-
-def get_default_padding(shape):
-    """Get default padding for input to ConvEncoderUMAP model.
-
-    Rounds up to nearest tens place
-    """
-    rounded_up = tuple(10 * math.ceil(x / 10) for x in shape)
-    padding = tuple(
-        rounded_up_x - shape_x
-        for (rounded_up_x, shape_x) in zip(rounded_up, shape)
-    )
-    return padding
diff --git a/src/vak/models/get.py b/src/vak/models/get.py
index 052a0e39b..e4cc3ab06 100644
--- a/src/vak/models/get.py
+++ b/src/vak/models/get.py
@@ -49,7 +49,7 @@ def get(
     """
     # we do this dynamically so we always get all registered models
     try:
-        model_class = registry.MODEL_CLASS_BY_NAME[name]
+        model_class = registry.MODEL_REGISTRY[name]
     except KeyError as e:
         raise ValueError(
             f"Invalid model name: '{name}'.\n"
diff --git a/src/vak/models/registry.py b/src/vak/models/registry.py
index a3d4cce88..639052895 100644
--- a/src/vak/models/registry.py
+++ b/src/vak/models/registry.py
@@ -36,15 +36,16 @@ def model_family(family_class: Type) -> None:
     return family_class
 
 
-MODELS_BY_FAMILY_REGISTRY = {}
+MODEL_REGISTRY = {}
 
 
-def register_model(model_class: Type) -> None:
+def register_model(model_class: Type) -> Type:
     """Decorator that registers a model in the model registry.
 
-    We call this decorator ``register_model`` to not conflict
-    with the existing :func:`vak.decorator.model`,
+    This function is called by :func:`vak.models.decorator.model`,
     that creates a model class from a model definition.
+    So you will not usually need to use this decorator directly,
+    and should prefer to use :func:`vak.models.decorator.model` instead.
     """
     model_family_classes = list(MODEL_FAMILY_REGISTRY.values())
     model_parent_class = inspect.getmro(model_class)[1]
@@ -58,21 +59,14 @@ def register_model(model_class: Type) -> None:
             f"Valid model family classes are: {model_family_classes}"
         )
 
-    model_parent_class_name = model_parent_class.__name__
-    if model_parent_class_name not in MODELS_BY_FAMILY_REGISTRY:
-        MODELS_BY_FAMILY_REGISTRY[model_parent_class_name] = {}
-
     model_name = model_class.__name__
-    if model_name in MODELS_BY_FAMILY_REGISTRY[model_parent_class_name]:
+    if model_name in MODEL_REGISTRY:
         raise ValueError(
             f"Attempted to register a model family with the name '{model_name}', "
-            f"but this name is already in the registry under this model's family name:\n{model_parent_class_name}.\n"
-            f"Classes in the model family registry:\n{MODELS_BY_FAMILY_REGISTRY}"
+            f"but this name is already in the registry.\n"
         )
 
-    MODELS_BY_FAMILY_REGISTRY[model_parent_class_name][
-        model_name
-    ] = model_class
+    MODEL_REGISTRY[model_name] = model_class
     # need to return class after we register it or we replace it with None
     # when this function is used as a decorator
     return model_class
@@ -81,24 +75,15 @@ def register_model(model_class: Type) -> None:
 def __getattr__(name: str) -> Any:
     """Module-level __getattr__ function that we use to dynamically determine models."""
     if name == "MODEL_FAMILY_FROM_NAME":
-        return {
-            model_name: family_name
-            for family_name, family_dict in MODELS_BY_FAMILY_REGISTRY.items()
-            for model_name, model_class in family_dict.items()
-        }
-    elif name == "MODEL_CLASS_BY_NAME":
-        return {
-            model_name: model_class
-            for family_name, family_dict in MODELS_BY_FAMILY_REGISTRY.items()
-            for model_name, model_class in family_dict.items()
-        }
+        model_name_family_name_map = {}
+        for model_name, model_class in MODEL_REGISTRY.items():
+            model_parent_class = inspect.getmro(model_class)[1]
+            family_name = model_parent_class.__name__
+            model_name_family_name_map[model_name] = family_name
+        return model_name_family_name_map
     elif name == "MODEL_NAMES":
         return list(
-            {
-                model_name: model_class
-                for family_name, family_dict in MODELS_BY_FAMILY_REGISTRY.items()
-                for model_name, model_class in family_dict.items()
-            }.keys()
+            MODEL_REGISTRY.keys()
         )
     else:
         raise AttributeError(
diff --git a/src/vak/models/teenytweetynet.py b/src/vak/models/teenytweetynet.py
deleted file mode 100644
index bdf839c57..000000000
--- a/src/vak/models/teenytweetynet.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""lightweight version of ``vak.models.TweetyNet`` used by ``vak`` unit tests
-"""
-import torch
-
-from .. import metrics, nets
-from .decorator import model
-from .frame_classification_model import FrameClassificationModel
-
-
-@model(family=FrameClassificationModel)
-class TeenyTweetyNet:
-    """lightweight version of ``vak.models.TweetyNet`` used by ``vak`` unit tests"""
-
-    network = nets.TeenyTweetyNet
-    loss = torch.nn.CrossEntropyLoss
-    optimizer = torch.optim.Adam
-    metrics = {
-        "acc": metrics.Accuracy,
-        "levenshtein": metrics.Levenshtein,
-        "segment_error_rate": metrics.SegmentErrorRate,
-        "loss": torch.nn.CrossEntropyLoss,
-    }
-    default_config = {"optimizer": {"lr": 0.003}}
diff --git a/src/vak/nets/__init__.py b/src/vak/nets/__init__.py
index 8602ec7dc..e31b90bff 100644
--- a/src/vak/nets/__init__.py
+++ b/src/vak/nets/__init__.py
@@ -1,7 +1,6 @@
-from . import conv_encoder, ed_tcn, teenytweetynet, tweetynet
+from . import conv_encoder, ed_tcn, tweetynet
 from .conv_encoder import ConvEncoder
 from .ed_tcn import ED_TCN
-from .teenytweetynet import TeenyTweetyNet
 from .tweetynet import TweetyNet
 
 __all__ = [
@@ -9,8 +8,6 @@
     "ConvEncoder",
     "ed_tcn",
     "ED_TCN",
-    "teenytweetynet",
-    "TeenyTweetyNet",
     "tweetynet",
     "TweetyNet",
 ]
diff --git a/src/vak/nets/conv_encoder.py b/src/vak/nets/conv_encoder.py
index 6fd828f2f..426554baf 100644
--- a/src/vak/nets/conv_encoder.py
+++ b/src/vak/nets/conv_encoder.py
@@ -12,12 +12,12 @@ class ConvEncoder(nn.Module):
     def __init__(
         self,
         input_shape: tuple[int],
-        conv1_filters: int = 64,
-        conv2_filters: int = 128,
+        conv1_filters: int = 32,
+        conv2_filters: int = 64,
         conv_kernel_size: int = 3,
         conv_stride: int = 2,
         conv_padding: int = 1,
-        n_features_linear: int = 512,
+        n_features_linear: int = 256,
         n_components: int = 2,
     ):
         """Initialize a ConvEncoder instance.
@@ -50,6 +50,7 @@ def __init__(
                 f"Input shape was: {input_shape}"
             )
 
+        self.input_shape = input_shape
         self.num_input_channels = input_shape[0]
 
         self.conv = nn.Sequential(
@@ -60,6 +61,7 @@ def __init__(
                 stride=conv_stride,
                 padding=conv_padding,
             ),
+            nn.MaxPool2d(2, 2),
             nn.Conv2d(
                 in_channels=conv1_filters,
                 out_channels=conv2_filters,
@@ -67,6 +69,7 @@ def __init__(
                 stride=conv_stride,
                 padding=conv_padding,
             ),
+            nn.MaxPool2d(2, 2),
             nn.Flatten(),
         )
         mock_input = torch.rand((1, *input_shape))
diff --git a/src/vak/nets/teenytweetynet.py b/src/vak/nets/teenytweetynet.py
deleted file mode 100644
index dafd694b9..000000000
--- a/src/vak/nets/teenytweetynet.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""lightweight version of ``vak.nets.TweetyNet`` used by ``vak`` unit tests"""
-import torch
-from torch import nn
-
-
-class TeenyTweetyNet(nn.Module):
-    """Lightweight version of ``vak.nets.TweetyNet`` used by ``vak`` unit tests.
-
-    Notes
-    -----
-    This is the network used by ``vak.models.TeenyTweetyNetModel``.
-    """
-
-    def __init__(
-        self,
-        num_classes,
-        num_input_channels=1,
-        num_freqbins=256,
-        conv1_filters=8,
-        conv1_kernel_size=(5, 5),
-        conv1_padding=(0, 2),
-        conv2_filters=16,
-        conv2_kernel_size=(5, 5),
-        conv2_padding=(0, 2),
-        pool1_size=(4, 1),
-        pool1_stride=(4, 1),
-        pool2_size=(4, 1),
-        pool2_stride=(4, 1),
-        hidden_size=32,
-    ):
-        """TeenyTweetyNet model
-
-        Parameters
-        ----------
-        num_classes : int
-            number of classes to predict, e.g., number of syllable classes in an individual bird's song
-        input_shape : tuple
-            with 3 elements corresponding to dimensions of spectrogram windows: (channels, frequency bins, time bins).
-            i.e. we assume input is a spectrogram and treat it like an image, typically with one channel,
-            the rows are frequency bins, and the columns are time bins. Default is (1, 513, 88).
-        conv1_filters : int
-            Number of filters in first convolutional layer. Default is 32.
-        conv1_kernel_size : tuple
-            Size of kernels, i.e. filters, in first convolutional layer. Default is (5, 5).
-        conv2_filters : int
-            Number of filters in second convolutional layer. Default is 64.
-        conv2_kernel_size : tuple
-            Size of kernels, i.e. filters, in second convolutional layer. Default is (5, 5).
-        pool1_size : two element tuple of ints
-            Size of sliding window for first max pooling layer. Default is (8, 1)
-        pool1_stride : two element tuple of ints
-            Step size for sliding window of first max pooling layer. Default is (8, 1)
-        pool2_size : two element tuple of ints
-            Size of sliding window for second max pooling layer. Default is (4, 1),
-        pool2_stride : two element tuple of ints
-            Step size for sliding window of second max pooling layer. Default is (4, 1)
-        hidden_size : int
-            Size of hidden state in recurrent neural network; dimensionality of vector.
-            Default is 32.
-        """
-        super().__init__()
-        self.num_classes = num_classes
-        self.num_input_channels = num_input_channels
-        self.num_freqbins = num_freqbins
-        self.hidden_size = hidden_size
-
-        self.cnn = nn.Sequential(
-            nn.Conv2d(
-                in_channels=self.num_input_channels,
-                out_channels=conv1_filters,
-                kernel_size=conv1_kernel_size,
-                padding=conv1_padding,
-            ),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=pool1_size, stride=pool1_stride),
-            nn.Conv2d(
-                in_channels=conv1_filters,
-                out_channels=conv2_filters,
-                kernel_size=conv2_kernel_size,
-                padding=conv2_padding,
-            ),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=pool2_size, stride=pool2_stride),
-        )
-
-        # determine number of features in output after stacking channels
-        # we use the same number of features for hidden states
-        # note self.num_hidden is also used to reshape output of cnn in self.forward method
-        N_DUMMY_TIMEBINS = (
-            256  # some not-small number. This dimension doesn't matter here
-        )
-        batch_shape = (
-            1,
-            self.num_input_channels,
-            self.num_freqbins,
-            N_DUMMY_TIMEBINS,
-        )
-        tmp_tensor = torch.rand(batch_shape)
-        tmp_out = self.cnn(tmp_tensor)
-        channels_out, freqbins_out = tmp_out.shape[1], tmp_out.shape[2]
-        self.rnn_input_size = channels_out * freqbins_out
-
-        self.rnn = nn.LSTM(
-            input_size=self.rnn_input_size,
-            hidden_size=self.hidden_size,
-            num_layers=1,
-            dropout=0,
-            bidirectional=True,
-        )
-
-        # for self.fc, in_features = hidden_size * 2, because LSTM is bidirectional
-        # so we get hidden forward + hidden backward as output
-        self.fc = nn.Linear(self.hidden_size * 2, num_classes)
-
-    def forward(self, x):
-        features = self.cnn(x)
-        # stack channels so that dimension order is (batch, rnn_input_size, num time bins)
-        features = features.view(features.shape[0], self.rnn_input_size, -1)
-        # now switch dimensions for feeding to rnn,
-        # so dimension order is (num time bins, batch size, rnn_input_size)
-        features = features.permute(2, 0, 1)
-        rnn_output, (hidden, cell_state) = self.rnn(features)
-        # permute back to (batch, time bins, features)
-        # so we can project features down onto number of classes
-        rnn_output = rnn_output.permute(1, 0, 2)
-        logits = self.fc(rnn_output)
-        # permute yet again
-        # so that dimension order is (batch, classes, time steps)
-        # because this is order that loss function expects
-        return logits.permute(0, 2, 1)
diff --git a/src/vak/nets/tweetynet.py b/src/vak/nets/tweetynet.py
index 9a5136a02..ed2ec5e7b 100644
--- a/src/vak/nets/tweetynet.py
+++ b/src/vak/nets/tweetynet.py
@@ -43,7 +43,7 @@ class TweetyNet(nn.Module):
         Finally fully-connected layer that maps
         the output of ``TweetyNet.rnn`` to a
         matrix of size (num. time bins in window, num. classes).
-\
+
     Notes
     -----
     This is the network used by ``vak.models.TweetyNetModel``.
@@ -73,11 +73,13 @@ def __init__(
         Parameters
         ----------
         num_classes : int
-            number of classes to predict, e.g., number of syllable classes in an individual bird's song
-        input_shape : tuple
-            with 3 elements corresponding to dimensions of spectrogram windows: (channels, frequency bins, time bins).
-            i.e. we assume input is a spectrogram and treat it like an image, typically with one channel,
-            the rows are frequency bins, and the columns are time bins. Default is (1, 513, 88).
+            Number of classes to predict, e.g., number of syllable classes in an individual bird's song
+        num_input_channels: int
+            Number of channels in input. Typically one, for a spectrogram.
+            Default is 1.
+        num_freqbins: int
+            Number of frequency bins in spectrograms that will be input to model.
+            Default is 256.
         padding : str
             type of padding to use, one of {"VALID", "SAME"}. Default is "SAME".
         conv1_filters : int
diff --git a/src/vak/predict/__init__.py b/src/vak/predict/__init__.py
index f9bbd3aec..e05422f49 100644
--- a/src/vak/predict/__init__.py
+++ b/src/vak/predict/__init__.py
@@ -1,9 +1,10 @@
-from .predict import predict
-from . import frame_classification, parametric_umap
+from . import frame_classification, parametric_umap, predict_
+from .predict_ import predict
 
 
 __all__ = [
     "frame_classification",
     "parametric_umap",
     "predict",
+    "predict_",
 ]
diff --git a/src/vak/predict/frame_classification.py b/src/vak/predict/frame_classification.py
index 99b5bf99d..c5c7e52da 100644
--- a/src/vak/predict/frame_classification.py
+++ b/src/vak/predict/frame_classification.py
@@ -226,10 +226,12 @@ def predict_with_frame_classification_model(
     )
     model.load_state_dict_from_path(checkpoint_path)
 
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
     if device == "cuda":
         accelerator = "gpu"
     else:
-        accelerator = None
+        accelerator = "auto"
+
     trainer_logger = lightning.loggers.TensorBoardLogger(save_dir=output_dir)
     trainer = lightning.Trainer(accelerator=accelerator, logger=trainer_logger)
 
diff --git a/src/vak/predict/parametric_umap.py b/src/vak/predict/parametric_umap.py
index df7eba8a0..66955f2a9 100644
--- a/src/vak/predict/parametric_umap.py
+++ b/src/vak/predict/parametric_umap.py
@@ -158,10 +158,12 @@ def predict_with_parametric_umap_model(
     )
     model.load_state_dict_from_path(checkpoint_path)
 
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
     if device == "cuda":
         accelerator = "gpu"
     else:
-        accelerator = None
+        accelerator = "auto"
+
     trainer_logger = lightning.loggers.TensorBoardLogger(save_dir=output_dir)
     trainer = lightning.Trainer(accelerator=accelerator, logger=trainer_logger)
 
diff --git a/src/vak/predict/predict.py b/src/vak/predict/predict_.py
similarity index 100%
rename from src/vak/predict/predict.py
rename to src/vak/predict/predict_.py
diff --git a/src/vak/prep/__init__.py b/src/vak/prep/__init__.py
index 832630976..b047955b1 100644
--- a/src/vak/prep/__init__.py
+++ b/src/vak/prep/__init__.py
@@ -4,10 +4,13 @@
     dataset_df_helper,
     frame_classification,
     parametric_umap,
+    prep_,
+    sequence_dataset,
     spectrogram_dataset,
     unit_dataset,
 )
-from .prep import prep
+from .prep_ import prep
+
 
 __all__ = [
     "audio_dataset",
@@ -16,6 +19,8 @@
     "frame_classification",
     "parametric_umap",
     "prep",
+    "prep_",
+    "sequence_dataset",
     "spectrogram_dataset",
     "unit_dataset",
 ]
diff --git a/src/vak/prep/audio_dataset.py b/src/vak/prep/audio_dataset.py
index 04422c4b6..3f6ff192e 100644
--- a/src/vak/prep/audio_dataset.py
+++ b/src/vak/prep/audio_dataset.py
@@ -11,8 +11,10 @@
 
 from ..common import annotation, constants
 from ..common.converters import expanded_user_path, labelset_to_set
+from ..common.typing import PathLike
 from .spectrogram_dataset.audio_helper import files_from_dir
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -28,22 +30,32 @@
 
 
 def prep_audio_dataset(
+    data_dir: PathLike,
     audio_format: str,
-    data_dir: list | None = None,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
 ) -> pd.DataFrame:
-    """Convert audio files into a dataset
-    represented as a Pandas DataFrame.
+    """Creates a dataset of audio files from a directory,
+    optionally paired with an annotation file or files,
+    and return a Pandas DataFrame that represents the dataset.
+
+    Finds all files with ``audio_format`` in ``data_dir``,
+    then finds any annotations with ``annot_format`` if specified,
+    and additionally filter the audio and annotation files
+    by ``labelset`` if specified.
+    Then creates the dataframe with columns specified by
+    ``vak.prep.audio_dataset.DF_COLUMNS``:
+    ``"audio_path"``, ``"annot_path"``, ``"annot_format"``, ``"samplerate"``,
+    ``"sample_dur",`` and ``"duration"``.
 
     Parameters
     ----------
+    data_dir : str, pathlib.Path
+        Path to directory containing audio files that should be used in dataset.
     audio_format : str
         A :class:`string` representing the format of audio files.
         One of :constant:`vak.common.constants.VALID_AUDIO_FORMATS`.
-    data_dir : str
-        Path to directory containing audio files that should be used in dataset.
     annot_format : str
         Name of annotation format. Added as a column to the DataFrame if specified.
         Used by other functions that open annotation files via their paths from the DataFrame.
@@ -77,8 +89,10 @@ def prep_audio_dataset(
         labelset = labelset_to_set(labelset)
 
     data_dir = expanded_user_path(data_dir)
-    if not data_dir.is_dir():
-        raise NotADirectoryError(f"data_dir not found: {data_dir}")
+    if not data_dir.exists() or not data_dir.is_dir():
+        raise NotADirectoryError(
+            f"`data_dir` not found, or not recognized as a directory:\n{data_dir}"
+        )
 
     audio_files = files_from_dir(data_dir, audio_format)
 
diff --git a/src/vak/prep/frame_classification/dataset_arrays.py b/src/vak/prep/frame_classification/dataset_arrays.py
index 4e6744288..dbeb46978 100644
--- a/src/vak/prep/frame_classification/dataset_arrays.py
+++ b/src/vak/prep/frame_classification/dataset_arrays.py
@@ -103,24 +103,34 @@ def make_npy_files_for_each_split(
     audio_format: str,
     spect_key: str = "s",
     timebins_key: str = "t",
-):
+) -> pd.DataFrame:
     r"""Make npy files containing arrays
     for each split of a frame classification dataset.
 
-    For each row in ``dataset_df``, this function creates one npy file
-    with the extension '.frames.npy`, containing the input
-    to the frame classification model, and '.frame_labels.npy',
-    a vector where each element is the target label
+    All the npy files for each split are saved
+    in a new directory inside ``dataset_path``
+    that has the same name as the split.
+    E.g., the ``train`` directory inside ``dataset_path``
+    would have all the files for every row in ``dataset_df``
+    for which ``dataset_df['split'] == 'train'``.
+
+    The function creates two npy files for each row in ``dataset_df``.
+    One has the extension '.frames.npy` and contains the input
+    to the frame classification model. The other has the extension
+    '.frame_labels.npy', and contains a vector
+    where each element is the target label that
     the network should predict for the corresponding frame.
-    These files are the data for each sample :math:`(x, y)` in the dataset,
+    Taken together, these two files are the data
+    for each sample :math:`(x, y)` in the dataset,
     where :math:`x_t` is the frames and :math:`y_t` is the frame labels.
 
-    This function also creates two "indexing" vectors that
+    This function also creates two additional npy files for each split.
+    These npy files are "indexing" vectors that
     are used by :class:`vak.datasets.frame_classification.WindowDataset`
     and :class:`vak.datasets.frame_classification.FramesDataset`.
     These vectors make it possible to work with files,
-    to avoid loading the entire dataset into memory
-    or working with memory-mapped arrays.
+    to avoid loading the entire dataset into memory,
+    and to avoid working with memory-mapped arrays.
     The first is the ``sample_ids`` vector,
     that represents the "ID" of any sample :math:`(x, y)` in the dataset.
     We use these IDs to load the array files corresponding to the samples.
@@ -171,7 +181,7 @@ def make_npy_files_for_each_split(
     -------
     dataset_df_out : pandas.DataFrame
         The ``dataset_df`` with splits sorted by increasing frequency
-        of labels (see :func:`~vak.prep.frame_classification.dataset_arrays.
+        of labels (see :func:`~vak.prep.frame_classification.dataset_arrays`),
         and with columns added containing the npy files for each row.
     """
     if input_type not in prep_constants.INPUT_TYPES:
@@ -203,7 +213,6 @@ def make_npy_files_for_each_split(
             split_df = (
                 split_df.sort_values(by="sort_inds")
                 .drop(columns="sort_inds")
-                .reset_index()
             )
 
         if input_type == "audio":
@@ -342,5 +351,7 @@ def _save_dataset_arrays_and_return_index_arrays(
         ] = frame_labels_npy_paths
         dataset_df_out.append(split_df)
 
-    dataset_df_out = pd.concat(dataset_df_out)
+    # we reset the entire index across all splits, instead of repeating indices,
+    # and we set drop=False because we don't want to add a new column 'index' or 'level_0'
+    dataset_df_out = pd.concat(dataset_df_out).reset_index(drop=True)
     return dataset_df_out
diff --git a/src/vak/prep/frame_classification/frame_classification.py b/src/vak/prep/frame_classification/frame_classification.py
index 620000297..4d291c8bc 100644
--- a/src/vak/prep/frame_classification/frame_classification.py
+++ b/src/vak/prep/frame_classification/frame_classification.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import json
 import logging
 import pathlib
diff --git a/src/vak/prep/frame_classification/learncurve.py b/src/vak/prep/frame_classification/learncurve.py
index 96392967f..363675d1d 100644
--- a/src/vak/prep/frame_classification/learncurve.py
+++ b/src/vak/prep/frame_classification/learncurve.py
@@ -12,6 +12,7 @@
 from .. import split
 from .dataset_arrays import make_npy_files_for_each_split
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -27,7 +28,11 @@ def make_learncurve_splits_from_dataset_df(
     timebins_key: str = "t",
 ) -> pd.DataFrame:
     """Make splits for a learning curve
-    from a dataframe representing the entire dataset.
+    from a dataframe representing the entire dataset,
+    one split for each combination of (training set duration,
+    replicate number).
+    Each split is a randomly drawn subset of data
+    from the total training split.
 
     Uses :func:`vak.prep.split.frame_classification_dataframe` to make
     splits/subsets of the training data
@@ -35,6 +40,27 @@ def make_learncurve_splits_from_dataset_df(
     :func:`vak.prep.frame_classification.dataset_arrays.make_npy_files_for_each_split`
     to make the array files for each split.
 
+    A new directory will be made for each combination of
+    (training set duration, replicate number) as shown below,
+    for ``train_durs=[4.0, 6.0], num_replicates=2``.
+
+    .. code-block:: console
+        032312-vak-frame-classification-dataset-generated-230820_144833
+        ├── 032312_prep_230820_144833.csv
+        ├── labelmap.json
+        ├── metadata.json
+        ├── prep_230820_144833.log
+        ├── spectrograms_generated_230820_144833
+        ├── test
+        ├── train
+        ├── train-dur-4.0-replicate-1
+        ├── train-dur-4.0-replicate-2
+        ├── train-dur-6.0-replicate-1
+        ├── train-dur-6.0-replicate-2
+        ├── TweetyNet_learncurve_audio_cbin_annot_notmat.toml
+        └── val
+
+
     Parameters
     ----------
     dataset_df : pandas.DataFrame
@@ -138,4 +164,9 @@ def make_learncurve_splits_from_dataset_df(
             dataset_df,
         )
     )
+    # We reset the entire index across all splits, instead of repeating indices,
+    # and we set drop=False because we don't want to add a new column 'index' or 'level_0'.
+    # Need to do this again after calling `make_npy_files_for_each_split` since we just
+    # did `pd.concat` with the original dataframe
+    dataset_df = dataset_df.reset_index(drop=True)
     return dataset_df
diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index 7b5be8fa8..560b5a699 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import json
 import logging
 import pathlib
diff --git a/src/vak/prep/prep.py b/src/vak/prep/prep_.py
similarity index 99%
rename from src/vak/prep/prep.py
rename to src/vak/prep/prep_.py
index b9aebbdbc..a99e287d4 100644
--- a/src/vak/prep/prep.py
+++ b/src/vak/prep/prep_.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import logging
 import pathlib
 
diff --git a/src/vak/prep/spectrogram_dataset/spect_helper.py b/src/vak/prep/spectrogram_dataset/spect_helper.py
index 125b5a8d8..a29ebe485 100644
--- a/src/vak/prep/spectrogram_dataset/spect_helper.py
+++ b/src/vak/prep/spectrogram_dataset/spect_helper.py
@@ -39,21 +39,26 @@ def make_dataframe_of_spect_files(
     spect_ext: str | None = None,
     annot_list: list | None = None,
     annot_format: str | None = None,
-    spect_annot_map: dict | None = None,
     labelset: set | None = None,
     n_decimals_trunc: int = 5,
     freqbins_key: str = "f",
     timebins_key: str = "t",
     spect_key: str = "s",
     audio_path_key: str = "audio_path",
-):
-    """Get a dataset of spectrograms and optional annotations as a Pandas DataFrame.
+) -> pd.DataFrame:
+    """Creates a dataset of spectrogram files from a directory,
+    optionally paired with an annotation file or files,
+    and returns a Pandas DataFrame that represents the dataset.
 
     Spectrogram files are array in npz files created by numpy
     or in mat files created by Matlab.
-    If files are in npz format, they will be convert to npz
+    If files are in mat format, they will be converted to npz
     with the default keys for arrays, and saved in
-    ``spect_output_dir``.
+    ``spect_output_dir``. This step is required so that all dataset
+    prepared by :mod:`vak` are in a "normalized" or
+    "canonicalized" format. If no ``spect_output_dir`` is provided
+    when the ``spect_format`` is ``'mat'``, then this function
+    will raise an error.
 
     Parameters
     ----------
@@ -71,10 +76,6 @@ def make_dataframe_of_spect_files(
         Used by other functions that open annotation files via their paths from the DataFrame.
         Should be a format that the crowsetta library recognizes.
         Default is None.
-    spect_annot_map : dict
-        Where keys are paths to files and value corresponding to each key is
-        the annotation for that file.
-        Default is None.
     labelset : str, list, set
         of str or int, set of unique labels for vocalizations. Default is None.
         If not None, then files will be skipped where the associated annotation
@@ -107,7 +108,7 @@ def make_dataframe_of_spect_files(
     at the bin centers. (As far as vak is concerned, "vector" and "matrix" are synonymous with
     "array".)
 
-    Since both .mat files and .npz files load into a dictionary-like structure,
+    Since both mat files and npz files load into a dictionary-like structure,
     the arrays will be accessed with keys. By convention, these keys are 's', 'f', and 't'.
     If you use different keys you can let this function know by changing
     the appropriate arguments: spect_key, freqbins_key, timebins_key
@@ -127,9 +128,9 @@ def make_dataframe_of_spect_files(
             "canonical format that other functions in the library expect."
         )
 
-    if all([arg is None for arg in (spect_dir, spect_files, spect_annot_map)]):
+    if all([arg is None for arg in (spect_dir, spect_files)]):
         raise ValueError(
-            "must specify one of: spect_dir, spect_files, spect_annot_map"
+            "must specify one of: spect_dir, spect_files"
         )
 
     if spect_dir and spect_files:
@@ -137,34 +138,12 @@ def make_dataframe_of_spect_files(
             "received values for spect_dir and spect_files, unclear which to use"
         )
 
-    if spect_dir and spect_annot_map:
-        raise ValueError(
-            "received values for spect_dir and spect_annot_map, unclear which to use"
-        )
-
-    if spect_files and spect_annot_map:
+    if annot_list and annot_format is None:
         raise ValueError(
-            "received values for spect_files and spect_annot_map, unclear which to use"
+            "an annot_list was provided, but no annot_format was specified"
         )
 
-    if annot_list and spect_annot_map:
-        raise ValueError(
-            "received values for annot_list and spect_annot_map, unclear which annotations to use"
-        )
-
-    if (annot_list or spect_annot_map) and (annot_format is None):
-        if annot_list:
-            raise ValueError(
-                "an annot_list was provided, but no annot_format was specified"
-            )
-        elif spect_annot_map:
-            raise ValueError(
-                "a spect_annot_map was provided, but no annot_format was specified"
-            )
-
-    if annot_format is not None and (
-        annot_list is None and spect_annot_map is None
-    ):
+    if annot_format is not None and annot_list is None:
         raise ValueError(
             "an annot_format was specified but no annot_list or spect_annot_map was provided"
         )
@@ -183,7 +162,7 @@ def make_dataframe_of_spect_files(
     if spect_dir:  # then get spect_files from that dir
         # note we already validated format above
         spect_files = sorted(
-            pathlib.Path(spect_dir).glob(f"**/*{spect_format}")
+            pathlib.Path(spect_dir).glob(f"*{spect_format}")
         )
 
     if spect_files:  # (or if we just got them from spect_dir)
diff --git a/src/vak/prep/split/split.py b/src/vak/prep/split/split.py
index a504406b1..dc6278b9b 100644
--- a/src/vak/prep/split/split.py
+++ b/src/vak/prep/split/split.py
@@ -12,6 +12,7 @@
 from .algorithms import brute_force
 from .algorithms.validate import validate_split_durations
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -107,7 +108,7 @@ def frame_classification_dataframe(
     Splits dataset into training, test, and (optionally) validation subsets,
     specified by their duration.
 
-    Additionally adds a 'split' column to the dataframe,
+    Additionally, adds a 'split' column to the dataframe,
     that assigns each row to 'train', 'val', 'test', or 'None'.
 
     Parameters
diff --git a/src/vak/train/__init__.py b/src/vak/train/__init__.py
index 3e99d4438..713ce4b80 100644
--- a/src/vak/train/__init__.py
+++ b/src/vak/train/__init__.py
@@ -1,8 +1,10 @@
-from . import frame_classification, parametric_umap
-from .train import train
+from . import frame_classification, parametric_umap, train_
+from .train_ import train
+
 
 __all__ = [
     "frame_classification",
     "parametric_umap",
+    "train_",
     "train",
 ]
diff --git a/src/vak/train/parametric_umap.py b/src/vak/train/parametric_umap.py
index 7ca112411..a254397ed 100644
--- a/src/vak/train/parametric_umap.py
+++ b/src/vak/train/parametric_umap.py
@@ -33,10 +33,11 @@ def get_trainer(
     """Returns an instance of ``lightning.Trainer``
     with a default set of callbacks.
     Used by ``vak.core`` functions."""
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
     if device == "cuda":
         accelerator = "gpu"
     else:
-        accelerator = None
+        accelerator = "auto"
 
     ckpt_callback = lightning.callbacks.ModelCheckpoint(
         dirpath=ckpt_root,
@@ -219,12 +220,6 @@ def train_parametric_umap_model(
 
     if train_transform_params is None:
         train_transform_params = {}
-    if (
-        "padding" not in train_transform_params
-        and model_name == "ConvEncoderUMAP"
-    ):
-        padding = models.convencoder_umap.get_default_padding(metadata.shape)
-        train_transform_params["padding"] = padding
     transform = transforms.defaults.get_default_transform(
         model_name, "train", train_transform_params
     )
@@ -251,14 +246,6 @@ def train_parametric_umap_model(
     if val_step:
         if val_transform_params is None:
             val_transform_params = {}
-        if (
-            "padding" not in val_transform_params
-            and model_name == "ConvEncoderUMAP"
-        ):
-            padding = models.convencoder_umap.get_default_padding(
-                metadata.shape
-            )
-            val_transform_params["padding"] = padding
         transform = transforms.defaults.get_default_transform(
             model_name, "eval", val_transform_params
         )
diff --git a/src/vak/train/train.py b/src/vak/train/train_.py
similarity index 100%
rename from src/vak/train/train.py
rename to src/vak/train/train_.py
diff --git a/src/vak/transforms/defaults/parametric_umap.py b/src/vak/transforms/defaults/parametric_umap.py
index 07ededc64..83c568b06 100644
--- a/src/vak/transforms/defaults/parametric_umap.py
+++ b/src/vak/transforms/defaults/parametric_umap.py
@@ -23,8 +23,4 @@ def get_default_parametric_umap_transform(
         vak_transforms.ToFloatTensor(),
         vak_transforms.AddChannel(),
     ]
-    if "padding" in transform_kwargs:
-        transforms.append(
-            torchvision.transforms.Pad(transform_kwargs["padding"])
-        )
     return torchvision.transforms.Compose(transforms)
diff --git a/src/vak/transforms/transforms.py b/src/vak/transforms/transforms.py
index 08920fa89..9e2c66935 100644
--- a/src/vak/transforms/transforms.py
+++ b/src/vak/transforms/transforms.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import pathlib
 
 import numpy as np
diff --git a/tests/conftest.py b/tests/conftest.py
index 4a8065221..b011bb4f6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,25 +1,14 @@
-from _pytest.mark import Mark
-
 from . import fixtures
 # keep this import here, we need it for fixtures
 from .fixtures import *
 
 
-empty_mark = Mark('', [], {})
-
-
 def by_slow_marker(item):
     return 1 if item.get_closest_marker('slow') is None else 0
 
 
+
 def pytest_addoption(parser):
-    parser.addoption(
-        "--models",
-        action="store",
-        default="teenytweetynet",
-        nargs="+",
-        help="vak models to test, space-separated list of names",
-    )
     parser.addoption('--dtype', action="store", default="float32")
     parser.addoption('--slow-last', action='store_true', default=False)
 
@@ -30,15 +19,6 @@ def pytest_collection_modifyitems(items, config):
 
 
 def pytest_generate_tests(metafunc):
-    models = metafunc.config.option.models
-    if isinstance(models, str):
-        # wrap a single model name in a list
-        models = [models]
-    # **note!** fixture name is singular even though cmdopt is plural
-    if "model" in metafunc.fixturenames and models is not None:
-        metafunc.parametrize("model", models)
-
-    dtype_names = None
     if 'dtype_name' in metafunc.fixturenames:
         raw_value = metafunc.config.getoption('--dtype')
         if raw_value == 'all':
diff --git a/tests/data_for_tests/configs/ConvEncoderUMAP_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/ConvEncoderUMAP_eval_audio_cbin_annot_notmat.toml
index b85935d23..a2be49143 100644
--- a/tests/data_for_tests/configs/ConvEncoderUMAP_eval_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/ConvEncoderUMAP_eval_audio_cbin_annot_notmat.toml
@@ -2,10 +2,11 @@
 dataset_type = "parametric umap"
 input_type = "spect"
 data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
-output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/ConvEncoderUMAP"
+output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat/ConvEncoderUMAP"
 audio_format = "cbin"
 annot_format = "notmat"
 labelset = "iabcdefghjk"
+test_dur = 0.2
 
 [SPECT_PARAMS]
 fft_size = 512
@@ -15,11 +16,19 @@ transform_type = "log_spect_plus_one"
 [EVAL]
 checkpoint_path = "tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/ConvEncoderUMAP/results_230727_210112/ConvEncoderUMAP/checkpoints/checkpoint.pt"
 model = "ConvEncoderUMAP"
-batch_size = 4
-num_workers = 2
+batch_size = 64
+num_workers = 16
 device = "cuda"
 output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/ConvEncoderUMAP"
-dataset_path = "tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/ConvEncoderUMAP/032312-vak-dimensionality-reduction-dataset-generated-230727_205727"
+
+[ConvEncoderUMAP.network]
+conv1_filters = 8
+conv2_filters = 16
+conv_kernel_size = 3
+conv_stride = 2
+conv_padding = 1
+n_features_linear = 32
+n_components = 2
 
 [ConvEncoderUMAP.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml
index 102dd6192..456c99468 100644
--- a/tests/data_for_tests/configs/ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml
@@ -6,8 +6,9 @@ output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notma
 audio_format = "cbin"
 annot_format = "notmat"
 labelset = "iabcdefghjk"
-train_dur = 40
-val_dur = 15
+train_dur = 0.5
+val_dur = 0.2
+test_dur = 0.25
 
 [SPECT_PARAMS]
 fft_size = 512
@@ -16,15 +17,22 @@ transform_type = "log_spect_plus_one"
 
 [TRAIN]
 model = "ConvEncoderUMAP"
-normalize_spectrograms = true
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
+batch_size = 64
+num_epochs = 1
+val_step = 1
+ckpt_step = 1000
+num_workers = 16
 device = "cuda"
 root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/ConvEncoderUMAP"
 
+[ConvEncoderUMAP.network]
+conv1_filters = 8
+conv2_filters = 16
+conv_kernel_size = 3
+conv_stride = 2
+conv_padding = 1
+n_features_linear = 32
+n_components = 2
+
 [ConvEncoderUMAP.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TeenyTweetyNet_eval_audio_cbin_annot_notmat.toml
deleted file mode 100644
index c523b7068..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_eval_audio_cbin_annot_notmat.toml
+++ /dev/null
@@ -1,31 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-labelset = "iabcdefghjk"
-data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
-output_dir = "./tests/data_for_tests/generated/prep/eval/audio_cbin_annot_notmat/TeenyTweetyNet"
-audio_format = "cbin"
-annot_format = "notmat"
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[EVAL]
-checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt"
-labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/labelmap.json"
-model = "TeenyTweetyNet"
-batch_size = 4
-num_workers = 2
-device = "cuda"
-spect_scaler_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
-output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/TeenyTweetyNet"
-
-[EVAL.transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TeenyTweetyNet_learncurve_audio_cbin_annot_notmat.toml
deleted file mode 100644
index b8a83600d..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_learncurve_audio_cbin_annot_notmat.toml
+++ /dev/null
@@ -1,41 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
-output_dir = "./tests/data_for_tests/generated/prep/learncurve/audio_cbin_annot_notmat/TeenyTweetyNet"
-audio_format = "cbin"
-annot_format = "notmat"
-labelset = "iabcdefghjk"
-train_dur = 50
-val_dur = 15
-test_dur = 30
-train_set_durs = [ 4, 6,]
-num_replicates = 2
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 500, 10000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[LEARNCURVE]
-model = "TeenyTweetyNet"
-normalize_spectrograms = true
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat/TeenyTweetyNet"
-
-[LEARNCURVE.train_dataset_params]
-window_size = 44
-
-[LEARNCURVE.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_cbin_annot_notmat.toml
deleted file mode 100644
index 4489c8960..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_cbin_annot_notmat.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032412"
-output_dir = "./tests/data_for_tests/generated/prep/predict/audio_cbin_annot_notmat/TeenyTweetyNet"
-audio_format = "cbin"
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[PREDICT]
-spect_scaler_path = "/home/user/results_181014_194418/spect_scaler"
-checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt"
-labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json"
-model = "TeenyTweetyNet"
-batch_size = 4
-num_workers = 2
-device = "cuda"
-output_dir = "./tests/data_for_tests/generated/results/predict/audio_cbin_annot_notmat/TeenyTweetyNet"
-annot_csv_filename = "bl26lb16.041912.annot.csv"
-
-[PREDICT.transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_wav_annot_birdsongrec.toml b/tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_wav_annot_birdsongrec.toml
deleted file mode 100644
index 9a2110cca..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_predict_audio_wav_annot_birdsongrec.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/audio_wav_annot_birdsongrec/Bird0"
-output_dir = "./tests/data_for_tests/generated/prep/predict/audio_wav_annot_birdsong-recognition-dataset/TeenyTweetyNet"
-audio_format = "wav"
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[PREDICT]
-spect_scaler_path = "/home/user/results_181014_194418/spect_scaler"
-checkpoint_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/TeenyTweetyNet/checkpoints/max-val-acc-checkpoint.pt"
-labelmap_path = "~/Documents/repos/coding/birdsong/TeenyTweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json"
-model = "TeenyTweetyNet"
-batch_size = 4
-num_workers = 2
-device = "cuda"
-output_dir = "./tests/data_for_tests/generated/results/predict/audio_wav_annot_birdsong-recognition-dataset/TeenyTweetyNet"
-annot_csv_filename = "Bird0.annot.csv"
-
-[PREDICT.transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TeenyTweetyNet_train_audio_cbin_annot_notmat.toml
deleted file mode 100644
index 1d6e54c08..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_train_audio_cbin_annot_notmat.toml
+++ /dev/null
@@ -1,39 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
-output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/TeenyTweetyNet"
-audio_format = "cbin"
-annot_format = "notmat"
-labelset = "iabcdefghjk"
-train_dur = 50
-val_dur = 15
-test_dur = 30
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[TRAIN]
-model = "TeenyTweetyNet"
-normalize_spectrograms = true
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/TeenyTweetyNet"
-
-[TRAIN.train_dataset_params]
-window_size = 44
-
-[TRAIN.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml b/tests/data_for_tests/configs/TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml
deleted file mode 100644
index c9b6cae79..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml
+++ /dev/null
@@ -1,40 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-labelset = "012345678"
-data_dir = "./tests/data_for_tests/source/audio_wav_annot_birdsongrec/Bird0"
-output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_birdsong-recognition-dataset/TeenyTweetyNet"
-audio_format = "wav"
-annot_format = "birdsong-recognition-dataset"
-annot_file = "./tests/data_for_tests/source/audio_wav_annot_birdsongrec/Bird0/Annotation.xml"
-test_dur = 50
-train_dur = 15
-val_dur = 30
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[TRAIN]
-model = "TeenyTweetyNet"
-normalize_spectrograms = true
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_birdsong-recognition-dataset/TeenyTweetyNet"
-
-[TRAIN.train_dataset_params]
-window_size = 44
-
-[TRAIN.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_cbin_annot_notmat.toml
deleted file mode 100644
index 334b5135c..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_cbin_annot_notmat.toml
+++ /dev/null
@@ -1,41 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
-output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/TeenyTweetyNet"
-audio_format = "cbin"
-annot_format = "notmat"
-labelset = "iabcdefghjk"
-train_dur = 50
-val_dur = 15
-test_dur = 30
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[TRAIN]
-model = "TeenyTweetyNet"
-normalize_spectrograms = true
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/TeenyTweetyNet"
-checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TweetyNet/checkpoints/max-val-acc-checkpoint.pt"
-spect_scaler_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
-
-[TRAIN.train_dataset_params]
-window_size = 44
-
-[TRAIN.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_wav_annot_birdsongrec.toml b/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_wav_annot_birdsongrec.toml
deleted file mode 100644
index 2b0025c8c..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_audio_wav_annot_birdsongrec.toml
+++ /dev/null
@@ -1,42 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-labelset = "012345678"
-data_dir = "./tests/data_for_tests/source/audio_wav_annot_birdsongrec/Bird0"
-output_dir = "./tests/data_for_tests/generated/prep/train/audio_wav_annot_birdsong-recognition-dataset/TeenyTweetyNet"
-audio_format = "wav"
-annot_format = "birdsong-recognition-dataset"
-annot_file = "./tests/data_for_tests/source/audio_wav_annot_birdsongrec/Bird0/Annotation.xml"
-test_dur = 50
-train_dur = 15
-val_dur = 30
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[TRAIN]
-model = "TeenyTweetyNet"
-normalize_spectrograms = true
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_birdsong-recognition-dataset/TeenyTweetyNet"
-checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TweetyNet/checkpoints/max-val-acc-checkpoint.pt"
-spect_scaler_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
-
-[TRAIN.train_dataset_params]
-window_size = 44
-
-[TRAIN.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_spect_mat_annot_yarden.toml
deleted file mode 100644
index 0881927d6..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_train_continue_spect_mat_annot_yarden.toml
+++ /dev/null
@@ -1,40 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/spect"
-output_dir = "./tests/data_for_tests/generated/prep/train/spect_mat_annot_yarden/TeenyTweetyNet"
-spect_format = "mat"
-annot_format = "yarden"
-annot_file = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat"
-labelset = "range: 1-3,6-14,17-19"
-train_dur = 213
-val_dur = 213
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[TRAIN]
-model = "TeenyTweetyNet"
-normalize_spectrograms = false
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/TeenyTweetyNet"
-checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TweetyNet/checkpoints/max-val-acc-checkpoint.pt"
-
-[TRAIN.train_dataset_params]
-window_size = 44
-
-[TRAIN.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TeenyTweetyNet_train_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/TeenyTweetyNet_train_spect_mat_annot_yarden.toml
deleted file mode 100644
index 3a3859ec5..000000000
--- a/tests/data_for_tests/configs/TeenyTweetyNet_train_spect_mat_annot_yarden.toml
+++ /dev/null
@@ -1,39 +0,0 @@
-[PREP]
-dataset_type = "frame classification"
-input_type = "spect"
-data_dir = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/spect"
-output_dir = "./tests/data_for_tests/generated/prep/train/spect_mat_annot_yarden/TeenyTweetyNet"
-spect_format = "mat"
-annot_format = "yarden"
-annot_file = "./tests/data_for_tests/source/spect_mat_annot_yarden/llb3/llb3_annot_subset.mat"
-labelset = "range: 1-3,6-14,17-19"
-train_dur = 213
-val_dur = 213
-
-[SPECT_PARAMS]
-fft_size = 256
-step_size = 64
-freq_cutoffs = [ 1000, 8000,]
-thresh = 6.25
-transform_type = "log_spect"
-
-[TRAIN]
-model = "TeenyTweetyNet"
-normalize_spectrograms = false
-batch_size = 4
-num_epochs = 2
-val_step = 50
-ckpt_step = 200
-patience = 3
-num_workers = 2
-device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/TeenyTweetyNet"
-
-[TRAIN.train_dataset_params]
-window_size = 44
-
-[TRAIN.val_transform_params]
-window_size = 44
-
-[TeenyTweetyNet.optimizer]
-lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_eval_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TweetyNet_eval_audio_cbin_annot_notmat.toml
index 295c159f4..57461768d 100644
--- a/tests/data_for_tests/configs/TweetyNet_eval_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/TweetyNet_eval_audio_cbin_annot_notmat.toml
@@ -19,7 +19,7 @@ checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRep
 labelmap_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/labelmap.json"
 model = "TweetyNet"
 batch_size = 11
-num_workers = 4
+num_workers = 16
 device = "cuda"
 spect_scaler_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
 output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_notmat/TweetyNet"
@@ -27,5 +27,16 @@ output_dir = "./tests/data_for_tests/generated/results/eval/audio_cbin_annot_not
 [EVAL.transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TweetyNet_learncurve_audio_cbin_annot_notmat.toml
index 01ccff182..e4e4f248d 100644
--- a/tests/data_for_tests/configs/TweetyNet_learncurve_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/TweetyNet_learncurve_audio_cbin_annot_notmat.toml
@@ -27,7 +27,7 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
 root_results_dir = "./tests/data_for_tests/generated/results/learncurve/audio_cbin_annot_notmat/TweetyNet"
 
@@ -37,5 +37,16 @@ window_size = 88
 [LEARNCURVE.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_predict_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TweetyNet_predict_audio_cbin_annot_notmat.toml
index 5206bc579..61af4b692 100644
--- a/tests/data_for_tests/configs/TweetyNet_predict_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/TweetyNet_predict_audio_cbin_annot_notmat.toml
@@ -18,7 +18,7 @@ checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRep
 labelmap_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json"
 model = "TweetyNet"
 batch_size = 11
-num_workers = 4
+num_workers = 16
 device = "cuda"
 output_dir = "./tests/data_for_tests/generated/results/predict/audio_cbin_annot_notmat/TweetyNet"
 annot_csv_filename = "bl26lb16.041912.annot.csv"
@@ -26,5 +26,16 @@ annot_csv_filename = "bl26lb16.041912.annot.csv"
 [PREDICT.transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_predict_audio_wav_annot_birdsongrec.toml b/tests/data_for_tests/configs/TweetyNet_predict_audio_wav_annot_birdsongrec.toml
index f0c827ba4..769dfba72 100644
--- a/tests/data_for_tests/configs/TweetyNet_predict_audio_wav_annot_birdsongrec.toml
+++ b/tests/data_for_tests/configs/TweetyNet_predict_audio_wav_annot_birdsongrec.toml
@@ -18,7 +18,7 @@ checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRep
 labelmap_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/bl26lb16/results_200620_164245/labelmap.json"
 model = "TweetyNet"
 batch_size = 11
-num_workers = 4
+num_workers = 16
 device = "cuda"
 output_dir = "./tests/data_for_tests/generated/results/predict/audio_wav_annot_birdsong-recognition-dataset/TweetyNet"
 annot_csv_filename = "Bird0.annot.csv"
@@ -26,5 +26,16 @@ annot_csv_filename = "Bird0.annot.csv"
 [PREDICT.transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TweetyNet_train_audio_cbin_annot_notmat.toml
index 9923e48ef..2f72adfb1 100644
--- a/tests/data_for_tests/configs/TweetyNet_train_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/TweetyNet_train_audio_cbin_annot_notmat.toml
@@ -25,7 +25,7 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
 root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/TweetyNet"
 
@@ -35,5 +35,16 @@ window_size = 88
 [TRAIN.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_train_audio_wav_annot_birdsongrec.toml b/tests/data_for_tests/configs/TweetyNet_train_audio_wav_annot_birdsongrec.toml
index 6b7c69cff..e3988e6ab 100644
--- a/tests/data_for_tests/configs/TweetyNet_train_audio_wav_annot_birdsongrec.toml
+++ b/tests/data_for_tests/configs/TweetyNet_train_audio_wav_annot_birdsongrec.toml
@@ -26,7 +26,7 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
 root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_birdsong-recognition-dataset/TweetyNet"
 
@@ -36,5 +36,16 @@ window_size = 88
 [TRAIN.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_train_continue_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/TweetyNet_train_continue_audio_cbin_annot_notmat.toml
index 41b78a358..c53ca4766 100644
--- a/tests/data_for_tests/configs/TweetyNet_train_continue_audio_cbin_annot_notmat.toml
+++ b/tests/data_for_tests/configs/TweetyNet_train_continue_audio_cbin_annot_notmat.toml
@@ -25,9 +25,9 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/TweetyNet"
+root_results_dir = "./tests/data_for_tests/generated/results/train_continue/audio_cbin_annot_notmat/TweetyNet"
 checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TweetyNet/checkpoints/max-val-acc-checkpoint.pt"
 spect_scaler_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
 
@@ -37,5 +37,16 @@ window_size = 88
 [TRAIN.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_train_continue_audio_wav_annot_birdsongrec.toml b/tests/data_for_tests/configs/TweetyNet_train_continue_audio_wav_annot_birdsongrec.toml
index e11ce475b..d995aa4d5 100644
--- a/tests/data_for_tests/configs/TweetyNet_train_continue_audio_wav_annot_birdsongrec.toml
+++ b/tests/data_for_tests/configs/TweetyNet_train_continue_audio_wav_annot_birdsongrec.toml
@@ -26,9 +26,9 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/audio_wav_annot_birdsong-recognition-dataset/TweetyNet"
+root_results_dir = "./tests/data_for_tests/generated/results/train_continue/audio_wav_annot_birdsong-recognition-dataset/TweetyNet"
 checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TweetyNet/checkpoints/max-val-acc-checkpoint.pt"
 spect_scaler_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/StandardizeSpect"
 
@@ -38,5 +38,16 @@ window_size = 88
 [TRAIN.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_train_continue_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/TweetyNet_train_continue_spect_mat_annot_yarden.toml
index 24a362cea..aa384b6ed 100644
--- a/tests/data_for_tests/configs/TweetyNet_train_continue_spect_mat_annot_yarden.toml
+++ b/tests/data_for_tests/configs/TweetyNet_train_continue_spect_mat_annot_yarden.toml
@@ -25,9 +25,9 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
-root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/TweetyNet"
+root_results_dir = "./tests/data_for_tests/generated/results/train_continue/spect_mat_annot_yarden/TweetyNet"
 checkpoint_path = "~/Documents/repos/coding/birdsong/TweetyNet/results/BFSongRepository/gy6or6/results_200620_165308/TweetyNet/checkpoints/max-val-acc-checkpoint.pt"
 
 [TRAIN.train_dataset_params]
@@ -36,5 +36,16 @@ window_size = 88
 [TRAIN.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/TweetyNet_train_spect_mat_annot_yarden.toml b/tests/data_for_tests/configs/TweetyNet_train_spect_mat_annot_yarden.toml
index 6ae2cc439..770012f4f 100644
--- a/tests/data_for_tests/configs/TweetyNet_train_spect_mat_annot_yarden.toml
+++ b/tests/data_for_tests/configs/TweetyNet_train_spect_mat_annot_yarden.toml
@@ -25,7 +25,7 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
 root_results_dir = "./tests/data_for_tests/generated/results/train/spect_mat_annot_yarden/TweetyNet"
 
@@ -35,5 +35,16 @@ window_size = 88
 [TRAIN.val_transform_params]
 window_size = 88
 
+[TweetyNet.network]
+conv1_filters = 8
+conv1_kernel_size = [3, 3]
+conv2_filters = 16
+conv2_kernel_size = [5, 5]
+pool1_size = [4, 1]
+pool1_stride = [4, 1]
+pool2_size = [4, 1]
+pool2_stride = [4, 1]
+hidden_size = 32
+
 [TweetyNet.optimizer]
 lr = 0.001
diff --git a/tests/data_for_tests/configs/configs.json b/tests/data_for_tests/configs/configs.json
index 05b4731a4..d92c5e674 100644
--- a/tests/data_for_tests/configs/configs.json
+++ b/tests/data_for_tests/configs/configs.json
@@ -100,106 +100,6 @@
       "use_dataset_from_config": "TweetyNet_train_spect_mat_annot_yarden.toml",
       "use_result_from_config": "TweetyNet_train_spect_mat_annot_yarden.toml"
     },
-    {
-      "filename": "TeenyTweetyNet_train_audio_cbin_annot_notmat.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "train",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat",
-      "use_dataset_from_config": "TweetyNet_train_audio_cbin_annot_notmat.toml",
-      "use_result_from_config": null
-    },
-    {
-      "filename": "TeenyTweetyNet_learncurve_audio_cbin_annot_notmat.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "learncurve",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat",
-      "use_dataset_from_config": "TweetyNet_learncurve_audio_cbin_annot_notmat.toml",
-      "use_result_from_config": null
-    },
-    {
-      "filename": "TeenyTweetyNet_eval_audio_cbin_annot_notmat.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "eval",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat",
-      "use_dataset_from_config": "TweetyNet_eval_audio_cbin_annot_notmat.toml",
-      "use_result_from_config": "TeenyTweetyNet_train_audio_cbin_annot_notmat.toml"
-    },
-    {
-      "filename": "TeenyTweetyNet_predict_audio_cbin_annot_notmat.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "predict",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat",
-      "use_dataset_from_config": "TweetyNet_predict_audio_cbin_annot_notmat.toml",
-      "use_result_from_config": "TeenyTweetyNet_train_audio_cbin_annot_notmat.toml"
-    },
-    {
-      "filename": "TeenyTweetyNet_train_continue_audio_cbin_annot_notmat.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "train_continue",
-      "audio_format": "cbin",
-      "spect_format": null,
-      "annot_format": "notmat",
-      "use_dataset_from_config": "TweetyNet_train_audio_cbin_annot_notmat.toml",
-      "use_result_from_config": "TeenyTweetyNet_train_audio_cbin_annot_notmat.toml"
-    },
-    {
-      "filename": "TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "train",
-      "audio_format": "wav",
-      "spect_format": null,
-      "annot_format": "birdsong-recognition-dataset",
-      "use_dataset_from_config": "TweetyNet_train_audio_wav_annot_birdsongrec.toml",
-      "use_result_from_config": null
-    },
-    {
-      "filename": "TeenyTweetyNet_predict_audio_wav_annot_birdsongrec.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "predict",
-      "audio_format": "wav",
-      "spect_format": null,
-      "annot_format": "birdsong-recognition-dataset",
-      "use_dataset_from_config": "TweetyNet_predict_audio_wav_annot_birdsongrec.toml",
-      "use_result_from_config": "TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml"
-    },
-    {
-      "filename": "TeenyTweetyNet_train_continue_audio_wav_annot_birdsongrec.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "train_continue",
-      "audio_format": "wav",
-      "spect_format": null,
-      "annot_format": "birdsong-recognition-dataset",
-      "use_dataset_from_config": "TweetyNet_train_audio_wav_annot_birdsongrec.toml",
-      "use_result_from_config": "TeenyTweetyNet_train_audio_wav_annot_birdsongrec.toml"
-    },
-    {
-      "filename": "TeenyTweetyNet_train_spect_mat_annot_yarden.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "train",
-      "audio_format": null,
-      "spect_format": "mat",
-      "annot_format": "yarden",
-      "use_dataset_from_config": "TweetyNet_train_spect_mat_annot_yarden.toml",
-      "use_result_from_config": null
-    },
-    {
-      "filename": "TeenyTweetyNet_train_continue_spect_mat_annot_yarden.toml",
-      "model": "TeenyTweetyNet",
-      "config_type": "train_continue",
-      "audio_format": null,
-      "spect_format": "mat",
-      "annot_format": "yarden",
-      "use_dataset_from_config": "TweetyNet_train_spect_mat_annot_yarden.toml",
-      "use_result_from_config": "TeenyTweetyNet_train_spect_mat_annot_yarden.toml"
-    },
     {
       "filename": "ConvEncoderUMAP_train_audio_cbin_annot_notmat.toml",
       "model": "ConvEncoderUMAP",
diff --git a/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml b/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml
index e2809aca6..ce13bb316 100644
--- a/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml
+++ b/tests/data_for_tests/configs/invalid_train_and_learncurve_config.toml
@@ -27,7 +27,7 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 device = "cuda"
 root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat"
 
@@ -39,7 +39,7 @@ num_epochs = 2
 val_step = 50
 ckpt_step = 200
 patience = 4
-num_workers = 4
+num_workers = 16
 train_set_durs = [ 4, 6 ]
 num_replicates = 2
 device = "cuda"
diff --git a/tests/fixtures/annot.py b/tests/fixtures/annot.py
index 083a6912f..f140abb48 100644
--- a/tests/fixtures/annot.py
+++ b/tests/fixtures/annot.py
@@ -30,7 +30,7 @@ def annot_list_yarden():
 
 LABELSET_YARDEN = [
     str(an_int)
-    for an_int in [1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19]
+    for an_int in [1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19]
 ]
 
 
diff --git a/tests/fixtures/audio.py b/tests/fixtures/audio.py
index df54838c6..4753a4097 100644
--- a/tests/fixtures/audio.py
+++ b/tests/fixtures/audio.py
@@ -1,6 +1,7 @@
 """fixtures relating to audio files"""
 import pytest
 
+from .annot import LABELSET_NOTMAT, ANNOT_LIST_NOTMAT
 from .test_data import SOURCE_TEST_DATA_ROOT
 
 
@@ -34,45 +35,33 @@ def audio_dir_cbin():
 def audio_list_cbin():
     return AUDIO_LIST_CBIN
 
+LABELSET_NOTMAT_AS_SET = set(LABELSET_NOTMAT)
+
+
+AUDIO_LIST_CBIN_ALL_LABELS_IN_LABELSET = []
+AUDIO_LIST_CBIN_LABELS_NOT_IN_LABELSET = []
+for audio_path in AUDIO_LIST_CBIN:
+    audio_fname = audio_path.name
+    annot = [
+        annot for annot in ANNOT_LIST_NOTMAT if annot.notated_path.name == audio_fname
+    ]
+    assert len(annot) == 1
+    annot = annot[0]
+    if set(annot.seq.labels).issubset(LABELSET_NOTMAT_AS_SET):
+        AUDIO_LIST_CBIN_ALL_LABELS_IN_LABELSET.append(audio_path)
+    else:
+        AUDIO_LIST_CBIN_LABELS_NOT_IN_LABELSET.append(audio_path)
 
 @pytest.fixture
-def audio_list_cbin_all_labels_in_labelset(
-    audio_list_cbin, annot_list_notmat, labelset_notmat
-):
+def audio_list_cbin_all_labels_in_labelset():
     """list of .cbin audio files where all labels in associated annotation **are** in labelset"""
-    labelset_notmat = set(labelset_notmat)
-    audio_list_labels_in_labelset = []
-    for audio_path in audio_list_cbin:
-        audio_fname = audio_path.name
-        annot = [
-            annot for annot in annot_list_notmat if annot.notated_path.name == audio_fname
-        ]
-        assert len(annot) == 1
-        annot = annot[0]
-        if set(annot.seq.labels).issubset(labelset_notmat):
-            audio_list_labels_in_labelset.append(audio_path)
-
-    return audio_list_labels_in_labelset
+    return AUDIO_LIST_CBIN_ALL_LABELS_IN_LABELSET
 
 
 @pytest.fixture
-def audio_list_cbin_labels_not_in_labelset(
-    audio_list_cbin, annot_list_notmat, labelset_notmat
-):
+def audio_list_cbin_labels_not_in_labelset():
     """list of .cbin audio files where some labels in associated annotation are **not** in labelset"""
-    labelset_notmat = set(labelset_notmat)
-    audio_list_labels_in_labelset = []
-    for audio_path in audio_list_cbin:
-        audio_fname = audio_path.name
-        annot = [
-            annot for annot in annot_list_notmat if annot.notated_path.name == audio_fname
-        ]
-        assert len(annot) == 1
-        annot = annot[0]
-        if not set(annot.seq.labels).issubset(labelset_notmat):
-            audio_list_labels_in_labelset.append(audio_path)
-
-    return audio_list_labels_in_labelset
+    return AUDIO_LIST_CBIN_LABELS_NOT_IN_LABELSET
 
 
 AUDIO_DIR_WAV_BIRDSONGREC = SOURCE_TEST_DATA_ROOT.joinpath("audio_wav_annot_birdsongrec", "Bird0", "Wave")
@@ -129,3 +118,22 @@ def _audio_list_factory(audio_format, annot_format):
         return FORMAT_AUDIO_LIST_FIXTURE_MAP[key]
 
     return _audio_list_factory
+
+
+@pytest.fixture
+def specific_audio_list(
+    audio_list_cbin,
+    audio_list_cbin_all_labels_in_labelset,
+    audio_list_cbin_labels_not_in_labelset,
+):
+    def _specific_audio_list(spect_format, qualifier=None):
+        MAP = {
+            "cbin": {
+                None: audio_list_cbin,
+                "all_labels_in_labelset": audio_list_cbin_all_labels_in_labelset,
+                "labels_not_in_labelset": audio_list_cbin_labels_not_in_labelset,
+            },
+        }
+        return MAP[spect_format][qualifier]
+
+    return _specific_audio_list
diff --git a/tests/fixtures/config.py b/tests/fixtures/config.py
index cd210438b..ac4c74573 100644
--- a/tests/fixtures/config.py
+++ b/tests/fixtures/config.py
@@ -52,7 +52,7 @@ def list_of_schematized_configs(test_configs_root):
     all of the other keys.
     """
     with test_configs_root.joinpath("configs.json").open("r") as fp:
-        return json.load(fp)["configs"]
+        return json.load(fp)["config_metadata"]
 
 
 @pytest.fixture
@@ -142,7 +142,6 @@ def _specific_config(
             with any options changed as specified
         """
         original_config_path = None
-
         for schematized_config in list_of_schematized_configs:
             if all(
                 [
@@ -156,6 +155,7 @@ def _specific_config(
                 original_config_path = generated_test_configs_root.joinpath(
                     schematized_config["filename"]
                 )
+                break
 
         if original_config_path is None:
             raise ValueError(
diff --git a/tests/fixtures/csv.py b/tests/fixtures/csv.py
index 78d60f2be..e4ef71761 100644
--- a/tests/fixtures/csv.py
+++ b/tests/fixtures/csv.py
@@ -26,7 +26,11 @@ def _specific_csv_path(
             config_type, model, annot_format, audio_format, spect_format
         )
         dataset_path = Path(config_toml[config_type.upper()]["dataset_path"])
-        metadata = vak.datasets.metadata.Metadata.from_dataset_path(dataset_path)
+        # TODO: make this more general -- dataset registry?
+        if config_toml['PREP']['dataset_type'] == 'frame classification':
+            metadata = vak.datasets.frame_classification.Metadata.from_dataset_path(dataset_path)
+        elif config_toml['PREP']['dataset_type'] == 'parametric umap':
+            metadata = vak.datasets.parametric_umap.Metadata.from_dataset_path(dataset_path)
         dataset_csv_path = dataset_path / metadata.dataset_csv_filename
         return dataset_csv_path
 
diff --git a/tests/fixtures/dataframe.py b/tests/fixtures/dataframe.py
index 8df14b67f..e36fda21d 100644
--- a/tests/fixtures/dataframe.py
+++ b/tests/fixtures/dataframe.py
@@ -36,7 +36,7 @@ def train_cbin_notmat_df(specific_dataset_df):
     """
     return specific_dataset_df(
         config_type="train",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat"
     )
diff --git a/tests/fixtures/model.py b/tests/fixtures/model.py
index c528d5bb7..6899384ee 100644
--- a/tests/fixtures/model.py
+++ b/tests/fixtures/model.py
@@ -4,8 +4,7 @@
 # instead of computing something dynamically e.g. from ``vak.models``.
 # Should be used throughout fixtures when we need to get things "by model"
 MODELS = [
-    "tweetynet",
-    "teenytweetynet",
+    "TweetyNet",
 ]
 
 
@@ -15,6 +14,6 @@ def default_model():
     Should work regardless of where the test is run, i.e. both on
     CI platform and locally.
 
-    currently ``teenytweetynet``
+    currently ``TweetyNet``
     """
-    return "teenytweetynet"
+    return "TweetyNet"
diff --git a/tests/fixtures/spect.py b/tests/fixtures/spect.py
index 1ad72fa70..53bc10f27 100644
--- a/tests/fixtures/spect.py
+++ b/tests/fixtures/spect.py
@@ -3,6 +3,12 @@
 
 import vak.common.files.spect
 
+from .annot import (
+    ANNOT_LIST_NOTMAT,
+    ANNOT_LIST_YARDEN,
+    LABELSET_NOTMAT,
+    LABELSET_YARDEN,
+)
 from .test_data import GENERATED_TEST_DATA_ROOT, SOURCE_TEST_DATA_ROOT
 
 
@@ -18,8 +24,8 @@ def spect_dir_mat():
 
 SPECT_DIR_NPZ = sorted(
         GENERATED_TEST_DATA_ROOT.joinpath(
-            "prep", "train", "audio_cbin_annot_notmat", "teenytweetynet"
-        ).glob("*vak-frame-classification-dataset-generated*")
+            "prep", "train", "audio_cbin_annot_notmat", "TweetyNet"
+        ).glob("*vak-frame-classification-dataset-generated*/spectrograms_generated_*")
     )[0]
 
 
@@ -49,7 +55,7 @@ def spect_list_mat():
     return SPECT_LIST_MAT
 
 
-SPECT_LIST_NPZ = sorted(SPECT_DIR_NPZ.glob("*/*.spect.npz"))
+SPECT_LIST_NPZ = sorted(SPECT_DIR_NPZ.glob("*.spect.npz"))
 
 
 @pytest.fixture
@@ -57,89 +63,60 @@ def spect_list_npz():
     return SPECT_LIST_NPZ
 
 
+LABELSET_YARDEN_SET = set(LABELSET_YARDEN)
+SPECT_LIST_MAT_ALL_LABELS_IN_LABELSET = []
+SPECT_LIST_MAT_LABELS_NOT_IN_LABELSET = []
+for spect_path in SPECT_LIST_MAT:
+    audio_fname = vak.common.files.spect.find_audio_fname(spect_path)
+    annot = [
+        annot for annot in ANNOT_LIST_YARDEN if annot.notated_path.name == audio_fname
+    ]
+    assert len(annot) == 1
+    annot = annot[0]
+    if set(annot.seq.labels).issubset(LABELSET_YARDEN_SET):
+        SPECT_LIST_MAT_ALL_LABELS_IN_LABELSET.append(spect_path)
+    else:
+        SPECT_LIST_MAT_LABELS_NOT_IN_LABELSET.append(spect_path)
+
+
 @pytest.fixture
-def spect_list_mat_all_labels_in_labelset(
-    spect_list_mat, annot_list_yarden, labelset_yarden
-):
+def spect_list_mat_all_labels_in_labelset():
     """list of .mat spectrogram files where all labels in associated annotation **are** in labelset"""
-    labelset_yarden = set(labelset_yarden)
-    spect_list_labels_in_labelset = []
-    for spect_path in spect_list_mat:
-        audio_fname = vak.common.files.spect.find_audio_fname(spect_path)
-        annot = [
-            annot for annot in annot_list_yarden if annot.notated_path.name == audio_fname
-        ]
-        assert len(annot) == 1
-        annot = annot[0]
-        if set(annot.seq.labels).issubset(labelset_yarden):
-            spect_list_labels_in_labelset.append(spect_path)
-
-    return spect_list_labels_in_labelset
+    return SPECT_LIST_MAT_ALL_LABELS_IN_LABELSET
 
 
 @pytest.fixture
-def spect_list_npz_all_labels_in_labelset(
-    spect_list_npz, annot_list_notmat, labelset_notmat
-):
-    """list of .npz spectrogram files where all labels in associated annotation **are** in labelset"""
-    labelset_notmat = set(labelset_notmat)
-    spect_list_labels_in_labelset = []
-    for spect_path in spect_list_npz:
-        audio_fname = vak.common.files.spect.find_audio_fname(spect_path)
-        annot = [
-            annot for annot in annot_list_notmat if annot.notated_path.name == audio_fname
-        ]
-        assert len(annot) == 1
-        annot = annot[0]
-        if set(annot.seq.labels).issubset(labelset_notmat):
-            spect_list_labels_in_labelset.append(spect_path)
+def spect_list_mat_labels_not_in_labelset():
+    """list of .mat spectrogram files where some labels in associated annotation are **not** in labelset"""
+    return SPECT_LIST_MAT_LABELS_NOT_IN_LABELSET
+
 
-    return spect_list_labels_in_labelset
+LABELSET_NOTMAT_SET = set(LABELSET_NOTMAT)
+SPECT_LIST_NPZ_ALL_LABELS_IN_LABELSET = []
+SPECT_LIST_NPZ_LABELS_NOT_IN_LABELSET = []
+for spect_path in SPECT_LIST_NPZ:
+    audio_fname = vak.common.files.spect.find_audio_fname(spect_path)
+    annot = [
+        annot for annot in ANNOT_LIST_NOTMAT if annot.notated_path.name == audio_fname
+    ]
+    assert len(annot) == 1
+    annot = annot[0]
+    if set(annot.seq.labels).issubset(LABELSET_NOTMAT_SET):
+        SPECT_LIST_NPZ_ALL_LABELS_IN_LABELSET.append(spect_path)
+    else:
+        SPECT_LIST_NPZ_LABELS_NOT_IN_LABELSET.append(spect_path)
 
 
 @pytest.fixture
-def spect_list_mat_labels_not_in_labelset(
-    spect_list_mat, annot_list_yarden, labelset_yarden
-):
-    """list of .mat spectrogram files where some labels in associated annotation are **not** in labelset"""
-    labelset_yarden = set(labelset_yarden)
-    spect_list_labels_not_in_labelset = []
-    for spect_path in spect_list_mat:
-        audio_fname = vak.common.files.spect.find_audio_fname(spect_path)
-        annot = [
-            annot for annot in annot_list_yarden if annot.notated_path.name == audio_fname
-        ]
-        assert len(annot) == 1
-        annot = annot[0]
-        # notice if labels **not** a subset of labelset
-        if not set(annot.seq.labels).issubset(labelset_yarden):
-            spect_list_labels_not_in_labelset.append(spect_path)
-
-    err = "not finding .mat spectrogram files where labels in associated annotations are not in dataset"
-    assert len(spect_list_labels_not_in_labelset) > 0, err
-    return spect_list_labels_not_in_labelset
+def spect_list_npz_all_labels_in_labelset():
+    """list of .npz spectrogram files where all labels in associated annotation **are** in labelset"""
+    return SPECT_LIST_NPZ_ALL_LABELS_IN_LABELSET
 
 
 @pytest.fixture
-def spect_list_npz_labels_not_in_labelset(
-    spect_list_npz, annot_list_notmat, labelset_notmat
-):
+def spect_list_npz_labels_not_in_labelset():
     """list of .npz spectrogram files where some labels in associated annotation are  **not** in labelset"""
-    labelset_notmat = set(labelset_notmat)
-    spect_list_labels_not_in_labelset = []
-    for spect_path in spect_list_npz:
-        audio_fname = vak.common.files.spect.find_audio_fname(spect_path)
-        annot = [
-            annot for annot in annot_list_notmat if annot.notated_path.name == audio_fname
-        ]
-        assert len(annot) == 1
-        annot = annot[0]
-        if set(annot.seq.labels).issubset(labelset_notmat):
-            spect_list_labels_not_in_labelset.append(spect_path)
-
-    err = "not finding .npz spectrogram files where labels in associated annotations are not in dataset"
-    assert len(spect_list_labels_not_in_labelset) > 0, err
-    return spect_list_labels_not_in_labelset
+    return SPECT_LIST_NPZ_LABELS_NOT_IN_LABELSET
 
 
 @pytest.fixture
diff --git a/tests/scripts/fix_prep_csv_paths.py b/tests/scripts/fix_prep_csv_paths.py
deleted file mode 100644
index 389a9272d..000000000
--- a/tests/scripts/fix_prep_csv_paths.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""This script gets run by continuous integration 
-(in ./github/workflows/ci-{os}.yml files)
-so that all the paths are correct on the virtual machines
-"""
-from pathlib import Path
-
-import pandas as pd
-
-HERE = Path(__file__).parent
-PROJ_ROOT = HERE / ".." / ".."
-PROJ_ROOT_ABS = PROJ_ROOT.resolve()  # <- used to fix paths!!!
-GENERATED_TEST_DATA = PROJ_ROOT / "tests" / "data_for_tests" / "generated"
-
-
-def main():
-    """loads csv files created by `prep` and changes the parent of paths so it's
-    the local file system, instead of what's on my laptop.
-    To get tests to run on CI without FileNotFound errors"""
-    prep_csvs = sorted(GENERATED_TEST_DATA.glob("**/*prep*csv"))
-    for prep_csv in prep_csvs:
-        vak_df = pd.read_csv(prep_csv)
-        for path_column_name in ("spect_path", "audio_path", "annot_path"):
-            paths = vak_df[path_column_name].values.tolist()
-            paths = [str(path) for path in paths]
-            new_column = []
-            for path_str in paths:
-                if path_str == "nan":
-                    new_column.append(path_str)
-                    continue
-                tests_root_ind = path_str.find('tests/data_for_tests')
-                if (tests_root_ind == -1
-                    and path_column_name == 'audio_path'
-                    and 'spect_mat_annot_yarden' in str(prep_csv)):
-                    # prep somehow gives root to audio -- from annotation?; we don't need these to exist though
-                    new_column.append(path_str)
-                    continue
-                new_path_str = path_str[tests_root_ind:]  # get rid of parent directories
-                new_path = PROJ_ROOT_ABS / new_path_str
-                if not new_path.exists():
-                    raise FileNotFoundError(
-                        f"New path does not exist:\n{new_path}"
-                    )
-                new_column.append(str(new_path))
-            vak_df[path_column_name] = new_column
-        vak_df.to_csv(prep_csv)
-
-
-main()
diff --git a/tests/scripts/vaktestdata/configs.py b/tests/scripts/vaktestdata/configs.py
index f62d0a834..cde39be49 100644
--- a/tests/scripts/vaktestdata/configs.py
+++ b/tests/scripts/vaktestdata/configs.py
@@ -6,6 +6,7 @@
 # TODO: use tomli
 import toml
 
+import vak.cli.prep
 from . import constants
 
 
@@ -70,7 +71,14 @@ def add_dataset_path_from_prepped_configs():
 
         with config_dataset_path.open("r") as fp:
             dataset_config_toml = toml.load(fp)
-        dataset_path = dataset_config_toml[section]['dataset_path']
+        purpose = vak.cli.prep.purpose_from_toml(dataset_config_toml)
+        # next line, we can't use `section` here because we could get a KeyError,
+        # e.g., when the config we are rewriting is an EVAL config, but
+        # the config we are getting the dataset from is a TRAIN config.
+        # so instead we use `purpose_from_toml` to get the `purpose`
+        # of the config we are getting the dataset from.
+        dataset_config_section = purpose.upper()  # need to be 'TRAIN', not 'train'
+        dataset_path = dataset_config_toml[dataset_config_section]['dataset_path']
         with config_to_change_path.open("r") as fp:
             config_to_change_toml = toml.load(fp)
         config_to_change_toml[section]['dataset_path'] = dataset_path
@@ -103,7 +111,7 @@ def fix_options_in_configs(config_metadata_list, command, single_train_result=Tr
             if single_train_result:
                 raise ValueError(
                     f"Did not find just a single results directory in root_results_dir from train_config:\n"
-                    f"{config_toml}"
+                    f"{config_to_use_result_from}"
                     f"root_results_dir was: {root_results_dir}"
                     f'Matches for "results_*" were: {results_dir}'
                 )
@@ -114,7 +122,7 @@ def fix_options_in_configs(config_metadata_list, command, single_train_result=Tr
         else:
             raise ValueError(
                 f"Did not find a results directory in root_results_dir from train_config:\n"
-                f"{config_toml}"
+                f"{config_to_use_result_from}"
                 f"root_results_dir was:\n{root_results_dir}"
                 f'Matches for "results_*" were:\n{results_dir}'
             )
@@ -122,11 +130,23 @@ def fix_options_in_configs(config_metadata_list, command, single_train_result=Tr
         # these are the only options whose values we need to change
         # and they are the same for both predict and eval
         checkpoint_path = sorted(results_dir.glob("**/checkpoints/checkpoint.pt"))[0]
-        if config_toml['TRAIN']['normalize_spectrograms']:
+        if 'normalize_spectrograms' in config_toml['TRAIN'] and config_toml['TRAIN']['normalize_spectrograms']:
             spect_scaler_path = sorted(results_dir.glob("StandardizeSpect"))[0]
         else:
             spect_scaler_path = None
-        labelmap_path = sorted(results_dir.glob("labelmap.json"))[0]
+
+        labelmap_path = sorted(results_dir.glob("labelmap.json"))
+        if len(labelmap_path) == 1:
+            labelmap_path = labelmap_path[0]
+        elif len(labelmap_path) == 0:
+            labelmap_path = None
+        else:
+            raise ValueError(
+                "Invalid number of labelmap.json files from results_dir for train config:\n"
+                f"{config_to_use_result_from}.\n"
+                f"Results dir: {results_dir}"
+                f"labelmap_path(s) found by globbing: {labelmap_path}"
+            )
 
         # now add these values to corresponding options in predict / eval config
         with config_to_fix.open("r") as fp:
@@ -145,7 +165,8 @@ def fix_options_in_configs(config_metadata_list, command, single_train_result=Tr
                 # remove any existing 'spect_scaler_path' option
                 del config_toml[section]["spect_scaler_path"]
         if command != 'train_continue':  # train always gets labelmap from dataset dir, not from a config option
-            config_toml[section]["labelmap_path"] = str(labelmap_path)
+            if labelmap_path is not None:
+                config_toml[section]["labelmap_path"] = str(labelmap_path)
 
         with config_to_fix.open("w") as fp:
             toml.dump(config_toml, fp)
diff --git a/tests/test_cli/test_eval.py b/tests/test_cli/test_eval.py
index 1d8d76bd0..3e0c5ef99 100644
--- a/tests/test_cli/test_eval.py
+++ b/tests/test_cli/test_eval.py
@@ -12,13 +12,13 @@
 
 
 @pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format",
+    "model_name, audio_format, spect_format, annot_format",
     [
-        ("cbin", None, "notmat"),
+        ("TweetyNet", "cbin", None, "notmat"),
     ],
 )
 def test_eval(
-    audio_format, spect_format, annot_format, specific_config, tmp_path, model, device
+    model_name, audio_format, spect_format, annot_format, specific_config, tmp_path, device
 ):
     output_dir = tmp_path.joinpath(
         f"test_eval_{audio_format}_{spect_format}_{annot_format}"
@@ -32,7 +32,7 @@ def test_eval(
 
     toml_path = specific_config(
         config_type="eval",
-        model=model,
+        model=model_name,
         audio_format=audio_format,
         annot_format=annot_format,
         spect_format=spect_format,
@@ -60,7 +60,7 @@ def test_eval_dataset_path_none_raises(
 
     toml_path = specific_config(
         config_type="eval",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         spect_format=None,
diff --git a/tests/test_cli/test_learncurve.py b/tests/test_cli/test_learncurve.py
index e39c8cbda..abf3adaaa 100644
--- a/tests/test_cli/test_learncurve.py
+++ b/tests/test_cli/test_learncurve.py
@@ -10,7 +10,7 @@
 from . import cli_asserts
 
 
-def test_learncurve(specific_config, tmp_path, model, device):
+def test_learncurve(specific_config, tmp_path, device):
     root_results_dir = tmp_path.joinpath("test_learncurve_root_results_dir")
     root_results_dir.mkdir()
 
@@ -25,7 +25,7 @@ def test_learncurve(specific_config, tmp_path, model, device):
 
     toml_path = specific_config(
         config_type="learncurve",
-        model=model,
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         options_to_change=options_to_change,
@@ -68,7 +68,7 @@ def test_learning_curve_dataset_path_none_raises(
 
     toml_path = specific_config(
         config_type="learncurve",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         spect_format=None,
diff --git a/tests/test_cli/test_predict.py b/tests/test_cli/test_predict.py
index 19f206cd2..ff2764364 100644
--- a/tests/test_cli/test_predict.py
+++ b/tests/test_cli/test_predict.py
@@ -11,14 +11,14 @@
 
 
 @pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format",
+    "model_name, audio_format, spect_format, annot_format",
     [
-        ("cbin", None, "notmat"),
-        ("wav", None, "birdsong-recognition-dataset"),
+        ("TweetyNet", "cbin", None, "notmat"),
+        ("TweetyNet", "wav", None, "birdsong-recognition-dataset"),
     ],
 )
 def test_predict(
-    audio_format, spect_format, annot_format, specific_config, tmp_path, model, device
+    model_name, audio_format, spect_format, annot_format, specific_config, tmp_path, device
 ):
     output_dir = tmp_path.joinpath(
         f"test_predict_{audio_format}_{spect_format}_{annot_format}"
@@ -32,7 +32,7 @@ def test_predict(
 
     toml_path = specific_config(
         config_type="predict",
-        model=model,
+        model=model_name,
         audio_format=audio_format,
         annot_format=annot_format,
         options_to_change=options_to_change,
@@ -58,7 +58,7 @@ def test_predict_dataset_path_none_raises(
 
     toml_path = specific_config(
         config_type="predict",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         spect_format=None,
diff --git a/tests/test_cli/test_train.py b/tests/test_cli/test_train.py
index 45290a23e..cb02736aa 100644
--- a/tests/test_cli/test_train.py
+++ b/tests/test_cli/test_train.py
@@ -12,15 +12,15 @@
 
 
 @pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format",
+    "model_name, audio_format, spect_format, annot_format",
     [
-        ("cbin", None, "notmat"),
-        ("wav", None, "birdsong-recognition-dataset"),
-        (None, "mat", "yarden"),
+        ("TweetyNet", "cbin", None, "notmat"),
+        ("TweetyNet", "wav", None, "birdsong-recognition-dataset"),
+        ("TweetyNet", None, "mat", "yarden"),
     ],
 )
 def test_train(
-    audio_format, spect_format, annot_format, specific_config, tmp_path, model, device
+    model_name, audio_format, spect_format, annot_format, specific_config, tmp_path, device
 ):
     root_results_dir = tmp_path.joinpath("test_train_root_results_dir")
     root_results_dir.mkdir()
@@ -36,7 +36,7 @@ def test_train(
 
     toml_path = specific_config(
         config_type="train",
-        model=model,
+        model=model_name,
         audio_format=audio_format,
         annot_format=annot_format,
         spect_format=spect_format,
@@ -71,7 +71,7 @@ def test_train_dataset_path_none_raises(
 
     toml_path = specific_config(
         config_type="train",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         spect_format=None,
diff --git a/tests/test_common/test_labels.py b/tests/test_common/test_labels.py
index 0dfa53367..a0cdae139 100644
--- a/tests/test_common/test_labels.py
+++ b/tests/test_common/test_labels.py
@@ -61,8 +61,8 @@ def test_to_set(labels_list, expected_labelset):
 @pytest.mark.parametrize(
     'config_type, model_name, audio_format, spect_format, annot_format',
     [
-        ('train', 'teenytweetynet', 'cbin', None, 'notmat'),
-        ('train', 'teenytweetynet', None, 'mat', 'yarden'),
+        ('train', 'TweetyNet', 'cbin', None, 'notmat'),
+        ('train', 'TweetyNet', None, 'mat', 'yarden'),
     ]
 )
 def test_from_df(config_type, model_name, audio_format, spect_format, annot_format,
diff --git a/tests/test_config/test_parse.py b/tests/test_config/test_parse.py
index 480c11907..70549b34f 100644
--- a/tests/test_config/test_parse.py
+++ b/tests/test_config/test_parse.py
@@ -22,11 +22,10 @@
 def test_parse_config_section_returns_attrs_class(
     section_name,
     configs_toml_path_pairs_by_model_factory,
-    model,
 ):
     """test that ``vak.config.parse.parse_config_section``
     returns an instance of ``vak.config.learncurve.LearncurveConfig``"""
-    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory(model, section_name)
+    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory("TweetyNet", section_name)
     for config_toml, toml_path in config_toml_path_pairs:
         config_section_obj = vak.config.parse.parse_config_section(
             config_toml=config_toml,
@@ -41,7 +40,6 @@ def test_parse_config_section_returns_attrs_class(
 @pytest.mark.parametrize(
     "section_name",
     [
-        "DATALOADER",
         "EVAL",
         "LEARNCURVE",
         "PREDICT",
@@ -53,13 +51,12 @@ def test_parse_config_section_returns_attrs_class(
 def test_parse_config_section_missing_options_raises(
     section_name,
     configs_toml_path_pairs_by_model_factory,
-    model,
 ):
     """test that configs without the required options in a section raise KeyError"""
     if vak.config.parse.REQUIRED_OPTIONS[section_name] is None:
         pytest.skip(f"no required options to test for section: {section_name}")
 
-    configs_toml_path_pairs = configs_toml_path_pairs_by_model_factory(model, section_name)
+    configs_toml_path_pairs = configs_toml_path_pairs_by_model_factory("TweetyNet", section_name)
 
     for config_toml, toml_path in configs_toml_path_pairs:
         for option in vak.config.parse.REQUIRED_OPTIONS[section_name]:
@@ -77,13 +74,12 @@ def test_parse_config_section_missing_options_raises(
 def test_parse_config_section_model_not_installed_raises(
         section_name,
         configs_toml_path_pairs_by_model_factory,
-        model,
 ):
     """test that a ValueError is raised when the ``models`` option
     in the section specifies names of models that are not installed"""
     # we only need one toml, path pair
     # so we just call next on the ``zipped`` iterator that our fixture gives us
-    configs_toml_path_pairs = configs_toml_path_pairs_by_model_factory(model)
+    configs_toml_path_pairs = configs_toml_path_pairs_by_model_factory("TweetyNet")
 
     for config_toml, toml_path in configs_toml_path_pairs:
         if section_name.lower() in toml_path.name:
@@ -163,8 +159,8 @@ def test_from_toml_path_raises_when_config_doesnt_exist(config_that_doesnt_exist
         vak.config.parse.from_toml_path(config_that_doesnt_exist)
 
 
-def test_from_toml(configs_toml_path_pairs_by_model_factory, model):
-    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory(model)
+def test_from_toml(configs_toml_path_pairs_by_model_factory):
+    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory("TweetyNet")
     for config_toml, toml_path in config_toml_path_pairs:
         config_obj = vak.config.parse.from_toml(config_toml, toml_path)
         assert isinstance(config_obj, vak.config.parse.Config)
@@ -172,13 +168,12 @@ def test_from_toml(configs_toml_path_pairs_by_model_factory, model):
 
 def test_from_toml_parse_prep_with_sections_not_none(
     configs_toml_path_pairs_by_model_factory,
-    model,
 ):
     """test that we get only the sections we want when we pass in a sections list to
     ``from_toml``. Specifically test ``PREP`` since that's what this will be used for."""
-    # only use configs from 'default_model') (teenytweetynet)
+    # only use configs from 'default_model') (TeenyTweetyNet)
     # so we are sure paths exist, to avoid NotADirectoryErrors that give spurious test failures
-    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory(model)
+    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory("TweetyNet")
     for config_toml, toml_path in config_toml_path_pairs:
         config_obj = vak.config.parse.from_toml(
             config_toml, toml_path, sections=["PREP", "SPECT_PARAMS"]
diff --git a/tests/test_config/test_prep.py b/tests/test_config/test_prep.py
index d4f9a2cd2..3912f11f0 100644
--- a/tests/test_config/test_prep.py
+++ b/tests/test_config/test_prep.py
@@ -4,9 +4,8 @@
 
 def test_parse_prep_config_returns_PrepConfig_instance(
         configs_toml_path_pairs_by_model_factory,
-        model,
 ):
-    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory(model)
+    config_toml_path_pairs = configs_toml_path_pairs_by_model_factory("TweetyNet")
     for config_toml, toml_path in config_toml_path_pairs:
         prep_section = config_toml["PREP"]
         config = vak.config.prep.PrepConfig(**prep_section)
diff --git a/tests/test_datasets/test_window_dataset/__init__.py b/tests/test_datasets/test_frame_classification/__init__.py
similarity index 100%
rename from tests/test_datasets/test_window_dataset/__init__.py
rename to tests/test_datasets/test_frame_classification/__init__.py
diff --git a/tests/test_datasets/test_frame_classification/test_frames_dataset.py b/tests/test_datasets/test_frame_classification/test_frames_dataset.py
new file mode 100644
index 000000000..953e9c3c1
--- /dev/null
+++ b/tests/test_datasets/test_frame_classification/test_frames_dataset.py
@@ -0,0 +1,34 @@
+import pytest
+
+import vak
+import vak.datasets.frame_classification
+
+
+class TestWindowDataset:
+    @pytest.mark.parametrize(
+        'config_type, model_name, audio_format, spect_format, annot_format, split',
+        [
+            ('eval', 'TweetyNet', 'cbin', None, 'notmat', 'test'),
+        ]
+    )
+    def test_from_dataset_path(self, config_type, model_name, audio_format, spect_format, annot_format,
+                               split, specific_config):
+        """Test we can get a FramesDataset instance from the classmethod ``from_dataset_path``"""
+        toml_path = specific_config(config_type,
+                                    model_name,
+                                    audio_format=audio_format,
+                                    spect_format=spect_format,
+                                    annot_format=annot_format)
+        cfg = vak.config.parse.from_toml_path(toml_path)
+        cfg_command = getattr(cfg, config_type)
+
+        item_transform = vak.transforms.defaults.get_default_transform(
+            model_name, config_type, cfg.eval.transform_params
+        )
+
+        dataset = vak.datasets.frame_classification.FramesDataset.from_dataset_path(
+            dataset_path=cfg_command.dataset_path,
+            split=split,
+            item_transform=item_transform,
+        )
+        assert isinstance(dataset, vak.datasets.frame_classification.FramesDataset)
diff --git a/tests/test_datasets/test_frame_classification/test_metadata.py b/tests/test_datasets/test_frame_classification/test_metadata.py
new file mode 100644
index 000000000..d5d2f5145
--- /dev/null
+++ b/tests/test_datasets/test_frame_classification/test_metadata.py
@@ -0,0 +1,79 @@
+import json
+import pathlib
+
+import pytest
+
+import vak.datasets.frame_classification
+
+
+ARGNAMES = 'dataset_csv_filename, input_type, frame_dur'
+ARGVALS = [
+    (pathlib.Path('bird1_prep_230319_115852.csv'), 'spect', 0.002),
+    (pathlib.Path('bird1_prep_230319_115852.csv'), 'spect', 0.001),
+    (pathlib.Path('bird1_prep_230319_115852.csv'), 'spect', 0.0027),
+    (pathlib.Path('bird1_prep_230319_115852.csv'), 'audio', 3.125e-05),
+    (pathlib.Path('bird1_prep_230319_115852.csv'), 'audio', 2.2727272727272726e-05),
+    (pathlib.Path('bird1_prep_230319_115852.csv'), 'audio', 6.25e-05),
+]
+
+
+class TestMetadata:
+    @pytest.mark.parametrize(
+        ARGNAMES,
+        ARGVALS
+    )
+    def test_metadata_init(self, dataset_csv_filename, input_type, frame_dur):
+        metadata = vak.datasets.frame_classification.Metadata(dataset_csv_filename, input_type, frame_dur)
+        assert isinstance(metadata, vak.datasets.frame_classification.Metadata)
+        for attr_name, attr_val in zip(
+            ('dataset_csv_filename', 'input_type', 'frame_dur'),
+            (dataset_csv_filename, input_type, frame_dur),
+        ):
+            assert hasattr(metadata, attr_name)
+            if isinstance(attr_val, pathlib.Path):
+                assert getattr(metadata, attr_name) == str(attr_val)
+            else:
+                assert getattr(metadata, attr_name) == attr_val
+
+    @pytest.mark.parametrize(
+        ARGNAMES,
+        ARGVALS
+    )
+    def test_metadata_from_path(self, dataset_csv_filename, input_type, frame_dur, tmp_path):
+        # we make metadata "by hand"
+        metadata_dict = {
+            'dataset_csv_filename': str(dataset_csv_filename),
+            'input_type': input_type,
+            'frame_dur': frame_dur,
+        }
+        metadata_json_path = tmp_path / vak.datasets.frame_classification.Metadata.METADATA_JSON_FILENAME
+        with metadata_json_path.open('w') as fp:
+            json.dump(metadata_dict, fp, indent=4)
+
+        metadata = vak.datasets.frame_classification.Metadata.from_path(metadata_json_path)
+        assert isinstance(metadata, vak.datasets.frame_classification.Metadata)
+        for attr_name, attr_val in zip(
+            ('dataset_csv_filename', 'input_type', 'frame_dur'),
+            (dataset_csv_filename, input_type, frame_dur),
+        ):
+            assert hasattr(metadata, attr_name)
+            if isinstance(attr_val, pathlib.Path):
+                assert getattr(metadata, attr_name) == str(attr_val)
+            else:
+                assert getattr(metadata, attr_name) == attr_val
+
+    @pytest.mark.parametrize(
+        ARGNAMES,
+        ARGVALS
+    )
+    def test_metadata_to_json(self, dataset_csv_filename, input_type, frame_dur, tmp_path):
+        metadata_to_json = vak.datasets.frame_classification.Metadata(dataset_csv_filename, input_type, frame_dur)
+        mock_dataset_path = tmp_path / 'mock_dataset'
+        mock_dataset_path.mkdir()
+
+        metadata_to_json.to_json(dataset_path=mock_dataset_path)
+        expected_json_path = mock_dataset_path / vak.datasets.frame_classification.Metadata.METADATA_JSON_FILENAME
+        assert expected_json_path.exists()
+
+        metadata_from_json = vak.datasets.frame_classification.Metadata.from_path(expected_json_path)
+        assert metadata_from_json == metadata_to_json
diff --git a/tests/test_datasets/test_frame_classification/test_window_dataset.py b/tests/test_datasets/test_frame_classification/test_window_dataset.py
new file mode 100644
index 000000000..5bc5f6586
--- /dev/null
+++ b/tests/test_datasets/test_frame_classification/test_window_dataset.py
@@ -0,0 +1,37 @@
+import pytest
+
+import vak
+import vak.datasets.frame_classification
+
+
+class TestWindowDataset:
+    @pytest.mark.parametrize(
+        'config_type, model_name, audio_format, spect_format, annot_format, split, transform_kwargs',
+        [
+            ('train', 'TweetyNet', 'cbin', None, 'notmat', 'train', {}),
+            ('train', 'TweetyNet', None, 'mat', 'yarden', 'train', {}),
+        ]
+    )
+    def test_from_dataset_path(self, config_type, model_name, audio_format, spect_format, annot_format,
+                               split, transform_kwargs, specific_config):
+        """Test we can get a WindowDataset instance from the classmethod ``from_dataset_path``"""
+        toml_path = specific_config(config_type,
+                                    model_name,
+                                    audio_format=audio_format,
+                                    spect_format=spect_format,
+                                    annot_format=annot_format)
+        cfg = vak.config.parse.from_toml_path(toml_path)
+        cfg_command = getattr(cfg, config_type)
+
+        transform, target_transform = vak.transforms.defaults.get_default_transform(
+            model_name, config_type, transform_kwargs
+        )
+
+        dataset = vak.datasets.frame_classification.WindowDataset.from_dataset_path(
+            dataset_path=cfg_command.dataset_path,
+            split=split,
+            window_size=cfg_command.train_dataset_params['window_size'],
+            transform=transform,
+            target_transform=target_transform,
+        )
+        assert isinstance(dataset, vak.datasets.frame_classification.WindowDataset)
diff --git a/tests/test_datasets/test_metadata.py b/tests/test_datasets/test_metadata.py
deleted file mode 100644
index 349eab2dd..000000000
--- a/tests/test_datasets/test_metadata.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import json
-import pathlib
-
-import pytest
-
-import vak.datasets.metadata
-
-
-ARGNAMES = 'dataset_csv_filename, timebin_dur'
-ARGVALS = [
-    (pathlib.Path('bird1_prep_230319_115852.csv'), 0.002),
-    (pathlib.Path('bird1_prep_230319_115852.csv'), 0.001),
-    (pathlib.Path('bird1_prep_230319_115852.csv'), 0.0027),
-]
-
-
-class TestMetadata:
-    @pytest.mark.parametrize(
-        ARGNAMES,
-        ARGVALS
-    )
-    def test_metadata_init(self, dataset_csv_filename, timebin_dur):
-        metadata = vak.datasets.metadata.Metadata(dataset_csv_filename, timebin_dur)
-        assert isinstance(metadata, vak.datasets.metadata.Metadata)
-        for attr_name, attr_val in zip(
-            ('dataset_csv_filename', 'timebin_dur'),
-            (dataset_csv_filename, timebin_dur),
-        ):
-            assert hasattr(metadata, attr_name)
-            if isinstance(attr_val, pathlib.Path):
-                assert getattr(metadata, attr_name) == str(attr_val)
-            else:
-                assert getattr(metadata, attr_name) == attr_val
-
-    @pytest.mark.parametrize(
-        ARGNAMES,
-        ARGVALS
-    )
-    def test_metadata_from_path(self, dataset_csv_filename, timebin_dur, tmp_path):
-        # we make metadata "by hand"
-        metadata_dict = {
-            'dataset_csv_filename': str(dataset_csv_filename),
-            'timebin_dur': timebin_dur,
-        }
-        metadata_json_path = tmp_path / vak.datasets.metadata.Metadata.METADATA_JSON_FILENAME
-        with metadata_json_path.open('w') as fp:
-            json.dump(metadata_dict, fp, indent=4)
-
-    @pytest.mark.parametrize(
-        ARGNAMES,
-        ARGVALS
-    )
-    def test_metadata_to_json(self, dataset_csv_filename, timebin_dur, tmp_path):
-        metadata_to_json = vak.datasets.metadata.Metadata(dataset_csv_filename, timebin_dur)
-        mock_dataset_path = tmp_path / 'mock_dataset'
-        mock_dataset_path.mkdir()
-
-        metadata_to_json.to_json(dataset_path=mock_dataset_path)
-        expected_json_path = mock_dataset_path / vak.datasets.metadata.Metadata.METADATA_JSON_FILENAME
-        assert expected_json_path.exists()
-
-        metadata_from_json = vak.datasets.metadata.Metadata.from_path(expected_json_path)
-        assert metadata_from_json == metadata_to_json
diff --git a/tests/test_nets/test_das/__init__.py b/tests/test_datasets/test_parametric_umap/__init__.py
similarity index 100%
rename from tests/test_nets/test_das/__init__.py
rename to tests/test_datasets/test_parametric_umap/__init__.py
diff --git a/tests/test_datasets/test_parametric_umap/test_parametric_umap.py b/tests/test_datasets/test_parametric_umap/test_parametric_umap.py
new file mode 100644
index 000000000..38a2782da
--- /dev/null
+++ b/tests/test_datasets/test_parametric_umap/test_parametric_umap.py
@@ -0,0 +1,34 @@
+import pytest
+
+import vak
+import vak.datasets.parametric_umap
+
+
+class TestParametricUMAPDataset:
+    @pytest.mark.parametrize(
+        'config_type, model_name, audio_format, spect_format, annot_format, split, transform_kwargs',
+        [
+            ('train', 'ConvEncoderUMAP', 'cbin', None, 'notmat', 'train', {}),
+        ]
+    )
+    def test_from_dataset_path(self, config_type, model_name, audio_format, spect_format, annot_format,
+                               split, transform_kwargs, specific_config):
+        """Test we can get a WindowDataset instance from the classmethod ``from_dataset_path``"""
+        toml_path = specific_config(config_type,
+                                    model_name,
+                                    audio_format=audio_format,
+                                    spect_format=spect_format,
+                                    annot_format=annot_format)
+        cfg = vak.config.parse.from_toml_path(toml_path)
+        cfg_command = getattr(cfg, config_type)
+
+        transform = vak.transforms.defaults.get_default_transform(
+            model_name, config_type, transform_kwargs
+        )
+
+        dataset = vak.datasets.parametric_umap.ParametricUMAPDataset.from_dataset_path(
+            dataset_path=cfg_command.dataset_path,
+            split=split,
+            transform=transform,
+        )
+        assert isinstance(dataset, vak.datasets.parametric_umap.ParametricUMAPDataset)
diff --git a/tests/test_datasets/test_seq/test_validators.py b/tests/test_datasets/test_seq/test_validators.py
deleted file mode 100644
index a5f94d82b..000000000
--- a/tests/test_datasets/test_seq/test_validators.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import pytest
-
-import vak
-
-
-@pytest.mark.parametrize(
-    'config_type, audio_format, spect_format, annot_format, expected_result',
-    [
-        ("train", "cbin", None, "notmat", True),
-        ("train", "wav", None, "birdsong-recognition-dataset", True),
-        ("train", None, "mat", "yarden", True),
-    ]
-)
-def test_has_unlabeled(config_type,
-                       audio_format,
-                       spect_format,
-                       annot_format,
-                       expected_result,
-                       model,
-                       specific_config_toml,
-                       specific_dataset_csv_path):
-    csv_path = specific_dataset_csv_path(config_type,
-                                      model,
-                                      annot_format,
-                                      audio_format,
-                                      spect_format)
-
-    has_unlabeled = vak.datasets.seq.validators.has_unlabeled(csv_path)
-    assert has_unlabeled == expected_result
diff --git a/tests/test_datasets/test_window_dataset/conftest.py b/tests/test_datasets/test_window_dataset/conftest.py
deleted file mode 100644
index 7d6a98bd2..000000000
--- a/tests/test_datasets/test_window_dataset/conftest.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import json
-
-import numpy as np
-import pandas as pd
-import pytest
-
-import vak
-import vak.datasets
-
-from ...fixtures.test_data import GENERATED_TEST_DATA_ROOT
-from ...fixtures.config import GENERATED_TEST_CONFIGS_ROOT
-
-
-# get the corresponding .toml config file that generated the dataset
-A_LEARNCURVE_TOML_PATH = GENERATED_TEST_CONFIGS_ROOT / 'teenytweetynet_learncurve_audio_cbin_annot_notmat.toml'
-
-
-def window_dataset_from_csv_kwargs_list():
-    """Returns a list of dicts, each dict kwargs for ``WindowDataset.from_csv``,
-    that we use to parametrize a fixture below, ``window_dataset_from_csv_kwargs``.
-
-    We do it this way to have one test case for each unique set of the vectors
-    that represent windows in the dataset.
-    There will be a unique set for each training replicate in a learncurve run.
-    """
-    window_dataset_from_csv_kwargs_list = []
-
-    cfg = vak.config.parse.from_toml_path(A_LEARNCURVE_TOML_PATH)
-    dataset_path = cfg.learncurve.dataset_path
-    metadata = vak.datasets.metadata.Metadata.from_dataset_path(dataset_path)
-    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
-    dataset_df = pd.read_csv(dataset_csv_path)
-
-    dataset_learncurve_dir = dataset_path / 'learncurve'
-    splits_path = dataset_learncurve_dir / 'learncurve-splits-metadata.csv'
-    splits_df = pd.read_csv(splits_path)
-
-    # stuff we need just to be able to instantiate window dataset
-    with (dataset_path / 'labelmap.json').open('r') as fp:
-        labelmap = json.load(fp)
-
-    for splits_df_row in splits_df.itertuples():
-        metadata = vak.datasets.metadata.Metadata.from_dataset_path(cfg.learncurve.dataset_path)
-        dataset_csv_path = cfg.learncurve.dataset_path / metadata.dataset_csv_filename
-
-        window_dataset_kwargs = dict(
-            dataset_csv_path=dataset_csv_path,
-            labelmap=labelmap,
-            window_size=cfg.dataloader.window_size,
-        )
-        for window_dataset_kwarg in [
-            "source_ids",
-            "source_inds",
-            "window_inds",
-        ]:
-            vec_filename = getattr(splits_df_row, f'{window_dataset_kwarg}_npy_filename')
-            window_dataset_kwargs[window_dataset_kwarg] = np.load(
-                dataset_learncurve_dir / vec_filename
-                )
-
-        window_dataset_from_csv_kwargs_list.append(
-            window_dataset_kwargs
-        )
-
-    return window_dataset_from_csv_kwargs_list
-
-
-@pytest.fixture(params=window_dataset_from_csv_kwargs_list())
-def window_dataset_from_csv_kwargs(request):
-    return request.param
diff --git a/tests/test_datasets/test_window_dataset/test_class_.py b/tests/test_datasets/test_window_dataset/test_class_.py
deleted file mode 100644
index 8bc484d52..000000000
--- a/tests/test_datasets/test_window_dataset/test_class_.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import pytest
-
-import vak
-import vak.datasets
-
-
-class TestWindowDataset:
-    @pytest.mark.parametrize(
-        'config_type, model_name, audio_format, spect_format, annot_format, x_source',
-        [
-            ('train', 'teenytweetynet', 'cbin', None, 'notmat', 'spect'),
-            ('train', 'teenytweetynet', None, 'mat', 'yarden', 'spect'),
-            ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 'spect'),
-        ]
-    )
-    def test_from_csv(self, config_type, model_name, audio_format, spect_format, annot_format, x_source,
-                      specific_config):
-        """Test we can get a WindowDataset instance from the classmethod ``from_csv``
-
-        This is the way we make ``WindowDataset`` instances
-        inside ``vak.train``,
-        as opposed to when we *also* pass in vectors representing the windows,
-        which we do in ``vak.learncurve.learning_curve``,
-        see next test method.
-        """
-        toml_path = specific_config(config_type,
-                                    model_name,
-                                    audio_format=audio_format,
-                                    spect_format=spect_format,
-                                    annot_format=annot_format)
-        cfg = vak.config.parse.from_toml_path(toml_path)
-        cfg_command = getattr(cfg, config_type)
-
-        # stuff we need just to be able to instantiate window dataset
-        labelmap = vak.common.labels.to_map(cfg.prep.labelset, map_unlabeled=True)
-
-        transform, target_transform = vak.transforms.get_defaults('train')
-
-        metadata = vak.datasets.metadata.Metadata.from_dataset_path(cfg_command.dataset_path)
-        dataset_csv_path = cfg_command.dataset_path / metadata.dataset_csv_filename
-
-        dataset = vak.datasets.WindowDataset.from_csv(
-            dataset_csv_path=dataset_csv_path,
-            split='train',
-            labelmap=labelmap,
-            window_size=cfg.dataloader.window_size,
-            transform=transform,
-            target_transform=target_transform,
-            source_ids=None,
-            source_inds=None,
-            window_inds=None,
-        )
-        assert isinstance(dataset, vak.datasets.WindowDataset)
-
-    def test_from_csv_with_vectors(self, window_dataset_from_csv_kwargs):
-        """Test that classmethod ``WindowDataset.from_csv`` works
-        when we pass in vectors representing windows.
-
-        This is the way we make ``WindowDataset`` instances
-        inside ``vak.learncurve.learning_curve``.
-
-        We get the vectors "by hand" inside the ``learning_curve``
-        function, and then feed them in to the ``from_csv``
-        classmethod when we instantiate.
-        """
-        transform, target_transform = vak.transforms.get_defaults('train')
-        dataset = vak.datasets.WindowDataset.from_csv(
-            split='train',
-            transform=transform,
-            target_transform=target_transform,
-            **window_dataset_from_csv_kwargs
-        )
-        assert isinstance(dataset, vak.datasets.WindowDataset)
diff --git a/tests/test_datasets/test_window_dataset/test_helper.py b/tests/test_datasets/test_window_dataset/test_helper.py
deleted file mode 100644
index 5a28a9c65..000000000
--- a/tests/test_datasets/test_window_dataset/test_helper.py
+++ /dev/null
@@ -1,210 +0,0 @@
-import numpy as np
-import pandas as pd
-import pytest
-
-import vak.datasets
-
-
-@pytest.mark.parametrize(
-    'config_type, model_name, audio_format, spect_format, annot_format, window_size, crop_dur',
-    [
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 22, 4.0),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 22, 6.0),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 44, 4.0),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 44, 6.0),
-    ]
-)
-def test_crop_vectors_keep_classes(config_type, model_name, audio_format, spect_format, annot_format,
-                                   window_size, crop_dur, specific_config):
-    toml_path = specific_config(config_type,
-                                model_name,
-                                audio_format=audio_format,
-                                spect_format=spect_format,
-                                annot_format=annot_format)
-    # ---- set-up (there's a lot so I'm marking it) ----
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    cmd_cfg = getattr(cfg, config_type)  # "command config", i.e., cli command, [TRAIN] or [LEARNCURVE] section
-    dataset_path = cmd_cfg.dataset_path
-    metadata = vak.datasets.metadata.Metadata.from_dataset_path(dataset_path)
-    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
-
-    df = pd.read_csv(dataset_csv_path)
-    df_split = df[df.split == 'train']
-
-    # stuff we need just to be able to instantiate window dataset
-    labelmap = vak.common.labels.to_map(cfg.prep.labelset, map_unlabeled=True)
-
-    timebin_dur = vak.prep.prep_helper.validate_and_get_timebin_dur(df)
-
-    (source_ids_before,
-     source_inds_before,
-     window_inds_before,
-     lbl_tb) = vak.datasets.window_dataset.helper._vectors_from_df(
-        df_split,
-        dataset_path,
-        window_size=window_size,
-        crop_to_dur=True if crop_dur else False,
-        labelmap=labelmap,
-    )
-
-    # ---- actually get result we want to test
-    (
-        source_ids,
-        source_inds,
-        window_inds,
-    ) = vak.datasets.window_dataset.helper.crop_vectors_keep_classes(
-        lbl_tb,
-        source_ids_before,
-        source_inds_before,
-        window_inds_before,
-        crop_dur,
-        timebin_dur,
-        labelmap,
-        window_size,
-    )
-
-    for vector_name, vector in zip(
-        ('source_ids', 'source_inds', 'window_inds'),
-        (source_ids, source_inds, window_inds)
-    ):
-        assert isinstance(vector, np.ndarray)
-    assert source_ids.shape[-1] == source_inds.shape[-1]
-    assert np.isclose(source_ids.shape[-1] * timebin_dur, crop_dur)
-
-    # test that valid window indices is strictly less than or equal to source_ids
-    window_inds_no_invalid = window_inds[window_inds != vak.datasets.WindowDataset.INVALID_WINDOW_VAL]
-    assert window_inds_no_invalid.shape[-1] <= source_ids.shape[-1]
-
-    # test we preserved unique classes
-    assert np.array_equal(
-        np.unique(lbl_tb[window_inds]),
-        np.unique(lbl_tb)
-    )
-
-
-@pytest.mark.parametrize(
-    'config_type, model_name, audio_format, spect_format, annot_format, window_size, crop_dur',
-    [
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 22, None),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 22, 4.0),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 44, None),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 44, 4.0),
-    ]
-)
-def test__vectors_from_df(config_type, model_name, audio_format, spect_format, annot_format,
-                          window_size, crop_dur, specific_config):
-    """Test the helper function ``_vectors_from_df`` that
-    generates the vectors representing windows,
-    *without* removing the elements markes as invalid start indices
-    from ``window_inds``.
-    """
-    toml_path = specific_config(config_type,
-                                model_name,
-                                audio_format=audio_format,
-                                spect_format=spect_format,
-                                annot_format=annot_format)
-    cfg = vak.config.parse.from_toml_path(toml_path)
-
-    # stuff we need just to be able to instantiate window dataset
-    labelmap = vak.common.labels.to_map(cfg.prep.labelset, map_unlabeled=True)
-
-    cmd_cfg = getattr(cfg, config_type)  # "command config", i.e., cli command, [TRAIN] or [LEARNCURVE] section
-    dataset_path = cmd_cfg.dataset_path
-    metadata = vak.datasets.metadata.Metadata.from_dataset_path(dataset_path)
-    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
-    df = pd.read_csv(dataset_csv_path)
-    df = df[df.split == 'train']
-
-    (source_ids,
-     source_inds,
-     window_inds,
-     lbl_tb) = vak.datasets.window_dataset.helper._vectors_from_df(
-        df,
-        dataset_path,
-        window_size=window_size,
-        crop_to_dur=True if crop_dur else False,
-        labelmap=labelmap,
-    )
-
-    for vector_name, vector in zip(
-        ('source_ids', 'source_inds', 'window_inds', 'lbl_tb'),
-        (source_ids, source_inds, window_inds)
-    ):
-        assert isinstance(vector, np.ndarray)
-
-    assert source_ids.shape == source_inds.shape == window_inds.shape
-
-    n_source_files_in_split = len(df)
-    window_inds_no_invalid = window_inds[window_inds != vak.datasets.WindowDataset.INVALID_WINDOW_VAL]
-    # For every source file there will be (window_size - 1) invalid indices for a window to start at.
-    # Think of the last valid window: all bins in that window except the first are invalid
-    assert window_inds_no_invalid.shape[-1] == window_inds.shape[-1] - (n_source_files_in_split * (window_size - 1))
-
-    assert np.array_equal(
-        np.unique(source_ids),
-        np.arange(n_source_files_in_split)
-    )
-
-    if crop_dur:
-        assert lbl_tb.shape == source_ids.shape == source_inds.shape == window_inds.shape
-    else:
-        assert lbl_tb is None
-
-
-@pytest.mark.parametrize(
-    'config_type, model_name, audio_format, spect_format, annot_format, window_size, crop_dur',
-    [
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 22, None),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 22, 4.0),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 44, None),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat', 44, 4.0),
-    ]
-)
-def test_vectors_from_df(config_type, model_name, audio_format, spect_format, annot_format,
-                         window_size, crop_dur, specific_config):
-    toml_path = specific_config(config_type,
-                                model_name,
-                                audio_format=audio_format,
-                                spect_format=spect_format,
-                                annot_format=annot_format)
-    cfg = vak.config.parse.from_toml_path(toml_path)
-
-    # stuff we need just to be able to instantiate window dataset
-    labelmap = vak.common.labels.to_map(cfg.prep.labelset, map_unlabeled=True)
-
-    cmd_cfg = getattr(cfg, config_type)  # "command config", i.e., cli command, [TRAIN] or [LEARNCURVE] section
-    dataset_path = cmd_cfg.dataset_path
-    metadata = vak.datasets.metadata.Metadata.from_dataset_path(dataset_path)
-    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
-    df = pd.read_csv(dataset_csv_path)
-
-    if crop_dur:
-        timebin_dur = vak.prep.prep_helper.validate_and_get_timebin_dur(df)
-    else:
-        timebin_dur = None
-
-    source_ids, source_inds, window_inds = vak.datasets.window_dataset.helper.vectors_from_df(
-        df,
-        dataset_path,
-        'train',
-        window_size,
-        crop_dur=crop_dur,
-        labelmap=labelmap,
-        timebin_dur=timebin_dur,
-    )
-
-    for vector_name, vector in zip(
-        ('source_ids', 'source_inds', 'window_inds'),
-        (source_ids, source_inds, window_inds)
-    ):
-        assert isinstance(vector, np.ndarray)
-
-    assert source_ids.shape == source_inds.shape
-    n_source_files_in_split = len(df[df.split == 'train'])
-    # For every source file there will be (window_size - 1) invalid indices for a window to start at.
-    # Think of the last valid window: all bins in that window except the first are invalid
-    n_total_invalid_start_inds = n_source_files_in_split * (window_size - 1)
-    if crop_dur:
-        assert window_inds.shape[-1] <= source_ids.shape[-1]
-    else:
-        assert window_inds.shape[-1] == source_inds.shape[-1] - n_total_invalid_start_inds
diff --git a/tests/test_eval/test_eval.py b/tests/test_eval/test_eval.py
index ecab39a91..94822887f 100644
--- a/tests/test_eval/test_eval.py
+++ b/tests/test_eval/test_eval.py
@@ -1,54 +1,29 @@
-"""tests for vak.eval.eval module"""
+"""Tests for vak.eval.eval function."""
+from unittest import mock
+
 import pytest
 
 import vak.config
 import vak.common.constants
 import vak.common.paths
-import vak.eval.eval
-
-
-# written as separate function so we can re-use in tests/unit/test_cli/test_eval.py
-def assert_eval_output_matches_expected(model_name, output_dir):
-    eval_csv = sorted(output_dir.glob(f"eval_{model_name}*csv"))
-    assert len(eval_csv) == 1
-
-
-# -- we do eval with all possible configurations of post_tfm_kwargs
-POST_TFM_KWARGS = [
-    # default, will use ToLabels
-    None,
-    # no cleanup but uses ToLabelsWithPostprocessing
-    {'majority_vote': False, 'min_segment_dur': None},
-    # use ToLabelsWithPostprocessing with *just* majority_vote
-    {'majority_vote': True, 'min_segment_dur': None},
-    # use ToLabelsWithPostprocessing with *just* min_segment_dur
-    {'majority_vote': False, 'min_segment_dur': 0.002},
-    # use ToLabelsWithPostprocessing with majority_vote *and* min_segment_dur
-    {'majority_vote': True, 'min_segment_dur': 0.002},
-]
-
-
-@pytest.fixture(params=POST_TFM_KWARGS)
-def post_tfm_kwargs(request):
-    return request.param
+import vak.eval
 
 
 @pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format",
+    "audio_format, spect_format, annot_format, model_name, eval_function_to_mock",
     [
-        ("cbin", None, "notmat"),
+        ("cbin", None, "notmat", "TweetyNet",
+         'vak.eval.eval_.eval_frame_classification_model'),
+        ("cbin", None, "notmat", "ConvEncoderUMAP",
+         'vak.eval.eval_.eval_parametric_umap_model'),
     ],
 )
 def test_eval(
-        audio_format,
-        spect_format,
-        annot_format,
-        specific_config,
-        tmp_path,
-        model,
-        device,
-        post_tfm_kwargs
+        audio_format, spect_format, annot_format, model_name, eval_function_to_mock,
+        specific_config, tmp_path
 ):
+    """Test that :func:`vak.eval.eval` dispatches to the correct model-specific
+    training functions"""
     output_dir = tmp_path.joinpath(
         f"test_eval_{audio_format}_{spect_format}_{annot_format}"
     )
@@ -56,12 +31,12 @@ def test_eval(
 
     options_to_change = [
         {"section": "EVAL", "option": "output_dir", "value": str(output_dir)},
-        {"section": "EVAL", "option": "device", "value": device},
+        {"section": "EVAL", "option": "device", "value": 'cpu'},
     ]
 
     toml_path = specific_config(
         config_type="eval",
-        model=model,
+        model=model_name,
         audio_format=audio_format,
         annot_format=annot_format,
         spect_format=spect_format,
@@ -70,135 +45,24 @@ def test_eval(
     cfg = vak.config.parse.from_toml_path(toml_path)
     model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
 
-    vak.eval.eval(
-        model_name=cfg.eval.model,
-        model_config=model_config,
-        dataset_path=cfg.eval.dataset_path,
-        checkpoint_path=cfg.eval.checkpoint_path,
-        labelmap_path=cfg.eval.labelmap_path,
-        output_dir=cfg.eval.output_dir,
-        window_size=cfg.dataloader.window_size,
-        num_workers=cfg.eval.num_workers,
-        spect_scaler_path=cfg.eval.spect_scaler_path,
-        spect_key=cfg.spect_params.spect_key,
-        timebins_key=cfg.spect_params.timebins_key,
-        device=cfg.eval.device,
-        post_tfm_kwargs=post_tfm_kwargs,
-    )
+    results_path = tmp_path / 'results_path'
+    results_path.mkdir()
 
-    assert_eval_output_matches_expected(cfg.eval.model, output_dir)
-
-
-@pytest.mark.parametrize(
-    'path_option_to_change',
-    [
-        {"section": "EVAL", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
-        {"section": "EVAL", "option": "labelmap_path", "value": '/obviously/doesnt/exist/labelmap.json'},
-        {"section": "EVAL", "option": "spect_scaler_path", "value": '/obviously/doesnt/exist/SpectScaler'},
-    ]
-)
-def test_eval_raises_file_not_found(
-    path_option_to_change,
-    specific_config,
-    tmp_path,
-    device
-):
-    """Test that core.eval raises FileNotFoundError
-    when one of the following does not exist:
-    checkpoint_path, labelmap_path, dataset_path, spect_scaler_path
-    """
-    output_dir = tmp_path.joinpath(
-        f"test_eval_cbin_notmat_invalid_dataset_path"
-    )
-    output_dir.mkdir()
-
-    options_to_change = [
-        {"section": "EVAL", "option": "output_dir", "value": str(output_dir)},
-        {"section": "EVAL", "option": "device", "value": device},
-        path_option_to_change,
-    ]
-
-    toml_path = specific_config(
-        config_type="eval",
-        model="teenytweetynet",
-        audio_format="cbin",
-        annot_format="notmat",
-        spect_format=None,
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
-    with pytest.raises(FileNotFoundError):
+    with mock.patch(eval_function_to_mock, autospec=True) as mock_eval_function:
         vak.eval.eval(
-            model_name=cfg.eval.model,
+            model_name=model_name,
             model_config=model_config,
             dataset_path=cfg.eval.dataset_path,
             checkpoint_path=cfg.eval.checkpoint_path,
             labelmap_path=cfg.eval.labelmap_path,
             output_dir=cfg.eval.output_dir,
-            window_size=cfg.dataloader.window_size,
             num_workers=cfg.eval.num_workers,
+            batch_size=cfg.eval.batch_size,
+            transform_params=cfg.eval.transform_params,
+            dataset_params=cfg.eval.dataset_params,
             spect_scaler_path=cfg.eval.spect_scaler_path,
-            spect_key=cfg.spect_params.spect_key,
-            timebins_key=cfg.spect_params.timebins_key,
             device=cfg.eval.device,
+            post_tfm_kwargs=cfg.eval.post_tfm_kwargs,
         )
 
-
-@pytest.mark.parametrize(
-    'path_option_to_change',
-    [
-        {"section": "EVAL", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
-        {"section": "EVAL", "option": "output_dir", "value": '/obviously/does/not/exist/output'},
-    ]
-)
-def test_eval_raises_not_a_directory(
-    path_option_to_change,
-    specific_config,
-    device,
-    tmp_path,
-):
-    """Test that core.eval raises NotADirectory
-    when directories don't exist
-    """
-    options_to_change = [
-        path_option_to_change,
-        {"section": "EVAL", "option": "device", "value": device},
-    ]
-
-    if path_option_to_change["option"] != "output_dir":
-        # need to make sure output_dir *does* exist
-        # so we don't detect spurious NotADirectoryError and assume test passes
-        output_dir = tmp_path.joinpath(
-            f"test_eval_raises_not_a_directory"
-        )
-        output_dir.mkdir()
-        options_to_change.append(
-            {"section": "EVAL", "option": "output_dir", "value": str(output_dir)}
-        )
-
-    toml_path = specific_config(
-        config_type="eval",
-        model="teenytweetynet",
-        audio_format="cbin",
-        annot_format="notmat",
-        spect_format=None,
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
-    with pytest.raises(NotADirectoryError):
-        vak.eval.eval(
-            model_name=cfg.eval.model,
-            model_config=model_config,
-            dataset_path=cfg.eval.dataset_path,
-            checkpoint_path=cfg.eval.checkpoint_path,
-            labelmap_path=cfg.eval.labelmap_path,
-            output_dir=cfg.eval.output_dir,
-            window_size=cfg.dataloader.window_size,
-            num_workers=cfg.eval.num_workers,
-            spect_scaler_path=cfg.eval.spect_scaler_path,
-            spect_key=cfg.spect_params.spect_key,
-            timebins_key=cfg.spect_params.timebins_key,
-            device=cfg.eval.device,
-        )
+        assert mock_eval_function.called
diff --git a/tests/test_eval/test_frame_classification.py b/tests/test_eval/test_frame_classification.py
new file mode 100644
index 000000000..5fe4c6d8d
--- /dev/null
+++ b/tests/test_eval/test_frame_classification.py
@@ -0,0 +1,201 @@
+"""tests for vak.eval.frame_classification module"""
+import pytest
+
+import vak.config
+import vak.common.constants
+import vak.common.paths
+import vak.eval.frame_classification
+
+
+# written as separate function so we can re-use in tests/unit/test_cli/test_eval.py
+def assert_eval_output_matches_expected(model_name, output_dir):
+    eval_csv = sorted(output_dir.glob(f"eval_{model_name}*csv"))
+    assert len(eval_csv) == 1
+
+
+# -- we do eval with all possible configurations of post_tfm_kwargs
+POST_TFM_KWARGS = [
+    # default, will use ToLabels
+    None,
+    # no cleanup but uses ToLabelsWithPostprocessing
+    {'majority_vote': False, 'min_segment_dur': None},
+    # use ToLabelsWithPostprocessing with *just* majority_vote
+    {'majority_vote': True, 'min_segment_dur': None},
+    # use ToLabelsWithPostprocessing with *just* min_segment_dur
+    {'majority_vote': False, 'min_segment_dur': 0.002},
+    # use ToLabelsWithPostprocessing with majority_vote *and* min_segment_dur
+    {'majority_vote': True, 'min_segment_dur': 0.002},
+]
+
+
+@pytest.fixture(params=POST_TFM_KWARGS)
+def post_tfm_kwargs(request):
+    return request.param
+
+
+@pytest.mark.parametrize(
+    "model_name, audio_format, spect_format, annot_format",
+    [
+        ("TweetyNet", "cbin", None, "notmat"),
+    ],
+)
+def test_eval_frame_classification_model(
+        model_name,
+        audio_format,
+        spect_format,
+        annot_format,
+        specific_config,
+        tmp_path,
+        device,
+        post_tfm_kwargs
+):
+    output_dir = tmp_path.joinpath(
+        f"test_eval_{audio_format}_{spect_format}_{annot_format}"
+    )
+    output_dir.mkdir()
+
+    options_to_change = [
+        {"section": "EVAL", "option": "output_dir", "value": str(output_dir)},
+        {"section": "EVAL", "option": "device", "value": device},
+    ]
+
+    toml_path = specific_config(
+        config_type="eval",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        spect_format=spect_format,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
+
+    vak.eval.frame_classification.eval_frame_classification_model(
+        model_name=cfg.eval.model,
+        model_config=model_config,
+        dataset_path=cfg.eval.dataset_path,
+        checkpoint_path=cfg.eval.checkpoint_path,
+        labelmap_path=cfg.eval.labelmap_path,
+        output_dir=cfg.eval.output_dir,
+        num_workers=cfg.eval.num_workers,
+        transform_params=cfg.eval.transform_params,
+        dataset_params=cfg.eval.dataset_params,
+        spect_scaler_path=cfg.eval.spect_scaler_path,
+        device=cfg.eval.device,
+        post_tfm_kwargs=post_tfm_kwargs,
+    )
+
+    assert_eval_output_matches_expected(cfg.eval.model, output_dir)
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "EVAL", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
+        {"section": "EVAL", "option": "labelmap_path", "value": '/obviously/doesnt/exist/labelmap.json'},
+        {"section": "EVAL", "option": "spect_scaler_path", "value": '/obviously/doesnt/exist/SpectScaler'},
+    ]
+)
+def test_eval_frame_classification_model_raises_file_not_found(
+    path_option_to_change,
+    specific_config,
+    tmp_path,
+    device
+):
+    """Test that core.eval raises FileNotFoundError
+    when one of the following does not exist:
+    checkpoint_path, labelmap_path, dataset_path, spect_scaler_path
+    """
+    output_dir = tmp_path.joinpath(
+        f"test_eval_cbin_notmat_invalid_dataset_path"
+    )
+    output_dir.mkdir()
+
+    options_to_change = [
+        {"section": "EVAL", "option": "output_dir", "value": str(output_dir)},
+        {"section": "EVAL", "option": "device", "value": device},
+        path_option_to_change,
+    ]
+
+    toml_path = specific_config(
+        config_type="eval",
+        model="TweetyNet",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
+    with pytest.raises(FileNotFoundError):
+        vak.eval.frame_classification.eval_frame_classification_model(
+            model_name=cfg.eval.model,
+            model_config=model_config,
+            dataset_path=cfg.eval.dataset_path,
+            checkpoint_path=cfg.eval.checkpoint_path,
+            labelmap_path=cfg.eval.labelmap_path,
+            output_dir=cfg.eval.output_dir,
+            num_workers=cfg.eval.num_workers,
+            transform_params=cfg.eval.transform_params,
+            dataset_params=cfg.eval.dataset_params,
+            spect_scaler_path=cfg.eval.spect_scaler_path,
+            device=cfg.eval.device,
+        )
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "EVAL", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
+        {"section": "EVAL", "option": "output_dir", "value": '/obviously/does/not/exist/output'},
+    ]
+)
+def test_eval_frame_classification_model_raises_not_a_directory(
+    path_option_to_change,
+    specific_config,
+    device,
+    tmp_path,
+):
+    """Test that core.eval raises NotADirectory
+    when directories don't exist
+    """
+    options_to_change = [
+        path_option_to_change,
+        {"section": "EVAL", "option": "device", "value": device},
+    ]
+
+    if path_option_to_change["option"] != "output_dir":
+        # need to make sure output_dir *does* exist
+        # so we don't detect spurious NotADirectoryError and assume test passes
+        output_dir = tmp_path.joinpath(
+            f"test_eval_raises_not_a_directory"
+        )
+        output_dir.mkdir()
+        options_to_change.append(
+            {"section": "EVAL", "option": "output_dir", "value": str(output_dir)}
+        )
+
+    toml_path = specific_config(
+        config_type="eval",
+        model="TweetyNet",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
+    with pytest.raises(NotADirectoryError):
+        vak.eval.frame_classification.eval_frame_classification_model(
+            model_name=cfg.eval.model,
+            model_config=model_config,
+            dataset_path=cfg.eval.dataset_path,
+            checkpoint_path=cfg.eval.checkpoint_path,
+            labelmap_path=cfg.eval.labelmap_path,
+            output_dir=cfg.eval.output_dir,
+            num_workers=cfg.eval.num_workers,
+            transform_params=cfg.eval.transform_params,
+            dataset_params=cfg.eval.dataset_params,
+            spect_scaler_path=cfg.eval.spect_scaler_path,
+            device=cfg.eval.device,
+        )
diff --git a/tests/test_eval/test_parametric_umap.py b/tests/test_eval/test_parametric_umap.py
new file mode 100644
index 000000000..adc95a729
--- /dev/null
+++ b/tests/test_eval/test_parametric_umap.py
@@ -0,0 +1,171 @@
+"""tests for vak.eval.parametric_umap module"""
+import pytest
+
+import vak.config
+import vak.common.constants
+import vak.common.paths
+import vak.eval.frame_classification
+
+
+def assert_eval_output_matches_expected(model_name, output_dir):
+    eval_csv = sorted(output_dir.glob(f"eval_{model_name}*csv"))
+    assert len(eval_csv) == 1
+
+
+@pytest.mark.parametrize(
+    "model_name, audio_format, spect_format, annot_format",
+    [
+        ("ConvEncoderUMAP", "cbin", None, "notmat"),
+    ],
+)
+def test_eval_parametric_umap_model(
+        model_name,
+        audio_format,
+        spect_format,
+        annot_format,
+        specific_config,
+        tmp_path,
+        device,
+):
+    output_dir = tmp_path.joinpath(
+        f"test_eval_{audio_format}_{spect_format}_{annot_format}"
+    )
+    output_dir.mkdir()
+
+    options_to_change = [
+        {"section": "EVAL", "option": "output_dir", "value": str(output_dir)},
+        {"section": "EVAL", "option": "device", "value": device},
+    ]
+
+    toml_path = specific_config(
+        config_type="eval",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        spect_format=spect_format,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
+
+    vak.eval.parametric_umap.eval_parametric_umap_model(
+        model_name=cfg.eval.model,
+        model_config=model_config,
+        dataset_path=cfg.eval.dataset_path,
+        checkpoint_path=cfg.eval.checkpoint_path,
+        output_dir=cfg.eval.output_dir,
+        batch_size=cfg.eval.batch_size,
+        num_workers=cfg.eval.num_workers,
+        transform_params=cfg.eval.transform_params,
+        dataset_params=cfg.eval.dataset_params,
+        device=cfg.eval.device,
+    )
+
+    assert_eval_output_matches_expected(cfg.eval.model, output_dir)
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "EVAL", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
+    ]
+)
+def test_eval_frame_classification_model_raises_file_not_found(
+    path_option_to_change,
+    specific_config,
+    tmp_path,
+    device
+):
+    """Test that :func:`vak.eval.parametric_umap.eval_parametric_umap_model`
+    raises FileNotFoundError when expected"""
+    output_dir = tmp_path.joinpath(
+        f"test_eval_cbin_notmat_invalid_dataset_path"
+    )
+    output_dir.mkdir()
+
+    options_to_change = [
+        {"section": "EVAL", "option": "output_dir", "value": str(output_dir)},
+        {"section": "EVAL", "option": "device", "value": device},
+        path_option_to_change,
+    ]
+
+    toml_path = specific_config(
+        config_type="eval",
+        model="ConvEncoderUMAP",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
+    with pytest.raises(FileNotFoundError):
+        vak.eval.parametric_umap.eval_parametric_umap_model(
+            model_name=cfg.eval.model,
+            model_config=model_config,
+            dataset_path=cfg.eval.dataset_path,
+            checkpoint_path=cfg.eval.checkpoint_path,
+            output_dir=cfg.eval.output_dir,
+            batch_size=cfg.eval.batch_size,
+            num_workers=cfg.eval.num_workers,
+            transform_params=cfg.eval.transform_params,
+            dataset_params=cfg.eval.dataset_params,
+            device=cfg.eval.device,
+        )
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "EVAL", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
+        {"section": "EVAL", "option": "output_dir", "value": '/obviously/does/not/exist/output'},
+    ]
+)
+def test_eval_frame_classification_model_raises_not_a_directory(
+    path_option_to_change,
+    specific_config,
+    device,
+    tmp_path,
+):
+    """Test that :func:`vak.eval.parametric_umap.eval_parametric_umap_model`
+    raises NotADirectoryError when expected"""
+    options_to_change = [
+        path_option_to_change,
+        {"section": "EVAL", "option": "device", "value": device},
+    ]
+
+    if path_option_to_change["option"] != "output_dir":
+        # need to make sure output_dir *does* exist
+        # so we don't detect spurious NotADirectoryError and assume test passes
+        output_dir = tmp_path.joinpath(
+            f"test_eval_raises_not_a_directory"
+        )
+        output_dir.mkdir()
+        options_to_change.append(
+            {"section": "EVAL", "option": "output_dir", "value": str(output_dir)}
+        )
+
+    toml_path = specific_config(
+        config_type="eval",
+        model="ConvEncoderUMAP",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.eval.model)
+    with pytest.raises(NotADirectoryError):
+        vak.eval.parametric_umap.eval_parametric_umap_model(
+            model_name=cfg.eval.model,
+            model_config=model_config,
+            dataset_path=cfg.eval.dataset_path,
+            checkpoint_path=cfg.eval.checkpoint_path,
+            output_dir=cfg.eval.output_dir,
+            batch_size=cfg.eval.batch_size,
+            num_workers=cfg.eval.num_workers,
+            transform_params=cfg.eval.transform_params,
+            dataset_params=cfg.eval.dataset_params,
+            device=cfg.eval.device,
+        )
+
diff --git a/tests/test_learncurve/test_learncurve.py b/tests/test_learncurve/test_frame_classification.py
similarity index 62%
rename from tests/test_learncurve/test_learncurve.py
rename to tests/test_learncurve/test_frame_classification.py
index d518a0b08..7ca125fd6 100644
--- a/tests/test_learncurve/test_learncurve.py
+++ b/tests/test_learncurve/test_frame_classification.py
@@ -1,4 +1,4 @@
-"""tests for vak.learncurve module"""
+"""tests for vak.learncurve.frame_classification module"""
 import pytest
 
 import vak.config
@@ -11,11 +11,11 @@ def assert_learncurve_output_matches_expected(cfg, model_name, results_path):
     assert results_path.joinpath("learning_curve.csv").exists()
 
     for train_set_dur in cfg.prep.train_set_durs:
-        train_set_dur_root = results_path.joinpath(f"train_dur_{train_set_dur}s")
+        train_set_dur_root = results_path / vak.learncurve.dirname.train_dur_dirname(train_set_dur)
         assert train_set_dur_root.exists()
 
         for replicate_num in range(1, cfg.prep.num_replicates + 1):
-            replicate_path = train_set_dur_root.joinpath(f"replicate_{replicate_num}")
+            replicate_path = train_set_dur_root / vak.learncurve.dirname.replicate_dirname(replicate_num)
             assert replicate_path.exists()
 
             assert replicate_path.joinpath("labelmap.json").exists()
@@ -44,14 +44,21 @@ def assert_learncurve_output_matches_expected(cfg, model_name, results_path):
 
 
 @pytest.mark.slow
-def test_learncurve(specific_config, tmp_path, model, device):
+@pytest.mark.parametrize(
+    'model_name, audio_format, annot_format',
+    [
+        ("TweetyNet", "cbin", "notmat"),
+    ]
+)
+def test_learning_curve_for_frame_classification_model(
+        model_name, audio_format, annot_format, specific_config, tmp_path, device):
     options_to_change = {"section": "LEARNCURVE", "option": "device", "value": device}
 
     toml_path = specific_config(
         config_type="learncurve",
-        model=model,
-        audio_format="cbin",
-        annot_format="notmat",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
         options_to_change=options_to_change,
     )
 
@@ -60,66 +67,19 @@ def test_learncurve(specific_config, tmp_path, model, device):
     results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
     results_path.mkdir()
 
-    vak.learncurve.learning_curve(
+    vak.learncurve.frame_classification.learning_curve_for_frame_classification_model(
         model_name=cfg.learncurve.model,
         model_config=model_config,
         dataset_path=cfg.learncurve.dataset_path,
-        window_size=cfg.dataloader.window_size,
         batch_size=cfg.learncurve.batch_size,
         num_epochs=cfg.learncurve.num_epochs,
         num_workers=cfg.learncurve.num_workers,
-        root_results_dir=None,
+        train_transform_params=cfg.learncurve.train_transform_params,
+        train_dataset_params=cfg.learncurve.train_dataset_params,
+        val_transform_params=cfg.learncurve.val_transform_params,
+        val_dataset_params=cfg.learncurve.val_dataset_params,
         results_path=results_path,
-        spect_key=cfg.spect_params.spect_key,
-        timebins_key=cfg.spect_params.timebins_key,
-        normalize_spectrograms=cfg.learncurve.normalize_spectrograms,
-        shuffle=cfg.learncurve.shuffle,
-        val_step=cfg.learncurve.val_step,
-        ckpt_step=cfg.learncurve.ckpt_step,
-        patience=cfg.learncurve.patience,
-        device=cfg.learncurve.device,
-    )
-
-    assert_learncurve_output_matches_expected(cfg, cfg.learncurve.model, results_path)
-
-
-@pytest.mark.slow
-def test_learncurve_no_results_path(specific_config, tmp_path, model, device):
-    root_results_dir = tmp_path.joinpath("test_learncurve_no_results_path")
-    root_results_dir.mkdir()
-
-    options_to_change = [
-        {
-            "section": "LEARNCURVE",
-            "option": "root_results_dir",
-            "value": str(root_results_dir),
-        },
-        {"section": "LEARNCURVE", "option": "device", "value": device},
-    ]
-
-    toml_path = specific_config(
-        config_type="learncurve",
-        model=model,
-        audio_format="cbin",
-        annot_format="notmat",
-        options_to_change=options_to_change,
-    )
-
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.learncurve.model)
-
-    vak.learncurve.learning_curve(
-        model_name=cfg.learncurve.model,
-        model_config=model_config,
-        dataset_path=cfg.learncurve.dataset_path,
-        window_size=cfg.dataloader.window_size,
-        batch_size=cfg.learncurve.batch_size,
-        num_epochs=cfg.learncurve.num_epochs,
-        num_workers=cfg.learncurve.num_workers,
-        root_results_dir=cfg.learncurve.root_results_dir,
-        results_path=None,
-        spect_key=cfg.spect_params.spect_key,
-        timebins_key=cfg.spect_params.timebins_key,
+        post_tfm_kwargs=cfg.learncurve.post_tfm_kwargs,
         normalize_spectrograms=cfg.learncurve.normalize_spectrograms,
         shuffle=cfg.learncurve.shuffle,
         val_step=cfg.learncurve.val_step,
@@ -128,10 +88,6 @@ def test_learncurve_no_results_path(specific_config, tmp_path, model, device):
         device=cfg.learncurve.device,
     )
 
-    results_path = sorted(root_results_dir.glob(f"{vak.common.constants.RESULTS_DIR_PREFIX}*"))
-    assert len(results_path) == 1
-    results_path = results_path[0]
-
     assert_learncurve_output_matches_expected(cfg, cfg.learncurve.model, results_path)
 
 
@@ -155,7 +111,7 @@ def test_learncurve_raises_not_a_directory(dir_option_to_change,
     ]
     toml_path = specific_config(
         config_type="learncurve",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         options_to_change=options_to_change,
@@ -166,18 +122,19 @@ def test_learncurve_raises_not_a_directory(dir_option_to_change,
     results_path = cfg.learncurve.root_results_dir / 'results-dir-timestamp'
 
     with pytest.raises(NotADirectoryError):
-        vak.learncurve.learning_curve(
+        vak.learncurve.frame_classification.learning_curve_for_frame_classification_model(
             model_name=cfg.learncurve.model,
             model_config=model_config,
             dataset_path=cfg.learncurve.dataset_path,
-            window_size=cfg.dataloader.window_size,
             batch_size=cfg.learncurve.batch_size,
             num_epochs=cfg.learncurve.num_epochs,
             num_workers=cfg.learncurve.num_workers,
-            root_results_dir=None,
+            train_transform_params=cfg.learncurve.train_transform_params,
+            train_dataset_params=cfg.learncurve.train_dataset_params,
+            val_transform_params=cfg.learncurve.val_transform_params,
+            val_dataset_params=cfg.learncurve.val_dataset_params,
             results_path=results_path,
-            spect_key=cfg.spect_params.spect_key,
-            timebins_key=cfg.spect_params.timebins_key,
+            post_tfm_kwargs=cfg.learncurve.post_tfm_kwargs,
             normalize_spectrograms=cfg.learncurve.normalize_spectrograms,
             shuffle=cfg.learncurve.shuffle,
             val_step=cfg.learncurve.val_step,
diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py
index e42b8efd0..3b4c9033d 100644
--- a/tests/test_models/conftest.py
+++ b/tests/test_models/conftest.py
@@ -1,6 +1,9 @@
 import torch
 
+import vak.models.registry
 
+
+# ---- mock networks ---------------------------------------------------------------------------------------------------
 class MockNetwork(torch.nn.Module):
     """Network used just to test vak.models.base.Model"""
     def __init__(self, n_classes=10):
@@ -17,32 +20,10 @@ def forward(self, x):
         return self.layers(x)
 
 
-class MockAcc:
-    def __init__(self, average='macro'):
-        self.average = average
-
-    def __call__(self, y: torch.Tensor, y_pred: torch.Tensor):
-        sample_acc = y == y_pred
-        if self.average == 'macro':
-            return sample_acc.mean()
-        elif self.average == 'micro':
-            return NotImplemented
-
-
-class MockModel:
-    """Model definition used just to test vak.models.base.Model"""
-    network = MockNetwork
-    loss = torch.nn.CrossEntropyLoss
-    optimizer = torch.optim.SGD
-    metrics = {'acc': MockAcc}
-    default_config = {
-        'optimizer': {'lr': 0.003}
-    }
-
-
 class MockEncoder(torch.nn.Module):
-    """Network used just to test vak.models.base.Model.
-    Unlike ``MockNetwork``, this network will be put into a
+    """Network used for testing.
+
+    This network is put into a
     ``dict`` with ``MockDecoder`` to test
     that specifying ``network`` as a ``dict`` works.
     """
@@ -80,8 +61,74 @@ def forward(self, x):
         return self.layers(x)
 
 
+# ---- mock metrics ----------------------------------------------------------------------------------------------------
+class MockAcc:
+    """Mock metric used for testing"""
+    def __init__(self, average='macro'):
+        self.average = average
+
+    def __call__(self, y: torch.Tensor, y_pred: torch.Tensor):
+        sample_acc = y == y_pred
+        if self.average == 'macro':
+            return sample_acc.mean()
+        elif self.average == 'micro':
+            return NotImplemented
+
+
+# ---- mock model families ---------------------------------------------------------------------------------------------
+class UnregisteredMockModelFamily(vak.models.Model):
+    """A model family defined only for tests.
+    Used to test :func:`vak.models.registry.model_family`.
+    """
+    def __init__(self, network, optimizer, loss, metrics):
+        super().__init__(
+            network=network, loss=loss, optimizer=optimizer, metrics=metrics
+        )
+
+    def training_step(self, *args, **kwargs):
+        pass
+
+    def validation_step(self, *args, **kwargs):
+        pass
+
+    @classmethod
+    def from_config(cls, config: dict):
+        """Return an initialized model instance from a config ``dict``."""
+        network, loss, optimizer, metrics = cls.attributes_from_config(config)
+        return cls(
+            network=network,
+            optimizer=optimizer,
+            loss=loss,
+            metrics=metrics,
+        )
+
+
+# Make a "copy" of UnregisteredModelFamily that we *do* register
+# so we can use it to test `vak.models.decorator.model` and other functions
+# that require a registered ModelFamily.
+# Used when testing :func:`vak.models.decorator.model` -- we need a model in the registry to test
+# and we don't want to have to deal with the idiosyncrasies of actual model families
+MockModelFamily = type('MockModelFamily',
+                       UnregisteredMockModelFamily.__bases__,
+                       dict(UnregisteredMockModelFamily.__dict__))
+vak.models.registry.model_family(MockModelFamily)
+
+
+# ---- mock models -----------------------------------------------------------------------------------------------------
+class MockModel:
+    """Model definition used for testing :func:`vak.models.decorator.model`"""
+    network = MockNetwork
+    loss = torch.nn.CrossEntropyLoss
+    optimizer = torch.optim.SGD
+    metrics = {'acc': MockAcc}
+    default_config = {
+        'optimizer': {'lr': 0.003}
+    }
+
+
 class MockEncoderDecoderModel:
-    """Model definition used only to that network works with a ``dict``"""
+    """Model definition used for testing :func:`vak.models.decorator.model`.
+    Specifically tests that `network` works with a ``dict``"""
     network = {'MockEncoder': MockEncoder, 'MockDecoder': MockDecoder}
     loss = torch.nn.TripletMarginWithDistanceLoss
     optimizer = torch.optim.Adam
diff --git a/tests/test_models/test_base.py b/tests/test_models/test_base.py
index a8a1c8c63..4f8dc282c 100644
--- a/tests/test_models/test_base.py
+++ b/tests/test_models/test_base.py
@@ -21,7 +21,6 @@
 
 from .test_definition import (
     InvalidMetricsDictKeyModelDefinition,
-    TeenyTweetyNetDefinition,
     TweetyNetDefinition,
 )
 
@@ -169,12 +168,17 @@ def test_validate_init_raises(self, definition, kwargs, expected_exception, monk
             vak.models.base.Model.validate_init(**kwargs)
 
     MODEL_DEFINITION_MAP = {
-        'tweetynet': TweetyNetDefinition,
-        'teenytweetynet': TeenyTweetyNetDefinition,
+        'TweetyNet': TweetyNetDefinition,
     }
 
+    @pytest.mark.parametrize(
+        'model_name',
+        [
+            'TweetyNet',
+        ]
+    )
     def test_load_state_dict_from_path(self,
-                                       model,
+                                       model_name,
                                        # our fixtures
                                        specific_config,
                                        # pytest fixtures
@@ -185,33 +189,38 @@ def test_load_state_dict_from_path(self,
 
         We use actual model definitions here so we can test with real checkpoints.
         """
-        definition = self.MODEL_DEFINITION_MAP[model]
-        model_name = definition.__name__.replace('Definition', '').lower()
+        definition = self.MODEL_DEFINITION_MAP[model_name]
         train_toml_path = specific_config('train', model_name, audio_format='cbin', annot_format='notmat')
         train_cfg = vak.config.parse.from_toml_path(train_toml_path)
 
         # stuff we need just to be able to instantiate network
-        metadata = vak.datasets.metadata.Metadata.from_dataset_path(train_cfg.train.dataset_path)
-        dataset_csv_path = train_cfg.train.dataset_path / metadata.dataset_csv_filename
         labelmap = vak.common.labels.to_map(train_cfg.prep.labelset, map_unlabeled=True)
-        transform, target_transform = vak.transforms.get_defaults("train")
-        train_dataset = vak.datasets.WindowDataset.from_csv(
-            dataset_csv_path=dataset_csv_path,
+        transform, target_transform = vak.transforms.defaults.get_default_transform(
+            model_name,
+            "train",
+            transform_kwargs={},
+        )
+        train_dataset = vak.datasets.frame_classification.WindowDataset.from_dataset_path(
+            dataset_path=train_cfg.train.dataset_path,
             split="train",
-            labelmap=labelmap,
-            window_size=train_cfg.dataloader.window_size,
-            spect_key='s',
-            timebins_key='t',
+            window_size=train_cfg.train.train_dataset_params['window_size'],
             transform=transform,
             target_transform=target_transform,
         )
         input_shape = train_dataset.shape
+        num_input_channels = input_shape[-3]
+        num_freqbins = input_shape[-2]
 
         monkeypatch.setattr(
             vak.models.base.Model, 'definition', definition, raising=False
         )
         # network is the one thing that has required args
-        network = definition.network(num_classes=len(labelmap), input_shape=input_shape)
+        # and we also need to use its config from the toml file
+        model_config = vak.config.model.config_from_toml_path(train_toml_path, model_name)
+        network = definition.network(num_classes=len(labelmap),
+                                     num_input_channels=num_input_channels,
+                                     num_freqbins=num_freqbins,
+                                     **model_config['network'])
         model = vak.models.base.Model(network=network)
         model.to(device)
 
diff --git a/tests/test_models/test_convencoder_umap.py b/tests/test_models/test_convencoder_umap.py
new file mode 100644
index 000000000..29c63643c
--- /dev/null
+++ b/tests/test_models/test_convencoder_umap.py
@@ -0,0 +1,36 @@
+import pytest
+
+import vak
+
+
+class TestConvEncoderUMAP:
+    @pytest.mark.parametrize(
+        'input_shape',
+        [
+            (1, 32, 32),
+            (1, 64, 64),
+        ]
+    )
+    def test_init(self, input_shape):
+        network = {
+            'encoder': vak.models.ConvEncoderUMAP.definition.network['encoder'](input_shape=input_shape)
+        }
+        model = vak.models.ConvEncoderUMAP(network=network)
+        assert isinstance(model, vak.models.ConvEncoderUMAP)
+        for attr in ('network', 'loss', 'optimizer'):
+            assert hasattr(model, attr)
+            attr_from_definition = getattr(vak.models.convencoder_umap.ConvEncoderUMAP.definition, attr)
+            if isinstance(attr_from_definition, dict):
+                attr_from_model = getattr(model, attr)
+                assert isinstance(attr_from_model, dict)
+                assert attr_from_model.keys() == attr_from_definition.keys()
+                for net_name, net_instance in attr_from_model.items():
+                    assert isinstance(net_instance, attr_from_definition[net_name])
+            else:
+                assert isinstance(getattr(model, attr),
+                                  getattr(vak.models.convencoder_umap.ConvEncoderUMAP.definition, attr))
+        assert hasattr(model, 'metrics')
+        assert isinstance(model.metrics, dict)
+        for metric_name, metric_callable in model.metrics.items():
+            assert isinstance(metric_callable,
+                              vak.models.convencoder_umap.ConvEncoderUMAP.definition.metrics[metric_name])
diff --git a/tests/test_models/test_das.py b/tests/test_models/test_das.py
deleted file mode 100644
index f7fd3b1e8..000000000
--- a/tests/test_models/test_das.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import dataclasses
-
-import pytest
-import pytorch_lightning as lightning
-import torch
-
-import vak.models
-
-
-class TestDAS:
-    def test_model_is_decorated(self):
-        assert issubclass(vak.models.DAS,
-                          vak.models.WindowedFrameClassificationModel)
-        assert issubclass(vak.models.DAS,
-                          vak.models.base.Model)
-        assert issubclass(vak.models.DAS,
-                          lightning.LightningModule)
-
-    @pytest.mark.parametrize(
-        'net_config_dataclass',
-        [
-            vak.nets.das.net.DASNetBengaleseFinchConfig,
-            vak.nets.das.net.DASNetFlyMultichannelConfig,
-            vak.nets.das.net.DASNetFlySinglechannelConfig,
-            vak.nets.das.net.DASNetMarmosetConfig,
-            vak.nets.das.net.DASNetMouseConfig,
-        ]
-    )
-    def test_init(self, net_config_dataclass):
-        net_config_dict = dataclasses.asdict(net_config_dataclass)
-        network = vak.nets.das.net.DASNet(**net_config_dict)
-        mock_labelmap = {lbl: str(lbl) for lbl in range(net_config_dict['num_classes'])}
-
-        model = vak.models.DAS(labelmap=mock_labelmap, network=network)
-
-        assert isinstance(model, vak.models.DAS)
-        for attr in ('network', 'loss', 'optimizer'):
-            assert hasattr(model, attr)
-            assert isinstance(getattr(model, attr),
-                              getattr(vak.models.das.DAS.definition, attr))
-        assert hasattr(model, 'metrics')
-        assert isinstance(model.metrics, dict)
-        for metric_name, metric_callable in model.metrics.items():
-            assert isinstance(metric_callable,
-                              vak.models.das.DAS.definition.metrics[metric_name])
-
-
-    @pytest.mark.parametrize(
-        'net_config_dataclass',
-        [
-            vak.nets.das.net.DASNetBengaleseFinchConfig,
-            vak.nets.das.net.DASNetFlyMultichannelConfig,
-            vak.nets.das.net.DASNetFlySinglechannelConfig,
-            vak.nets.das.net.DASNetMarmosetConfig,
-            vak.nets.das.net.DASNetMouseConfig,
-        ]
-    )
-    def test_forward(self, net_config_dataclass):
-        net_config_dict = dataclasses.asdict(net_config_dataclass)
-        network = vak.nets.das.net.DASNet(**net_config_dict)
-        mock_labelmap = {lbl: str(lbl) for lbl in range(net_config_dict['num_classes'])}
-
-        model = vak.models.DAS(labelmap=mock_labelmap, network=network)
-
-        FAKE_BATCH_SIZE = 8
-        fake_input_shape = (FAKE_BATCH_SIZE,
-                            net_config_dict['n_audio_channels'],
-                            net_config_dict['num_samples'])
-        fake_input = torch.rand(*fake_input_shape)
-
-        out = model(fake_input)
-
-        out_shape = out.shape
-        assert out_shape[0] == FAKE_BATCH_SIZE
-        assert out_shape[1] == net_config_dict['num_classes']
-        assert out_shape[2] == net_config_dict['num_samples']
diff --git a/tests/test_models/test_decorator.py b/tests/test_models/test_decorator.py
index 4ea6dcf5a..bb06b9246 100644
--- a/tests/test_models/test_decorator.py
+++ b/tests/test_models/test_decorator.py
@@ -2,9 +2,8 @@
 
 import vak.models
 
-from .test_base import MockModel, MockEncoderDecoderModel
+from .conftest import MockModel, MockModelFamily, MockEncoderDecoderModel
 from .test_definition import TweetyNetDefinition as TweetyNet
-from .test_definition import TeenyTweetyNetDefinition as TeenyTweetyNet
 
 from .test_definition import (
     MissingClassVarModelDefinition,
@@ -21,30 +20,28 @@
 
 
 TweetyNet.__name__ = 'TweetyNet'
-TeenyTweetyNet.__name__ = 'TeenyTweetyNet'
 
 
 @pytest.mark.parametrize(
     'definition, family, expected_name',
     [
         (MockModel,
-         vak.models.Model,
+         MockModelFamily,
          'MockModel'),
         (MockEncoderDecoderModel,
-         vak.models.Model,
+         MockModelFamily,
          'MockEncoderDecoderModel'),
-        (TweetyNet,
-         vak.models.WindowedFrameClassificationModel,
-         'TweetyNet'),
-        (TeenyTweetyNet,
-         vak.models.WindowedFrameClassificationModel,
-         'TeenyTweetyNet'),
     ]
 )
 def test_model(definition, family, expected_name):
+    """Test that :func:`vak.models.decorator.model` decorator
+    returns a subclass of the specified model family,
+    and has the expected name"""
     model_class = vak.models.decorator.model(family)(definition)
     assert issubclass(model_class, family)
     assert model_class.__name__ == expected_name
+    # need to delete model from registry so other tests don't fail
+    del vak.models.registry.MODEL_REGISTRY[model_class.__name__]
 
 
 @pytest.mark.parametrize(
@@ -64,4 +61,4 @@ def test_model(definition, family, expected_name):
 )
 def test_model_raises(definition):
     with pytest.raises(vak.models.decorator.ModelDefinitionValidationError):
-        model_class = vak.models.decorator.model(vak.models.base.Model)(definition)
+        vak.models.decorator.model(vak.models.base.Model)(definition)
diff --git a/tests/test_models/test_definition.py b/tests/test_models/test_definition.py
index 40ec61989..7ade01089 100644
--- a/tests/test_models/test_definition.py
+++ b/tests/test_models/test_definition.py
@@ -4,7 +4,7 @@
 
 import torch
 from vak import metrics
-from vak.nets import TweetyNet, TeenyTweetyNet
+from vak.nets import TweetyNet
 
 import vak.models
 
@@ -34,26 +34,6 @@ class TweetyNetDefinition:
     }
 
 
-class TeenyTweetyNetDefinition:
-    """Redefine here to test that ``vak.models.definition.validate``
-    actually works on classes we care about.
-
-    Can't use the class itself in the ``vak.models.tweetynet``
-    because that's already decorated with ``vak.models.decorator.model``.
-    """
-    network = TeenyTweetyNet
-    loss = torch.nn.CrossEntropyLoss
-    optimizer = torch.optim.Adam
-    metrics = {'acc': metrics.Accuracy,
-               'levenshtein': metrics.Levenshtein,
-               'segment_error_rate': metrics.SegmentErrorRate,
-               'loss': torch.nn.CrossEntropyLoss}
-    default_config = {
-        'optimizer':
-            {'lr': 0.003}
-    }
-
-
 class MissingClassVarModelDefinition:
     network = TweetyNet
     loss = torch.nn.CrossEntropyLoss
@@ -273,7 +253,6 @@ class TestModelDefinition:
         'definition',
         [
             TweetyNetDefinition,
-            TeenyTweetyNetDefinition
         ]
     )
     def test_validate(self, definition):
diff --git a/tests/test_models/test_ed_tcn.py b/tests/test_models/test_ed_tcn.py
new file mode 100644
index 000000000..9e81c0d9a
--- /dev/null
+++ b/tests/test_models/test_ed_tcn.py
@@ -0,0 +1,28 @@
+import pytest
+
+import vak
+
+from .test_tweetynet import TEST_INIT_ARGVALS
+
+
+class TestED_TCN:
+    @pytest.mark.parametrize(
+        'labelmap, input_shape',
+        TEST_INIT_ARGVALS
+    )
+    def test_init(self, labelmap, input_shape):
+        # network has required args that need to be determined dynamically
+        num_input_channels = input_shape[-3]
+        num_freqbins = input_shape[-2]
+        network = vak.models.ED_TCN.definition.network(len(labelmap), num_input_channels, num_freqbins)
+        model = vak.models.ED_TCN(labelmap=labelmap, network=network)
+        assert isinstance(model, vak.models.ED_TCN)
+        for attr in ('network', 'loss', 'optimizer'):
+            assert hasattr(model, attr)
+            assert isinstance(getattr(model, attr),
+                              getattr(vak.models.ed_tcn.ED_TCN.definition, attr))
+        assert hasattr(model, 'metrics')
+        assert isinstance(model.metrics, dict)
+        for metric_name, metric_callable in model.metrics.items():
+            assert isinstance(metric_callable,
+                              vak.models.ed_tcn.ED_TCN.definition.metrics[metric_name])
diff --git a/tests/test_models/test_windowed_frame_classification_model.py b/tests/test_models/test_frame_classification_model.py
similarity index 67%
rename from tests/test_models/test_windowed_frame_classification_model.py
rename to tests/test_models/test_frame_classification_model.py
index 3eeed5978..4694fa160 100644
--- a/tests/test_models/test_windowed_frame_classification_model.py
+++ b/tests/test_models/test_frame_classification_model.py
@@ -7,9 +7,7 @@
 import vak.models
 
 from .test_definition import (
-    TeenyTweetyNetDefinition,
     TweetyNetDefinition,
-    InvalidMetricsDictKeyModelDefinition,
 )
 from .test_tweetynet import LABELMAPS, INPUT_SHAPES
 
@@ -17,13 +15,12 @@
 # pytest.mark.parametrize vals for test_init_with_definition
 MODEL_DEFS = (
     TweetyNetDefinition,
-    TeenyTweetyNetDefinition,
 )
 
 TEST_INIT_ARGVALS = itertools.product(LABELMAPS, INPUT_SHAPES, MODEL_DEFS)
 
 
-class TestWindowedFrameClassificationModel:
+class TestFrameClassificationModel:
 
     @pytest.mark.parametrize(
         'labelmap, input_shape, definition',
@@ -34,21 +31,22 @@ def test_init(self,
                   input_shape,
                   definition,
                   monkeypatch):
-        """Test Model.__init__ works as expected"""
+        """Test FrameClassificationModel.__init__ works as expected"""
         # monkeypatch a definition so we can test __init__
         definition = vak.models.definition.validate(definition)
         monkeypatch.setattr(
-            vak.models.WindowedFrameClassificationModel,
+            vak.models.FrameClassificationModel,
             'definition',
             definition,
             raising=False
         )
+        num_input_channels, num_freqbins = input_shape[0], input_shape[1]
         # network has required args that need to be determined dynamically
-        network = definition.network(num_classes=len(labelmap), input_shape=input_shape)
-        model = vak.models.WindowedFrameClassificationModel(labelmap=labelmap, network=network)
+        network = definition.network(len(labelmap), num_input_channels, num_freqbins)
+        model = vak.models.FrameClassificationModel(labelmap=labelmap, network=network)
 
         # now test that attributes are what we expect
-        assert isinstance(model, vak.models.WindowedFrameClassificationModel)
+        assert isinstance(model, vak.models.FrameClassificationModel)
         for attr in ('network', 'loss', 'optimizer', 'metrics'):
             assert hasattr(model, attr)
             model_attr = getattr(model, attr)
@@ -75,7 +73,6 @@ def test_init(self,
     @pytest.mark.parametrize(
         'definition',
         [
-            TeenyTweetyNetDefinition,
             TweetyNetDefinition,
         ]
     )
@@ -87,7 +84,7 @@ def test_from_config(self,
                          monkeypatch,
                          ):
         definition = vak.models.definition.validate(definition)
-        model_name = definition.__name__.replace('Definition', '').lower()
+        model_name = definition.__name__.replace('Definition', '')
         toml_path = specific_config('train', model_name, audio_format='cbin', annot_format='notmat')
         cfg = vak.config.parse.from_toml_path(toml_path)
 
@@ -95,42 +92,47 @@ def test_from_config(self,
         labelmap = vak.common.labels.to_map(cfg.prep.labelset, map_unlabeled=True)
 
         monkeypatch.setattr(
-            vak.models.WindowedFrameClassificationModel, 'definition', definition, raising=False
+            vak.models.FrameClassificationModel, 'definition', definition, raising=False
         )
 
         config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+        num_input_channels, num_freqbins = self.MOCK_INPUT_SHAPE[0], self.MOCK_INPUT_SHAPE[1]
+
         config["network"].update(
             num_classes=len(labelmap),
-            input_shape=self.MOCK_INPUT_SHAPE,
+            num_input_channels=num_input_channels,
+            num_freqbins=num_freqbins
         )
 
-        model = vak.models.WindowedFrameClassificationModel.from_config(config=config, labelmap=labelmap)
-        assert isinstance(model, vak.models.WindowedFrameClassificationModel)
+        model = vak.models.FrameClassificationModel.from_config(config=config, labelmap=labelmap)
+        assert isinstance(model, vak.models.FrameClassificationModel)
 
+        # below, we can only test the config kwargs that actually end up as attributes
+        # so we use `if hasattr` before checking
         if 'network' in config:
             if inspect.isclass(definition.network):
                 for network_kwarg, network_kwargval in config['network'].items():
-                    assert hasattr(model.network, network_kwarg)
-                    assert getattr(model.network, network_kwarg) == network_kwargval
+                    if hasattr(model.network, network_kwarg):
+                        assert getattr(model.network, network_kwarg) == network_kwargval
             elif isinstance(definition.network, dict):
                 for net_name, net_kwargs in config['network'].items():
                     for network_kwarg, network_kwargval in net_kwargs.items():
-                        assert hasattr(model.network[net_name], network_kwarg)
-                        assert getattr(model.network[net_name], network_kwarg) == network_kwargval
+                        if hasattr(model.network[net_name], network_kwarg):
+                            assert getattr(model.network[net_name], network_kwarg) == network_kwargval
 
         if 'loss' in config:
             for loss_kwarg, loss_kwargval in config['loss'].items():
-                assert hasattr(model.loss, loss_kwarg)
-                assert getattr(model.loss, loss_kwarg) == loss_kwargval
+                if hasattr(model.loss, loss_kwarg):
+                    assert getattr(model.loss, loss_kwarg) == loss_kwargval
 
         if 'optimizer' in config:
             for optimizer_kwarg, optimizer_kwargval in config['optimizer'].items():
-                assert optimizer_kwarg in model.optimizer.param_groups[0]
-                assert model.optimizer.param_groups[0][optimizer_kwarg] == optimizer_kwargval
+                if optimizer_kwarg in model.optimizer.param_groups[0]:
+                    assert model.optimizer.param_groups[0][optimizer_kwarg] == optimizer_kwargval
 
         if 'metrics' in config:
             for metric_name, metric_kwargs in config['metrics'].items():
                 assert metric_name in model.metrics
                 for metric_kwarg, metric_kwargval in metric_kwargs.items():
-                    assert hasattr(model.metrics[metric_name], metric_kwarg)
-                    assert getattr(model.metrics[metric_name], metric_kwarg) == metric_kwargval
+                    if hasattr(model.metrics[metric_name], metric_kwarg):
+                        assert getattr(model.metrics[metric_name], metric_kwarg) == metric_kwargval
diff --git a/tests/test_models/test_parametric_umap_model.py b/tests/test_models/test_parametric_umap_model.py
new file mode 100644
index 000000000..b3b75d3e0
--- /dev/null
+++ b/tests/test_models/test_parametric_umap_model.py
@@ -0,0 +1,130 @@
+import inspect
+
+import pytest
+import torch
+
+import vak.models
+
+
+class ConvEncoderUMAPDefinition:
+    network = {"encoder": vak.nets.ConvEncoder}
+    loss = vak.nn.UmapLoss
+    optimizer = torch.optim.AdamW
+    metrics = {
+        "acc": vak.metrics.Accuracy,
+        "levenshtein": vak.metrics.Levenshtein,
+        "segment_error_rate": vak.metrics.SegmentErrorRate,
+        "loss": torch.nn.CrossEntropyLoss,
+    }
+    default_config = {
+        "optimizer": {"lr": 1e-3},
+    }
+
+
+class TestParametricUMAPModel:
+
+    @pytest.mark.parametrize(
+        'input_shape, definition',
+        [
+            ((1, 128, 128), ConvEncoderUMAPDefinition),
+        ]
+    )
+    def test_init(
+            self,
+            input_shape,
+            definition,
+            monkeypatch,
+    ):
+        """Test ParametricUMAPModel.__init__ works as expected"""
+        # monkeypatch a definition so we can test __init__
+        definition = vak.models.definition.validate(definition)
+        monkeypatch.setattr(
+            vak.models.ParametricUMAPModel,
+            'definition',
+            definition,
+            raising=False
+        )
+        network = {'encoder': vak.nets.ConvEncoder(input_shape)}
+        model = vak.models.ParametricUMAPModel(network=network)
+
+        # now test that attributes are what we expect
+        assert isinstance(model, vak.models.ParametricUMAPModel)
+        for attr in ('network', 'loss', 'optimizer', 'metrics'):
+            assert hasattr(model, attr)
+            model_attr = getattr(model, attr)
+            definition_attr = getattr(definition, attr)
+            if inspect.isclass(definition_attr):
+                assert isinstance(model_attr, definition_attr)
+            elif isinstance(definition_attr, dict):
+                assert isinstance(model_attr, dict)
+                for definition_key, definition_val in definition_attr.items():
+                    assert definition_key in model_attr
+                    model_val = model_attr[definition_key]
+                    if inspect.isclass(definition_val):
+                        assert isinstance(model_val, definition_val)
+                    else:
+                        assert callable(definition_val)
+                        assert model_val is definition_val
+            else:
+                # must be a function
+                assert callable(model_attr)
+                assert model_attr is definition_attr
+
+    @pytest.mark.xfail
+    @pytest.mark.parametrize(
+        'input_shape, definition',
+        [
+            ((1, 128, 128), ConvEncoderUMAPDefinition),
+        ]
+    )
+    def test_from_config(
+            self,
+            input_shape,
+            definition,
+            specific_config,
+            monkeypatch,
+    ):
+        definition = vak.models.definition.validate(definition)
+        model_name = definition.__name__.replace('Definition', '')
+        toml_path = specific_config('train', model_name, audio_format='cbin', annot_format='notmat')
+        cfg = vak.config.parse.from_toml_path(toml_path)
+
+        monkeypatch.setattr(
+            vak.models.ParametricUMAPModel, 'definition', definition, raising=False
+        )
+
+        config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+        config["network"].update(
+            encoder=dict(input_shape=input_shape)
+        )
+
+        model = vak.models.ParametricUMAPModel.from_config(config=config)
+        assert isinstance(model, vak.models.ParametricUMAPModel)
+
+        if 'network' in config:
+            if inspect.isclass(definition.network):
+                for network_kwarg, network_kwargval in config['network'].items():
+                    assert hasattr(model.network, network_kwarg)
+                    assert getattr(model.network, network_kwarg) == network_kwargval
+            elif isinstance(definition.network, dict):
+                for net_name, net_kwargs in config['network'].items():
+                    for network_kwarg, network_kwargval in net_kwargs.items():
+                        assert hasattr(model.network[net_name], network_kwarg)
+                        assert getattr(model.network[net_name], network_kwarg) == network_kwargval
+
+        if 'loss' in config:
+            for loss_kwarg, loss_kwargval in config['loss'].items():
+                assert hasattr(model.loss, loss_kwarg)
+                assert getattr(model.loss, loss_kwarg) == loss_kwargval
+
+        if 'optimizer' in config:
+            for optimizer_kwarg, optimizer_kwargval in config['optimizer'].items():
+                assert optimizer_kwarg in model.optimizer.param_groups[0]
+                assert model.optimizer.param_groups[0][optimizer_kwarg] == optimizer_kwargval
+
+        if 'metrics' in config:
+            for metric_name, metric_kwargs in config['metrics'].items():
+                assert metric_name in model.metrics
+                for metric_kwarg, metric_kwargval in metric_kwargs.items():
+                    assert hasattr(model.metrics[metric_name], metric_kwarg)
+                    assert getattr(model.metrics[metric_name], metric_kwarg) == metric_kwargval
diff --git a/tests/test_models/test_registry.py b/tests/test_models/test_registry.py
index 04fe30ef2..af8f8cfc6 100644
--- a/tests/test_models/test_registry.py
+++ b/tests/test_models/test_registry.py
@@ -1,10 +1,107 @@
+import inspect
+
+import pytest
+
+import vak.models.registry
+from .conftest import (
+    MockModel,
+    MockEncoderDecoderModel,
+    MockModelFamily,
+    UnregisteredMockModelFamily,
+)
+from .test_definition import (
+    TweetyNetDefinition,
+)
+
+
 def test_model_family():
-    assert False
+    """Test that :func:`vak.models.registry.model_family`
+    adds a model family to the registry"""
+    # we make this copy so that we don't register UnregisteredMockModelFamily;
+    # we need that class to stay unregistered for other tests
+    ModelFamilyCopy = type('ModelFamilyCopy',
+                           UnregisteredMockModelFamily.__bases__,
+                           dict(UnregisteredMockModelFamily.__dict__))
+    assert ModelFamilyCopy.__name__ not in vak.models.registry.MODEL_FAMILY_REGISTRY
+    vak.models.registry.model_family(ModelFamilyCopy)
+    assert ModelFamilyCopy.__name__ in vak.models.registry.MODEL_FAMILY_REGISTRY
+    assert vak.models.registry.MODEL_FAMILY_REGISTRY[ModelFamilyCopy.__name__] == ModelFamilyCopy
+
+
+@pytest.mark.parametrize(
+    'family, definition',
+    [
+        (MockModelFamily, MockModel),
+        (MockModelFamily, MockEncoderDecoderModel),
+    ]
+)
+def test_register_model(family, definition):
+    """Test that :func:`vak.models.registry.register_model`
+    adds a model to the registry"""
+    # to set up, we repeat what :func:`vak.models.decorator.model` does
+    attributes = dict(family.__dict__)
+    attributes.update({"definition": definition})
+    subclass_name = definition.__name__
+    subclass = type(subclass_name, (family,), attributes)
+    subclass.__module__ = definition.__module__
+
+    assert subclass_name not in vak.models.registry.MODEL_REGISTRY
+    vak.models.registry.register_model(subclass)
+    assert subclass_name in vak.models.registry.MODEL_REGISTRY
+    assert vak.models.registry.MODEL_REGISTRY[subclass_name] == subclass
+    del vak.models.registry.MODEL_REGISTRY[subclass_name]  # so this test doesn't fail for the second case
+
+
+def test_register_model_raises_family():
+    """Test that :func:`vak.models.registry.register_model`
+    raises an error if parent class is not in model_family_classes"""
+    # to set up, we repeat what :func:`vak.models.decorator.model` does,
+    # but notice that we use an unregistered model family
+    attributes = dict(UnregisteredMockModelFamily.__dict__)
+    attributes.update({"definition": MockModel})
+    subclass_name = MockModel.__name__
+    subclass = type(subclass_name, (UnregisteredMockModelFamily,), attributes)
+    subclass.__module__ = MockModel.__module__
+
+    with pytest.raises(TypeError):
+        vak.models.registry.register_model(subclass)
+
+
+@pytest.mark.parametrize(
+    'family, definition',
+    [
+        (vak.models.FrameClassificationModel, TweetyNetDefinition),
+    ]
+)
+def test_register_model_raises_registered(family, definition):
+    """Test that :func:`vak.models.registry.register_model`
+    raises an error if a class is already registered"""
+    # to set up, we repeat what :func:`vak.models.decorator.model` does
+    attributes = dict(family.__dict__)
+    attributes.update({"definition": definition})
+    # NOTE we replace 'Definition' with an empty string
+    # so that the name clashes with an existing model name
+    subclass_name = definition.__name__.replace('Definition', '')
+    subclass = type(subclass_name, (family,), attributes)
+    subclass.__module__ = definition.__module__
+
+    with pytest.raises(ValueError):
+        vak.models.registry.register_model(subclass)
 
 
-def test_register_model():
-    assert False
+def test___get_attr__MODEL_FAMILY_FROM_NAME():
+    assert hasattr(vak.models.registry, 'MODEL_FAMILY_FROM_NAME')
+    attr = getattr(vak.models.registry, 'MODEL_FAMILY_FROM_NAME')
+    assert isinstance(attr, dict)
+    for model_name, model_class in vak.models.registry.MODEL_REGISTRY.items():
+        model_parent_class = inspect.getmro(model_class)[1]
+        family_name = model_parent_class.__name__
+        assert attr[model_name] == family_name
 
 
-def test___get_attr__():
-    assert False
+def test___get_attr__MODEL_NAMES():
+    assert hasattr(vak.models.registry, 'MODEL_NAMES')
+    attr = getattr(vak.models.registry, 'MODEL_NAMES')
+    assert isinstance(attr, list)
+    for model_name in vak.models.registry.MODEL_REGISTRY.keys():
+            assert model_name in attr
diff --git a/tests/test_models/test_teenytweetynet.py b/tests/test_models/test_teenytweetynet.py
deleted file mode 100644
index 8ebb50e34..000000000
--- a/tests/test_models/test_teenytweetynet.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import sys
-
-import pytest
-import pytorch_lightning as lightning
-
-import vak.models
-
-from .test_tweetynet import TEST_INIT_ARGVALS
-
-
-class TestTeenyTweetyNet:
-    def test_model_is_decorated(self):
-        assert issubclass(vak.models.TeenyTweetyNet,
-                          vak.models.WindowedFrameClassificationModel)
-        assert issubclass(vak.models.TeenyTweetyNet,
-                          vak.models.base.Model)
-        assert issubclass(vak.models.TeenyTweetyNet,
-                          lightning.LightningModule)
-
-    @pytest.mark.parametrize(
-        'labelmap, input_shape',
-        TEST_INIT_ARGVALS
-    )
-    def test_init(self, labelmap, input_shape):
-        # network has required args that need to be determined dynamically
-        network = vak.models.TeenyTweetyNet.definition.network(num_classes=len(labelmap),
-                                                               input_shape=input_shape)
-        model = vak.models.TeenyTweetyNet(labelmap=labelmap, network=network)
-        assert isinstance(model, vak.models.TeenyTweetyNet)
-        for attr in ('network', 'loss', 'optimizer'):
-            assert hasattr(model, attr)
-            assert isinstance(getattr(model, attr),
-                              getattr(vak.models.teenytweetynet.TeenyTweetyNet.definition, attr))
-        assert hasattr(model, 'metrics')
-        assert isinstance(model.metrics, dict)
-        for metric_name, metric_callable in model.metrics.items():
-            assert isinstance(metric_callable,
-                              vak.models.teenytweetynet.TeenyTweetyNet.definition.metrics[metric_name])
diff --git a/tests/test_models/test_tweetynet.py b/tests/test_models/test_tweetynet.py
index 019c2b531..6c06c2029 100644
--- a/tests/test_models/test_tweetynet.py
+++ b/tests/test_models/test_tweetynet.py
@@ -33,7 +33,7 @@
 class TestTweetyNet:
     def test_model_is_decorated(self):
         assert issubclass(vak.models.TweetyNet,
-                          vak.models.WindowedFrameClassificationModel)
+                          vak.models.FrameClassificationModel)
         assert issubclass(vak.models.TweetyNet,
                           vak.models.base.Model)
         assert issubclass(vak.models.TweetyNet,
@@ -45,8 +45,9 @@ def test_model_is_decorated(self):
     )
     def test_init(self, labelmap, input_shape):
         # network has required args that need to be determined dynamically
-        network = vak.models.TweetyNet.definition.network(num_classes=len(labelmap),
-                                                          input_shape=input_shape)
+        num_input_channels = input_shape[-3]
+        num_freqbins = input_shape[-2]
+        network = vak.models.TweetyNet.definition.network(len(labelmap), num_input_channels, num_freqbins)
         model = vak.models.TweetyNet(labelmap=labelmap, network=network)
         assert isinstance(model, vak.models.TweetyNet)
         for attr in ('network', 'loss', 'optimizer'):
diff --git a/tests/test_nets/test_convencoder.py b/tests/test_nets/test_convencoder.py
new file mode 100644
index 000000000..eaa3b6f6d
--- /dev/null
+++ b/tests/test_nets/test_convencoder.py
@@ -0,0 +1,53 @@
+import inspect
+
+import torch
+import pytest
+
+import vak.nets
+
+
+class TestConvEncoder:
+
+    @pytest.mark.parametrize(
+        'input_shape',
+        [
+            (
+                    1, 128, 128,
+            ),
+            (
+                    1, 256, 256,
+            ),
+        ]
+    )
+    def test_init(self, input_shape):
+        """test we can instantiate ConvEncoder
+        and it has the expected attributes"""
+        net = vak.nets.ConvEncoder(input_shape)
+        assert isinstance(net, vak.nets.ConvEncoder)
+        for expected_attr, expected_type in (
+            ('input_shape', tuple),
+            ('num_input_channels', int),
+            ('conv', torch.nn.Module),
+            ('encoder', torch.nn.Module),
+        ):
+            assert hasattr(net, expected_attr)
+            assert isinstance(getattr(net, expected_attr), expected_type)
+
+        assert net.input_shape == input_shape
+
+    @pytest.mark.parametrize(
+        'input_shape, batch_size',
+        [
+            ((1, 128, 128,), 32),
+            ((1, 256, 256,), 64),
+        ]
+    )
+    def test_forward(self, input_shape, batch_size):
+        """test we can forward a tensor through a ConvEncoder instance
+        and get the expected output"""
+
+        input = torch.rand(batch_size, *input_shape)  # a "batch"
+        net = vak.nets.ConvEncoder(input_shape)
+        out = net(input)
+        assert isinstance(out, torch.Tensor)
+
diff --git a/tests/test_nets/test_das/test_kapre.py b/tests/test_nets/test_das/test_kapre.py
deleted file mode 100644
index ed1ed90f0..000000000
--- a/tests/test_nets/test_das/test_kapre.py
+++ /dev/null
@@ -1,211 +0,0 @@
-import inspect
-
-import numpy as np
-import pytest
-import torch
-
-import vak.nets.das.kapre
-
-
-@pytest.mark.parametrize(
-    'input_length, filter_size, padding, stride, dilation, expected_output_length',
-    [
-        # we just test we get expected answer for DAS, not that whole function is correct
-        (1024, 64, 'same', 16, 1, 64),
-    ]
-)
-def test_conv_output_length(input_length, filter_size, padding, stride, dilation, expected_output_length):
-    output_length = vak.nets.das.kapre.conv_output_length(
-        input_length, filter_size, padding, stride, dilation
-    )
-    assert output_length == expected_output_length
-
-
-@pytest.mark.parametrize(
-    'n_dft',
-    [
-        64,
-        512,
-        1024,
-        2048,
-    ]
-)
-def test_get_stft_kernels(n_dft):
-    real, img = vak.nets.das.kapre.get_stft_kernels(n_dft)
-    for out_tensor in (real, img):
-        assert isinstance(out_tensor, torch.Tensor)
-        assert out_tensor.dtype == torch.float32
-        assert out_tensor.shape[0] == n_dft
-        assert out_tensor.shape[-1] == int(n_dft // 2 + 1)
-
-
-@pytest.mark.parametrize(
-    'nb_pre_dft',
-    [
-        64,
-    ]
-)
-def test_get_stft_kernels_allclose(nb_pre_dft, specific_stft_kernels_factory):
-    expected_real, expected_imag = specific_stft_kernels_factory(nb_pre_dft)
-    real, imag = vak.nets.das.kapre.get_stft_kernels(nb_pre_dft)
-    real_np, imag_np = real.cpu().numpy(), imag.cpu().numpy()
-    for out, expected in zip(
-            (real_np, imag_np), (expected_real, expected_imag)
-    ):
-        assert np.allclose(
-            out, expected
-        )
-
-
-def test_amplitude_to_decibel_real_data(amp_to_db_in_out_tuples, device):
-    # set up ----
-    amp_in, expected_amp_out = amp_to_db_in_out_tuples
-    amp_in = torch.from_numpy(amp_in).to(device)
-
-    # get defaults
-    sig = inspect.signature(vak.nets.das.kapre.amplitude_to_decibel)
-    params = sig.parameters
-    default_dynamic_range = params['dynamic_range'].default
-
-    # actually call function
-    amp_out = vak.nets.das.kapre.amplitude_to_decibel(amp_in)
-
-    amp_out = amp_out.cpu().numpy()
-
-    # test
-    assert amp_out.max() == 0.
-    assert amp_out.min() >= -default_dynamic_range  # internally we set to negative
-
-    np.testing.assert_allclose(
-        amp_out,
-        expected_amp_out,
-        atol=1e-5, rtol=1e-3,
-    )
-
-
-# from https://www.tensorflow.org/api_docs/python/tf/nn#same_padding_2:
-# Note that the division by 2 means that there might be cases when the padding on both sides
-# (top vs bottom, right vs left) are off by one. In this case, the bottom and right sides
-# always get the one additional padded pixel.
-# For example, when pad_along_height is 5, we pad 2 pixels at the top and 3 pixels at the bottom.
-# Note that this is different from existing libraries such as PyTorch and Caffe,
-# which explicitly specify the number of padded pixels and always pad the same number of pixels on both sides.
-@pytest.mark.parametrize(
-    'pad_size, expected_padding_tuple',
-    [
-        (5, (2, 3, 2, 3)),
-        (6, (3, 3, 3, 3)),
-    ]
-)
-def test_get_same_pad_tuple(pad_size, expected_padding_tuple):
-    padding_tuple = vak.nets.das.kapre.get_same_pad_tuple(pad_h=pad_size, pad_w=pad_size)
-    assert padding_tuple == expected_padding_tuple
-
-
-# TODO: fix
-@pytest.mark.xfail(msg='Numerical/implementation error')
-def test_conv2d_same(inputs_conv2d_outputs, device, n_dft=64, nb_pre_conv=4):
-    inputs, expected_conv2d_real, expected_conv2d_imag = inputs_conv2d_outputs
-    inputs, expected_conv2d_real, expected_conv2d_imag = (inputs.to(device),
-                                                          expected_conv2d_real.to(device),
-                                                          expected_conv2d_imag.to(device))
-    # permute output tensor dimensions to be in same order we use in torch
-    # (batch, n_dft, 1, channels/filters) -> (batch, channels/filters, n_dft, 1)
-    expected_conv2d_real = torch.permute(expected_conv2d_real, (0, 3, 1, 2))
-    expected_conv2d_imag = torch.permute(expected_conv2d_imag, (0, 3, 1, 2))
-
-    inputs = torch.permute(inputs, (0, 2, 1))  # -> channels first
-
-    dft_real_kernels, dft_imag_kernels = vak.nets.das.kapre.get_stft_kernels(n_dft)
-    # permute dimensions of dft kernels to be what pytorch conv2d uses
-    # this is exact same permutation vak.nets.das.kapre.Spectrogram does
-    # after calling get_stft_kernels (so we can test that function returns expected)
-    dft_real_kernels = torch.permute(dft_real_kernels, (3, 2, 0, 1))
-    dft_imag_kernels = torch.permute(dft_imag_kernels, (3, 2, 0, 1))
-
-    # next line, what ``das.models`` does. No idea how one picks ``nb_pre_conv``.
-    n_hop = 2 ** nb_pre_conv
-
-    inputs = torch.unsqueeze(inputs, -1)
-    subsample = (n_hop, 1)
-    output_real = vak.nets.das.kapre.conv2d_same(inputs, dft_real_kernels, stride=subsample)
-    output_imag = vak.nets.das.kapre.conv2d_same(inputs, dft_imag_kernels, stride=subsample)
-
-    assert output_real.shape == torch.Size([32, 33, 64, 1])
-    assert output_imag.shape == torch.Size([32, 33, 64, 1])
-
-    np.testing.assert_allclose(
-        output_real.detach().numpy(),
-        expected_conv2d_real.numpy(),
-        atol=1e-5, rtol=1e-3,
-    )
-
-    np.testing.assert_allclose(
-        output_imag.detach().numpy(),
-        expected_conv2d_imag.numpy(),
-        atol=1e-5, rtol=1e-3,
-    )
-
-
-# TODO: fix
-@pytest.mark.xfail(msg='Numerical/implementation error')
-def test_spectrogram_mono_real_data(inputs_stft_out_tuples, device, n_dft=64, nb_pre_conv=4):
-    inputs, stft_out_expected = inputs_stft_out_tuples
-    inputs, stft_out_expected = inputs.to(device), stft_out_expected.to(device)
-
-    inputs = torch.permute(inputs, (0, 2, 1))  # -> channels first
-
-    dft_real_kernels, dft_imag_kernels = vak.nets.das.kapre.get_stft_kernels(n_dft)
-    dft_real_kernels = torch.permute(dft_real_kernels, (3, 2, 0, 1))
-    dft_imag_kernels = torch.permute(dft_imag_kernels, (3, 2, 0, 1))
-
-    n_hop = 2 ** nb_pre_conv
-    stft = vak.nets.das.kapre.Spectrogram(
-        num_samples=inputs.shape[2],
-        n_audio_channels=inputs.shape[1],
-        n_dft=pre_nb_dft,
-        n_hop=n_hop,
-        return_decibel_spectrogram=True,
-        power_spectrogram=1.0,
-        trainable_kernel=True,
-    )
-
-
-    inputs = torch.unsqueeze(inputs, -1)
-    subsample = (n_hop, 1)
-    output_real = vak.nets.das.kapre.conv2d_same(inputs, dft_real_kernels, stride=subsample)
-    output_imag = vak.nets.das.kapre.conv2d_same(inputs, dft_imag_kernels, stride=subsample)
-
-    assert output_real.shape == torch.Size([32, 33, 64, 1])
-    assert output_imag.shape == torch.Size([32, 33, 64, 1])
-
-
-# TODO: fix
-@pytest.mark.xfail(msg='Numerical/implementation error')
-def test_spectrogram_real_data(inputs_stft_out_tuples,
-                               device,
-                               nb_pre_conv=4,
-                               pre_nb_dft=64,
-                               ):
-    inputs, stft_out_expected = inputs_stft_out_tuples
-    inputs, stft_out_expected = inputs.to(device), stft_out_expected.to(device)
-    inputs = torch.permute(inputs, (0, 2, 1))  # -> channels first
-    stft_out_expected = torch.permute(stft_out_expected, (0, 1, 3, 2))
-
-    n_hop = 2 ** nb_pre_conv
-    stft = vak.nets.das.kapre.Spectrogram(
-        num_samples=inputs.shape[2],
-        n_audio_channels=inputs.shape[1],
-        n_dft=pre_nb_dft,
-        n_hop=n_hop,
-        return_decibel_spectrogram=True,
-        power_spectrogram=1.0,
-        trainable_kernel=True,
-    )
-    stft_out = stft(inputs)
-
-    np.testing.assert_allclose(
-        stft_out.detach().numpy(),
-        stft_out_expected,
-        atol=1e-5, rtol=1e-3,
-    )
diff --git a/tests/test_nets/test_das/test_net.py b/tests/test_nets/test_das/test_net.py
deleted file mode 100644
index 940a41ecd..000000000
--- a/tests/test_nets/test_das/test_net.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import dataclasses
-
-import pytest
-import torch
-
-import vak.nets.das
-
-
-class TestResidualBlock:
-
-    @pytest.mark.parametrize(
-        'in_channels, out_channels, kernel_size, stride, dilation, dropout',
-        [
-            (1024, 64, 2, 1, 1, 0.0),
-            (64, 64, 2, 1, 2, 0.0),
-            (64, 64, 2, 1, 4, 0.0),
-            (64, 64, 2, 1, 8, 0.0),
-            (64, 64, 2, 1, 16, 0.0),
-        ]
-    )
-    def test_init(self, in_channels, out_channels, kernel_size, stride, dilation, dropout):
-        residual_block = vak.nets.das.net.ResidualBlock(in_channels, out_channels,
-                                                        kernel_size, stride=stride,
-                                                        dilation=dilation, dropout=dropout)
-        assert isinstance(residual_block, vak.nets.das.net.ResidualBlock)
-
-    @pytest.mark.parametrize(
-        'in_channels, out_channels, L, kernel_size, stride, dilation, dropout',
-        [
-            (1, 64, 1024, 2, 1, 1, 0.0),
-            (64, 64, 64, 2, 1, 2, 0.0),
-            (64, 64, 64, 2, 1, 4, 0.0),
-            (64, 64, 64, 2, 1, 8, 0.0),
-            (64, 64, 64, 2, 1, 16, 0.0),
-            (1, 64, 1024, 2, 1, 1, 0.0),
-        ]
-    )
-    def test_forward(self, in_channels, out_channels, L, kernel_size, stride, dilation, dropout):
-        residual_block = vak.nets.das.net.ResidualBlock(in_channels, out_channels,
-                                                        kernel_size, stride=stride,
-                                                        dilation=dilation, dropout=dropout)
-
-        if in_channels != out_channels:
-            pytest.xfail(
-                reason=('DAS implementation of ResidualBlock does not correctly downsample input, '
-                        'see comment in module. So xfail if in_channels != out_channels')
-            )
-        else:
-            x = torch.rand(10, in_channels, L)
-            out = residual_block(x)
-            assert isinstance(x, torch.Tensor)
-
-
-class TestTCNBLock:
-    @pytest.mark.parametrize(
-        'num_inputs, num_blocks, use_skip_connections',
-        [
-            (64, 2, True),
-            (64, 3, True),
-            (64, 4, True),
-        ]
-    )
-    def test_init(self, num_inputs, num_blocks, use_skip_connections):
-        tcn = vak.nets.das.net.TCNBlock(num_inputs=num_inputs,
-                                        num_blocks=num_blocks,
-                                        use_skip_connections=use_skip_connections)
-        assert isinstance(tcn, vak.nets.das.net.TCNBlock)
-        assert len(tcn.tcn_layers) == len(tcn.dilations) * tcn.num_blocks
-
-
-class TestDASNet:
-    @pytest.mark.parametrize(
-        'net_config_dataclass',
-        [
-            vak.nets.das.net.DASNetBengaleseFinchConfig,
-            vak.nets.das.net.DASNetFlyMultichannelConfig,
-            vak.nets.das.net.DASNetFlySinglechannelConfig,
-            vak.nets.das.net.DASNetMarmosetConfig,
-            vak.nets.das.net.DASNetMouseConfig,
-        ]
-    )
-    def test_init(self, net_config_dataclass):
-        net_config_dict = dataclasses.asdict(net_config_dataclass)
-        net = vak.nets.das.net.DASNet(**net_config_dict)
-
-        assert isinstance(net, vak.nets.das.net.DASNet)
-
-        for param_name, param_val in net_config_dict.items():
-            assert getattr(net, param_name) == param_val
-
-    @pytest.mark.parametrize(
-        'net_config_dataclass',
-        [
-            vak.nets.das.net.DASNetBengaleseFinchConfig,
-            vak.nets.das.net.DASNetFlyMultichannelConfig,
-            vak.nets.das.net.DASNetFlySinglechannelConfig,
-            vak.nets.das.net.DASNetMarmosetConfig,
-            vak.nets.das.net.DASNetMouseConfig,
-        ]
-    )
-    def test_forward(self, net_config_dataclass):
-        net_config_dict = dataclasses.asdict(net_config_dataclass)
-        net = vak.nets.das.net.DASNet(**net_config_dict)
-
-        FAKE_BATCH_SIZE = 8
-        fake_input_shape = (FAKE_BATCH_SIZE,
-                            net_config_dict['n_audio_channels'],
-                            net_config_dict['num_samples'])
-        fake_input = torch.rand(*fake_input_shape)
-        out = net(fake_input)
-        out_shape = out.shape
-        assert out_shape[0] == FAKE_BATCH_SIZE
-        assert out_shape[1] == net_config_dict['num_classes']
-        assert out_shape[2] == net_config_dict['num_samples']
-
-
-@pytest.mark.parametrize(
-    'net_builder_func, net_config_dataclass',
-    [
-        (vak.nets.das.dasnet_bengalese_finch, vak.nets.das.net.DASNetBengaleseFinchConfig),
-        (vak.nets.das.dasnet_fly_multichannel, vak.nets.das.net.DASNetFlyMultichannelConfig),
-        (vak.nets.das.dasnet_fly_singlechannel, vak.nets.das.net.DASNetFlySinglechannelConfig),
-        (vak.nets.das.dasnet_marmoset, vak.nets.das.net.DASNetMarmosetConfig),
-        (vak.nets.das.dasnet_mouse, vak.nets.das.net.DASNetMouseConfig),
-
-    ]
-)
-def test_net_builders(net_builder_func, net_config_dataclass):
-    if 'fly' in net_builder_func.__name__:
-        pytest.xfail(
-            reason='error with groups due to separableconv'
-        )
-    net = net_builder_func()
-    net_config_dict = dataclasses.asdict(net_config_dataclass)
-    for param_name, param_val in net_config_dict.items():
-        assert getattr(net, param_name) == param_val
diff --git a/tests/test_nets/test_das/test_nn.py b/tests/test_nets/test_das/test_nn.py
deleted file mode 100644
index 13d8f81da..000000000
--- a/tests/test_nets/test_das/test_nn.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import numpy as np
-import pytest
-import torch
-
-import vak.nets.das
-
-
-class TestChannelNormalization:
-
-    @pytest.mark.parametrize(
-        'dim, keepdim',
-        [
-            (None, None),
-            (1, True)
-        ]
-    )
-    def test_init(self, dim, keepdim):
-        if dim and keepdim:
-            channel_norm = vak.nets.das.nn.ChannelNormalization(dim=dim, keepdim=keepdim)
-        else:
-            # test default args
-            channel_norm = vak.nets.das.nn.ChannelNormalization()
-
-        assert isinstance(channel_norm, vak.nets.das.nn.ChannelNormalization)
-
-        if dim and keepdim:
-            assert channel_norm.dim == dim
-            assert channel_norm.keepdim == keepdim
-
-    def test_forward(self):
-        # note only testing default behavior here
-        channel_norm = vak.nets.das.nn.ChannelNormalization()
-
-        x = torch.rand(10, 64, 1048)
-
-        out = channel_norm(x)
-
-        torch.testing.assert_allclose(
-            out,
-            x / (torch.max(torch.abs(x), dim=channel_norm.dim, keepdim=channel_norm.keepdim)[0] + 1e-5),
-        )
-
-
-class TestCausalConv1d:
-
-    @pytest.mark.parametrize(
-        'in_channels, out_channels, kernel_size, stride, dilation, use_separable',
-        [
-            (1024, 64, 2, 1, 1, False),
-            (64, 64, 2, 1, 2, False),
-            (64, 64, 2, 1, 4, False),
-            (64, 64, 2, 1, 8, False),
-            (64, 64, 2, 1, 16, False),
-            (1024, 64, 2, 1, 1, True),
-            (64, 64, 2, 1, 2, True),
-            (64, 64, 2, 1, 4, True),
-            (64, 64, 2, 1, 8, True),
-            (64, 64, 2, 1, 16, True),
-        ]
-    )
-    def test_init(self, in_channels, out_channels, kernel_size, stride, dilation, use_separable):
-        conv1d = vak.nets.das.nn.CausalConv1d(in_channels, out_channels,
-                                              kernel_size, stride=stride,
-                                              dilation=dilation,
-                                              use_separable=use_separable)
-        assert isinstance(conv1d, vak.nets.das.nn.CausalConv1d)
-
-    @pytest.mark.parametrize(
-        'in_channels, out_channels, L, kernel_size, stride, dilation, use_separable',
-        [
-            (1, 64, 1024, 2, 1, 1, False),
-            (64, 64, 64, 2, 1, 2, False),
-            (64, 64, 64, 2, 1, 4, False),
-            (64, 64, 64, 2, 1, 8, False),
-            (64, 64, 64, 2, 1, 16, False),
-            (1, 64, 1024, 2, 1, 1, True),
-            (64, 64, 64, 2, 1, 2, True),
-            (64, 64, 64, 2, 1, 4, True),
-            (64, 64, 64, 2, 1, 8, True),
-            (64, 64, 64, 2, 1, 16, True),
-        ]
-    )
-    def test_forward(self, in_channels, out_channels, L, kernel_size, stride, dilation, use_separable):
-        conv1d = vak.nets.das.nn.CausalConv1d(in_channels, out_channels,
-                                              kernel_size, stride=stride,
-                                              dilation=dilation,
-                                              use_separable=use_separable)
-
-
-        x = torch.rand(10, in_channels, L)
-        out = conv1d(x)
-
-        assert isinstance(x, torch.Tensor)
-        # assert out.shape == x.shape
-
-
-INPUT_SHAPE = (2, 2, 3)
-X_NP = np.arange(np.prod(INPUT_SHAPE)).reshape(INPUT_SHAPE)
-REPEATS = [2, 3, 4]
-DIM = 1
-
-@pytest.mark.parametrize(
-    'repeats',
-    REPEATS
-)
-def test_upsampling1d(repeats):
-    x = torch.tensor(X_NP)
-    out = vak.nets.das.nn.upsampling1d(x, size=repeats)  # has param dim, defaults to 1
-    expected_x = torch.tensor(
-        np.repeat(X_NP, repeats, axis=DIM)
-    )
-    assert torch.all(torch.eq(out, expected_x))
-
-
-@pytest.mark.parametrize(
-    'repeats',
-    REPEATS
-)
-def test_Upsampling1d(repeats):
-    x = torch.tensor(X_NP)
-
-    upsample = vak.nets.das.nn.UpSampling1D(size=repeats)
-    out = upsample(x)  # has param dim, defaults to 1
-    expected_x = torch.tensor(
-        np.repeat(X_NP, repeats, axis=DIM)
-    )
-    assert torch.all(torch.eq(out, expected_x))
diff --git a/tests/test_nets/test_ed_tcn.py b/tests/test_nets/test_ed_tcn.py
new file mode 100644
index 000000000..d9a4d33c6
--- /dev/null
+++ b/tests/test_nets/test_ed_tcn.py
@@ -0,0 +1,84 @@
+import inspect
+
+import torch
+import pytest
+
+import vak.nets
+
+
+class TestED_TCN:
+
+    @pytest.mark.parametrize(
+        'num_classes, num_input_channels, num_freqbins',
+        [
+            (
+                    1, 10, None,
+            ),
+            (
+                    1, 23, None,
+            ),
+            (
+                    6, 1, 513
+            ),
+            (
+                    23, 1, 512
+            ),
+        ]
+    )
+    def test_init(self, num_classes, num_input_channels, num_freqbins):
+        """test we can instantiate ED_TCN
+        and it has the expected attributes"""
+        if num_input_channels is None or num_freqbins is None:
+            init_sig = inspect.signature(vak.nets.ED_TCN.__init__)
+            if num_input_channels is None:
+                num_input_channels = init_sig.parameters['num_input_channels'].default
+            if num_freqbins is None:
+                num_freqbins = init_sig.parameters['num_freqbins'].default
+
+        net = vak.nets.ED_TCN(num_classes, num_input_channels, num_freqbins)
+        assert isinstance(net, vak.nets.ED_TCN)
+        for expected_attr, expected_type in (
+            ('num_classes', int),
+            ('num_input_channels', int),
+            ('num_freqbins', int),
+            ('cnn', torch.nn.Module),
+            ('encoder', torch.nn.Module),
+            ('decoder', torch.nn.Module),
+            ('fc', torch.nn.Linear)
+        ):
+            assert hasattr(net, expected_attr)
+            assert isinstance(getattr(net, expected_attr), expected_type)
+
+        assert net.num_classes == num_classes
+        assert net.num_input_channels == num_input_channels
+        assert net.num_freqbins == num_freqbins
+
+    @pytest.mark.parametrize(
+        'num_classes, num_input_channels, num_freqbins, num_timebins, batch_size',
+        [
+            (
+                    10, None, None, 100, 8
+            ),
+            (
+                    23, None, None, 100, 64
+            ),
+            (
+                    23, 1, 512, 100, 64
+            ),
+        ]
+    )
+    def test_forward(self, num_classes, num_input_channels, num_freqbins, num_timebins, batch_size):
+        """test we can forward a tensor through a ED_TCN instance
+        and get the expected output"""
+        if num_input_channels is None or num_freqbins is None:
+            init_sig = inspect.signature(vak.nets.ED_TCN.__init__)
+            if num_input_channels is None:
+                num_input_channels = init_sig.parameters['num_input_channels'].default
+            if num_freqbins is None:
+                num_freqbins = init_sig.parameters['num_freqbins'].default
+
+        input = torch.rand(batch_size, num_input_channels, num_freqbins, num_timebins)  # a "batch"
+        net = vak.nets.ED_TCN(num_classes, num_input_channels, num_freqbins)
+        out = net(input)
+        assert isinstance(out, torch.Tensor)
+        assert out.shape == (batch_size, num_classes, num_timebins)
diff --git a/tests/test_nets/test_teenytweetynet.py b/tests/test_nets/test_teenytweetynet.py
deleted file mode 100644
index ff778d0f0..000000000
--- a/tests/test_nets/test_teenytweetynet.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import inspect
-
-import torch
-import pytest
-
-import vak.nets
-
-
-class TestTeenyTweetyNet:
-
-    @pytest.mark.parametrize(
-        'input_shape, num_classes',
-        [
-            (
-                    None, 10
-            ),
-            (
-                    None, 23
-            ),
-            (
-                    (1, 513, 88), 6
-            ),
-            (
-                    (1, 512, 1000), 23
-            ),
-        ]
-    )
-    def test_init(self, num_classes, input_shape):
-        """test we can instantiate TeenyTweetyNet
-        and it has the expected attributes"""
-        if input_shape is None:
-            init_sig = inspect.signature(vak.nets.TeenyTweetyNet.__init__)
-            input_shape = init_sig.parameters['input_shape'].default
-
-        net = vak.nets.TeenyTweetyNet(num_classes=num_classes, input_shape=input_shape)
-        assert isinstance(net, vak.nets.TeenyTweetyNet)
-        for expected_attr, expected_type in (
-            ('num_classes', int),
-            ('input_shape', tuple),
-            ('cnn', torch.nn.Module),
-            ('rnn_input_size', int),
-            ('rnn', torch.nn.LSTM),
-            ('fc', torch.nn.Linear)
-        ):
-            assert hasattr(net, expected_attr)
-            assert isinstance(getattr(net, expected_attr), expected_type)
-
-        assert net.num_classes == num_classes
-        assert net.input_shape == input_shape
-
-    @pytest.mark.parametrize(
-        'input_shape, num_classes, batch_size',
-        [
-            (
-                    None, 10, 8
-            ),
-            (
-                    None, 23, 64
-            ),
-            (
-                    (1, 512, 1000), 23, 64
-            ),
-        ]
-    )
-    def test_forward(self, input_shape, num_classes, batch_size):
-        """test we can forward a tensor through a TeenyTweetyNet instance
-        and get the expected output
-        """
-        if input_shape is None:
-            init_sig = inspect.signature(vak.nets.TeenyTweetyNet.__init__)
-            input_shape = init_sig.parameters['input_shape'].default
-        input = torch.rand(batch_size, *input_shape)  # a "batch"
-        net = vak.nets.TeenyTweetyNet(num_classes=num_classes)
-        out = net(input)
-        assert isinstance(out, torch.Tensor)
-        assert out.shape == (batch_size, num_classes, input_shape[2])
diff --git a/tests/test_nets/test_tweetynet.py b/tests/test_nets/test_tweetynet.py
index 23a1f18a9..0cf113afd 100644
--- a/tests/test_nets/test_tweetynet.py
+++ b/tests/test_nets/test_tweetynet.py
@@ -9,34 +9,38 @@
 class TestTweetyNet:
 
     @pytest.mark.parametrize(
-        'input_shape, num_classes',
+        'num_classes, num_input_channels, num_freqbins',
         [
             (
-                    None, 10
+                    1, 10, None,
             ),
             (
-                    None, 23
+                    1, 23, None,
             ),
             (
-                    (1, 513, 88), 6
+                    6, 1, 513
             ),
             (
-                    (1, 512, 1000), 23
+                    23, 1, 512
             ),
         ]
     )
-    def test_init(self, num_classes, input_shape):
+    def test_init(self, num_classes, num_input_channels, num_freqbins):
         """test we can instantiate TweetyNet
         and it has the expected attributes"""
-        if input_shape is None:
+        if num_input_channels is None or num_freqbins is None:
             init_sig = inspect.signature(vak.nets.TweetyNet.__init__)
-            input_shape = init_sig.parameters['input_shape'].default
+            if num_input_channels is None:
+                num_input_channels = init_sig.parameters['num_input_channels'].default
+            if num_freqbins is None:
+                num_freqbins = init_sig.parameters['num_freqbins'].default
 
-        net = vak.nets.TweetyNet(num_classes=num_classes, input_shape=input_shape)
+        net = vak.nets.TweetyNet(num_classes, num_input_channels, num_freqbins)
         assert isinstance(net, vak.nets.TweetyNet)
         for expected_attr, expected_type in (
             ('num_classes', int),
-            ('input_shape', tuple),
+            ('num_input_channels', int),
+            ('num_freqbins', int),
             ('cnn', torch.nn.Module),
             ('rnn_input_size', int),
             ('rnn', torch.nn.LSTM),
@@ -46,30 +50,35 @@ def test_init(self, num_classes, input_shape):
             assert isinstance(getattr(net, expected_attr), expected_type)
 
         assert net.num_classes == num_classes
-        assert net.input_shape == input_shape
+        assert net.num_input_channels == num_input_channels
+        assert net.num_freqbins == num_freqbins
 
     @pytest.mark.parametrize(
-        'input_shape, num_classes, batch_size',
+        'num_classes, num_input_channels, num_freqbins, num_timebins, batch_size',
         [
             (
-                    None, 10, 8
+                    10, None, None, 100, 8
             ),
             (
-                    None, 23, 64
+                    23, None, None, 100, 64
             ),
             (
-                    (1, 512, 1000), 23, 64
+                    23, 1, 512, 100, 64
             ),
         ]
     )
-    def test_forward(self, input_shape, num_classes, batch_size):
+    def test_forward(self, num_classes, num_input_channels, num_freqbins, num_timebins, batch_size):
         """test we can forward a tensor through a TweetyNet instance
         and get the expected output"""
-        if input_shape is None:
+        if num_input_channels is None or num_freqbins is None:
             init_sig = inspect.signature(vak.nets.TweetyNet.__init__)
-            input_shape = init_sig.parameters['input_shape'].default
-        input = torch.rand(batch_size, *input_shape)  # a "batch"
-        net = vak.nets.TweetyNet(num_classes=num_classes)
+            if num_input_channels is None:
+                num_input_channels = init_sig.parameters['num_input_channels'].default
+            if num_freqbins is None:
+                num_freqbins = init_sig.parameters['num_freqbins'].default
+
+        input = torch.rand(batch_size, num_input_channels, num_freqbins, num_timebins)  # a "batch"
+        net = vak.nets.TweetyNet(num_classes, num_input_channels, num_freqbins)
         out = net(input)
         assert isinstance(out, torch.Tensor)
-        assert out.shape == (batch_size, num_classes, input_shape[2])
+        assert out.shape == (batch_size, num_classes, num_timebins)
diff --git a/tests/test_predict/test_frame_classification.py b/tests/test_predict/test_frame_classification.py
new file mode 100644
index 000000000..65f76d133
--- /dev/null
+++ b/tests/test_predict/test_frame_classification.py
@@ -0,0 +1,214 @@
+"""tests for vak.predict module"""
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+import vak.config
+import vak.common.constants
+import vak.predict
+
+
+# written as separate function so we can re-use in tests/unit/test_cli/test_predict.py
+def assert_predict_output_matches_expected(output_dir, annot_csv_filename):
+    annot_csv = output_dir.joinpath(annot_csv_filename)
+    assert annot_csv.exists()
+
+
+@pytest.mark.parametrize(
+    "model_name, audio_format, spect_format, annot_format, save_net_outputs",
+    [
+        ("TweetyNet", "cbin", None, "notmat", False),
+        ("TweetyNet", "wav", None, "birdsong-recognition-dataset", False),
+        ("TweetyNet", "cbin", None, "notmat", True),
+        ("TweetyNet", "wav", None, "birdsong-recognition-dataset", True),
+    ],
+)
+def test_predict_with_frame_classification_model(
+    model_name,
+    audio_format,
+    spect_format,
+    annot_format,
+    save_net_outputs,
+    specific_config,
+    tmp_path,
+    device,
+):
+    output_dir = tmp_path.joinpath(
+        f"test_predict_{audio_format}_{spect_format}_{annot_format}"
+    )
+    output_dir.mkdir()
+
+    options_to_change = [
+        {"section": "PREDICT", "option": "output_dir", "value": str(output_dir)},
+        {"section": "PREDICT", "option": "device", "value": device},
+        {"section": "PREDICT", "option": "save_net_outputs", "value": save_net_outputs},
+    ]
+    toml_path = specific_config(
+        config_type="predict",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.predict.model)
+
+    vak.predict.frame_classification.predict_with_frame_classification_model(
+        model_name=cfg.predict.model,
+        model_config=model_config,
+        dataset_path=cfg.predict.dataset_path,
+        checkpoint_path=cfg.predict.checkpoint_path,
+        labelmap_path=cfg.predict.labelmap_path,
+        num_workers=cfg.predict.num_workers,
+        transform_params=cfg.predict.transform_params,
+        dataset_params=cfg.predict.dataset_params,
+        timebins_key=cfg.spect_params.timebins_key,
+        spect_scaler_path=cfg.predict.spect_scaler_path,
+        device=cfg.predict.device,
+        annot_csv_filename=cfg.predict.annot_csv_filename,
+        output_dir=cfg.predict.output_dir,
+        min_segment_dur=cfg.predict.min_segment_dur,
+        majority_vote=cfg.predict.majority_vote,
+        save_net_outputs=cfg.predict.save_net_outputs,
+    )
+
+    assert_predict_output_matches_expected(output_dir, cfg.predict.annot_csv_filename)
+    if save_net_outputs:
+        net_outputs = sorted(
+            Path(output_dir).glob(f"*{vak.common.constants.NET_OUTPUT_SUFFIX}")
+        )
+
+        metadata = vak.datasets.frame_classification.Metadata.from_dataset_path(cfg.predict.dataset_path)
+        dataset_csv_path = cfg.predict.dataset_path / metadata.dataset_csv_filename
+        dataset_df = pd.read_csv(dataset_csv_path)
+
+        for spect_path in dataset_df.spect_path.values:
+            net_output_spect_path = [
+                net_output
+                for net_output in net_outputs
+                if net_output.name.startswith(Path(spect_path).stem)
+            ]
+            assert len(net_output_spect_path) == 1
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "PREDICT", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
+        {"section": "PREDICT", "option": "labelmap_path", "value": '/obviously/doesnt/exist/labelmap.json'},
+        {"section": "PREDICT", "option": "spect_scaler_path", "value": '/obviously/doesnt/exist/SpectScaler'},
+    ]
+)
+def test_predict_with_frame_classification_model_raises_file_not_found(
+    path_option_to_change,
+    specific_config,
+    tmp_path,
+    device
+):
+    """Test that core.eval raises FileNotFoundError
+    when `dataset_path` does not exist."""
+    output_dir = tmp_path.joinpath(
+        f"test_predict_cbin_notmat_invalid_dataset_path"
+    )
+    output_dir.mkdir()
+
+    options_to_change = [
+        {"section": "PREDICT", "option": "output_dir", "value": str(output_dir)},
+        {"section": "PREDICT", "option": "device", "value": device},
+        path_option_to_change,
+    ]
+    toml_path = specific_config(
+        config_type="predict",
+        model="TweetyNet",
+        audio_format="cbin",
+        annot_format="notmat",
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.predict.model)
+
+    with pytest.raises(FileNotFoundError):
+        vak.predict.frame_classification.predict_with_frame_classification_model(
+            model_name=cfg.predict.model,
+            model_config=model_config,
+            dataset_path=cfg.predict.dataset_path,
+            checkpoint_path=cfg.predict.checkpoint_path,
+            labelmap_path=cfg.predict.labelmap_path,
+            num_workers=cfg.predict.num_workers,
+            transform_params=cfg.predict.transform_params,
+            dataset_params=cfg.predict.dataset_params,
+            timebins_key=cfg.spect_params.timebins_key,
+            spect_scaler_path=cfg.predict.spect_scaler_path,
+            device=cfg.predict.device,
+            annot_csv_filename=cfg.predict.annot_csv_filename,
+            output_dir=cfg.predict.output_dir,
+            min_segment_dur=cfg.predict.min_segment_dur,
+            majority_vote=cfg.predict.majority_vote,
+            save_net_outputs=cfg.predict.save_net_outputs,
+        )
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "PREDICT", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
+        {"section": "PREDICT", "option": "output_dir", "value": '/obviously/does/not/exist/output'},
+    ]
+)
+def test_predict_with_frame_classification_model_raises_not_a_directory(
+    path_option_to_change,
+    specific_config,
+    device,
+    tmp_path,
+):
+    """Test that core.eval raises NotADirectory
+    when ``output_dir`` does not exist
+    """
+    options_to_change = [
+        path_option_to_change,
+        {"section": "PREDICT", "option": "device", "value": device},
+    ]
+
+    if path_option_to_change["option"] != "output_dir":
+        # need to make sure output_dir *does* exist
+        # so we don't detect spurious NotADirectoryError and assume test passes
+        output_dir = tmp_path.joinpath(
+            f"test_predict_raises_not_a_directory"
+        )
+        output_dir.mkdir()
+        options_to_change.append(
+            {"section": "PREDICT", "option": "output_dir", "value": str(output_dir)}
+        )
+
+    toml_path = specific_config(
+        config_type="predict",
+        model="TweetyNet",
+        audio_format="cbin",
+        annot_format="notmat",
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.predict.model)
+
+    with pytest.raises(NotADirectoryError):
+        vak.predict.frame_classification.predict_with_frame_classification_model(
+            model_name=cfg.predict.model,
+            model_config=model_config,
+            dataset_path=cfg.predict.dataset_path,
+            checkpoint_path=cfg.predict.checkpoint_path,
+            labelmap_path=cfg.predict.labelmap_path,
+            num_workers=cfg.predict.num_workers,
+            transform_params=cfg.predict.transform_params,
+            dataset_params=cfg.predict.dataset_params,
+            timebins_key=cfg.spect_params.timebins_key,
+            spect_scaler_path=cfg.predict.spect_scaler_path,
+            device=cfg.predict.device,
+            annot_csv_filename=cfg.predict.annot_csv_filename,
+            output_dir=cfg.predict.output_dir,
+            min_segment_dur=cfg.predict.min_segment_dur,
+            majority_vote=cfg.predict.majority_vote,
+            save_net_outputs=cfg.predict.save_net_outputs,
+        )
diff --git a/tests/test_predict/test_predict.py b/tests/test_predict/test_predict.py
index 3812ac71f..0aa528abf 100644
--- a/tests/test_predict/test_predict.py
+++ b/tests/test_predict/test_predict.py
@@ -1,39 +1,26 @@
-"""tests for vak.predict module"""
-from pathlib import Path
-
-import pandas as pd
+"""Tests for vak.predict.predict module."""
+from unittest import mock
 import pytest
 
+import vak.cli.predict
 import vak.config
 import vak.common.constants
-import vak.predict
-
-
-# written as separate function so we can re-use in tests/unit/test_cli/test_predict.py
-def assert_predict_output_matches_expected(output_dir, annot_csv_filename):
-    annot_csv = output_dir.joinpath(annot_csv_filename)
-    assert annot_csv.exists()
+import vak.common.paths
 
 
 @pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format, save_net_outputs",
+    "audio_format, spect_format, annot_format, model_name, predict_function_to_mock",
     [
-        ("cbin", None, "notmat", False),
-        ("wav", None, "birdsong-recognition-dataset", False),
-        ("cbin", None, "notmat", True),
-        ("wav", None, "birdsong-recognition-dataset", True),
+        ("cbin", None, "notmat", "TweetyNet",
+         'vak.predict.predict_.predict_with_frame_classification_model'),
     ],
 )
 def test_predict(
-    audio_format,
-    spect_format,
-    annot_format,
-    save_net_outputs,
-    specific_config,
-    tmp_path,
-    model,
-    device,
+    audio_format, spect_format, annot_format, model_name, predict_function_to_mock,
+    specific_config, tmp_path
 ):
+    """Test that :func:`vak.predict.predict` dispatches to the correct model-specific
+    training functions"""
     output_dir = tmp_path.joinpath(
         f"test_predict_{audio_format}_{spect_format}_{annot_format}"
     )
@@ -41,168 +28,32 @@ def test_predict(
 
     options_to_change = [
         {"section": "PREDICT", "option": "output_dir", "value": str(output_dir)},
-        {"section": "PREDICT", "option": "device", "value": device},
-        {"section": "PREDICT", "option": "save_net_outputs", "value": save_net_outputs},
+        {"section": "PREDICT", "option": "device", "value": 'cpu'},
     ]
+
     toml_path = specific_config(
         config_type="predict",
-        model=model,
+        model=model_name,
         audio_format=audio_format,
         annot_format=annot_format,
         options_to_change=options_to_change,
     )
     cfg = vak.config.parse.from_toml_path(toml_path)
-
     model_config = vak.config.model.config_from_toml_path(toml_path, cfg.predict.model)
 
-    vak.predict.predict(
-        model_name=cfg.predict.model,
-        model_config=model_config,
-        dataset_path=cfg.predict.dataset_path,
-        checkpoint_path=cfg.predict.checkpoint_path,
-        labelmap_path=cfg.predict.labelmap_path,
-        window_size=cfg.dataloader.window_size,
-        num_workers=cfg.predict.num_workers,
-        spect_key=cfg.spect_params.spect_key,
-        timebins_key=cfg.spect_params.timebins_key,
-        spect_scaler_path=cfg.predict.spect_scaler_path,
-        device=cfg.predict.device,
-        annot_csv_filename=cfg.predict.annot_csv_filename,
-        output_dir=cfg.predict.output_dir,
-        min_segment_dur=cfg.predict.min_segment_dur,
-        majority_vote=cfg.predict.majority_vote,
-        save_net_outputs=cfg.predict.save_net_outputs,
-    )
-
-    assert_predict_output_matches_expected(output_dir, cfg.predict.annot_csv_filename)
-    if save_net_outputs:
-        net_outputs = sorted(
-            Path(output_dir).glob(f"*{vak.common.constants.NET_OUTPUT_SUFFIX}")
-        )
-
-        metadata = vak.datasets.metadata.Metadata.from_dataset_path(cfg.predict.dataset_path)
-        dataset_csv_path = cfg.predict.dataset_path / metadata.dataset_csv_filename
-        dataset_df = pd.read_csv(dataset_csv_path)
-
-        for spect_path in dataset_df.spect_path.values:
-            net_output_spect_path = [
-                net_output
-                for net_output in net_outputs
-                if net_output.name.startswith(Path(spect_path).stem)
-            ]
-            assert len(net_output_spect_path) == 1
-
-
-@pytest.mark.parametrize(
-    'path_option_to_change',
-    [
-        {"section": "PREDICT", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
-        {"section": "PREDICT", "option": "labelmap_path", "value": '/obviously/doesnt/exist/labelmap.json'},
-        {"section": "PREDICT", "option": "spect_scaler_path", "value": '/obviously/doesnt/exist/SpectScaler'},
-    ]
-)
-def test_predict_raises_file_not_found(
-    path_option_to_change,
-    specific_config,
-    tmp_path,
-    device
-):
-    """Test that core.eval raises FileNotFoundError
-    when `dataset_path` does not exist."""
-    output_dir = tmp_path.joinpath(
-        f"test_predict_cbin_notmat_invalid_dataset_path"
-    )
-    output_dir.mkdir()
-
-    options_to_change = [
-        {"section": "PREDICT", "option": "output_dir", "value": str(output_dir)},
-        {"section": "PREDICT", "option": "device", "value": device},
-        path_option_to_change,
-    ]
-    toml_path = specific_config(
-        config_type="predict",
-        model="teenytweetynet",
-        audio_format="cbin",
-        annot_format="notmat",
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.predict.model)
-
-    with pytest.raises(FileNotFoundError):
-        vak.predict.predict(
-            model_name=cfg.predict.model,
-            model_config=model_config,
-            dataset_path=cfg.predict.dataset_path,
-            checkpoint_path=cfg.predict.checkpoint_path,
-            labelmap_path=cfg.predict.labelmap_path,
-            window_size=cfg.dataloader.window_size,
-            num_workers=cfg.predict.num_workers,
-            spect_key=cfg.spect_params.spect_key,
-            timebins_key=cfg.spect_params.timebins_key,
-            spect_scaler_path=cfg.predict.spect_scaler_path,
-            device=cfg.predict.device,
-            annot_csv_filename=cfg.predict.annot_csv_filename,
-            output_dir=cfg.predict.output_dir,
-            min_segment_dur=cfg.predict.min_segment_dur,
-            majority_vote=cfg.predict.majority_vote,
-            save_net_outputs=cfg.predict.save_net_outputs,
-        )
-
-
-@pytest.mark.parametrize(
-    'path_option_to_change',
-    [
-        {"section": "PREDICT", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
-        {"section": "PREDICT", "option": "output_dir", "value": '/obviously/does/not/exist/output'},
-    ]
-)
-def test_predict_raises_not_a_directory(
-    path_option_to_change,
-    specific_config,
-    device,
-    tmp_path,
-):
-    """Test that core.eval raises NotADirectory
-    when ``output_dir`` does not exist
-    """
-    options_to_change = [
-        path_option_to_change,
-        {"section": "PREDICT", "option": "device", "value": device},
-    ]
-
-    if path_option_to_change["option"] != "output_dir":
-        # need to make sure output_dir *does* exist
-        # so we don't detect spurious NotADirectoryError and assume test passes
-        output_dir = tmp_path.joinpath(
-            f"test_predict_raises_not_a_directory"
-        )
-        output_dir.mkdir()
-        options_to_change.append(
-            {"section": "PREDICT", "option": "output_dir", "value": str(output_dir)}
-        )
-
-    toml_path = specific_config(
-        config_type="predict",
-        model="teenytweetynet",
-        audio_format="cbin",
-        annot_format="notmat",
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.predict.model)
+    results_path = tmp_path / 'results_path'
+    results_path.mkdir()
 
-    with pytest.raises(NotADirectoryError):
+    with mock.patch(predict_function_to_mock, autospec=True) as mock_predict_function:
         vak.predict.predict(
-            model_name=cfg.predict.model,
+            model_name=model_name,
             model_config=model_config,
             dataset_path=cfg.predict.dataset_path,
             checkpoint_path=cfg.predict.checkpoint_path,
             labelmap_path=cfg.predict.labelmap_path,
-            window_size=cfg.dataloader.window_size,
             num_workers=cfg.predict.num_workers,
-            spect_key=cfg.spect_params.spect_key,
+            transform_params=cfg.predict.transform_params,
+            dataset_params=cfg.predict.dataset_params,
             timebins_key=cfg.spect_params.timebins_key,
             spect_scaler_path=cfg.predict.spect_scaler_path,
             device=cfg.predict.device,
@@ -212,3 +63,4 @@ def test_predict_raises_not_a_directory(
             majority_vote=cfg.predict.majority_vote,
             save_net_outputs=cfg.predict.save_net_outputs,
         )
+        assert mock_predict_function.called
diff --git a/tests/test_prep/test_audio_dataset.py b/tests/test_prep/test_audio_dataset.py
index cb64251ff..1c8efcff9 100644
--- a/tests/test_prep/test_audio_dataset.py
+++ b/tests/test_prep/test_audio_dataset.py
@@ -4,6 +4,11 @@
 import pandas as pd
 import pytest
 
+from ..fixtures.audio import (
+    AUDIO_DIR_CBIN, AUDIO_LIST_CBIN_ALL_LABELS_IN_LABELSET, AUDIO_LIST_CBIN_LABELS_NOT_IN_LABELSET
+)
+from ..fixtures.annot import LABELSET_NOTMAT
+
 import vak.common.annotation
 import vak.common.constants
 import vak.prep.spectrogram_dataset.prep
@@ -14,22 +19,17 @@ def returned_dataframe_matches_expected(
     df_returned,
     data_dir,
     labelset,
-    annot_format,
     audio_format,
-    spect_format,
-    spect_output_dir=None,
+    annot_format=None,
     annot_file=None,
     expected_audio_paths=None,
     not_expected_audio_paths=None,
-    spect_file_ext=".spect.npz",
-    expected_spect_paths=None,
-    not_expected_spect_paths=None,
 ):
-    """tests that dataframe returned by ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset``
+    """tests that dataframe returned by ``vak.prep.audio_dataset.prep.prep_spectrogram_dataset``
     matches expected dataframe."""
     assert isinstance(df_returned, pd.DataFrame)
 
-    assert df_returned.columns.values.tolist() == vak.prep.spectrogram_dataset.spect_helper.DF_COLUMNS
+    assert df_returned.columns.values.tolist() == vak.prep.audio_dataset.DF_COLUMNS
 
     annot_format_from_df = df_returned.annot_format.unique()
     assert len(annot_format_from_df) == 1
@@ -58,347 +58,63 @@ def returned_dataframe_matches_expected(
             # should be true that set of labels from each annotation is a subset of labelset
             assert labelset_from_labels.issubset(set(labelset))
 
-    audio_files_from_df = [Path(audio_path) for audio_path in df_returned.audio_path]
-    spect_paths_from_df = [Path(spect_path) for spect_path in df_returned.spect_path]
-
-    # --- which assertions to run depends on whether we made the dataframe
-    # from audio files or spect files ----
-    if audio_format:  # implies that --> we made the dataframe from audio files
+    audio_paths_from_df = [Path(audio_path) for audio_path in df_returned.audio_path]
 
-        # test that all audio files came from data_dir
-        for audio_file_from_df in audio_files_from_df:
-            assert Path(audio_file_from_df).parent == data_dir
+    # test that all audio files came from data_dir
+    for audio_path_from_df in audio_paths_from_df:
+        assert audio_path_from_df.parent == data_dir
 
-        # test that each audio file has a corresponding spect file in `spect_path` column
-        spect_file_names = [spect_path.name for spect_path in spect_paths_from_df]
-        expected_spect_files = [
-            source_audio_file.name + spect_file_ext
-            for source_audio_file in expected_audio_paths
-        ]
+    if expected_audio_paths:
         assert all(
             [
-                expected_spect_file in spect_file_names
-                for expected_spect_file in expected_spect_files
+                expected_audio_path in audio_paths_from_df
+                for expected_audio_path in expected_audio_paths
             ]
         )
 
-        # if there are audio files we expect to **not** be in audio_path
-        # -- because the associated annotations have labels not in labelset --
-        # then test those files are **not** in spect_path
-        if not_expected_audio_paths is not None:
-            not_expected_spect_files = [
-                source_audio_file.name + spect_file_ext
-                for source_audio_file in not_expected_audio_paths
-            ]
-            assert all(
-                [
-                    not_expected_spect_file not in spect_file_names
-                    for not_expected_spect_file in not_expected_spect_files
-                ]
-            )
-
-        # test that all the generated spectrogram files are in a
-        # newly-created directory inside spect_output_dir
+    # if there are audio files we expect to **not** be in audio_path
+    # -- because the associated annotations have labels not in labelset --
+    # then test those files are **not** in spect_path
+    if not_expected_audio_paths:
         assert all(
             [
-                spect_path.parents[1] == spect_output_dir
-                for spect_path in spect_paths_from_df
+                not_expected_audio_path not in audio_paths_from_df
+                for not_expected_audio_path in not_expected_audio_paths
             ]
         )
 
-    elif spect_format:  # implies that --> we made the dataframe from audio files
-        for expected_spect_path in list(expected_spect_paths):
-            assert expected_spect_path in spect_paths_from_df
-            spect_paths_from_df.remove(expected_spect_path)
-
-        # test that **only** expected paths were in DataFrame
-        if not_expected_spect_paths is not None:
-            for not_expected_spect_path in not_expected_spect_paths:
-                assert not_expected_spect_path not in spect_paths_from_df
-
-        # test that **only** expected paths were in DataFrame
-        # spect_paths_from_df should be empty after popping off all the expected paths
-        assert (
-            len(spect_paths_from_df) == 0
-        )  # yes I know this isn't "Pythonic". It's readable, go away.
-
     return True  # all asserts passed
 
 
-def spect_files_have_correct_keys(df_returned, 
-                                  spect_params):
-    spect_paths_from_df = [Path(spect_path) for spect_path in df_returned.spect_path]
-    for spect_path in spect_paths_from_df:
-        spect_dict = vak.common.files.spect.load(spect_path)
-        for key_type in ['freqbins_key', 'timebins_key', 'spect_key', 'audio_path_key']:
-            if key_type in spect_params:
-                key = spect_params[key_type]
-            else:
-                # if we didn't pass in this key type, don't check for it
-                # this is for `audio_path` which is not strictly required currently
-                continue
-            assert key in spect_dict
-
-    return True
-
-
 @pytest.mark.parametrize(
-    'data_dir, labelset,'
+    'data_dir, audio_format, labelset, annot_format, annot_file, expected_audio_paths, not_expected_audio_paths',
+    [
+        (AUDIO_DIR_CBIN, "cbin", LABELSET_NOTMAT, "notmat", None,
+         AUDIO_LIST_CBIN_ALL_LABELS_IN_LABELSET, AUDIO_LIST_CBIN_LABELS_NOT_IN_LABELSET),
+    ]
 )
 def test_prep_audio_dataset(
-    audio_dir_cbin,
-    default_spect_params,
-    labelset_notmat,
-    audio_list_cbin_all_labels_in_labelset,
-    audio_list_cbin_labels_not_in_labelset,
-    spect_list_npz_all_labels_in_labelset,
-    spect_list_npz_labels_not_in_labelset,
-    tmp_path,
+    data_dir, audio_format, labelset, annot_format, annot_file, tmp_path,
+    expected_audio_paths, not_expected_audio_paths,
 ):
     """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
     when we point it at directory of .cbin audio files
     and specify an annotation format"""
     dataset_df = vak.prep.audio_dataset.prep_audio_dataset(
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        annot_file=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        annot_file=None,
-        expected_audio_paths=audio_list_cbin_all_labels_in_labelset,
-        not_expected_audio_paths=audio_list_cbin_labels_not_in_labelset,
-        expected_spect_paths=spect_list_npz_all_labels_in_labelset,
-        not_expected_spect_paths=spect_list_npz_labels_not_in_labelset,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_audio_cbin_no_annot(
-    audio_dir_cbin, default_spect_params, labelset_notmat, audio_list_cbin, tmp_path
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .cbin audio files
-    and  **do not** specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        annot_format=None,
-        labelset=None,
-        audio_format="cbin",
-        spect_output_dir=tmp_path,
-        spect_format=None,
-        annot_file=None,
-        spect_params=default_spect_params,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        annot_format=None,
-        labelset=None,
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        expected_audio_paths=audio_list_cbin,
-        annot_file=None,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_audio_cbin_no_labelset(
-    audio_dir_cbin, default_spect_params, audio_list_cbin, tmp_path
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .cbin audio files
-    and specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        annot_format="notmat",
-        labelset=None,
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        annot_file=None,
-        spect_params=default_spect_params,
+        data_dir=data_dir,
+        audio_format=audio_format,
+        labelset=labelset,
+        annot_format=annot_format,
+        annot_file=annot_file,
     )
 
     assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        annot_format="notmat",
-        labelset=None,
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        expected_audio_paths=audio_list_cbin,
-        annot_file=None,
+        dataset_df,
+        data_dir=data_dir,
+        labelset=labelset,
+        annot_format=annot_format,
+        audio_format=audio_format,
+        annot_file=annot_file,
+        expected_audio_paths=expected_audio_paths,
+        not_expected_audio_paths=not_expected_audio_paths,
     )
-
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_audio_cbin_non_default_spect_file_keys(
-    audio_dir_cbin,
-    default_spect_params,
-    labelset_notmat,
-    audio_list_cbin_all_labels_in_labelset,
-    audio_list_cbin_labels_not_in_labelset,
-    spect_list_npz_all_labels_in_labelset,
-    spect_list_npz_labels_not_in_labelset,
-    tmp_path,
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we specify different keys for accessing
-    arrays in array files
-    """
-    spect_params = {k:v for k, v in default_spect_params.items()}
-    spect_params.update(
-        dict(
-            freqbins_key="freqbins",
-            timebins_key="timebins",
-            spect_key="spect",
-            audio_path_key="audio_path"
-        )
-    )
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_output_dir=tmp_path,
-        spect_format=None,
-        annot_file=None,
-        spect_params=spect_params,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        annot_file=None,
-        expected_audio_paths=audio_list_cbin_all_labels_in_labelset,
-        not_expected_audio_paths=audio_list_cbin_labels_not_in_labelset,
-        expected_spect_paths=spect_list_npz_all_labels_in_labelset,
-        not_expected_spect_paths=spect_list_npz_labels_not_in_labelset,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, spect_params)
-
-
-def test_prep_spectrogram_dataset_with_spect_mat(
-    spect_dir_mat,
-    default_spect_params,
-    labelset_yarden,
-    annot_file_yarden,
-    spect_list_mat_all_labels_in_labelset,
-    spect_list_mat_labels_not_in_labelset,
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .mat array files
-    and specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=spect_dir_mat,
-        labelset=labelset_yarden,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        spect_params=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=spect_dir_mat,
-        labelset=labelset_yarden,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        expected_spect_paths=spect_list_mat_all_labels_in_labelset,
-        not_expected_spect_paths=spect_list_mat_labels_not_in_labelset,
-    )
-
-    del default_spect_params['audio_path_key']  # 'audio_path' not strictly required
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_spect_mat_no_annot(default_spect_params,
-                                            spect_dir_mat,
-                                            spect_list_mat):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .mat array files
-    and **do not** specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format=None,
-        audio_format=None,
-        spect_format="mat",
-        annot_file=None,
-        spect_params=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format=None,
-        audio_format=None,
-        spect_format="mat",
-        annot_file=None,
-        expected_spect_paths=spect_list_mat,
-    )
-
-    del default_spect_params['audio_path_key']  # 'audio_path' not strictly required
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_spect_mat_no_labelset(spect_dir_mat,
-                                               default_spect_params,
-                                               labelset_yarden,
-                                               annot_file_yarden,
-                                               annot_list_yarden,
-                                               spect_list_mat
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .mat array files
-    and specify an annotation format
-    but do not specify a labelset"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        spect_params=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        expected_spect_paths=spect_list_mat,
-    )
-
-    del default_spect_params['audio_path_key']  # 'audio_path' not strictly required
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
diff --git a/tests/test_prep/test_frame_classification/test_dataset_arrays.py b/tests/test_prep/test_frame_classification/test_dataset_arrays.py
index f2bbf52e9..8c3c82f54 100644
--- a/tests/test_prep/test_frame_classification/test_dataset_arrays.py
+++ b/tests/test_prep/test_frame_classification/test_dataset_arrays.py
@@ -1,5 +1,7 @@
 """Unit tests for vak.prep.frame_classification.dataset_arrays"""
 import json
+import pathlib
+import shutil
 
 import crowsetta
 import pytest
@@ -43,12 +45,18 @@ def test_argsort_by_label_freq(annots, expected_sort_inds):
     assert out == expected_sort_inds
 
 
-def copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, tmp_path_data_dir):
+def copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, config_type, input_type, tmp_path_data_dir):
     """Copy all the files in a dataset DataFrame to a `tmp_path_data_dir`,
     and change the paths in the Dataframe, so that we can then call
     `vak.prep.frame_classification.helper.move_files_into_split_subdirs`."""
-    # TODO: rewrite to handle case where 'source' files of dataset are audio
-    for paths_col in ('spect_path', 'annot_path'):
+    paths_cols = []
+    if input_type == 'spect':
+        paths_cols.append('spect_path')
+    elif input_type == 'audio':
+        paths_cols.append('audio_path')
+    if config_type != 'predict':
+        paths_cols.append('annot_path')
+    for paths_col in paths_cols:
         paths = dataset_df[paths_col].values
         new_paths = []
         for path in paths:
@@ -59,56 +67,104 @@ def copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, tmp_pat
 
 
 @pytest.mark.parametrize(
-    'config_type, model_name, audio_format, spect_format, annot_format',
+    'config_type, model_name, audio_format, spect_format, annot_format, input_type',
     [
-        ('train', 'teenytweetynet', 'cbin', None, 'notmat'),
-        ('train', 'teenytweetynet', None, 'mat', 'yarden'),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat'),
+        ('train', 'TweetyNet', 'cbin', None, 'notmat', 'spect'),
+        ('predict', 'TweetyNet', 'cbin', None, 'notmat', 'spect'),
+        ('eval', 'TweetyNet', 'cbin', None, 'notmat', 'spect'),
+        ('train', 'TweetyNet', None, 'mat', 'yarden', 'spect'),
+        ('learncurve', 'TweetyNet', 'cbin', None, 'notmat', 'spect'),
+        # TODO: add audio cases
     ]
 )
 def test_make_npy_files_for_each_split(config_type, model_name, audio_format, spect_format, annot_format,
-                                       tmp_path, specific_dataset_df, specific_dataset_path):
+                                       input_type, tmp_path, specific_dataset_df, specific_dataset_path):
     dataset_df = specific_dataset_df(config_type, model_name, annot_format, audio_format, spect_format)
     dataset_path = specific_dataset_path(config_type, model_name, annot_format, audio_format, spect_format)
     tmp_path_data_dir = tmp_path / 'data_dir'
     tmp_path_data_dir.mkdir()
-    copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, tmp_path_data_dir)
+    copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, config_type, input_type, tmp_path_data_dir)
 
     tmp_dataset_path = tmp_path / 'dataset_dir'
     tmp_dataset_path.mkdir()
 
-    with (dataset_path / 'labelmap.json').open('r') as fp:
-        labelmap = json.load(fp)
+    if config_type != 'predict':
+        with (dataset_path / 'labelmap.json').open('r') as fp:
+            labelmap = json.load(fp)
+    else:
+        labelmap = None
 
     purpose = config_type
 
-    vak.prep.frame_classification.helper.make_frame_classification_arrays_from_spectrogram_dataset(
+    vak.prep.frame_classification.dataset_arrays.make_npy_files_for_each_split(
         dataset_df,
         tmp_dataset_path,
+        input_type,
         purpose,
         labelmap,
-        annot_format
+        audio_format,
     )
 
-    for split in dataset_df['split'].dropna().unique():
+    splits = [
+        split
+        for split in sorted(dataset_df.split.dropna().unique())
+        if split != "None"
+    ]
+
+    for split in splits:
         split_subdir = tmp_dataset_path / split
         if split != 'None':
             assert split_subdir.exists()
         elif split == 'None':
             assert not split_subdir.exists()
 
-        for array_file_that_should_exist in (
-            vak.datasets.frame_classification.INPUT_ARRAY_FILENAME,
-            vak.datasets.frame_classification.SOURCE_IDS_ARRAY_FILENAME,
-            vak.datasets.frame_classification.INDS_IN_SOURCE_ARRAY_FILENAME,
-        ):
-            expected_array_path = split_subdir / array_file_that_should_exist
-            assert expected_array_path.exists()
-
-        if purpose != 'predict':
-            for file_that_should_exist in (
-                    vak.datasets.frame_classification.FRAME_LABELS_ARRAY_FILENAME,
-                    vak.datasets.frame_classification.ANNOTATION_CSV_FILENAME,
-            ):
-                expected_path = split_subdir / file_that_should_exist
-                assert expected_path.exists()
+        split_df = dataset_df[dataset_df.split == split].copy()
+
+        if purpose != "predict":
+            annots = vak.common.annotation.from_df(split_df)
+        else:
+            annots = None
+
+        if input_type == "audio":
+            source_paths = split_df["audio_path"].values
+        elif input_type == "spect":
+            source_paths = split_df["spect_path"].values
+
+        source_paths = [pathlib.Path(source_path) for source_path in source_paths]
+
+        if annots:
+            source_path_annot_tups = [
+                (source_path, annot)
+                for source_path, annot in zip(source_paths, annots)
+            ]
+        else:
+            source_path_annot_tups = [
+                (source_path, None)
+                for source_path in source_paths
+            ]
+
+        for source_path_annot_tup in source_path_annot_tups:
+            source_path, annot = source_path_annot_tup
+            frames_array_file_that_should_exist = split_subdir / (
+                source_path.stem
+                + vak.datasets.frame_classification.constants.FRAMES_ARRAY_EXT
+            )
+            assert frames_array_file_that_should_exist.exists()
+            if annot:
+                frame_labels_file_that_should_exist = split_subdir / (
+                    source_path.stem
+                    + vak.datasets.frame_classification.constants.FRAME_LABELS_EXT
+                )
+                assert frame_labels_file_that_should_exist.exists()
+
+        sample_id_vec_path = (
+            split_subdir /
+            vak.datasets.frame_classification.constants.SAMPLE_IDS_ARRAY_FILENAME
+        )
+        assert sample_id_vec_path.exists()
+
+        inds_in_sample_vec_path = (
+            split_subdir /
+            vak.datasets.frame_classification.constants.INDS_IN_SAMPLE_ARRAY_FILENAME
+        )
+        assert inds_in_sample_vec_path.exists()
diff --git a/tests/test_prep/test_frame_classification/test_frame_classification.py b/tests/test_prep/test_frame_classification/test_frame_classification.py
index 020b2ea7f..b0fdf781f 100644
--- a/tests/test_prep/test_frame_classification/test_frame_classification.py
+++ b/tests/test_prep/test_frame_classification/test_frame_classification.py
@@ -10,7 +10,6 @@
 import vak
 
 
-# written as separate function so we can re-use in tests/unit/test_cli/test_prep.py
 def assert_prep_output_matches_expected(dataset_path, df_returned_by_prep):
     dataset_path = pathlib.Path(dataset_path)
     assert dataset_path.exists()
@@ -19,7 +18,7 @@ def assert_prep_output_matches_expected(dataset_path, df_returned_by_prep):
     log_path = sorted(dataset_path.glob('*log'))
     assert len(log_path) == 1
 
-    meta_json_path = dataset_path / vak.datasets.metadata.Metadata.METADATA_JSON_FILENAME
+    meta_json_path = dataset_path / vak.datasets.frame_classification.Metadata.METADATA_JSON_FILENAME
     assert meta_json_path.exists()
 
     with meta_json_path.open('r') as fp:
@@ -35,11 +34,14 @@ def assert_prep_output_matches_expected(dataset_path, df_returned_by_prep):
             check_exact = False
         else:
             check_exact = True
-        assert_series_equal(
-            df_from_dataset_path[column],
-            df_returned_by_prep[column],
-            check_exact=check_exact,
-        )
+        try:
+            assert_series_equal(
+                df_from_dataset_path[column],
+                df_returned_by_prep[column],
+                check_exact=check_exact,
+            )
+        except:
+            breakpoint()
 
     for column in ('spect_path', 'annot_path'):
         paths = df_from_dataset_path[column].values
@@ -96,6 +98,7 @@ def test_prep_frame_classification_dataset(
     purpose = config_type.lower()
     dataset_df, dataset_path = vak.prep.frame_classification.frame_classification.prep_frame_classification_dataset(
         data_dir=cfg.prep.data_dir,
+        input_type=cfg.prep.input_type,
         purpose=purpose,
         audio_format=cfg.prep.audio_format,
         spect_format=cfg.prep.spect_format,
@@ -169,6 +172,7 @@ def test_prep_frame_classification_dataset_raises_when_labelset_required_but_is_
     with pytest.raises(ValueError):
         vak.prep.frame_classification.frame_classification.prep_frame_classification_dataset(
             data_dir=cfg.prep.data_dir,
+            input_type=cfg.prep.input_type,
             purpose=purpose,
             audio_format=cfg.prep.audio_format,
             spect_format=cfg.prep.spect_format,
@@ -235,6 +239,7 @@ def test_prep_frame_classification_dataset_with_single_audio_and_annot(source_te
     purpose = 'eval'
     dataset_df, dataset_path = vak.prep.frame_classification.frame_classification.prep_frame_classification_dataset(
         data_dir=cfg.prep.data_dir,
+        input_type=cfg.prep.input_type,
         purpose=purpose,
         audio_format=cfg.prep.audio_format,
         spect_format=cfg.prep.spect_format,
@@ -292,6 +297,7 @@ def test_prep_frame_classification_dataset_when_annot_has_single_segment(source_
     purpose = 'eval'
     dataset_df, dataset_path = vak.prep.frame_classification.frame_classification.prep_frame_classification_dataset(
         data_dir=cfg.prep.data_dir,
+        input_type=cfg.prep.input_type,
         purpose=purpose,
         audio_format=cfg.prep.audio_format,
         spect_format=cfg.prep.spect_format,
@@ -327,7 +333,7 @@ def test_prep_frame_classification_dataset_raises_not_a_directory(
     """
     toml_path = specific_config(
         config_type="train",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         spect_format=None,
@@ -339,6 +345,7 @@ def test_prep_frame_classification_dataset_raises_not_a_directory(
     with pytest.raises(NotADirectoryError):
         vak.prep.frame_classification.frame_classification.prep_frame_classification_dataset(
             data_dir=cfg.prep.data_dir,
+            input_type=cfg.prep.input_type,
             purpose=purpose,
             audio_format=cfg.prep.audio_format,
             spect_format=cfg.prep.spect_format,
@@ -374,7 +381,7 @@ def test_prep_frame_classification_dataset_raises_file_not_found(
     """
     toml_path = specific_config(
         config_type="train",
-        model="teenytweetynet",
+        model="TweetyNet",
         audio_format="cbin",
         annot_format="notmat",
         spect_format=None,
@@ -386,6 +393,7 @@ def test_prep_frame_classification_dataset_raises_file_not_found(
     with pytest.raises(FileNotFoundError):
         vak.prep.frame_classification.frame_classification.prep_frame_classification_dataset(
             data_dir=cfg.prep.data_dir,
+            input_type=cfg.prep.input_type,
             purpose=purpose,
             audio_format=cfg.prep.audio_format,
             spect_format=cfg.prep.spect_format,
diff --git a/tests/test_prep/test_frame_classification/test_helper.py b/tests/test_prep/test_frame_classification/test_helper.py
deleted file mode 100644
index 94159c1ca..000000000
--- a/tests/test_prep/test_frame_classification/test_helper.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import json
-import pathlib
-import shutil
-
-import pytest
-
-import vak.prep.frame_classification.helper
-
-
-def copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, tmp_path_data_dir):
-    """Copy all the files in a dataset DataFrame to a `tmp_path_data_dir`,
-    and change the paths in the Dataframe, so that we can then call
-    `vak.prep.frame_classification.helper.move_files_into_split_subdirs`."""
-    # TODO: rewrite to handle case where 'source' files of dataset are audio
-    for paths_col in ('spect_path', 'annot_path'):
-        paths = dataset_df[paths_col].values
-        new_paths = []
-        for path in paths:
-            new_path = shutil.copy(src=dataset_path / path, dst=tmp_path_data_dir)
-            new_paths.append(new_path)
-        dataset_df[paths_col] = new_paths
-    return dataset_df
-
-
-@pytest.mark.parametrize(
-    'config_type, model_name, audio_format, spect_format, annot_format',
-    [
-        ('train', 'teenytweetynet', 'cbin', None, 'notmat'),
-        ('train', 'teenytweetynet', None, 'mat', 'yarden'),
-        ('learncurve', 'teenytweetynet', 'cbin', None, 'notmat'),
-    ]
-)
-def test_make_frame_classification_arrays_from_spectrogram_dataset(
-        config_type, model_name, audio_format, spect_format, annot_format,
-        tmp_path, specific_dataset_df, specific_dataset_path):
-    dataset_df = specific_dataset_df(config_type, model_name, annot_format, audio_format, spect_format)
-    dataset_path = specific_dataset_path(config_type, model_name, annot_format, audio_format, spect_format)
-    tmp_path_data_dir = tmp_path / 'data_dir'
-    tmp_path_data_dir.mkdir()
-    copy_dataset_df_files_to_tmp_path_data_dir(dataset_df, dataset_path, tmp_path_data_dir)
-
-    tmp_dataset_path = tmp_path / 'dataset_dir'
-    tmp_dataset_path.mkdir()
-
-    with (dataset_path / 'labelmap.json').open('r') as fp:
-        labelmap = json.load(fp)
-
-    purpose = config_type
-
-    vak.prep.frame_classification.helper.make_frame_classification_arrays_from_spectrogram_dataset(
-        dataset_df,
-        tmp_dataset_path,
-        purpose,
-        labelmap,
-        annot_format
-    )
-
-    for split in dataset_df['split'].dropna().unique():
-        split_subdir = tmp_dataset_path / split
-        if split != 'None':
-            assert split_subdir.exists()
-        elif split == 'None':
-            assert not split_subdir.exists()
-
-        for array_file_that_should_exist in (
-            vak.datasets.frame_classification.INPUT_ARRAY_FILENAME,
-            vak.datasets.frame_classification.SOURCE_IDS_ARRAY_FILENAME,
-            vak.datasets.frame_classification.INDS_IN_SOURCE_ARRAY_FILENAME,
-        ):
-            expected_array_path = split_subdir / array_file_that_should_exist
-            assert expected_array_path.exists()
-
-        if purpose != 'predict':
-            for file_that_should_exist in (
-                    vak.datasets.frame_classification.FRAME_LABELS_ARRAY_FILENAME,
-                    vak.datasets.frame_classification.ANNOTATION_CSV_FILENAME,
-            ):
-                expected_path = split_subdir / file_that_should_exist
-                assert expected_path.exists()
-
-
-@pytest.mark.parametrize(
-    'data_dir_name, timestamp',
-    [
-        ('bird1', '230319_115852')
-    ]
-)
-def test_get_dataset_csv_filename(data_dir_name, timestamp):
-    out = vak.prep.frame_classification.helper.get_dataset_csv_filename(data_dir_name, timestamp)
-    assert isinstance(out, str)
-    assert out.startswith(data_dir_name)
-    assert out.endswith('.csv')
-    out =  out.replace('.csv', '')
-    assert out.endswith(timestamp)  # after removing extension
-    assert '_prep_' in out
-
-
-@pytest.mark.parametrize(
-    'data_dir_name, timestamp',
-    [
-        ('bird1', '230319_115852')
-    ]
-)
-def test_get_dataset_csv_path(data_dir_name, timestamp, tmp_path):
-    out = vak.prep.frame_classification.helper.get_dataset_csv_path(tmp_path, data_dir_name, timestamp)
-    assert isinstance(out, pathlib.Path)
-    assert out.name == vak.prep.frame_classification.helper.get_dataset_csv_filename(data_dir_name, timestamp)
-    assert out.parent == tmp_path
-
-
-def test_add_split_col(audio_dir_cbin,
-                       default_spect_params,
-                       labelset_notmat,
-                       tmp_path):
-    """test that ``add_split_col`` adds a 'split' column
-    to a DataFrame, where all values in the Series are the
-    specified split (a string)"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_output_dir=tmp_path,
-        spect_format=None,
-        annot_file=None,
-        spect_params=default_spect_params,
-    )
-
-    assert "split" not in vak_df.columns
-
-    vak_df = vak.prep.frame_classification.helper.add_split_col(vak_df, split="train")
-    assert "split" in vak_df.columns
-
-    assert vak_df["split"].unique().item() == "train"
diff --git a/tests/test_prep/test_frame_classification/test_learncurve.py b/tests/test_prep/test_frame_classification/test_learncurve.py
index 6b013d832..1de0ad22a 100644
--- a/tests/test_prep/test_frame_classification/test_learncurve.py
+++ b/tests/test_prep/test_frame_classification/test_learncurve.py
@@ -1,3 +1,4 @@
+import json
 import shutil
 
 import numpy as np
@@ -5,98 +6,100 @@
 import pytest
 
 import vak.common.converters
-import vak.prep.learncurve
-import vak.datasets.seq
 import vak.common.labels
 import vak.common.paths
+import vak.prep.frame_classification
 
 
-@pytest.mark.parametrize("window_size", [44, 88, 176])
+@pytest.mark.parametrize(
+    'model_name, audio_format, annot_format, input_type',
+    [
+        ('TweetyNet', 'cbin', 'notmat', 'spect')
+    ]
+)
 def test_make_learncurve_splits_from_dataset_df(
-    specific_config, labelset_notmat, default_model, device, tmp_path, window_size
+    model_name, audio_format, annot_format, input_type, specific_config, device, tmp_path,
 ):
     root_results_dir = tmp_path.joinpath("tmp_root_results_dir")
     root_results_dir.mkdir()
-
     options_to_change = [
         {
             "section": "LEARNCURVE",
             "option": "root_results_dir",
             "value": str(root_results_dir),
         },
-        {"section": "LEARNCURVE", "option": "device", "value": device},
-        {"section": "DATALOADER", "option": "window_size", "value": window_size},
     ]
     toml_path = specific_config(
         config_type="learncurve",
-        model=default_model,
-        audio_format="cbin",
-        annot_format="notmat",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
         options_to_change=options_to_change,
     )
     cfg = vak.config.parse.from_toml_path(toml_path)
 
     dataset_path = cfg.learncurve.dataset_path
-    metadata = vak.datasets.metadata.Metadata.from_dataset_path(dataset_path)
+    metadata = vak.datasets.frame_classification.Metadata.from_dataset_path(dataset_path)
     dataset_csv_path = dataset_path / metadata.dataset_csv_filename
     dataset_df = pd.read_csv(dataset_csv_path)
 
-    labelset_notmat = vak.common.converters.labelset_to_set(labelset_notmat)
-    has_unlabeled = vak.datasets.seq.validators.has_unlabeled(dataset_csv_path)
-    if has_unlabeled:
-        map_unlabeled = True
-    else:
-        map_unlabeled = False
-    labelmap = vak.common.labels.to_map(labelset_notmat, map_unlabeled=map_unlabeled)
+    labelmap_path = dataset_path / "labelmap.json"
+    with labelmap_path.open("r") as f:
+        labelmap = json.load(f)
 
-    tmp_dataset_path = tmp_path / f"test_make_learncurve_splits_from_dataset_df-window-size-{window_size}"
+    tmp_dataset_path = tmp_path / f"test_make_learncurve_splits_from_dataset_df"
     shutil.copytree(dataset_path, tmp_dataset_path)
-    shutil.rmtree(tmp_dataset_path / 'learncurve')  # since we're about to make this and test it works
-    tmp_dataset_csv_path = tmp_dataset_path / dataset_csv_path.name
-
-    vak.prep.learncurve.make_learncurve_splits_from_dataset_df(
+    # delete all the split directories since we're about to test that we make them
+    for train_dur in cfg.prep.train_set_durs:
+        for replicate_num in range(1, cfg.prep.num_replicates + 1):
+            train_dur_replicate_split_name = vak.common.learncurve.get_train_dur_replicate_split_name(
+                    train_dur, replicate_num
+                )
+            split_dir = tmp_dataset_path / train_dur_replicate_split_name
+            shutil.rmtree(split_dir)
+
+    out = vak.prep.frame_classification.learncurve.make_learncurve_splits_from_dataset_df(
         dataset_df,
-        tmp_dataset_csv_path,
+        "spect",
         cfg.prep.train_set_durs,
         cfg.prep.num_replicates,
         tmp_dataset_path,
-        window_size,
         labelmap,
+        audio_format=audio_format,
     )
+    assert isinstance(out, pd.DataFrame)
 
-    learncurve_splits_root = dataset_path / 'learncurve'
-    assert learncurve_splits_root.exists()
-
-    learncurve_splits_path = learncurve_splits_root / 'learncurve-splits-metadata.csv'
-    assert learncurve_splits_path.exists()
-
-    splits_df = pd.read_csv(learncurve_splits_path)
-
+    splits_df = dataset_df[
+        ~dataset_df.split.isin(('train', 'val', 'test'))
+    ]
     assert sorted(splits_df['train_dur'].unique()) == cfg.prep.train_set_durs
     assert sorted(
         splits_df['replicate_num'].unique()
     ) == list(range(1, cfg.prep.num_replicates + 1))
 
-    for train_dur in sorted(splits_df['train_dur'].unique()):
+    # assert that each expected split name is in data frame
+    all_split_names = []
+    for train_dur in cfg.prep.train_set_durs:
         train_dur_df = splits_df[np.isclose(splits_df['train_dur'], train_dur)].copy()
+        # assert correct number of replicates for this train duration
         assert sorted(
             train_dur_df['replicate_num']
         ) == list(range(1, cfg.prep.num_replicates + 1))
 
-        for replicate_num in sorted(train_dur_df['replicate_num']):
-            train_dur_replicate_df = splits_df[
-                (np.isclose(splits_df['train_dur'], train_dur)) &
-                (splits_df['replicate_num'] == replicate_num)
-            ]
-            assert len(train_dur_replicate_df) == 1
+        for replicate_num in range(1, cfg.prep.num_replicates + 1):
+            train_dur_replicate_split_name = vak.common.learncurve.get_train_dur_replicate_split_name(
+                    train_dur, replicate_num
+                )
+            all_split_names.append(train_dur_replicate_split_name)
+
+            # assert directory holding split files exists
+            split_dir = tmp_dataset_path / train_dur_replicate_split_name
+            assert split_dir.exists() and split_dir.is_dir()
 
-            split_csv_path = tmp_dataset_path / train_dur_replicate_df["split_csv_filename"].item()
-            assert split_csv_path.exists()
+            # assert this train_dur + replicate split exists in dataframe
+            assert np.isin(train_dur_replicate_split_name, splits_df['split'].values)
+            this_split_df = splits_df[splits_df['split'] == train_dur_replicate_split_name]
 
-            split_df = pd.read_csv(split_csv_path)
-            assert split_df[split_df.split == 'train'].duration.sum() >= train_dur
+            # assert that it has the correct duration
+            assert this_split_df['duration'].sum() >= train_dur
 
-            for vec_name in ("source_ids", "source_inds", "window_inds"):
-                vec_filename = train_dur_replicate_df[f'{vec_name}_npy_filename'].item()
-                vector_path = learncurve_splits_root / vec_filename
-                assert vector_path.exists()
diff --git a/tests/test_prep/test_prep.py b/tests/test_prep/test_prep.py
index 5c481ad86..4481af81c 100644
--- a/tests/test_prep/test_prep.py
+++ b/tests/test_prep/test_prep.py
@@ -10,13 +10,13 @@
 @pytest.mark.parametrize(
     "config_type, audio_format, spect_format, annot_format, dataset_prep_func_to_mock",
     [
-        ("eval", "cbin", None, "notmat", "vak.prep.frame_classification.prep"),
-        ("learncurve", "cbin", None, "notmat", "vak.prep.frame_classification.prep"),
-        ("predict", "cbin", None, "notmat", "vak.prep.frame_classification.prep"),
-        ("predict", "wav", None, "birdsong-recognition-dataset", "vak.prep.frame_classification.prep"),
-        ("train", "cbin", None, "notmat", "vak.prep.frame_classification.prep"),
-        ("train", "wav", None, "birdsong-recognition-dataset", "vak.prep.frame_classification.prep"),
-        ("train", None, "mat", "yarden", "vak.prep.frame_classification.prep"),
+        ("eval", "cbin", None, "notmat", "vak.prep.prep_.prep_frame_classification_dataset"),
+        ("learncurve", "cbin", None, "notmat", "vak.prep.prep_.prep_frame_classification_dataset"),
+        ("predict", "cbin", None, "notmat", "vak.prep.prep_.prep_frame_classification_dataset"),
+        ("predict", "wav", None, "birdsong-recognition-dataset", "vak.prep.prep_.prep_frame_classification_dataset"),
+        ("train", "cbin", None, "notmat", "vak.prep.prep_.prep_frame_classification_dataset"),
+        ("train", "wav", None, "birdsong-recognition-dataset", "vak.prep.prep_.prep_frame_classification_dataset"),
+        ("train", None, "mat", "yarden", "vak.prep.prep_.prep_frame_classification_dataset"),
     ],
 )
 def test_prep(
@@ -73,7 +73,6 @@ def test_prep(
             test_dur=cfg.prep.test_dur,
             train_set_durs=cfg.prep.train_set_durs,
             num_replicates=cfg.prep.num_replicates,
-            window_size=cfg.dataloader.window_size,
         )
 
         assert mocked_dataset_prep_func.called
diff --git a/tests/test_prep/test_sequence_dataset.py b/tests/test_prep/test_sequence_dataset.py
new file mode 100644
index 000000000..80ef3739e
--- /dev/null
+++ b/tests/test_prep/test_sequence_dataset.py
@@ -0,0 +1,31 @@
+import pandas as pd
+import pytest
+
+import vak
+
+
+@pytest.mark.parametrize(
+    'model_name, config_type, audio_format, spect_format, annot_format, expected_result',
+    [
+        ("TweetyNet", "train", "cbin", None, "notmat", True),
+        ("TweetyNet", "train", "wav", None, "birdsong-recognition-dataset", True),
+        ("TweetyNet", "train", None, "mat", "yarden", True),
+    ]
+)
+def test_has_unlabeled_segments(config_type,
+                                model_name,
+                                audio_format,
+                                spect_format,
+                                annot_format,
+                                expected_result,
+                                specific_config_toml,
+                                specific_dataset_csv_path):
+    dataset_csv_path = specific_dataset_csv_path(config_type,
+                                                 model_name,
+                                                 annot_format,
+                                                 audio_format,
+                                                 spect_format)
+
+    dataset_df = pd.read_csv(dataset_csv_path)
+    has_unlabeled = vak.prep.sequence_dataset.has_unlabeled_segments(dataset_df)
+    assert has_unlabeled == expected_result
diff --git a/tests/test_prep/test_spectrogram_dataset/test_prep.py b/tests/test_prep/test_spectrogram_dataset/test_prep.py
index e5aff5d9b..792ff1c85 100644
--- a/tests/test_prep/test_spectrogram_dataset/test_prep.py
+++ b/tests/test_prep/test_spectrogram_dataset/test_prep.py
@@ -1,15 +1,20 @@
 """tests for vak.prep.spectrogram_dataset module"""
-from pathlib import Path
+import pathlib
 
 import pandas as pd
+import pytest
 
 import vak.common.annotation
 import vak.common.constants
 import vak.prep.spectrogram_dataset.prep
 import vak.prep.spectrogram_dataset.spect_helper
 
+from ...fixtures.annot import ANNOT_FILE_YARDEN
+from ...fixtures.audio import AUDIO_DIR_CBIN
+from ...fixtures.spect import SPECT_DIR_MAT
 
-def returned_dataframe_matches_expected(
+
+def assert_returned_dataframe_matches_expected(
     df_returned,
     data_dir,
     labelset,
@@ -45,7 +50,7 @@ def returned_dataframe_matches_expected(
 
     if annot_file:
         assert all(
-            [Path(annot_path) == annot_file for annot_path in df_returned["annot_path"]]
+            [pathlib.Path(annot_path) == annot_file for annot_path in df_returned["annot_path"]]
         )
 
     if labelset:
@@ -57,26 +62,26 @@ def returned_dataframe_matches_expected(
             # should be true that set of labels from each annotation is a subset of labelset
             assert labelset_from_labels.issubset(set(labelset))
 
-    audio_files_from_df = [Path(audio_path) for audio_path in df_returned.audio_path]
-    spect_paths_from_df = [Path(spect_path) for spect_path in df_returned.spect_path]
+    audio_paths_from_df = [pathlib.Path(audio_path) for audio_path in df_returned.audio_path]
+    spect_paths_from_df = [pathlib.Path(spect_path) for spect_path in df_returned.spect_path]
+    spect_file_names_from_df = [spect_path.name for spect_path in spect_paths_from_df]
 
-    # --- which assertions to run depends on whether we made the dataframe
-    # from audio files or spect files ----
+    # which assertions to run depends on whether we made the dataframe
+    # from audio files or spect files
     if audio_format:  # implies that --> we made the dataframe from audio files
 
         # test that all audio files came from data_dir
-        for audio_file_from_df in audio_files_from_df:
-            assert Path(audio_file_from_df).parent == data_dir
+        for audio_path_from_df in audio_paths_from_df:
+            assert audio_path_from_df.parent == data_dir
 
         # test that each audio file has a corresponding spect file in `spect_path` column
-        spect_file_names = [spect_path.name for spect_path in spect_paths_from_df]
         expected_spect_files = [
             source_audio_file.name + spect_file_ext
             for source_audio_file in expected_audio_paths
         ]
         assert all(
             [
-                expected_spect_file in spect_file_names
+                expected_spect_file in spect_file_names_from_df
                 for expected_spect_file in expected_spect_files
             ]
         )
@@ -91,7 +96,7 @@ def returned_dataframe_matches_expected(
             ]
             assert all(
                 [
-                    not_expected_spect_file not in spect_file_names
+                    not_expected_spect_file not in spect_file_names_from_df
                     for not_expected_spect_file in not_expected_spect_files
                 ]
             )
@@ -105,31 +110,45 @@ def returned_dataframe_matches_expected(
             ]
         )
 
-    elif spect_format:  # implies that --> we made the dataframe from audio files
-        for expected_spect_path in list(expected_spect_paths):
-            assert expected_spect_path in spect_paths_from_df
-            spect_paths_from_df.remove(expected_spect_path)
+    elif spect_format:  # implies that --> we made the dataframe from spect files
+        if spect_format == 'mat':
+            expected_spect_file_names = [
+                spect_path.name.replace('.mat', '.npz')
+                for spect_path in expected_spect_paths
+            ]
+        else:
+            expected_spect_file_names = [
+                spect_path.name for spect_path in expected_spect_paths
+            ]
 
-        # test that **only** expected paths were in DataFrame
-        if not_expected_spect_paths is not None:
-            for not_expected_spect_path in not_expected_spect_paths:
-                assert not_expected_spect_path not in spect_paths_from_df
+        assert all(
+            [expected_spect_file_name in spect_file_names_from_df
+             for expected_spect_file_name in expected_spect_file_names]
+        )
 
         # test that **only** expected paths were in DataFrame
-        # spect_paths_from_df should be empty after popping off all the expected paths
-        assert (
-            len(spect_paths_from_df) == 0
-        )  # yes I know this isn't "Pythonic". It's readable, go away.
-
-    return True  # all asserts passed
+        if not_expected_spect_paths is not None:
+            if spect_format == 'mat':
+                not_expected_spect_file_names = [
+                    spect_path.name.replace('.mat', '.npz')
+                    for spect_path in not_expected_spect_paths
+                ]
+            else:
+                not_expected_spect_file_names = [
+                    spect_path.name for spect_path in not_expected_spect_paths
+                ]
+            assert all(
+                [not_expected_spect_file_name not in spect_file_names_from_df
+                 for not_expected_spect_file_name in not_expected_spect_file_names]
+            )
 
 
-def spect_files_have_correct_keys(df_returned, 
+def assert_spect_files_have_correct_keys(df_returned,
                                   spect_params):
-    spect_paths_from_df = [Path(spect_path) for spect_path in df_returned.spect_path]
+    spect_paths_from_df = [pathlib.Path(spect_path) for spect_path in df_returned.spect_path]
     for spect_path in spect_paths_from_df:
         spect_dict = vak.common.files.spect.load(spect_path)
-        for key_type in ['freqbins_key', 'timebins_key', 'spect_key', 'audio_path_key']:
+        for key_type in ['freqbins_key', 'timebins_key', 'spect_key']:
             if key_type in spect_params:
                 key = spect_params[key_type]
             else:
@@ -138,266 +157,88 @@ def spect_files_have_correct_keys(df_returned,
                 continue
             assert key in spect_dict
 
-    return True
-
 
-def test_prep_spectrogram_dataset_with_audio_cbin_with_labelset(
-    audio_dir_cbin,
-    default_spect_params,
-    labelset_notmat,
-    audio_list_cbin_all_labels_in_labelset,
-    audio_list_cbin_labels_not_in_labelset,
-    spect_list_npz_all_labels_in_labelset,
-    spect_list_npz_labels_not_in_labelset,
+@pytest.mark.parametrize(
+    'data_dir, audio_format, spect_format, annot_format, labelset, annot_file',
+    [
+        (AUDIO_DIR_CBIN, "cbin", None, "notmat", True, None),
+        (AUDIO_DIR_CBIN, "cbin", None, "notmat", False, None),
+        (AUDIO_DIR_CBIN, "cbin", None, None, False, None),
+        (SPECT_DIR_MAT, None, "mat", "yarden", True, ANNOT_FILE_YARDEN),
+        (SPECT_DIR_MAT, None, "mat", "yarden", False, ANNOT_FILE_YARDEN),
+        (SPECT_DIR_MAT, None, "mat", None, False, None),
+    ]
+)
+def test_prep_spectrogram_dataset(
+    data_dir,
+    audio_format,
+    spect_format,
+    annot_format,
+    labelset,
+    annot_file,
     tmp_path,
+    default_spect_params,
+    specific_audio_list,
+    specific_spect_list,
+    specific_labelset,
 ):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .cbin audio files
-    and specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_output_dir=tmp_path,
-        spect_format=None,
-        annot_file=None,
-        spect_params=default_spect_params,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        annot_file=None,
-        expected_audio_paths=audio_list_cbin_all_labels_in_labelset,
-        not_expected_audio_paths=audio_list_cbin_labels_not_in_labelset,
-        expected_spect_paths=spect_list_npz_all_labels_in_labelset,
-        not_expected_spect_paths=spect_list_npz_labels_not_in_labelset,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_audio_cbin_no_annot(
-    audio_dir_cbin, default_spect_params, labelset_notmat, audio_list_cbin, tmp_path
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
+    """Test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
     when we point it at directory of .cbin audio files
     and  **do not** specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        annot_format=None,
-        labelset=None,
-        audio_format="cbin",
-        spect_output_dir=tmp_path,
-        spect_format=None,
-        annot_file=None,
-        spect_params=default_spect_params,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        annot_format=None,
-        labelset=None,
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        expected_audio_paths=audio_list_cbin,
-        annot_file=None,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_audio_cbin_no_labelset(
-    audio_dir_cbin, default_spect_params, audio_list_cbin, tmp_path
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .cbin audio files
-    and specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        annot_format="notmat",
-        labelset=None,
-        audio_format="cbin",
-        spect_format=None,
+    if labelset:
+        labelset = specific_labelset(annot_format)
+    else:
+        labelset = None
+
+    dataset_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
+        data_dir=data_dir,
+        annot_format=annot_format,
+        labelset=labelset,
+        annot_file=annot_file,
+        audio_format=audio_format,
         spect_output_dir=tmp_path,
-        annot_file=None,
+        spect_format=spect_format,
         spect_params=default_spect_params,
     )
 
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        annot_format="notmat",
-        labelset=None,
-        audio_format="cbin",
-        spect_format=None,
-        spect_output_dir=tmp_path,
-        expected_audio_paths=audio_list_cbin,
-        annot_file=None,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_audio_cbin_non_default_spect_file_keys(
-    audio_dir_cbin,
-    default_spect_params,
-    labelset_notmat,
-    audio_list_cbin_all_labels_in_labelset,
-    audio_list_cbin_labels_not_in_labelset,
-    spect_list_npz_all_labels_in_labelset,
-    spect_list_npz_labels_not_in_labelset,
-    tmp_path,
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we specify different keys for accessing
-    arrays in array files
-    """
-    spect_params = {k:v for k, v in default_spect_params.items()}
-    spect_params.update(
-        dict(
-            freqbins_key="freqbins",
-            timebins_key="timebins",
-            spect_key="spect",
-            audio_path_key="audio_path"
+    if labelset and audio_format:
+        expected_audio_paths = specific_audio_list(audio_format, "all_labels_in_labelset")
+        not_expected_audio_paths = specific_audio_list(audio_format, "labels_not_in_labelset")
+        expected_spect_paths = None
+        not_expected_spect_paths = None
+    elif labelset is None and audio_format:
+        expected_audio_paths = specific_audio_list(audio_format)
+        not_expected_audio_paths = None
+        expected_spect_paths = None
+        not_expected_spect_paths = None
+    elif labelset and spect_format:
+        expected_audio_paths = None
+        not_expected_audio_paths = None
+        expected_spect_paths = specific_spect_list(
+            spect_format, "all_labels_in_labelset"
         )
-    )
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_output_dir=tmp_path,
-        spect_format=None,
-        annot_file=None,
-        spect_params=spect_params,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=audio_dir_cbin,
-        labelset=labelset_notmat,
-        annot_format="notmat",
-        audio_format="cbin",
-        spect_format=None,
+        not_expected_spect_paths = specific_spect_list(
+            spect_format, "labels_not_in_labelset"
+        )
+    elif labelset is None and spect_format:
+        expected_audio_paths = None
+        not_expected_audio_paths = None
+        expected_spect_paths = specific_spect_list(spect_format)
+        not_expected_spect_paths = None
+
+    assert_returned_dataframe_matches_expected(
+        dataset_df,
+        data_dir=data_dir,
+        annot_format=annot_format,
+        labelset=labelset,
+        audio_format=audio_format,
+        spect_format=spect_format,
         spect_output_dir=tmp_path,
-        annot_file=None,
-        expected_audio_paths=audio_list_cbin_all_labels_in_labelset,
-        not_expected_audio_paths=audio_list_cbin_labels_not_in_labelset,
-        expected_spect_paths=spect_list_npz_all_labels_in_labelset,
-        not_expected_spect_paths=spect_list_npz_labels_not_in_labelset,
-    )
-
-    assert spect_files_have_correct_keys(vak_df, spect_params)
-
-
-def test_prep_spectrogram_dataset_with_spect_mat(
-    spect_dir_mat,
-    default_spect_params,
-    labelset_yarden,
-    annot_file_yarden,
-    spect_list_mat_all_labels_in_labelset,
-    spect_list_mat_labels_not_in_labelset,
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .mat array files
-    and specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=spect_dir_mat,
-        labelset=labelset_yarden,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        spect_params=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=spect_dir_mat,
-        labelset=labelset_yarden,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        expected_spect_paths=spect_list_mat_all_labels_in_labelset,
-        not_expected_spect_paths=spect_list_mat_labels_not_in_labelset,
-    )
-
-    del default_spect_params['audio_path_key']  # 'audio_path' not strictly required
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_spect_mat_no_annot(default_spect_params,
-                                            spect_dir_mat,
-                                            spect_list_mat):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .mat array files
-    and **do not** specify an annotation format"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format=None,
-        audio_format=None,
-        spect_format="mat",
-        annot_file=None,
-        spect_params=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format=None,
-        audio_format=None,
-        spect_format="mat",
-        annot_file=None,
-        expected_spect_paths=spect_list_mat,
-    )
-
-    del default_spect_params['audio_path_key']  # 'audio_path' not strictly required
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
-
-
-def test_prep_spectrogram_dataset_with_spect_mat_no_labelset(spect_dir_mat,
-                                               default_spect_params,
-                                               labelset_yarden,
-                                               annot_file_yarden,
-                                               annot_list_yarden,
-                                               spect_list_mat
-):
-    """test that ``vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset`` works
-    when we point it at directory of .mat array files
-    and specify an annotation format
-    but do not specify a labelset"""
-    vak_df = vak.prep.spectrogram_dataset.prep.prep_spectrogram_dataset(
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        spect_params=None,
-    )
-
-    assert returned_dataframe_matches_expected(
-        vak_df,
-        data_dir=spect_dir_mat,
-        labelset=None,
-        annot_format="yarden",
-        audio_format=None,
-        spect_format="mat",
-        annot_file=annot_file_yarden,
-        expected_spect_paths=spect_list_mat,
+        expected_audio_paths=expected_audio_paths,
+        not_expected_audio_paths=not_expected_audio_paths,
+        expected_spect_paths=expected_spect_paths,
+        not_expected_spect_paths=not_expected_spect_paths,
+        annot_file=annot_file,
     )
 
-    del default_spect_params['audio_path_key']  # 'audio_path' not strictly required
-    assert spect_files_have_correct_keys(vak_df, default_spect_params)
+    assert_spect_files_have_correct_keys(dataset_df, default_spect_params)
diff --git a/tests/test_prep/test_spectrogram_dataset/test_spect_helper.py b/tests/test_prep/test_spectrogram_dataset/test_spect_helper.py
index 4c48133b9..77babd444 100644
--- a/tests/test_prep/test_spectrogram_dataset/test_spect_helper.py
+++ b/tests/test_prep/test_spectrogram_dataset/test_spect_helper.py
@@ -8,13 +8,16 @@
 import vak.common.files.spect
 
 
-def expected_spect_paths_in_dataframe(
-    dataset_df, expected_spect_paths, not_expected_spect_paths=None
+def spect_paths_from_df_as_paths(dataset_df):
+    return [Path(spect_path) for spect_path in dataset_df["spect_path"]]
+
+
+def assert_expected_spect_paths_in_dataframe(
+    spect_paths_from_df, spect_format, expected_spect_paths, not_expected_spect_paths=None
 ):
-    """tests that a dataframe ``dataset_df`` contains
-    all paths in ``expected_spect_paths``, and only those paths,
-    in its ``spect_path`` column.
-    If so, returns True.
+    """Tests that a dataframe ``dataset_df`` contains one file
+    for each path in ``expected_spect_paths`` in its ``spect_path`` column,
+    and only those paths.
 
     Parameters
     ----------
@@ -25,265 +28,129 @@ def expected_spect_paths_in_dataframe(
     not_expected_spect_paths : list
         of paths to spectrogram files, that should **not** be in dataset_df.spect_path column
     """
-    assert type(dataset_df) == pd.DataFrame
+    spect_file_names_from_df = [spect_path.name for spect_path in spect_paths_from_df]
 
-    spect_paths_from_df = [Path(spect_path) for spect_path in dataset_df["spect_path"]]
+    if spect_format == 'mat':
+        expected_spectfile_names = [
+            spect_path.name.replace('.mat', '.npz')
+            for spect_path in expected_spect_paths
+        ]
+    else:
+        expected_spectfile_names = [
+            spect_path.name for spect_path in expected_spect_paths
+        ]
 
-    for expected_spect_path in list(expected_spect_paths):
-        assert expected_spect_path in spect_paths_from_df
-        spect_paths_from_df.remove(expected_spect_path)
+    assert all(
+        [expected_spect_file in spect_file_names_from_df for expected_spect_file in expected_spectfile_names]
+    )
 
     # test that **only** expected paths were in DataFrame
     if not_expected_spect_paths is not None:
-        for not_expected_spect_path in not_expected_spect_paths:
-            assert not_expected_spect_path not in spect_paths_from_df
-
-    # test that **only** expected paths were in DataFrame
-    # spect_paths_from_df should be empty after popping off all the expected paths
-    assert (
-        len(spect_paths_from_df) == 0
-    )  # yes I know this isn't "Pythonic". It's readable, go away.
-
-    return True  # all asserts passed
+        if spect_format == 'mat':
+            not_expected_spectfile_names = [
+                spect_path.name.replace('.mat', '.npz')
+                for spect_path in not_expected_spect_paths
+            ]
+        else:
+            not_expected_spectfile_names = [
+                spect_path.name for spect_path in not_expected_spect_paths
+            ]
+        assert all(
+            [not_expected_spect_file not in spect_file_names_from_df
+             for not_expected_spect_file in not_expected_spectfile_names]
+        )
 
 
 @pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
+    "spect_format, annot_format, spect_ext, labelset, arg_to_test",
     [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
+        ("mat", "yarden", None, True, 'spect_dir'),
+        ("npz", "notmat", ".spect.npz", True, 'spect_dir'),
+        ("mat", "yarden", None, False, 'spect_dir'),
+        ("npz", "notmat", ".spect.npz", False, 'spect_dir'),
+        ("mat", None, None, False, 'spect_dir'),
+        ("npz", None, ".spect.npz", False, 'spect_dir'),
+
+        ("mat", "yarden", None, True, 'spect_files'),
+        ("npz", "notmat", ".spect.npz", True, 'spect_files'),
+        ("mat", "yarden", None, False, 'spect_files'),
+        ("npz", "notmat", ".spect.npz", False, 'spect_files'),
+        ("mat", None, None, False, 'spect_files'),
+        ("npz", None, ".spect.npz", False, 'spect_files'),
     ],
 )
-def test_make_dataframe_of_spect_files_spect_dir(
+def test_make_dataframe_of_spect_files(
     spect_format,
     annot_format,
     spect_ext,
+    labelset,
+    arg_to_test,
     specific_spect_dir,
     specific_spect_list,
     specific_annot_list,
     specific_labelset,
+    tmp_path,
 ):
-    """test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works
+    """Test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works
     when we point it at directory + give it list of annotations"""
-    spect_dir = specific_spect_dir(spect_format)
-    labelset = specific_labelset(annot_format)
-    annot_list = specific_annot_list(annot_format)
+    if arg_to_test == 'spect_dir':
+        spect_dir = specific_spect_dir(spect_format)
+        spect_files = None
+    elif arg_to_test == 'spect_files':
+        spect_dir = None
+        spect_files = specific_spect_list(spect_format)
+
+    if labelset:
+        labelset = specific_labelset(annot_format)
+    else:
+        labelset = None
+
+    if annot_format:
+        annot_list = specific_annot_list(annot_format)
+    else:
+        annot_list = None
+
+    if spect_format == "mat":
+        spect_output_dir = tmp_path
+    else:
+        spect_output_dir = None
 
     dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
         spect_format=spect_format,
         spect_dir=spect_dir,
+        spect_files=spect_files,
+        spect_output_dir=spect_output_dir,
         labelset=labelset,
         annot_list=annot_list,
         annot_format=annot_format,
         spect_ext=spect_ext,
     )
+    assert type(dataset_df) == pd.DataFrame
 
-    spect_list_all_labels_in_labelset = specific_spect_list(
-        spect_format, "all_labels_in_labelset"
-    )
-    spect_list_labels_not_in_labelset = specific_spect_list(
-        spect_format, "labels_not_in_labelset"
-    )
-    assert expected_spect_paths_in_dataframe(
-        dataset_df, spect_list_all_labels_in_labelset, spect_list_labels_not_in_labelset
-    )
-
-
-@pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
-    [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
-    ],
-)
-def test_make_dataframe_of_spect_files_spect_dir_no_labelset(
-    spect_format,
-    annot_format,
-    spect_ext,
-    specific_spect_dir,
-    specific_spect_list,
-    specific_annot_list,
-):
-    """test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works when we point it at directory + give it list of annotations
-    but do not give it a labelset to filter out files"""
-    spect_dir = specific_spect_dir(spect_format)
-    annot_list = specific_annot_list(annot_format)
-
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format=spect_format,
-        spect_dir=spect_dir,
-        labelset=None,
-        annot_list=annot_list,
-        annot_format="yarden",
-        spect_ext=spect_ext,
-    )
-
-    spect_list = specific_spect_list(spect_format)
-    assert expected_spect_paths_in_dataframe(dataset_df, spect_list)
-
-
-@pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
-    [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
-    ],
-)
-def test_make_dataframe_of_spect_files_spect_dir_without_annot(
-    spect_format, annot_format, spect_ext, specific_spect_dir, specific_spect_list
-):
-    """test ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works with a dataset from spectrogram files without annotations,
-    # e.g. if we're going to predict the annotations using the spectrograms"""
-    spect_dir = specific_spect_dir(spect_format)
-
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format=spect_format, spect_dir=spect_dir, annot_list=None, spect_ext=spect_ext,
-    )
-
-    spect_list = specific_spect_list(spect_format)
-    assert expected_spect_paths_in_dataframe(dataset_df, spect_list)
-
-
-@pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
-    [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
-    ],
-)
-def test_make_dataframe_of_spect_files_spect_files(
-    spect_format,
-    annot_format,
-    spect_ext,
-    specific_spect_list,
-    specific_annot_list,
-    specific_labelset,
-):
-    """test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works
-    when we give it list of spectrogram files and a list of annotations"""
-    spect_list = specific_spect_list(spect_format)
-    labelset = specific_labelset(annot_format)
-    annot_list = specific_annot_list(annot_format)
-
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format=spect_format,
-        spect_files=spect_list,
-        labelset=labelset,
-        annot_list=annot_list,
-        annot_format=annot_format,
-        spect_ext=spect_ext,
-    )
-
-    spect_list_all_labels_in_labelset = specific_spect_list(
-        spect_format, "all_labels_in_labelset"
-    )
-    spect_list_labels_not_in_labelset = specific_spect_list(
-        spect_format, "labels_not_in_labelset"
-    )
-    assert expected_spect_paths_in_dataframe(
-        dataset_df, spect_list_all_labels_in_labelset, spect_list_labels_not_in_labelset
-    )
-
-
-@pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
-    [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
-    ],
-)
-def test_make_dataframe_of_spect_files_spect_files_no_labelset(
-    spect_format, annot_format, spect_ext, specific_spect_list, specific_annot_list
-):
-    """test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works
-    when we give it list of spectrogram files and a list of annotations
-    but do not give it a labelset to filter out files"""
-    spect_list = specific_spect_list(spect_format)
-    annot_list = specific_annot_list(annot_format)
-
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format=spect_format,
-        spect_files=spect_list,
-        labelset=None,
-        annot_list=annot_list,
-        annot_format=annot_format,
-        spect_ext=spect_ext,
-    )
-
-    spect_list = specific_spect_list(spect_format)
-    assert expected_spect_paths_in_dataframe(dataset_df, spect_list)
-
-
-@pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
-    [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
-    ],
-)
-def test_make_dataframe_of_spect_files_spect_annot_map(
-    spect_format,
-    annot_format,
-    spect_ext,
-    specific_spect_list,
-    specific_annot_list,
-    specific_labelset,
-):
-    """test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works
-    when we give it a dict that maps spectrogram files to annotations
-    but do not give it a labelset to filter out files"""
-    spect_list = specific_spect_list(spect_format)
-    labelset = specific_labelset(annot_format)
-    annot_list = specific_annot_list(annot_format)
+    spect_paths_from_df = spect_paths_from_df_as_paths(dataset_df)
+    if labelset:
+        expected_spect_list = specific_spect_list(
+            spect_format, "all_labels_in_labelset"
+        )
+        not_expected_spect_list = specific_spect_list(
+            spect_format, "labels_not_in_labelset"
+        )
+    else:
+        expected_spect_list = specific_spect_list(spect_format)
+        not_expected_spect_list = None
 
-    spect_annot_map = dict(zip(spect_list, annot_list))
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format=spect_format,
-        labelset=labelset,
-        spect_annot_map=spect_annot_map,
-        annot_format=annot_format,
-        spect_ext=spect_ext,
+    assert_expected_spect_paths_in_dataframe(
+        spect_paths_from_df, spect_format,
+        expected_spect_list, not_expected_spect_list
     )
 
-    spect_list_all_labels_in_labelset = specific_spect_list(
-        spect_format, "all_labels_in_labelset"
+    if spect_format == 'mat':
+        expected_parent = spect_output_dir
+    else:
+        expected_parent = specific_spect_dir(spect_format)
+    assert all(
+        [spect_path.parent == expected_parent for spect_path in spect_paths_from_df]
     )
-    spect_list_labels_not_in_labelset = specific_spect_list(
-        spect_format, "labels_not_in_labelset"
-    )
-    assert expected_spect_paths_in_dataframe(
-        dataset_df, spect_list_all_labels_in_labelset, spect_list_labels_not_in_labelset
-    )
-
-
-@pytest.mark.parametrize(
-    "spect_format, annot_format, spect_ext",
-    [
-        ("mat", "yarden", None),
-        ("npz", "notmat", ".spect.npz"),
-    ],
-)
-def test_make_dataframe_of_spect_files_spect_annot_map_no_labelset(
-    spect_format, annot_format, spect_ext, specific_spect_list, specific_annot_list
-):
-    """test that ``vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files`` works
-    when we give it a dict that maps spectrogram files to annotations
-    but do not give it a labelset to filter out files"""
-    spect_list = specific_spect_list(spect_format)
-    annot_list = specific_annot_list(annot_format)
-
-    spect_annot_map = dict(zip(spect_list, annot_list))
-
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format=spect_format,
-        labelset=None,
-        spect_annot_map=spect_annot_map,
-        annot_format=annot_format,
-        spect_ext=spect_ext,
-    )
-
-    spect_list = specific_spect_list(spect_format)
-    assert expected_spect_paths_in_dataframe(dataset_df, spect_list)
 
 
 def test_make_dataframe_of_spect_files_no_spect_dir_files_or_map_raises(annot_list_yarden):
@@ -296,7 +163,6 @@ def test_make_dataframe_of_spect_files_no_spect_dir_files_or_map_raises(annot_li
             spect_dir=None,
             spect_files=None,
             annot_list=annot_list_yarden,
-            spect_annot_map=None,
             annot_format="yarden",
         )
 
@@ -326,65 +192,12 @@ def test_make_dataframe_of_spect_files_dir_and_list_raises(
         )
 
 
-def test_make_dataframe_of_spect_files_dir_and_map_raises(
-    spect_dir_mat, spect_list_mat, annot_list_yarden
-):
-    """test that calling ``make_dataframe_of_spect_files`` with both dir and map raises a ValueError"""
-    spect_annot_map = dict(zip(spect_list_mat, annot_list_yarden))
-    with pytest.raises(ValueError):
-        vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-            spect_format="mat",
-            spect_dir=spect_dir_mat,
-            spect_annot_map=spect_annot_map,
-            annot_format="yarden",
-        )
-
-
-def test_make_dataframe_of_spect_files_list_and_map_raises(
-    spect_dir_mat, spect_list_mat, annot_list_yarden
-):
-    """test that calling ``make_dataframe_of_spect_files`` with both list and map raises a ValueError"""
-    spect_annot_map = dict(zip(spect_list_mat, annot_list_yarden))
-    with pytest.raises(ValueError):
-        vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-            spect_format="mat",
-            spect_files=spect_list_mat,
-            spect_annot_map=spect_annot_map,
-            annot_format="yarden",
-        )
-
-
-def test_make_dataframe_of_spect_files_annot_list_and_map_raises(
-    spect_dir_mat, spect_list_mat, annot_list_yarden
-):
-    """test that calling ``make_dataframe_of_spect_files`` with both list of annotations and map raises a ValueError"""
-    spect_annot_map = dict(zip(spect_list_mat, annot_list_yarden))
-    with pytest.raises(ValueError):
-        vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-            spect_format="mat",
-            spect_annot_map=spect_annot_map,
-            annot_list=annot_list_yarden,
-            annot_format="yarden",
-        )
-
-
 def test_make_dataframe_of_spect_files_annot_list_without_annot_format_raises(
     spect_dir_mat, spect_list_mat, annot_list_yarden
 ):
-    """test that calling ``make_dataframe_of_spect_files`` with a list of annotations but no annot_format raises a ValueError"""
-    spect_annot_map = dict(zip(spect_list_mat, annot_list_yarden))
+    """test that calling ``make_dataframe_of_spect_files`` with a list of annotations
+    but no annot_format raises a ValueError"""
     with pytest.raises(ValueError):
         vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
             spect_format="mat", annot_list=annot_list_yarden, annot_format=None
         )
-
-
-def test_make_dataframe_of_spect_files_spect_annot_map_without_annot_format_raises(
-    spect_dir_mat, spect_list_mat, annot_list_yarden
-):
-    """test that calling ``make_dataframe_of_spect_files`` with a list of annotations but no annot_format raises a ValueError"""
-    spect_annot_map = dict(zip(spect_list_mat, annot_list_yarden))
-    with pytest.raises(ValueError):
-        vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-            spect_format="mat", spect_annot_map=spect_annot_map, annot_format=None
-        )
diff --git a/tests/test_prep/test_split/test_split.py b/tests/test_prep/test_split/test_split.py
index 047635d60..76b3822c6 100644
--- a/tests/test_prep/test_split/test_split.py
+++ b/tests/test_prep/test_split/test_split.py
@@ -1,4 +1,5 @@
-from math import isclose
+import json
+import math
 
 import numpy as np
 import pandas as pd
@@ -34,11 +35,11 @@ def train_test_dur_split_inds_output_matches_expected(
                 assert dur_out >= dur_in
             elif dur_in == -1:
                 if split == "train":
-                    assert isclose(
+                    assert math.isclose(
                         dur_out, sum(durs) - sum([durs[ind] for ind in test_inds])
                     )
                 elif split == "test":
-                    assert isclose(
+                    assert math.isclose(
                         dur_out, sum(durs) - sum([durs[ind] for ind in train_inds])
                     )
 
@@ -244,43 +245,96 @@ def test_dataframe_specd_dur_gt_raises():
         )
 
 
-@pytest.mark.parametrize("train_dur, test_dur", [(200, 200), (200, None), (None, 200)])
-def test_split_dataframe_mat(
-    train_dur, test_dur, spect_list_mat_all_labels_in_labelset, annot_list_yarden, labelset_yarden,
-    specific_dataset_path
+@pytest.mark.parametrize(
+    "config_type, model_name, spect_format, audio_format, annot_format, train_dur, val_dur, test_dur",
+    [
+        ('train', 'TweetyNet', None, 'cbin', 'notmat', 45, None, None,),
+        ('train', 'TweetyNet', None, 'cbin', 'notmat', 45, None, 30,),
+        ('train', 'TweetyNet', None, 'cbin', 'notmat', 45, 15, 30,),
+        ('train', 'TweetyNet', None, 'cbin', 'notmat', None, None, 30,),
+        ('train', 'TweetyNet', 'mat', None, 'yarden', 200, None, None,),
+        ('train', 'TweetyNet', 'mat', None, 'yarden', None, None, 200,),
+        ('train', 'TweetyNet', 'mat', None, 'yarden', 200, None, 200,),
+        ('train', 'TweetyNet', 'mat', None, 'yarden', 200, 80, 120,),
+    ]
+)
+def test_split_frame_classification_dataframe(
+    config_type, model_name, spect_format, audio_format, annot_format,  specific_dataset_path,
+    train_dur, val_dur, test_dur,
 ):
-    labelset_yarden = set(labelset_yarden)
-
-    dataset_df = vak.prep.spectrogram_dataset.spect_helper.make_dataframe_of_spect_files(
-        spect_format="mat",
-        spect_files=spect_list_mat_all_labels_in_labelset,
-        annot_format="yarden",
-        annot_list=annot_list_yarden,
-    )
     dataset_path = specific_dataset_path(
-        spect_format="mat",
-        annot_format="yarden",
-        config_type="train",
-        model="teenytweetynet"
+        spect_format=spect_format,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        config_type=config_type,
+        model=model_name,
+    )
+    metadata = vak.datasets.frame_classification.Metadata.from_dataset_path(dataset_path)
+    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
+    dataset_df = pd.read_csv(dataset_csv_path)
+    dataset_df = dataset_df.drop(columns=('split'))
+    labelmap_path = dataset_path / "labelmap.json"
+    with labelmap_path.open("r") as f:
+        labelmap = json.load(f)
+    labelset = set(key for key in labelmap.keys() if key != 'unlabeled')
+
+    dataset_df_split = vak.prep.split.split.frame_classification_dataframe(
+        dataset_df, dataset_path, labelset=labelset, train_dur=train_dur, val_dur=val_dur, test_dur=test_dur
     )
 
-    train_dur = 200
-    test_dur = 200
+    assert isinstance(dataset_df_split, pd.DataFrame)
+
+    for split, duration in zip(
+            ('train', 'val', 'test'),
+            (train_dur, val_dur, test_dur),
+    ):
+        if duration is not None:
+            duration_out = dataset_df_split[dataset_df_split["split"] == split].duration.sum()
+            assert duration_out >= duration
+        else:
+            assert split not in dataset_df_split["split"].unique().tolist()
+
 
-    dataset_df_split = vak.prep.split.split.dataframe(
-        dataset_df, dataset_path, labelset=labelset_yarden, train_dur=train_dur, test_dur=test_dur
+@pytest.mark.parametrize(
+    "config_type, model_name, spect_format, audio_format, annot_format, train_dur, val_dur, test_dur",
+    [
+        ('train', 'ConvEncoderUMAP', None, 'cbin', 'notmat', 0.2, None, None,),
+        ('train', 'ConvEncoderUMAP', None, 'cbin', 'notmat', 0.2, None, 0.15,),
+        ('train', 'ConvEncoderUMAP', None, 'cbin', 'notmat', 0.2, 0.1, 0.15,),
+    ]
+)
+def test_split_unit_dataframe(
+    config_type, model_name, spect_format, audio_format, annot_format,  specific_dataset_path,
+    train_dur, val_dur, test_dur,
+):
+    dataset_path = specific_dataset_path(
+        spect_format=spect_format,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        config_type=config_type,
+        model=model_name,
+    )
+    metadata = vak.datasets.parametric_umap.Metadata.from_dataset_path(dataset_path)
+    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
+    dataset_df = pd.read_csv(dataset_csv_path)
+    dataset_df = dataset_df.drop(columns=('split'))
+    labelmap_path = dataset_path / "labelmap.json"
+    with labelmap_path.open("r") as f:
+        labelmap = json.load(f)
+    labelset = set(key for key in labelmap.keys() if key != 'unlabeled')
+
+    dataset_df_split = vak.prep.split.split.unit_dataframe(
+        dataset_df, dataset_path, labelset=labelset, train_dur=train_dur, val_dur=val_dur, test_dur=test_dur
     )
 
     assert isinstance(dataset_df_split, pd.DataFrame)
 
-    if train_dur is not None:
-        train_dur_out = dataset_df_split[dataset_df_split["split"] == "train"].duration.sum()
-        assert train_dur_out >= train_dur
-    else:
-        assert "train" not in dataset_df_split["split"].unique().tolist()
-
-    if test_dur is not None:
-        test_dur_out = dataset_df_split[dataset_df_split["split"] == "test"].duration.sum()
-        assert test_dur_out >= test_dur
-    else:
-        assert "test" not in dataset_df_split["split"].unique().tolist()
+    for split, duration in zip(
+            ('train', 'val', 'test'),
+            (train_dur, val_dur, test_dur),
+    ):
+        if duration is not None:
+            duration_out = dataset_df_split[dataset_df_split["split"] == split].duration.sum()
+            assert duration_out >= duration
+        else:
+            assert split not in dataset_df_split["split"].unique().tolist()
\ No newline at end of file
diff --git a/tests/test_train/test_frame_classification.py b/tests/test_train/test_frame_classification.py
new file mode 100644
index 000000000..c2ef7f201
--- /dev/null
+++ b/tests/test_train/test_frame_classification.py
@@ -0,0 +1,252 @@
+"""Tests for vak.train.frame_classification module"""
+import pathlib
+
+import pytest
+
+import vak.config
+import vak.common.constants
+import vak.common.paths
+import vak.train
+
+
+def assert_train_output_matches_expected(cfg: vak.config.config.Config, model_name: str,
+                                         results_path: pathlib.Path):
+    assert results_path.joinpath("labelmap.json").exists()
+
+    if cfg.train.normalize_spectrograms or cfg.train.spect_scaler_path:
+        assert results_path.joinpath("StandardizeSpect").exists()
+    else:
+        assert not results_path.joinpath("StandardizeSpect").exists()
+
+    model_path = results_path.joinpath(model_name)
+    assert model_path.exists()
+
+    tensorboard_log = sorted(
+        model_path.glob(f"lightning_logs/**/*events*")
+    )
+    assert len(tensorboard_log) == 1
+
+    checkpoints_path = model_path.joinpath("checkpoints")
+    assert checkpoints_path.exists()
+    assert checkpoints_path.joinpath("checkpoint.pt").exists()
+    if cfg.train.val_step is not None:
+        assert checkpoints_path.joinpath("max-val-acc-checkpoint.pt").exists()
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "model_name, audio_format, spect_format, annot_format",
+    [
+        ("TweetyNet", "cbin", None, "notmat"),
+        ("TweetyNet", "wav", None, "birdsong-recognition-dataset"),
+        ("TweetyNet", None, "mat", "yarden"),
+    ],
+)
+def test_train_frame_classification_model(
+    model_name, audio_format, spect_format, annot_format, specific_config, tmp_path, device
+):
+    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
+    results_path.mkdir()
+    options_to_change = [
+        {"section": "TRAIN", "option": "device", "value": device},
+        {"section": "TRAIN", "option": "root_results_dir", "value": results_path}
+    ]
+    toml_path = specific_config(
+        config_type="train",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        spect_format=spect_format,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+
+    vak.train.frame_classification.train_frame_classification_model(
+        model_name=cfg.train.model,
+        model_config=model_config,
+        dataset_path=cfg.train.dataset_path,
+        batch_size=cfg.train.batch_size,
+        num_epochs=cfg.train.num_epochs,
+        num_workers=cfg.train.num_workers,
+        train_transform_params=cfg.train.train_transform_params,
+        train_dataset_params=cfg.train.train_dataset_params,
+        val_transform_params=cfg.train.val_transform_params,
+        val_dataset_params=cfg.train.val_dataset_params,
+        checkpoint_path=cfg.train.checkpoint_path,
+        spect_scaler_path=cfg.train.spect_scaler_path,
+        results_path=results_path,
+        normalize_spectrograms=cfg.train.normalize_spectrograms,
+        shuffle=cfg.train.shuffle,
+        val_step=cfg.train.val_step,
+        ckpt_step=cfg.train.ckpt_step,
+        patience=cfg.train.patience,
+        device=cfg.train.device,
+    )
+
+    assert_train_output_matches_expected(cfg, cfg.train.model, results_path)
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "model_name, audio_format, spect_format, annot_format",
+    [
+        ("TweetyNet", "cbin", None, "notmat"),
+        ("TweetyNet", "wav", None, "birdsong-recognition-dataset"),
+        ("TweetyNet", None, "mat", "yarden"),
+    ],
+)
+def test_continue_training(
+    model_name, audio_format, spect_format, annot_format, specific_config, tmp_path, device
+):
+    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
+    results_path.mkdir()
+    options_to_change = [
+        {"section": "TRAIN", "option": "device", "value": device},
+        {"section": "TRAIN", "option": "root_results_dir", "value": results_path}
+    ]
+    toml_path = specific_config(
+        config_type="train_continue",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        spect_format=spect_format,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+
+    vak.train.frame_classification.train_frame_classification_model(
+        model_name=cfg.train.model,
+        model_config=model_config,
+        dataset_path=cfg.train.dataset_path,
+        batch_size=cfg.train.batch_size,
+        num_epochs=cfg.train.num_epochs,
+        num_workers=cfg.train.num_workers,
+        train_transform_params=cfg.train.train_transform_params,
+        train_dataset_params=cfg.train.train_dataset_params,
+        val_transform_params=cfg.train.val_transform_params,
+        val_dataset_params=cfg.train.val_dataset_params,
+        checkpoint_path=cfg.train.checkpoint_path,
+        spect_scaler_path=cfg.train.spect_scaler_path,
+        results_path=results_path,
+        normalize_spectrograms=cfg.train.normalize_spectrograms,
+        shuffle=cfg.train.shuffle,
+        val_step=cfg.train.val_step,
+        ckpt_step=cfg.train.ckpt_step,
+        patience=cfg.train.patience,
+        device=cfg.train.device,
+    )
+
+    assert_train_output_matches_expected(cfg, cfg.train.model, results_path)
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "TRAIN", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
+        {"section": "TRAIN", "option": "spect_scaler_path", "value": '/obviously/doesnt/exist/SpectScaler'},
+    ]
+)
+def test_train_raises_file_not_found(
+    path_option_to_change, specific_config, tmp_path, device
+):
+    """Test that pre-conditions in `vak.train` raise FileNotFoundError
+    when one of the following does not exist:
+    checkpoint_path, dataset_path, spect_scaler_path
+    """
+    options_to_change = [
+        {"section": "TRAIN", "option": "device", "value": device},
+        path_option_to_change
+    ]
+    toml_path = specific_config(
+        config_type="train",
+        model="TweetyNet",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
+    results_path.mkdir()
+
+    with pytest.raises(FileNotFoundError):
+        vak.train.frame_classification.train_frame_classification_model(
+            model_name=cfg.train.model,
+            model_config=model_config,
+            dataset_path=cfg.train.dataset_path,
+            batch_size=cfg.train.batch_size,
+            num_epochs=cfg.train.num_epochs,
+            num_workers=cfg.train.num_workers,
+            train_transform_params=cfg.train.train_transform_params,
+            train_dataset_params=cfg.train.train_dataset_params,
+            val_transform_params=cfg.train.val_transform_params,
+            val_dataset_params=cfg.train.val_dataset_params,
+            checkpoint_path=cfg.train.checkpoint_path,
+            spect_scaler_path=cfg.train.spect_scaler_path,
+            results_path=results_path,
+            normalize_spectrograms=cfg.train.normalize_spectrograms,
+            shuffle=cfg.train.shuffle,
+            val_step=cfg.train.val_step,
+            ckpt_step=cfg.train.ckpt_step,
+            patience=cfg.train.patience,
+            device=cfg.train.device,
+        )
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "TRAIN", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
+        {"section": "TRAIN", "option": "root_results_dir", "value": '/obviously/doesnt/exist/results/'},
+    ]
+)
+def test_train_raises_not_a_directory(
+    path_option_to_change, specific_config, device, tmp_path
+):
+    """Test that core.train raises NotADirectory
+    when directory does not exist
+    """
+    options_to_change = [
+        path_option_to_change,
+        {"section": "TRAIN", "option": "device", "value": device},
+    ]
+
+    toml_path = specific_config(
+        config_type="train",
+        model="TweetyNet",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+
+    # mock behavior of cli.train, building `results_path` from config option `root_results_dir`
+    results_path = cfg.train.root_results_dir / 'results-dir-timestamp'
+
+    with pytest.raises(NotADirectoryError):
+        vak.train.frame_classification.train_frame_classification_model(
+            model_name=cfg.train.model,
+            model_config=model_config,
+            dataset_path=cfg.train.dataset_path,
+            batch_size=cfg.train.batch_size,
+            num_epochs=cfg.train.num_epochs,
+            num_workers=cfg.train.num_workers,
+            train_transform_params=cfg.train.train_transform_params,
+            train_dataset_params=cfg.train.train_dataset_params,
+            val_transform_params=cfg.train.val_transform_params,
+            val_dataset_params=cfg.train.val_dataset_params,
+            checkpoint_path=cfg.train.checkpoint_path,
+            spect_scaler_path=cfg.train.spect_scaler_path,
+            results_path=results_path,
+            normalize_spectrograms=cfg.train.normalize_spectrograms,
+            shuffle=cfg.train.shuffle,
+            val_step=cfg.train.val_step,
+            ckpt_step=cfg.train.ckpt_step,
+            patience=cfg.train.patience,
+            device=cfg.train.device,
+        )
diff --git a/tests/test_train/test_parametric_umap.py b/tests/test_train/test_parametric_umap.py
new file mode 100644
index 000000000..11e5f709c
--- /dev/null
+++ b/tests/test_train/test_parametric_umap.py
@@ -0,0 +1,180 @@
+"""Tests for vak.train.parametric_umap module"""
+import pathlib
+
+import pytest
+
+import vak.config
+import vak.common.constants
+import vak.common.paths
+import vak.train
+
+
+def assert_train_output_matches_expected(cfg: vak.config.config.Config, model_name: str,
+                                         results_path: pathlib.Path):
+    model_path = results_path.joinpath(model_name)
+    assert model_path.exists()
+
+    tensorboard_log = sorted(
+        model_path.glob(f"lightning_logs/**/*events*")
+    )
+    assert len(tensorboard_log) == 1
+
+    checkpoints_path = model_path.joinpath("checkpoints")
+    assert checkpoints_path.exists()
+    assert checkpoints_path.joinpath("checkpoint.pt").exists()
+    if cfg.train.val_step is not None:
+        assert checkpoints_path.joinpath("min-val-loss-checkpoint.pt").exists()
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "model_name, audio_format, spect_format, annot_format",
+    [
+        ("ConvEncoderUMAP", "cbin", None, "notmat"),
+    ],
+)
+def test_train_parametric_umap_model(
+    model_name, audio_format, spect_format, annot_format,
+    specific_config, tmp_path, device
+):
+    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
+    results_path.mkdir()
+    options_to_change = [
+        {"section": "TRAIN", "option": "device", "value": device},
+        {"section": "TRAIN", "option": "root_results_dir", "value": results_path}
+    ]
+    toml_path = specific_config(
+        config_type="train",
+        model=model_name,
+        audio_format=audio_format,
+        annot_format=annot_format,
+        spect_format=spect_format,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+
+    vak.train.parametric_umap.train_parametric_umap_model(
+        model_name=cfg.train.model,
+        model_config=model_config,
+        dataset_path=cfg.train.dataset_path,
+        batch_size=cfg.train.batch_size,
+        num_epochs=cfg.train.num_epochs,
+        num_workers=cfg.train.num_workers,
+        train_transform_params=cfg.train.train_transform_params,
+        train_dataset_params=cfg.train.train_dataset_params,
+        val_transform_params=cfg.train.val_transform_params,
+        val_dataset_params=cfg.train.val_dataset_params,
+        checkpoint_path=cfg.train.checkpoint_path,
+        results_path=results_path,
+        shuffle=cfg.train.shuffle,
+        val_step=cfg.train.val_step,
+        ckpt_step=cfg.train.ckpt_step,
+        device=cfg.train.device,
+    )
+
+    assert_train_output_matches_expected(cfg, cfg.train.model, results_path)
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "TRAIN", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
+    ]
+)
+def test_train_parametric_umap_model_raises_file_not_found(
+    path_option_to_change, specific_config, tmp_path, device
+):
+    """Test that pre-conditions in :func:`vak.train.parametric_umap.train_parametric_umap_model`
+    raise FileNotFoundError when one of the following does not exist:
+    checkpoint_path, dataset_path
+    """
+    options_to_change = [
+        {"section": "TRAIN", "option": "device", "value": device},
+        path_option_to_change
+    ]
+    toml_path = specific_config(
+        config_type="train",
+        model="ConvEncoderUMAP",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
+    results_path.mkdir()
+
+    with pytest.raises(FileNotFoundError):
+        vak.train.parametric_umap.train_parametric_umap_model(
+            model_name=cfg.train.model,
+            model_config=model_config,
+            dataset_path=cfg.train.dataset_path,
+            batch_size=cfg.train.batch_size,
+            num_epochs=cfg.train.num_epochs,
+            num_workers=cfg.train.num_workers,
+            train_transform_params=cfg.train.train_transform_params,
+            train_dataset_params=cfg.train.train_dataset_params,
+            val_transform_params=cfg.train.val_transform_params,
+            val_dataset_params=cfg.train.val_dataset_params,
+            checkpoint_path=cfg.train.checkpoint_path,
+            results_path=results_path,
+            shuffle=cfg.train.shuffle,
+            val_step=cfg.train.val_step,
+            ckpt_step=cfg.train.ckpt_step,
+            device=cfg.train.device,
+        )
+
+
+@pytest.mark.parametrize(
+    'path_option_to_change',
+    [
+        {"section": "TRAIN", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
+        {"section": "TRAIN", "option": "root_results_dir", "value": '/obviously/doesnt/exist/results/'},
+    ]
+)
+def test_train_parametric_umap_model_raises_not_a_directory(
+    path_option_to_change, specific_config, device, tmp_path
+):
+    """Test that core.train raises NotADirectory
+    when directory does not exist
+    """
+    options_to_change = [
+        path_option_to_change,
+        {"section": "TRAIN", "option": "device", "value": device},
+    ]
+
+    toml_path = specific_config(
+        config_type="train",
+        model="ConvEncoderUMAP",
+        audio_format="cbin",
+        annot_format="notmat",
+        spect_format=None,
+        options_to_change=options_to_change,
+    )
+    cfg = vak.config.parse.from_toml_path(toml_path)
+    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
+
+    # mock behavior of cli.train, building `results_path` from config option `root_results_dir`
+    results_path = cfg.train.root_results_dir / 'results-dir-timestamp'
+
+    with pytest.raises(NotADirectoryError):
+        vak.train.parametric_umap.train_parametric_umap_model(
+            model_name=cfg.train.model,
+            model_config=model_config,
+            dataset_path=cfg.train.dataset_path,
+            batch_size=cfg.train.batch_size,
+            num_epochs=cfg.train.num_epochs,
+            num_workers=cfg.train.num_workers,
+            train_transform_params=cfg.train.train_transform_params,
+            train_dataset_params=cfg.train.train_dataset_params,
+            val_transform_params=cfg.train.val_transform_params,
+            val_dataset_params=cfg.train.val_dataset_params,
+            checkpoint_path=cfg.train.checkpoint_path,
+            results_path=results_path,
+            shuffle=cfg.train.shuffle,
+            val_step=cfg.train.val_step,
+            ckpt_step=cfg.train.ckpt_step,
+            device=cfg.train.device,
+        )
diff --git a/tests/test_train/test_train.py b/tests/test_train/test_train.py
index 511e431ee..c43e6631f 100644
--- a/tests/test_train/test_train.py
+++ b/tests/test_train/test_train.py
@@ -1,4 +1,6 @@
-"""tests for vak.train module"""
+"""Tests for vak.train.train function."""
+from unittest import mock
+
 import pytest
 
 import vak.config
@@ -7,50 +9,40 @@
 import vak.train
 
 
-def assert_train_output_matches_expected(cfg, model_name, results_path):
-    assert results_path.joinpath("labelmap.json").exists()
-
-    if cfg.train.normalize_spectrograms or cfg.train.spect_scaler_path:
-        assert results_path.joinpath("StandardizeSpect").exists()
-    else:
-        assert not results_path.joinpath("StandardizeSpect").exists()
-
-    model_path = results_path.joinpath(model_name)
-    assert model_path.exists()
-
-    tensorboard_log = sorted(
-        model_path.glob(f"lightning_logs/**/*events*")
-    )
-    assert len(tensorboard_log) == 1
-
-    checkpoints_path = model_path.joinpath("checkpoints")
-    assert checkpoints_path.exists()
-    assert checkpoints_path.joinpath("checkpoint.pt").exists()
-    if cfg.train.val_step is not None:
-        assert checkpoints_path.joinpath("max-val-acc-checkpoint.pt").exists()
-
-
-@pytest.mark.slow
 @pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format",
+    "audio_format, spect_format, annot_format, model_name, train_function_to_mock",
     [
-        ("cbin", None, "notmat"),
-        ("wav", None, "birdsong-recognition-dataset"),
-        (None, "mat", "yarden"),
+        ("cbin", None, "notmat", "TweetyNet",
+         'vak.train.train_.train_frame_classification_model'),
+        ("wav", None, "birdsong-recognition-dataset", "TweetyNet",
+         'vak.train.train_.train_frame_classification_model'),
+        (None, "mat", "yarden", "TweetyNet",
+         'vak.train.train_.train_frame_classification_model'),
+        ("cbin", None, "notmat", "ConvEncoderUMAP",
+         'vak.train.train_.train_parametric_umap_model'),
     ],
 )
 def test_train(
-    audio_format, spect_format, annot_format, specific_config, tmp_path, model, device
+    audio_format, spect_format, annot_format, model_name, train_function_to_mock,
+    specific_config, tmp_path
 ):
-    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
-    results_path.mkdir()
+    """Test that :func:`vak.train.train` dispatches to the correct model-specific
+    training functions"""
+    root_results_dir = tmp_path.joinpath("test_train_root_results_dir")
+    root_results_dir.mkdir()
+
     options_to_change = [
-        {"section": "TRAIN", "option": "device", "value": device},
-        {"section": "TRAIN", "option": "root_results_dir", "value": results_path}
+        {
+            "section": "TRAIN",
+            "option": "root_results_dir",
+            "value": str(root_results_dir),
+        },
+        {"section": "TRAIN", "option": "device", "value": 'cpu'},
     ]
+
     toml_path = specific_config(
         config_type="train",
-        model=model,
+        model=model_name,
         audio_format=audio_format,
         annot_format=annot_format,
         spect_format=spect_format,
@@ -59,180 +51,24 @@ def test_train(
     cfg = vak.config.parse.from_toml_path(toml_path)
     model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
 
-    vak.train.train(
-        cfg.train.model,
-        model_config,
-        cfg.train.dataset_path,
-        cfg.dataloader.window_size,
-        cfg.train.batch_size,
-        cfg.train.num_epochs,
-        cfg.train.num_workers,
-        results_path=results_path,
-        spect_key=cfg.spect_params.spect_key,
-        timebins_key=cfg.spect_params.timebins_key,
-        normalize_spectrograms=cfg.train.normalize_spectrograms,
-        shuffle=cfg.train.shuffle,
-        val_step=cfg.train.val_step,
-        ckpt_step=cfg.train.ckpt_step,
-        patience=cfg.train.patience,
-        device=cfg.train.device,
-    )
-
-    assert_train_output_matches_expected(cfg, cfg.train.model, results_path)
-
-
-@pytest.mark.slow
-@pytest.mark.parametrize(
-    "audio_format, spect_format, annot_format",
-    [
-        ("cbin", None, "notmat"),
-        ("wav", None, "birdsong-recognition-dataset"),
-        (None, "mat", "yarden"),
-    ],
-)
-def test_continue_training(
-    audio_format, spect_format, annot_format, specific_config, tmp_path, model, device
-):
-    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
+    results_path = tmp_path / 'results_path'
     results_path.mkdir()
-    options_to_change = [
-        {"section": "TRAIN", "option": "device", "value": device},
-        {"section": "TRAIN", "option": "root_results_dir", "value": results_path}
-    ]
-    toml_path = specific_config(
-        config_type="train_continue",
-        model=model,
-        audio_format=audio_format,
-        annot_format=annot_format,
-        spect_format=spect_format,
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
-
-    vak.train.train(
-        model_name=cfg.train.model,
-        model_config=model_config,
-        dataset_path=cfg.train.dataset_path,
-        window_size=cfg.dataloader.window_size,
-        batch_size=cfg.train.batch_size,
-        num_epochs=cfg.train.num_epochs,
-        num_workers=cfg.train.num_workers,
-        spect_scaler_path=cfg.train.spect_scaler_path,
-        results_path=results_path,
-        spect_key=cfg.spect_params.spect_key,
-        timebins_key=cfg.spect_params.timebins_key,
-        normalize_spectrograms=cfg.train.normalize_spectrograms,
-        shuffle=cfg.train.shuffle,
-        val_step=cfg.train.val_step,
-        ckpt_step=cfg.train.ckpt_step,
-        patience=cfg.train.patience,
-        device=cfg.train.device,
-    )
-
-    assert_train_output_matches_expected(cfg, cfg.train.model, results_path)
-
-
-@pytest.mark.parametrize(
-    'path_option_to_change',
-    [
-        {"section": "TRAIN", "option": "checkpoint_path", "value": '/obviously/doesnt/exist/ckpt.pt'},
-        {"section": "TRAIN", "option": "spect_scaler_path", "value": '/obviously/doesnt/exist/SpectScaler'},
-    ]
-)
-def test_train_raises_file_not_found(
-    path_option_to_change, specific_config, tmp_path, device
-):
-    """Test that pre-conditions in `vak.train` raise FileNotFoundError
-    when one of the following does not exist:
-    checkpoint_path, dataset_path, spect_scaler_path
-    """
-    options_to_change = [
-        {"section": "TRAIN", "option": "device", "value": device},
-        path_option_to_change
-    ]
-    toml_path = specific_config(
-        config_type="train",
-        model="teenytweetynet",
-        audio_format="cbin",
-        annot_format="notmat",
-        spect_format=None,
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
-    results_path = vak.common.paths.generate_results_dir_name_as_path(tmp_path)
-    results_path.mkdir()
-
-    with pytest.raises(FileNotFoundError):
-        vak.train.train(
-            model_name=cfg.train.model,
-            model_config=model_config,
-            dataset_path=cfg.train.dataset_path,
-            window_size=cfg.dataloader.window_size,
-            batch_size=cfg.train.batch_size,
-            num_epochs=cfg.train.num_epochs,
-            num_workers=cfg.train.num_workers,
-            checkpoint_path=cfg.train.checkpoint_path,
-            spect_scaler_path=cfg.train.spect_scaler_path,
-            results_path=results_path,
-            spect_key=cfg.spect_params.spect_key,
-            timebins_key=cfg.spect_params.timebins_key,
-            normalize_spectrograms=cfg.train.normalize_spectrograms,
-            shuffle=cfg.train.shuffle,
-            val_step=cfg.train.val_step,
-            ckpt_step=cfg.train.ckpt_step,
-            patience=cfg.train.patience,
-            device=cfg.train.device,
-        )
-
-
-@pytest.mark.parametrize(
-    'path_option_to_change',
-    [
-        {"section": "TRAIN", "option": "dataset_path", "value": '/obviously/doesnt/exist/dataset-dir'},
-        {"section": "TRAIN", "option": "root_results_dir", "value": '/obviously/doesnt/exist/results/'},
-    ]
-)
-def test_train_raises_not_a_directory(
-    path_option_to_change, specific_config, device, tmp_path
-):
-    """Test that core.train raises NotADirectory
-    when directory does not exist
-    """
-    options_to_change = [
-        path_option_to_change,
-        {"section": "TRAIN", "option": "device", "value": device},
-    ]
-
-    toml_path = specific_config(
-        config_type="train",
-        model="teenytweetynet",
-        audio_format="cbin",
-        annot_format="notmat",
-        spect_format=None,
-        options_to_change=options_to_change,
-    )
-    cfg = vak.config.parse.from_toml_path(toml_path)
-    model_config = vak.config.model.config_from_toml_path(toml_path, cfg.train.model)
-
-    # mock behavior of cli.train, building `results_path` from config option `root_results_dir`
-    results_path = cfg.train.root_results_dir / 'results-dir-timestamp'
 
-    with pytest.raises(NotADirectoryError):
+    with mock.patch(train_function_to_mock, autospec=True) as mock_train_function:
         vak.train.train(
-            model_name=cfg.train.model,
+            model_name=model_name,
             model_config=model_config,
             dataset_path=cfg.train.dataset_path,
-            window_size=cfg.dataloader.window_size,
             batch_size=cfg.train.batch_size,
             num_epochs=cfg.train.num_epochs,
             num_workers=cfg.train.num_workers,
+            train_transform_params=cfg.train.train_transform_params,
+            train_dataset_params=cfg.train.train_dataset_params,
+            val_transform_params=cfg.train.val_transform_params,
+            val_dataset_params=cfg.train.val_dataset_params,
             checkpoint_path=cfg.train.checkpoint_path,
             spect_scaler_path=cfg.train.spect_scaler_path,
             results_path=results_path,
-            spect_key=cfg.spect_params.spect_key,
-            timebins_key=cfg.spect_params.timebins_key,
             normalize_spectrograms=cfg.train.normalize_spectrograms,
             shuffle=cfg.train.shuffle,
             val_step=cfg.train.val_step,
@@ -240,3 +76,4 @@ def test_train_raises_not_a_directory(
             patience=cfg.train.patience,
             device=cfg.train.device,
         )
+        assert mock_train_function.called
diff --git a/tests/test_transforms/test_labeled_timebins/__init__.py b/tests/test_transforms/test_frame_labels/__init__.py
similarity index 100%
rename from tests/test_transforms/test_labeled_timebins/__init__.py
rename to tests/test_transforms/test_frame_labels/__init__.py
diff --git a/tests/test_transforms/test_labeled_timebins/test_functional.py b/tests/test_transforms/test_frame_labels/test_functional.py
similarity index 93%
rename from tests/test_transforms/test_labeled_timebins/test_functional.py
rename to tests/test_transforms/test_frame_labels/test_functional.py
index 8c6b3ed57..861835052 100644
--- a/tests/test_transforms/test_labeled_timebins/test_functional.py
+++ b/tests/test_transforms/test_frame_labels/test_functional.py
@@ -1,7 +1,7 @@
 """tests for functional forms of transforms
 for labeled timebins.
 
-Tests are in the same order as the module ``vak.transforms.labeled_timebins.functional``.:
+Tests are in the same order as the module ``vak.transforms.frame_labels.functional``.:
 - from_segments: transform to get labeled timebins from annotations
 - to_labels: transform to get back just string labels from labeled timebins,
   used to evaluate a model
@@ -30,7 +30,7 @@
 
 import vak.common.files.spect
 import vak.common.labels
-import vak.transforms.labeled_timebins
+import vak.transforms.frame_labels
 
 
 from ...fixtures.annot import ANNOT_LIST_YARDEN, ANNOT_LIST_NOTMAT, LABELSET_YARDEN, LABELSET_NOTMAT
@@ -82,7 +82,7 @@ def test_from_segments(annot, spect_path, labelset):
             'Annotation with label not in labelset, would not include in dataset'
         )
 
-    lbl_tb = vak.transforms.labeled_timebins.from_segments(
+    lbl_tb = vak.transforms.frame_labels.from_segments(
         lbls_int,
         annot.seq.onsets_s,
         annot.seq.offsets_s,
@@ -114,7 +114,7 @@ def test_to_labels(lbl_tb, labelmap, labels_expected_int):
     labelmap_inv = {v: k for k, v in labelmap.items()}
     labels_expected = ''.join([labelmap_inv[lbl_int] for lbl_int in labels_expected_int])
 
-    labels = vak.transforms.labeled_timebins.to_labels(lbl_tb, labelmap)
+    labels = vak.transforms.frame_labels.to_labels(lbl_tb, labelmap)
     assert labels == labels_expected
 
 
@@ -172,7 +172,7 @@ def test_to_labels_real_data(
 
     timebins = vak.common.files.spect.load(spect_path)[TIMEBINS_KEY]
 
-    lbl_tb = vak.transforms.labeled_timebins.from_segments(
+    lbl_tb = vak.transforms.frame_labels.from_segments(
         lbls_int,
         annot.seq.onsets_s,
         annot.seq.offsets_s,
@@ -180,7 +180,7 @@ def test_to_labels_real_data(
         unlabeled_label=labelmap["unlabeled"],
     )
 
-    labels = vak.transforms.labeled_timebins.to_labels(
+    labels = vak.transforms.frame_labels.to_labels(
         lbl_tb,
         labelmap,
     )
@@ -224,7 +224,7 @@ def test_to_segments_real_data(
 
     timebins = vak.common.files.spect.load(spect_path)[TIMEBINS_KEY]
 
-    lbl_tb = vak.transforms.labeled_timebins.from_segments(
+    lbl_tb = vak.transforms.frame_labels.from_segments(
         lbls_int,
         annot.seq.onsets_s,
         annot.seq.offsets_s,
@@ -232,7 +232,7 @@ def test_to_segments_real_data(
         unlabeled_label=labelmap["unlabeled"],
     )
 
-    labels, onsets_s, offsets_s = vak.transforms.labeled_timebins.to_segments(
+    labels, onsets_s, offsets_s = vak.transforms.frame_labels.to_segments(
         lbl_tb, labelmap, timebins
     )
 
@@ -246,7 +246,7 @@ def test_to_segments_real_data(
 
 
 @pytest.mark.parametrize(
-    "lbl_tb, seg_inds_list_expected",
+    "frame_labels, seg_inds_list_expected",
     [
         (np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0]), [np.array([4, 5, 6, 7])]),
         # assert works when segment is at start of lbl_tb
@@ -263,12 +263,12 @@ def test_to_segments_real_data(
         ),
     ],
 )
-def test_to_inds(lbl_tb, seg_inds_list_expected):
+def test_to_inds(frame_labels, seg_inds_list_expected):
     """Test ``to_inds`` works as expected"""
     UNLABELED = 0
 
-    seg_inds_list = vak.transforms.labeled_timebins.to_inds_list(
-        lbl_tb=lbl_tb, unlabeled_label=UNLABELED
+    seg_inds_list = vak.transforms.frame_labels.to_inds_list(
+        frame_labels=frame_labels, unlabeled_label=UNLABELED
     )
     assert np.array_equal(seg_inds_list, seg_inds_list_expected)
 
@@ -296,10 +296,10 @@ def test_to_inds(lbl_tb, seg_inds_list_expected):
 )
 def test_remove_short_segments(lbl_tb, unlabeled, timebin_dur, min_segment_dur, lbl_tb_expected):
     """Test ``remove_short_segments`` works as expected"""
-    segment_inds_list = vak.transforms.labeled_timebins.to_inds_list(
+    segment_inds_list = vak.transforms.frame_labels.to_inds_list(
         lbl_tb, unlabeled_label=unlabeled
     )
-    lbl_tb_tfm, segment_inds_list_out = vak.transforms.labeled_timebins.remove_short_segments(
+    lbl_tb_tfm, segment_inds_list_out = vak.transforms.frame_labels.remove_short_segments(
         lbl_tb,
         segment_inds_list,
         timebin_dur=timebin_dur,
@@ -345,10 +345,10 @@ def test_remove_short_segments(lbl_tb, unlabeled, timebin_dur, min_segment_dur,
 )
 def test_majority_vote(lbl_tb_in, unlabeled, lbl_tb_expected):
     """Test ``majority_vote`` works as expected"""
-    segment_inds_list = vak.transforms.labeled_timebins.to_inds_list(
+    segment_inds_list = vak.transforms.frame_labels.to_inds_list(
         lbl_tb_in, unlabeled_label=unlabeled
     )
-    lbl_tb_maj_vote = vak.transforms.labeled_timebins.take_majority_vote(
+    lbl_tb_maj_vote = vak.transforms.frame_labels.take_majority_vote(
         lbl_tb_in, segment_inds_list
     )
     assert np.array_equal(lbl_tb_maj_vote, lbl_tb_expected)
@@ -455,10 +455,10 @@ def test_majority_vote(lbl_tb_in, unlabeled, lbl_tb_expected):
     POSTPROCESS_PARAMS_ARGVALS
 )
 def test_postprocess(lbl_tb, timebin_dur, unlabeled_label, min_segment_dur, majority_vote, lbl_tb_expected):
-    """Test that ``trasnforms.labeled_timebins.postprocess`` works as expected.
+    """Test that ``trasnforms.frame_labels.postprocess`` works as expected.
     Specifically test that we recover an expected string of labels,
     as would be used to compute edit distance."""
-    lbl_tb = vak.transforms.labeled_timebins.postprocess(
+    lbl_tb = vak.transforms.frame_labels.postprocess(
         lbl_tb,
         timebin_dur=timebin_dur,
         unlabeled_label=UNLABELED_LABEL,
diff --git a/tests/test_transforms/test_labeled_timebins/test_transforms.py b/tests/test_transforms/test_frame_labels/test_transforms.py
similarity index 87%
rename from tests/test_transforms/test_labeled_timebins/test_transforms.py
rename to tests/test_transforms/test_frame_labels/test_transforms.py
index 430d39f70..c044fd8b0 100644
--- a/tests/test_transforms/test_labeled_timebins/test_transforms.py
+++ b/tests/test_transforms/test_frame_labels/test_transforms.py
@@ -15,8 +15,8 @@
 
 class TestFromSegments:
     def test_init(self):
-        from_segments_tfm = vak.transforms.labeled_timebins.FromSegments()
-        assert isinstance(from_segments_tfm, vak.transforms.labeled_timebins.FromSegments)
+        from_segments_tfm = vak.transforms.frame_labels.FromSegments()
+        assert isinstance(from_segments_tfm, vak.transforms.frame_labels.FromSegments)
 
     @pytest.mark.parametrize(
         'annot, spect_path, labelset',
@@ -36,7 +36,7 @@ def test_call(self, annot, spect_path, labelset):
                 'Annotation with label not in labelset, would not include in dataset'
             )
 
-        from_segments_tfm = vak.transforms.labeled_timebins.FromSegments(unlabeled_label=labelmap['unlabeled'])
+        from_segments_tfm = vak.transforms.frame_labels.FromSegments(unlabeled_label=labelmap['unlabeled'])
         lbl_tb = from_segments_tfm(
             lbls_int,
             annot.seq.onsets_s,
@@ -60,10 +60,10 @@ def test_init(self, labelset):
         labelset = vak.common.converters.labelset_to_set(labelset)
         labelmap = vak.common.labels.to_map(labelset, map_unlabeled=True)
 
-        to_labels_tfm = vak.transforms.labeled_timebins.ToLabels(
+        to_labels_tfm = vak.transforms.frame_labels.ToLabels(
             labelmap=labelmap,
         )
-        assert isinstance(to_labels_tfm, vak.transforms.labeled_timebins.ToLabels)
+        assert isinstance(to_labels_tfm, vak.transforms.frame_labels.ToLabels)
 
     @pytest.mark.parametrize(
         "lbl_tb, labelmap, labels_expected_int",
@@ -83,7 +83,7 @@ def test_call(self, lbl_tb, labelmap, labels_expected_int):
         labelmap_inv = {v: k for k, v in labelmap.items()}
         labels_expected = ''.join([labelmap_inv[lbl_int] for lbl_int in labels_expected_int])
 
-        to_labels_tfm = vak.transforms.labeled_timebins.ToLabels(
+        to_labels_tfm = vak.transforms.frame_labels.ToLabels(
             labelmap=labelmap,
         )
         labels = to_labels_tfm(lbl_tb)
@@ -125,7 +125,7 @@ def test_call_real_data(
 
         timebins = vak.common.files.spect.load(spect_path)[TIMEBINS_KEY]
 
-        lbl_tb = vak.transforms.labeled_timebins.from_segments(
+        lbl_tb = vak.transforms.frame_labels.from_segments(
             lbls_int,
             annot.seq.onsets_s,
             annot.seq.offsets_s,
@@ -133,7 +133,7 @@ def test_call_real_data(
             unlabeled_label=labelmap["unlabeled"],
         )
 
-        to_labels_tfm = vak.transforms.labeled_timebins.ToLabels(
+        to_labels_tfm = vak.transforms.frame_labels.ToLabels(
             labelmap=labelmap,
         )
         labels = to_labels_tfm(lbl_tb)
@@ -157,10 +157,10 @@ def test_init(self, labelset):
         labelset = vak.common.converters.labelset_to_set(labelset)
         labelmap = vak.common.labels.to_map(labelset, map_unlabeled=True)
 
-        to_segments_tfm = vak.transforms.labeled_timebins.ToSegments(
+        to_segments_tfm = vak.transforms.frame_labels.ToSegments(
             labelmap=labelmap,
         )
-        assert isinstance(to_segments_tfm, vak.transforms.labeled_timebins.ToSegments)
+        assert isinstance(to_segments_tfm, vak.transforms.frame_labels.ToSegments)
 
     @pytest.mark.parametrize(
         'annot, spect_path, labelset',
@@ -190,7 +190,7 @@ def test_call_real_data(self, annot, spect_path, labelset):
 
         timebins = vak.common.files.spect.load(spect_path)[TIMEBINS_KEY]
 
-        lbl_tb = vak.transforms.labeled_timebins.from_segments(
+        lbl_tb = vak.transforms.frame_labels.from_segments(
             lbls_int,
             annot.seq.onsets_s,
             annot.seq.offsets_s,
@@ -198,7 +198,7 @@ def test_call_real_data(self, annot, spect_path, labelset):
             unlabeled_label=labelmap["unlabeled"],
         )
 
-        to_segments_tfm = vak.transforms.labeled_timebins.ToSegments(
+        to_segments_tfm = vak.transforms.frame_labels.ToSegments(
             labelmap=labelmap,
         )
 
@@ -221,12 +221,12 @@ def test_init(self, min_segment_dur, majority_vote, timebin_dur):
         # Note that we add an 'unlabeled' class
         # because post-processing transforms *require* it
         # This is default, just making it explicit
-        to_labels_tfm = vak.transforms.labeled_timebins.PostProcess(
+        to_labels_tfm = vak.transforms.frame_labels.PostProcess(
             min_segment_dur=min_segment_dur,
             majority_vote=majority_vote,
             timebin_dur=timebin_dur,
         )
-        assert isinstance(to_labels_tfm, vak.transforms.labeled_timebins.PostProcess)
+        assert isinstance(to_labels_tfm, vak.transforms.frame_labels.PostProcess)
 
     @pytest.mark.parametrize(
         'lbl_tb, timebin_dur, unlabeled_label, min_segment_dur, majority_vote, lbl_tb_expected',
@@ -235,7 +235,7 @@ def test_init(self, min_segment_dur, majority_vote, timebin_dur):
     def test_call(self, lbl_tb, timebin_dur, unlabeled_label, min_segment_dur, majority_vote, lbl_tb_expected):
         # Note that we add an 'unlabeled' class because post-processing transforms *require* it
         # This is default, just making it explicit
-        postprocess_tfm = vak.transforms.labeled_timebins.PostProcess(
+        postprocess_tfm = vak.transforms.frame_labels.PostProcess(
             min_segment_dur=min_segment_dur,
             majority_vote=majority_vote,
             timebin_dur=timebin_dur,
diff --git a/tests/test_transforms/test_transforms.py b/tests/test_transforms/test_transforms.py
index 8ce1b7aaa..a90270388 100644
--- a/tests/test_transforms/test_transforms.py
+++ b/tests/test_transforms/test_transforms.py
@@ -50,19 +50,19 @@ def test_instance(self, mean_freqs, std_freqs, non_zero_std):
             None
         ]
     )
-    def test_fit_csv_path(self, split, train_cbin_notmat_df,
+    def test_fit_dataset_path(self, split, train_cbin_notmat_df,
                           specific_dataset_path, specific_dataset_csv_path):
         # we need dataset_path since paths in df are relative to it
         dataset_path = specific_dataset_path(
             config_type="train",
-            model="teenytweetynet",
+            model="TweetyNet",
             audio_format="cbin",
             annot_format="notmat"
         )
 
         dataset_csv_path = specific_dataset_csv_path(
             config_type="train",
-            model="teenytweetynet",
+            model="TweetyNet",
             audio_format="cbin",
             annot_format="notmat"
         )
@@ -90,11 +90,10 @@ def test_fit_csv_path(self, split, train_cbin_notmat_df,
 
         # ---- actually do fit
         if split:
-            standardizer = vak.transforms.StandardizeSpect.fit_csv_path(dataset_csv_path,
-                                                                        split=split)
+            standardizer = vak.transforms.StandardizeSpect.fit_dataset_path(dataset_path, split=split)
         else:
             # this tests that default value for split 'train' works as expected
-            standardizer = vak.transforms.StandardizeSpect.fit_csv_path(dataset_csv_path)
+            standardizer = vak.transforms.StandardizeSpect.fit_dataset_path(dataset_path)
 
         # ---- test
         for attr_name, expected in zip(
diff --git a/tests/vak.tests.config.json b/tests/vak.tests.config.json
new file mode 100644
index 000000000..ae8dfde4e
--- /dev/null
+++ b/tests/vak.tests.config.json
@@ -0,0 +1,6 @@
+{
+  "models": [
+    "TweetyNet",
+    "ConvEncoderUMAP"
+  ]
+}