Merge branch 'corpus-3-release-fixes' of https://github.com/sensein/b…

…2aiprep into corpus-3-release-fixes
sensein · Oct 16, 2024 · 184687b · 184687b
2 parents 21247a2 + 603ced3
commit 184687b
Show file tree

Hide file tree

Showing 4 changed files with 808 additions and 21 deletions.
diff --git a/src/b2aiprep/cli.py b/src/b2aiprep/cli.py
@@ -19,6 +19,7 @@
     extract_mfcc_from_audios,
     extract_spectrogram_from_audios,
 )
+
 from senselab.audio.tasks.preprocessing.preprocessing import resample_audios
 from senselab.audio.tasks.speaker_embeddings.api import (
     extract_speaker_embeddings_from_audios,
@@ -144,29 +145,20 @@ def validate(
         bids_dir_path=Path(bids_dir_path),
         fix=fix,
     )
-
-@main.command()
-@click.argument("bids_dir_path", type=click.Path())
-@click.argument("fix", type=bool)
-def validate(
-    bids_dir_path,
-    fix,
-):
-    """Organizes the data into a BIDS-like directory structure.
-
-    redcap_csv_path: path to the redcap csv\n
-    audio_dir_path: path to directory with audio files\n
-    bids_dir_path: path to store bids-like data\n
-    tar_file_path: path to store tar file\n
-    transcription_model_size: tiny, small, medium, or large\n
-    n_cores: number of cores to run feature extraction on\n
-    with_sensitive: whether to include sensitive data
-    """
-    validate_bids_data(
-        bids_dir_path=Path(bids_dir_path),
-        fix=fix,
+
+@click.argument("source_data_csv_path", type=click.Path(exists=True))
+@click.argument("synthetic_data_path", type=click.Path())
+@click.option("--n_synthetic_rows", default=100, type=int, help="Number of synthetic rows to generate.")
+@click.option("--synthesizer_path", type=click.Path(), help="Path to save/load the synthesizer.")
+def gensynthtabdata(source_data_csv_path, synthetic_data_path, n_synthetic_rows, synthesizer_path):
+    generate_synthetic_tabular_data(
+        source_data_csv_path=Path(source_data_csv_path),
+        synthetic_data_path=Path(synthetic_data_path),
+        n_synthetic_rows=n_synthetic_rows,
+        synthesizer_path=Path(synthesizer_path) if synthesizer_path else None
     )
 
+
 @main.command()
 @click.argument("filename", type=click.Path(exists=True))
 @click.option("-s", "--subject", type=str, default=None)

diff --git a/src/b2aiprep/create_audio_files.py b/src/b2aiprep/create_audio_files.py
@@ -0,0 +1,50 @@
+import os
+import shutil
+
+def copy_wav_file(src_file_path, dest_directory, file_names):
+    if not os.path.exists(dest_directory):
+        os.makedirs(dest_directory)
+
+    for file_name in file_names:
+        dest_file_path = os.path.join(dest_directory, file_name)
+        shutil.copy(src_file_path, dest_file_path)
+
+# Example usage
+src_file_path = '/path/to/your/source/file.wav'  # Update this with the path to your source file
+dest_directory = '/Users/isaacbevers/sensein/b2ai-wrapper/b2ai-data/b2ai-data-bids-like-curated/sub-0aeebf70-44a5-4537-af1b-1c24840f104d/ses-C63E6402-5ECC-45B5-8A57-6FE638A766A5/audio'
+file_names = [
+    "Animal-fluency_rec-Animal-fluency.wav",
+    "Audio-Check_rec-Audio-Check-1.wav",
+    "Audio-Check_rec-Audio-Check-2.wav",
+    "Audio-Check_rec-Audio-Check-3.wav",
+    "Audio-Check_rec-Audio-Check-4.wav",
+    "Diadochokinesis_rec-Diadochokinesis-buttercup.wav",
+    "Diadochokinesis_rec-Diadochokinesis-KA.wav",
+    "Diadochokinesis_rec-Diadochokinesis-PA.wav",
+    "Diadochokinesis_rec-Diadochokinesis-Pataka.wav",
+    "Diadochokinesis_rec-Diadochokinesis-TA.wav",
+    "Free-speech_rec-Free-speech-1.wav",
+    "Free-speech_rec-Free-speech-2.wav",
+    "Free-speech_rec-Free-speech-3.wav",
+    "Glides_rec-Glides-High-to-Low.wav",
+    "Glides_rec-Glides-Low-to-High.wav",
+    "Loudness_rec-Loudness.wav",
+    "Maximum-phonation-time_rec-Maximum-phonation-time-1.wav",
+    "Maximum-phonation-time_rec-Maximum-phonation-time-2.wav",
+    "Maximum-phonation-time_rec-Maximum-phonation-time-3.wav",
+    "Open-response-questions_rec-Open-response-questions.wav",
+    "Picture-description_rec-Picture-description.wav",
+    "Prolonged-vowel_rec-Prolonged-vowel.wav",
+    "Rainbow-Passage_rec-Rainbow-Passage.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-Breath-1.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-Breath-2.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-Cough-1.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-Cough-2.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-FiveBreaths-1.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-FiveBreaths-2.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-FiveBreaths-3.wav",
+    "Respiration-and-cough_rec-Respiration-and-cough-FiveBreaths-4.wav",
+    "Story-recall_rec-Story-recall.wav"
+]
+
+copy_wav_file(src_file_path, dest_directory, file_names)