From b56fd73bf6d09a910f0b40b58e3da6b162219347 Mon Sep 17 00:00:00 2001
From: Jing Hua <tohjinghua123@gmail.com>
Date: Fri, 30 Sep 2022 13:04:21 +0800
Subject: [PATCH] feat: manual device selection

Co-authored-by: Ayaka <ayaka@mail.shn.hk>
---
 yt_whisper/cli.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/yt_whisper/cli.py b/yt_whisper/cli.py
index e2faac1..190acb0 100644
--- a/yt_whisper/cli.py
+++ b/yt_whisper/cli.py
@@ -21,10 +21,12 @@ def main():
 
     parser.add_argument("--task", type=str, default="transcribe", choices=[
                         "transcribe", "translate"], help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
+    parser.add_argument("--device", choices=("cpu", "cuda"), help="device to use for PyTorch inference")
 
     args = parser.parse_args().__dict__
     model_name: str = args.pop("model")
     output_dir: str = args.pop("output_dir")
+    device: str = args.pop("device")
     os.makedirs(output_dir, exist_ok=True)
 
     if model_name.endswith(".en"):
@@ -32,7 +34,7 @@ def main():
             f"{model_name} is an English-only model, forcing English detection.")
         args["language"] = "en"
 
-    model = whisper.load_model(model_name)
+    model = whisper.load_model(model_name, device=device)
     audios = get_audio(args.pop("video"))
 
     for title, audio_path in audios.items():