update versions, add option to use CUDA, prep new release

rowan-sl · Mar 27, 2023 · a6ba26d · a6ba26d
1 parent a51fbb4
commit a6ba26d
Show file tree

Hide file tree

Showing 5 changed files with 93 additions and 74 deletions.
diff --git a/coqui-tts/Cargo.toml b/coqui-tts/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "coqui-tts"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 authors = ["Rowan S-L <[email protected]>"]
@@ -18,5 +18,5 @@ categories = ["science", "api-bindings"]
 [dependencies]
 
 [dependencies.pyo3]
-version = "0.16.5"
+version = "0.18.2"
 features = ["auto-initialize"]
diff --git a/coqui-tts/README.md b/coqui-tts/README.md
@@ -12,4 +12,4 @@ feel free to ask in the [repo](https://github.com/rowan-sl/coqui-rs/tree/main)
 
 ## Python dependanices
 
-this depends on the `TTS` package, (currently supports 0.7.1). it is recommended to install it in a python virtual environment such as `venv`
+this depends on the `TTS` package, (tested to work with up to v0.12.0). it is recommended to install it in a python virtual environment such as `venv`
diff --git a/coqui-tts/src/lib.rs b/coqui-tts/src/lib.rs
@@ -24,10 +24,12 @@ impl Synthesizer {
     /// this may spew out some text to stdout about initialization,
     /// this is from the python library and there is nothing that can be done about it
     ///
-    pub fn new(model: &str) -> Self {
+    pub fn new(model: &str, use_cuda: bool) -> Self {
         Python::with_gil(|py| {
             let locals: Py<PyDict> = PyDict::new(py).into();
-            locals.as_ref(py).borrow().setattr("model_name", model).unwrap();
+            let locals_ref = locals.as_ref(py).borrow();
+            locals_ref.set_item("model_name", model).unwrap();
+            locals_ref.set_item("use_cuda", use_cuda).unwrap();
             py.run(r#"
 from TTS.utils.synthesizer import Synthesizer
 from TTS.utils.manage import ModelManager
@@ -49,7 +51,7 @@ coqui_tts = Synthesizer(
     config_path,
     vocoder_checkpoint=vocoder_path,
     vocoder_config=vocoder_config_path,
-    use_cuda=False
+    use_cuda=use_cuda
 )
             "#, None, Some(locals.as_ref(py).borrow())).unwrap();
             Self { locals }

diff --git a/tts-testing/Cargo.toml b/tts-testing/Cargo.toml
@@ -6,10 +6,11 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-rodio = "0.15.0"
+rodio = "0.17.1"
+wav = "1.0.0"
 
 [dependencies.pyo3]
-version = "0.16.5"
+version = "0.18.2"
 features = ["auto-initialize"]
 
 [dependencies.coqui-tts]

diff --git a/tts-testing/src/main.rs b/tts-testing/src/main.rs
@@ -1,84 +1,100 @@
+use std::fs;
+use std::io::{self, Write};
+
+use coqui_tts::Synthesizer;
 use rodio::buffer::SamplesBuffer;
 use rodio::{OutputStream, Sink};
-use coqui_tts::Synthesizer;
 
 // use pyo3::{prelude::*, types::{PyDict, PyList}};
 
-
 fn main() {
     let (_stream, stream_handle) = OutputStream::try_default().unwrap();
     let sink = Sink::try_new(&stream_handle).unwrap();
 
     println!("Starting TTS");
-    let mut synth = Synthesizer::new("tts_models/en/ljspeech/tacotron2-DDC");
-    println!("creating audio");
-    let audio = synth.tts("The fast brown fox jumps over the lazy dog.");
-    let rate  = synth.sample_rate();
-    println!("playing audio at rate {}", rate);
-    // for i in 0..10 {
-    // Add a dummy source of the sake of the example.
-    sink.append(SamplesBuffer::new(1, rate as u32, audio.clone()));
-
-    // The sound plays in a separate thread. This call will block the current thread until the sink
-    // has finished playing all its queued sounds.
-    sink.sleep_until_end();
-    // }
-
-//     let audio = Python::aq(|py| {
-//         // py.eval("print(\"Hello, World!\")", None, None).unwrap();
-//         // let tts_manage = py.import("TTS.utils.manage").unwrap();
-//         // let manager_class = tts_manage.getattr("ModelManager").unwrap();
-//         // let manager_instance = manager_class.call0();
-
-//         let locals = PyDict::new(py);
-//         let globals = PyDict::new(py);
-//         py.run(r#"
-// from TTS.utils.synthesizer import Synthesizer
-// from TTS.utils.manage import ModelManager
-//         "#, Some(globals), Some(locals)).unwrap();
-
-//         py.run(r#"
-// # create instance of the coqui tts model manager
-// manager = ModelManager()
-// # download the model
-// (
-//     model_path,
-//     config_path,
-//     model_item,
-// ) = manager.download_model("tts_models/en/ljspeech/tacotron2-DDC")
-// # download the vocoder
-// vocoder_path, vocoder_config_path, _ = manager.download_model(
-//     model_item["default_vocoder"]
-// )
-// # create the coqui tts instance
-// coqui_tts = Synthesizer(
-//     model_path,
-//     config_path,
-//     vocoder_checkpoint=vocoder_path,
-//     vocoder_config=vocoder_config_path,
-//     use_cuda=False
-// )
-//         "#, Some(globals), Some(locals)).unwrap();
+    let mut synth = Synthesizer::new("tts_models/en/ljspeech/tacotron2-DDC", true);
+    print!("Ready\n>>");
+    io::stdout().flush().unwrap();
+    for input in io::stdin().lines() {
+        let mut input = input.unwrap();
+        if input.is_empty() {
+            continue;
+        }
+        // if !['.', '?', '!'].contains(&input.chars().last().unwrap()) {
+        // input.push('.')
+        // }
+        let audio = synth.tts(&input);
+        let rate = synth.sample_rate();
+        wav::write(
+            wav::Header::new(wav::WAV_FORMAT_IEEE_FLOAT, 1, rate as u32, 32),
+            &wav::BitDepth::ThirtyTwoFloat(audio.clone()),
+            &mut fs::OpenOptions::new()
+                .create(true)
+                .write(true)
+                .open("cursed_tts.wav")
+                .unwrap(),
+        )
+        .unwrap();
+        // break;
+        println!("playing audio at rate {}", rate);
+        sink.append(SamplesBuffer::new(1, rate as u32, audio.clone()));
+        sink.sleep_until_end();
+        print!("Ready\n>>");
+        io::stdout().flush().unwrap();
+    }
 
-//         let tts = locals.get_item("coqui_tts").unwrap();
-//         let example_text = "Hi.";
-//         let audio = tts.call_method1("tts", (example_text,)).unwrap().downcast::<PyList>().unwrap();
-//         let r_audio = audio.extract::<Vec<f32>>().unwrap();
-//         r_audio
-//     });
+    //     let audio = Python::aq(|py| {
+    //         // py.eval("print(\"Hello, World!\")", None, None).unwrap();
+    //         // let tts_manage = py.import("TTS.utils.manage").unwrap();
+    //         // let manager_class = tts_manage.getattr("ModelManager").unwrap();
+    //         // let manager_instance = manager_class.call0();
 
+    //         let locals = PyDict::new(py);
+    //         let globals = PyDict::new(py);
+    //         py.run(r#"
+    // from TTS.utils.synthesizer import Synthesizer
+    // from TTS.utils.manage import ModelManager
+    //         "#, Some(globals), Some(locals)).unwrap();
 
-//     let (_stream, stream_handle) = OutputStream::try_default().unwrap();
-//     let sink = Sink::try_new(&stream_handle).unwrap();
+    //         py.run(r#"
+    // # create instance of the coqui tts model manager
+    // manager = ModelManager()
+    // # download the model
+    // (
+    //     model_path,
+    //     config_path,
+    //     model_item,
+    // ) = manager.download_model("tts_models/en/ljspeech/tacotron2-DDC")
+    // # download the vocoder
+    // vocoder_path, vocoder_config_path, _ = manager.download_model(
+    //     model_item["default_vocoder"]
+    // )
+    // # create the coqui tts instance
+    // coqui_tts = Synthesizer(
+    //     model_path,
+    //     config_path,
+    //     vocoder_checkpoint=vocoder_path,
+    //     vocoder_config=vocoder_config_path,
+    //     use_cuda=False
+    // )
+    //         "#, Some(globals), Some(locals)).unwrap();
 
+    //         let tts = locals.get_item("coqui_tts").unwrap();
+    //         let example_text = "Hi.";
+    //         let audio = tts.call_method1("tts", (example_text,)).unwrap().downcast::<PyList>().unwrap();
+    //         let r_audio = audio.extract::<Vec<f32>>().unwrap();
+    //         r_audio
+    //     });
 
+    //     let (_stream, stream_handle) = OutputStream::try_default().unwrap();
+    //     let sink = Sink::try_new(&stream_handle).unwrap();
 
-//     for i in 0..10 {
-//         // Add a dummy source of the sake of the example.
-//         sink.append(SamplesBuffer::new(1, 22050, audio.clone()));
+    //     for i in 0..10 {
+    //         // Add a dummy source of the sake of the example.
+    //         sink.append(SamplesBuffer::new(1, 22050, audio.clone()));
 
-//         // The sound plays in a separate thread. This call will block the current thread until the sink
-//         // has finished playing all its queued sounds.
-//         sink.sleep_until_end();
-//     }
+    //         // The sound plays in a separate thread. This call will block the current thread until the sink
+    //         // has finished playing all its queued sounds.
+    //         sink.sleep_until_end();
+    //     }
 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,4 +12,4 @@ feel free to ask in the [repo](https://github.com/rowan-sl/coqui-rs/tree/main)

		## Python dependanices

		this depends on the `TTS` package, (currently supports 0.7.1). it is recommended to install it in a python virtual environment such as `venv`
		this depends on the `TTS` package, (tested to work with up to v0.12.0). it is recommended to install it in a python virtual environment such as `venv`