Skip to content

Commit

Permalink
update versions, add option to use CUDA, prep new release
Browse files Browse the repository at this point in the history
  • Loading branch information
rowan-sl committed Mar 27, 2023
1 parent a51fbb4 commit a6ba26d
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 74 deletions.
4 changes: 2 additions & 2 deletions coqui-tts/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "coqui-tts"
version = "0.1.0"
version = "0.2.0"
edition = "2021"
license = "MIT OR Apache-2.0"
authors = ["Rowan S-L <[email protected]>"]
Expand All @@ -18,5 +18,5 @@ categories = ["science", "api-bindings"]
[dependencies]

[dependencies.pyo3]
version = "0.16.5"
version = "0.18.2"
features = ["auto-initialize"]
2 changes: 1 addition & 1 deletion coqui-tts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ feel free to ask in the [repo](https://github.com/rowan-sl/coqui-rs/tree/main)

## Python dependanices

this depends on the `TTS` package, (currently supports 0.7.1). it is recommended to install it in a python virtual environment such as `venv`
this depends on the `TTS` package, (tested to work with up to v0.12.0). it is recommended to install it in a python virtual environment such as `venv`
8 changes: 5 additions & 3 deletions coqui-tts/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@ impl Synthesizer {
/// this may spew out some text to stdout about initialization,
/// this is from the python library and there is nothing that can be done about it
///
pub fn new(model: &str) -> Self {
pub fn new(model: &str, use_cuda: bool) -> Self {
Python::with_gil(|py| {
let locals: Py<PyDict> = PyDict::new(py).into();
locals.as_ref(py).borrow().setattr("model_name", model).unwrap();
let locals_ref = locals.as_ref(py).borrow();
locals_ref.set_item("model_name", model).unwrap();
locals_ref.set_item("use_cuda", use_cuda).unwrap();
py.run(r#"
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.manage import ModelManager
Expand All @@ -49,7 +51,7 @@ coqui_tts = Synthesizer(
config_path,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config_path,
use_cuda=False
use_cuda=use_cuda
)
"#, None, Some(locals.as_ref(py).borrow())).unwrap();
Self { locals }
Expand Down
5 changes: 3 additions & 2 deletions tts-testing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rodio = "0.15.0"
rodio = "0.17.1"
wav = "1.0.0"

[dependencies.pyo3]
version = "0.16.5"
version = "0.18.2"
features = ["auto-initialize"]

[dependencies.coqui-tts]
Expand Down
148 changes: 82 additions & 66 deletions tts-testing/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,84 +1,100 @@
use std::fs;
use std::io::{self, Write};

use coqui_tts::Synthesizer;
use rodio::buffer::SamplesBuffer;
use rodio::{OutputStream, Sink};
use coqui_tts::Synthesizer;

// use pyo3::{prelude::*, types::{PyDict, PyList}};


fn main() {
let (_stream, stream_handle) = OutputStream::try_default().unwrap();
let sink = Sink::try_new(&stream_handle).unwrap();

println!("Starting TTS");
let mut synth = Synthesizer::new("tts_models/en/ljspeech/tacotron2-DDC");
println!("creating audio");
let audio = synth.tts("The fast brown fox jumps over the lazy dog.");
let rate = synth.sample_rate();
println!("playing audio at rate {}", rate);
// for i in 0..10 {
// Add a dummy source of the sake of the example.
sink.append(SamplesBuffer::new(1, rate as u32, audio.clone()));

// The sound plays in a separate thread. This call will block the current thread until the sink
// has finished playing all its queued sounds.
sink.sleep_until_end();
// }

// let audio = Python::aq(|py| {
// // py.eval("print(\"Hello, World!\")", None, None).unwrap();
// // let tts_manage = py.import("TTS.utils.manage").unwrap();
// // let manager_class = tts_manage.getattr("ModelManager").unwrap();
// // let manager_instance = manager_class.call0();

// let locals = PyDict::new(py);
// let globals = PyDict::new(py);
// py.run(r#"
// from TTS.utils.synthesizer import Synthesizer
// from TTS.utils.manage import ModelManager
// "#, Some(globals), Some(locals)).unwrap();

// py.run(r#"
// # create instance of the coqui tts model manager
// manager = ModelManager()
// # download the model
// (
// model_path,
// config_path,
// model_item,
// ) = manager.download_model("tts_models/en/ljspeech/tacotron2-DDC")
// # download the vocoder
// vocoder_path, vocoder_config_path, _ = manager.download_model(
// model_item["default_vocoder"]
// )
// # create the coqui tts instance
// coqui_tts = Synthesizer(
// model_path,
// config_path,
// vocoder_checkpoint=vocoder_path,
// vocoder_config=vocoder_config_path,
// use_cuda=False
// )
// "#, Some(globals), Some(locals)).unwrap();
let mut synth = Synthesizer::new("tts_models/en/ljspeech/tacotron2-DDC", true);
print!("Ready\n>>");
io::stdout().flush().unwrap();
for input in io::stdin().lines() {
let mut input = input.unwrap();
if input.is_empty() {
continue;
}
// if !['.', '?', '!'].contains(&input.chars().last().unwrap()) {
// input.push('.')
// }
let audio = synth.tts(&input);
let rate = synth.sample_rate();
wav::write(
wav::Header::new(wav::WAV_FORMAT_IEEE_FLOAT, 1, rate as u32, 32),
&wav::BitDepth::ThirtyTwoFloat(audio.clone()),
&mut fs::OpenOptions::new()
.create(true)
.write(true)
.open("cursed_tts.wav")
.unwrap(),
)
.unwrap();
// break;
println!("playing audio at rate {}", rate);
sink.append(SamplesBuffer::new(1, rate as u32, audio.clone()));
sink.sleep_until_end();
print!("Ready\n>>");
io::stdout().flush().unwrap();
}

// let tts = locals.get_item("coqui_tts").unwrap();
// let example_text = "Hi.";
// let audio = tts.call_method1("tts", (example_text,)).unwrap().downcast::<PyList>().unwrap();
// let r_audio = audio.extract::<Vec<f32>>().unwrap();
// r_audio
// });
// let audio = Python::aq(|py| {
// // py.eval("print(\"Hello, World!\")", None, None).unwrap();
// // let tts_manage = py.import("TTS.utils.manage").unwrap();
// // let manager_class = tts_manage.getattr("ModelManager").unwrap();
// // let manager_instance = manager_class.call0();

// let locals = PyDict::new(py);
// let globals = PyDict::new(py);
// py.run(r#"
// from TTS.utils.synthesizer import Synthesizer
// from TTS.utils.manage import ModelManager
// "#, Some(globals), Some(locals)).unwrap();

// let (_stream, stream_handle) = OutputStream::try_default().unwrap();
// let sink = Sink::try_new(&stream_handle).unwrap();
// py.run(r#"
// # create instance of the coqui tts model manager
// manager = ModelManager()
// # download the model
// (
// model_path,
// config_path,
// model_item,
// ) = manager.download_model("tts_models/en/ljspeech/tacotron2-DDC")
// # download the vocoder
// vocoder_path, vocoder_config_path, _ = manager.download_model(
// model_item["default_vocoder"]
// )
// # create the coqui tts instance
// coqui_tts = Synthesizer(
// model_path,
// config_path,
// vocoder_checkpoint=vocoder_path,
// vocoder_config=vocoder_config_path,
// use_cuda=False
// )
// "#, Some(globals), Some(locals)).unwrap();

// let tts = locals.get_item("coqui_tts").unwrap();
// let example_text = "Hi.";
// let audio = tts.call_method1("tts", (example_text,)).unwrap().downcast::<PyList>().unwrap();
// let r_audio = audio.extract::<Vec<f32>>().unwrap();
// r_audio
// });

// let (_stream, stream_handle) = OutputStream::try_default().unwrap();
// let sink = Sink::try_new(&stream_handle).unwrap();

// for i in 0..10 {
// // Add a dummy source of the sake of the example.
// sink.append(SamplesBuffer::new(1, 22050, audio.clone()));
// for i in 0..10 {
// // Add a dummy source of the sake of the example.
// sink.append(SamplesBuffer::new(1, 22050, audio.clone()));

// // The sound plays in a separate thread. This call will block the current thread until the sink
// // has finished playing all its queued sounds.
// sink.sleep_until_end();
// }
// // The sound plays in a separate thread. This call will block the current thread until the sink
// // has finished playing all its queued sounds.
// sink.sleep_until_end();
// }
}

0 comments on commit a6ba26d

Please sign in to comment.