-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
update versions, add option to use CUDA, prep new release
- Loading branch information
Showing
5 changed files
with
93 additions
and
74 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[package] | ||
name = "coqui-tts" | ||
version = "0.1.0" | ||
version = "0.2.0" | ||
edition = "2021" | ||
license = "MIT OR Apache-2.0" | ||
authors = ["Rowan S-L <[email protected]>"] | ||
|
@@ -18,5 +18,5 @@ categories = ["science", "api-bindings"] | |
[dependencies] | ||
|
||
[dependencies.pyo3] | ||
version = "0.16.5" | ||
version = "0.18.2" | ||
features = ["auto-initialize"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,84 +1,100 @@ | ||
use std::fs; | ||
use std::io::{self, Write}; | ||
|
||
use coqui_tts::Synthesizer; | ||
use rodio::buffer::SamplesBuffer; | ||
use rodio::{OutputStream, Sink}; | ||
use coqui_tts::Synthesizer; | ||
|
||
// use pyo3::{prelude::*, types::{PyDict, PyList}}; | ||
|
||
|
||
fn main() { | ||
let (_stream, stream_handle) = OutputStream::try_default().unwrap(); | ||
let sink = Sink::try_new(&stream_handle).unwrap(); | ||
|
||
println!("Starting TTS"); | ||
let mut synth = Synthesizer::new("tts_models/en/ljspeech/tacotron2-DDC"); | ||
println!("creating audio"); | ||
let audio = synth.tts("The fast brown fox jumps over the lazy dog."); | ||
let rate = synth.sample_rate(); | ||
println!("playing audio at rate {}", rate); | ||
// for i in 0..10 { | ||
// Add a dummy source of the sake of the example. | ||
sink.append(SamplesBuffer::new(1, rate as u32, audio.clone())); | ||
|
||
// The sound plays in a separate thread. This call will block the current thread until the sink | ||
// has finished playing all its queued sounds. | ||
sink.sleep_until_end(); | ||
// } | ||
|
||
// let audio = Python::aq(|py| { | ||
// // py.eval("print(\"Hello, World!\")", None, None).unwrap(); | ||
// // let tts_manage = py.import("TTS.utils.manage").unwrap(); | ||
// // let manager_class = tts_manage.getattr("ModelManager").unwrap(); | ||
// // let manager_instance = manager_class.call0(); | ||
|
||
// let locals = PyDict::new(py); | ||
// let globals = PyDict::new(py); | ||
// py.run(r#" | ||
// from TTS.utils.synthesizer import Synthesizer | ||
// from TTS.utils.manage import ModelManager | ||
// "#, Some(globals), Some(locals)).unwrap(); | ||
|
||
// py.run(r#" | ||
// # create instance of the coqui tts model manager | ||
// manager = ModelManager() | ||
// # download the model | ||
// ( | ||
// model_path, | ||
// config_path, | ||
// model_item, | ||
// ) = manager.download_model("tts_models/en/ljspeech/tacotron2-DDC") | ||
// # download the vocoder | ||
// vocoder_path, vocoder_config_path, _ = manager.download_model( | ||
// model_item["default_vocoder"] | ||
// ) | ||
// # create the coqui tts instance | ||
// coqui_tts = Synthesizer( | ||
// model_path, | ||
// config_path, | ||
// vocoder_checkpoint=vocoder_path, | ||
// vocoder_config=vocoder_config_path, | ||
// use_cuda=False | ||
// ) | ||
// "#, Some(globals), Some(locals)).unwrap(); | ||
let mut synth = Synthesizer::new("tts_models/en/ljspeech/tacotron2-DDC", true); | ||
print!("Ready\n>>"); | ||
io::stdout().flush().unwrap(); | ||
for input in io::stdin().lines() { | ||
let mut input = input.unwrap(); | ||
if input.is_empty() { | ||
continue; | ||
} | ||
// if !['.', '?', '!'].contains(&input.chars().last().unwrap()) { | ||
// input.push('.') | ||
// } | ||
let audio = synth.tts(&input); | ||
let rate = synth.sample_rate(); | ||
wav::write( | ||
wav::Header::new(wav::WAV_FORMAT_IEEE_FLOAT, 1, rate as u32, 32), | ||
&wav::BitDepth::ThirtyTwoFloat(audio.clone()), | ||
&mut fs::OpenOptions::new() | ||
.create(true) | ||
.write(true) | ||
.open("cursed_tts.wav") | ||
.unwrap(), | ||
) | ||
.unwrap(); | ||
// break; | ||
println!("playing audio at rate {}", rate); | ||
sink.append(SamplesBuffer::new(1, rate as u32, audio.clone())); | ||
sink.sleep_until_end(); | ||
print!("Ready\n>>"); | ||
io::stdout().flush().unwrap(); | ||
} | ||
|
||
// let tts = locals.get_item("coqui_tts").unwrap(); | ||
// let example_text = "Hi."; | ||
// let audio = tts.call_method1("tts", (example_text,)).unwrap().downcast::<PyList>().unwrap(); | ||
// let r_audio = audio.extract::<Vec<f32>>().unwrap(); | ||
// r_audio | ||
// }); | ||
// let audio = Python::aq(|py| { | ||
// // py.eval("print(\"Hello, World!\")", None, None).unwrap(); | ||
// // let tts_manage = py.import("TTS.utils.manage").unwrap(); | ||
// // let manager_class = tts_manage.getattr("ModelManager").unwrap(); | ||
// // let manager_instance = manager_class.call0(); | ||
|
||
// let locals = PyDict::new(py); | ||
// let globals = PyDict::new(py); | ||
// py.run(r#" | ||
// from TTS.utils.synthesizer import Synthesizer | ||
// from TTS.utils.manage import ModelManager | ||
// "#, Some(globals), Some(locals)).unwrap(); | ||
|
||
// let (_stream, stream_handle) = OutputStream::try_default().unwrap(); | ||
// let sink = Sink::try_new(&stream_handle).unwrap(); | ||
// py.run(r#" | ||
// # create instance of the coqui tts model manager | ||
// manager = ModelManager() | ||
// # download the model | ||
// ( | ||
// model_path, | ||
// config_path, | ||
// model_item, | ||
// ) = manager.download_model("tts_models/en/ljspeech/tacotron2-DDC") | ||
// # download the vocoder | ||
// vocoder_path, vocoder_config_path, _ = manager.download_model( | ||
// model_item["default_vocoder"] | ||
// ) | ||
// # create the coqui tts instance | ||
// coqui_tts = Synthesizer( | ||
// model_path, | ||
// config_path, | ||
// vocoder_checkpoint=vocoder_path, | ||
// vocoder_config=vocoder_config_path, | ||
// use_cuda=False | ||
// ) | ||
// "#, Some(globals), Some(locals)).unwrap(); | ||
|
||
// let tts = locals.get_item("coqui_tts").unwrap(); | ||
// let example_text = "Hi."; | ||
// let audio = tts.call_method1("tts", (example_text,)).unwrap().downcast::<PyList>().unwrap(); | ||
// let r_audio = audio.extract::<Vec<f32>>().unwrap(); | ||
// r_audio | ||
// }); | ||
|
||
// let (_stream, stream_handle) = OutputStream::try_default().unwrap(); | ||
// let sink = Sink::try_new(&stream_handle).unwrap(); | ||
|
||
// for i in 0..10 { | ||
// // Add a dummy source of the sake of the example. | ||
// sink.append(SamplesBuffer::new(1, 22050, audio.clone())); | ||
// for i in 0..10 { | ||
// // Add a dummy source of the sake of the example. | ||
// sink.append(SamplesBuffer::new(1, 22050, audio.clone())); | ||
|
||
// // The sound plays in a separate thread. This call will block the current thread until the sink | ||
// // has finished playing all its queued sounds. | ||
// sink.sleep_until_end(); | ||
// } | ||
// // The sound plays in a separate thread. This call will block the current thread until the sink | ||
// // has finished playing all its queued sounds. | ||
// sink.sleep_until_end(); | ||
// } | ||
} |