Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,17 @@
^CLAUDE\.md$
^README\.md$
^\.github$
^outputs$
^scripts$
^man-md$
^ref_audio$
^ARCHITECTURE\.md$
^JIT_TRACE_OPTIMIZATION\.md$
^PseudoCode\.md$
^fyi\.md$
^output\.wav$
^test_output\.wav$
^validation_status\.csv$
^src/.*\.o$
^src/.*\.so$
^inst/audio$
18 changes: 10 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
Package: chatterbox
Title: Text-to-Speech Using Chatterbox TTS Engine
Version: 0.0.1
Version: 0.1.0
Authors@R:
person("Troy", "Hernandez", role = c("aut", "cre"),
email = "troy@example.com")
Author: Troy Hernandez [aut, cre]
Maintainer: Troy Hernandez <troy@example.com>
Description: An R port of the Chatterbox text-to-speech engine using torch.
Provides high-quality speech synthesis with voice cloning capabilities.
c(person("Troy", "Hernandez", role = c("aut", "cre"),
email = "troy@cornball.ai",
comment = c(ORCID = "0009-0005-4248-604X")),
person("Resemble AI", role = "cph"))
Description: An R port of the 'Chatterbox' text-to-speech engine
(see <https://github.com/resemble-ai/chatterbox>) using 'torch'.
Provides speech synthesis with voice cloning capabilities.
License: MIT + file LICENSE
Encoding: UTF-8
URL: https://github.com/cornball-ai/chatterbox
BugReports: https://github.com/cornball-ai/chatterbox/issues
Depends:
R (>= 4.0.0)
Imports:
Expand All @@ -23,4 +26,3 @@ Suggests:
av,
simplermarkdown
VignetteBuilder: simplermarkdown
RoxygenNote: 7.3.3
10 changes: 7 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# tinyrox says don't edit this manually, but it can't stop you!

useDynLib(chatterbox, .registration = TRUE)

export(chatterbox)
export(compute_mel_spectrogram)
export(compute_mel_spectrogram_ve)
Expand All @@ -11,10 +9,15 @@ export(create_s3gen_vocoder)
export(create_voice_embedding)
export(decode_tokens)
export(download_chatterbox_models)
export(download_chatterbox_turbo_models)
export(generate)
export(get_model_paths)
export(get_turbo_model_paths)
export(load_bpe_tokenizer)
export(load_campplus_weights)
export(load_chatterbox)
export(load_chatterbox_turbo)
export(load_gpt2_tokenizer)
export(load_hifigan_weights)
export(load_s3gen)
export(load_s3gen_weights)
Expand All @@ -27,10 +30,11 @@ export(resample_audio)
export(s3_tokenizer)
export(t3_inference)
export(t3_inference_cpp)
export(t3_inference_turbo)
export(text_to_tokens)
export(generate)
export(tts_chunked)
export(tts_to_file)
export(turbo_models_available)
export(write_audio)

S3method(print,chatterbox)
Expand Down
2 changes: 2 additions & 0 deletions R/audio_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ resample_audio <- function (samples, from_sr, to_sr)
#' @param n_mels Number of mel bins
#' @param fmin Minimum frequency
#' @param fmax Maximum frequency
#' @param norm Character. Normalization type. Default "slaney".
#' @param htk Logical. Use HTK formula. Default FALSE.
#' @return Mel filterbank matrix (n_mels x (n_fft/2 + 1))
create_mel_filterbank <- function (sr, n_fft, n_mels, fmin = 0, fmax = NULL,
norm = "slaney", htk = FALSE)
Expand Down
1 change: 1 addition & 0 deletions R/conformer.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#' to the input.
#'
#' @param d_model Model dimension
#' @param dropout_rate Numeric. Dropout rate. Default 0.1.
#' @param max_len Maximum sequence length
#' @return nn_module
espnet_rel_positional_encoding <- torch::nn_module(
Expand Down
131 changes: 131 additions & 0 deletions R/download.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@ CHATTERBOX_FILES <- c(
# Approximate total model size in MB
.model_size_mb <- 2000

CHATTERBOX_TURBO_REPO <- "ResembleAI/chatterbox-turbo"

CHATTERBOX_TURBO_FILES <- c(
"t3_turbo_v1.safetensors",
"s3gen_meanflow.safetensors",
"s3gen.safetensors",
"ve.safetensors",
"conds.pt",
"vocab.json",
"merges.txt",
"added_tokens.json",
"special_tokens_map.json",
"tokenizer_config.json"
)

# Approximate turbo model size in MB
.turbo_model_size_mb <- 3800

#' Check if Models are Downloaded
#'
#' @return TRUE if all model files exist locally
Expand Down Expand Up @@ -130,3 +148,116 @@ get_model_paths <- function ()

paths
}

#' Check if Turbo Models are Downloaded
#'
#' @return TRUE if all turbo model files exist locally
#' @export
turbo_models_available <- function ()
{
if (!requireNamespace("hfhub", quietly = TRUE)) {
return(FALSE)
}

tryCatch({
for (f in CHATTERBOX_TURBO_FILES) {
hfhub::hub_download(CHATTERBOX_TURBO_REPO, f, local_files_only = TRUE)
}
TRUE
}, error = function(e) FALSE)
}

#' Download Chatterbox Turbo Models from HuggingFace
#'
#' Download all Chatterbox Turbo model files from HuggingFace.
#' The turbo model uses a GPT-2 backbone and MeanFlow decoder for faster inference.
#'
#' @param force Re-download even if files exist
#' @return Named list of local file paths (invisibly)
#' @export
#' @examples
#' \dontrun{
#' download_chatterbox_turbo_models()
#' }
download_chatterbox_turbo_models <- function (force = FALSE)
{
if (!requireNamespace("hfhub", quietly = TRUE)) {
stop("hfhub package required. Install with: install.packages('hfhub')")
}

if (!force && turbo_models_available()) {
message("Chatterbox Turbo models are already downloaded.")
return(invisible(get_turbo_model_paths()))
}

if (isTRUE(getOption("chatterbox.consent"))) {
# Consent already given
} else if (interactive()) {
ans <- utils::askYesNo(
paste0("Download Chatterbox Turbo models (~", .turbo_model_size_mb,
" MB) from HuggingFace?"),
default = TRUE
)
if (!isTRUE(ans)) {
stop("Download cancelled.", call. = FALSE)
}
} else {
stop(
"Cannot download models in non-interactive mode without consent. ",
"Run download_chatterbox_turbo_models() interactively first, ",
"or set options(chatterbox.consent = TRUE) to allow downloads.",
call. = FALSE
)
}

message("Downloading Chatterbox Turbo models from HuggingFace (",
CHATTERBOX_TURBO_REPO, ")...")

paths <- list()
for (f in CHATTERBOX_TURBO_FILES) {
message(" ", f, "...")
tryCatch({
path <- hfhub::hub_download(CHATTERBOX_TURBO_REPO, f,
force_download = force)
name <- tools::file_path_sans_ext(basename(f))
paths[[name]] <- path
}, error = function(e) {
warning("Failed to download ", f, ": ", e$message)
})
}

if (length(paths) < length(CHATTERBOX_TURBO_FILES)) {
stop("Failed to download all turbo model files")
}

message("Turbo models downloaded successfully.")
invisible(paths)
}

#' Get Paths to Downloaded Turbo Model Files
#'
#' @return Named list of local file paths
#' @export
get_turbo_model_paths <- function ()
{
if (!requireNamespace("hfhub", quietly = TRUE)) {
stop("hfhub package required. Install with: install.packages('hfhub')")
}

paths <- list()
for (f in CHATTERBOX_TURBO_FILES) {
name <- tools::file_path_sans_ext(basename(f))
tryCatch({
paths[[name]] <- hfhub::hub_download(CHATTERBOX_TURBO_REPO, f,
local_files_only = TRUE)
}, error = function(e) {
stop(
"Turbo model file '", f, "' not found. ",
"Run download_chatterbox_turbo_models() first.",
call. = FALSE
)
})
}

paths
}
Loading