From 5413cc49e1ff75425d3c0c2db6ab6ac661c6256f Mon Sep 17 00:00:00 2001 From: Barbara Vreede Date: Thu, 22 Aug 2024 11:46:30 +0200 Subject: [PATCH] allow for source = NULL in a talkr init (#100) * allow for source = NULL in a talkr init * check that existing columns arent overwritten when source = null --- R/init.R | 9 ++++++++- man/init.Rd | 4 +++- tests/testthat/test-init.R | 27 +++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/R/init.R b/R/init.R index 17463d9..7d317d2 100644 --- a/R/init.R +++ b/R/init.R @@ -6,7 +6,9 @@ #' Initializing a talkr dataset is the first step in the talkr workflow. #' #' @param data A dataframe object -#' @param source The column name identifying the conversation source (e.g. a filename; is used as unique conversation ID) +#' @param source The column name identifying the conversation source +#' (e.g. a filename; is used as unique conversation ID). If there are no different +#' sources in the data, set this parameter to `NULL`. #' @param begin The column name with the begin time of the utterance (in milliseconds) #' @param end The column name with the end time of the utterance (in milliseconds) #' @param participant The column name with the participant who produced the utterance @@ -43,6 +45,11 @@ init <- function(data, data$end <- as.numeric(data$end) } + # ensure a `source` column exists; if it does not exist, create one + if(!"source" %in% names(data)){ + data$source <- "talkr" + } + # generate UIDs if("uid" %in% names(data)){ warning("Column 'uid' already exists in the dataset. This column will be renamed to `original_uid`.") diff --git a/man/init.Rd b/man/init.Rd index f6e64d0..65bc034 100644 --- a/man/init.Rd +++ b/man/init.Rd @@ -17,7 +17,9 @@ init( \arguments{ \item{data}{A dataframe object} -\item{source}{The column name identifying the conversation source (e.g. a filename; is used as unique conversation ID)} +\item{source}{The column name identifying the conversation source +(e.g. a filename; is used as unique conversation ID). If there are no different +sources in the data, set this parameter to `NULL`.} \item{begin}{The column name with the begin time of the utterance (in milliseconds)} diff --git a/tests/testthat/test-init.R b/tests/testthat/test-init.R index aea9e07..6edbe86 100644 --- a/tests/testthat/test-init.R +++ b/tests/testthat/test-init.R @@ -113,3 +113,30 @@ test_that("Warning is generated with existing UID column", { expect_true("original_uid" %in% names(talkr_dataset)) }) + +test_that("init works with source = NULL", { + expect_no_error(talkr_dataset <- init(dummy_data, + source = NULL, + begin = "col1", + end = "col2", + participant = "x", + utterance = "y")) + expect_false("source" %in% names(dummy_data)) + expect_true("source" %in% names(talkr_dataset)) + expected_UIDs <- c("talkr-0001-1", + "talkr-0002-2", + "talkr-0003-3", + "talkr-0004-4", + "talkr-0005-5") + expect_equal(talkr_dataset$uid, expected_UIDs) +}) + +test_that("init does not overwrite existing columns when source = NULL", { + data <- data.frame(begin = 1:4, + end = 5:8, + participant = "Person1", + utterance = "HelloWorld", + source = "A.txt") + talkr_dataset <- init(data, source = NULL) + expect_equal(talkr_dataset$source, data$source) +})