Skip to content

Commit

Permalink
update main
Browse files Browse the repository at this point in the history
  • Loading branch information
mshin77 committed Jan 16, 2024
1 parent 38f2b84 commit c17b7d3
Show file tree
Hide file tree
Showing 10 changed files with 21 additions and 29 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
dplyr,
DT,
ggplot2,
ggraph,
magrittr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import(stm)
import(tidyr)
import(tidytext)
import(widyr)
importFrom(DT,datatable)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
Expand Down
28 changes: 12 additions & 16 deletions R/text_mining_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ preprocess_texts <-
split_tags = TRUE,
include_docvars = TRUE,
padding = FALSE,
verbose = TRUE)
verbose = FALSE)

# Convert the features of tokens to lowercase.
toks_lower <- quanteda::tokens_tolower(toks_clean,
Expand All @@ -58,13 +58,13 @@ preprocess_texts <-
quanteda::tokens_remove(quanteda::stopwords("en"),
valuetype = "glob",
window = 0,
verbose = TRUE,
verbose = FALSE,
padding = TRUE)

# Specify the minimum length in characters for tokens (at least 2).
toks_lower_no_stop_adj <- toks_lower_no_stop %>%
quanteda::tokens_select(min_nchar=2L,
verbose = TRUE)
verbose = FALSE)

return(toks_lower_no_stop_adj)
}
Expand All @@ -88,8 +88,7 @@ preprocess_texts <-
#' @examples
#' if(requireNamespace("quanteda")){
#' dfm <- SpecialEduTech %>%
#' preprocess_texts(text_field = "abstract",
#' verbose = FALSE) %>%
#' preprocess_texts(text_field = "abstract") %>%
#' quanteda::dfm()
#' dfm %>% plot_word_frequency(n = 20)
#' }
Expand Down Expand Up @@ -129,8 +128,7 @@ plot_word_frequency <-
#' @examples
#' if(requireNamespace("quanteda")){
#' dfm <- SpecialEduTech %>%
#' preprocess_texts(text_field = "abstract",
#' verbose = FALSE) %>%
#' preprocess_texts(text_field = "abstract") %>%
#' quanteda::dfm()
#' dfm %>% extract_frequent_word()
#' }
Expand Down Expand Up @@ -169,8 +167,7 @@ extract_frequent_word <-
#' @examples
#' if(requireNamespace("quanteda", "tidytext")){
#' dfm <- SpecialEduTech %>%
#' preprocess_texts(text_field = "abstract",
#' verbose = FALSE) %>%
#' preprocess_texts(text_field = "abstract") %>%
#' quanteda::dfm()
#' data <- tidytext::tidy(stm_15, document_names = rownames(dfm), log = FALSE)
#' data %>% plot_topic_term(top_n = 10)
Expand Down Expand Up @@ -251,8 +248,7 @@ plot_topic_term <-
#' @examples
#' if(requireNamespace("quanteda", "tidytext")){
#' dfm <- SpecialEduTech %>%
#' preprocess_texts(text_field = "abstract",
#' verbose = FALSE) %>%
#' preprocess_texts(text_field = "abstract") %>%
#' quanteda::dfm()
#' data <- tidytext::tidy(stm_15, document_names = rownames(dfm), log = FALSE)
#' data %>% examine_top_terms(top_n = 5)
Expand Down Expand Up @@ -295,8 +291,7 @@ examine_top_terms <-
#' @examples
#' if(requireNamespace("quanteda", "tidytext")){
#' dfm <- SpecialEduTech %>%
#' preprocess_texts(text_field = "abstract",
#' verbose = FALSE) %>%
#' preprocess_texts(text_field = "abstract") %>%
#' quanteda::dfm()
#' data <- tidytext::tidy(stm_15, matrix = "gamma", document_names = rownames(dfm), log = FALSE)
#' data %>% topic_probability_plot(top_n = 15)
Expand Down Expand Up @@ -363,8 +358,7 @@ topic_probability_plot <-
#' @examples
#' if(requireNamespace("quanteda", "tidytext")){
#' dfm <- SpecialEduTech %>%
#' preprocess_texts(text_field = "abstract",
#' verbose = FALSE) %>%
#' preprocess_texts(text_field = "abstract") %>%
#' quanteda::dfm()
#' data <- tidytext::tidy(stm_15, matrix = "gamma", document_names = rownames(dfm), log = FALSE)
#' data %>% topic_probability_table(top_n = 15)
Expand All @@ -374,6 +368,7 @@ topic_probability_plot <-
#' @import ggplot2
#' @importFrom magrittr %>%
#' @importFrom stats reorder
#' @importFrom DT datatable
#'
topic_probability_table <-
function(data, top_n, ...) {
Expand All @@ -396,7 +391,8 @@ topic_probability_table <-
levels = levelt)
topic_by_prevalence_table_output <- topic_by_prevalence_table %>%
select(topic, gamma) %>%
mutate_if(is.numeric, ~ round(., 3))
mutate_if(is.numeric, ~ round(., 3)) %>%
datatable(rownames = FALSE)

return(topic_by_prevalence_table_output)
}
2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: ~
url: https://mshin77.github.io/TextAnalysisR/
template:
bootstrap: 5

3 changes: 1 addition & 2 deletions man/examine_top_terms.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/extract_frequent_word.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/plot_topic_term.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/plot_word_frequency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/topic_probability_plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions man/topic_probability_table.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c17b7d3

Please sign in to comment.