update

mshin77 · Mar 21, 2024 · 3f283a6 · 3f283a6
1 parent a13deb6
commit 3f283a6
Show file tree

Hide file tree

Showing 14 changed files with 492 additions and 96 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -29,7 +29,7 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -47,3 +47,4 @@ jobs:
       - uses: r-lib/actions/check-r-package@v2
         with:
           upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -22,7 +22,7 @@ jobs:
     permissions:
       contents: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -41,7 +41,7 @@ jobs:
 
       - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@v4.4.1
+        uses: JamesIves/github-pages-deploy-action@v4.5.0
         with:
           clean: false
           branch: gh-pages

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -15,7 +15,7 @@ jobs:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
@@ -39,12 +39,12 @@ jobs:
         if: always()
         run: |
           ## --------------------------------------------------------------------
-          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
         shell: bash
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: coverage-test-failures
           path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,15 +10,14 @@ Language: en-US
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Imports: 
     dplyr,
     DT,
     ggplot2,
     ggraph,
     magrittr,
     numform,
-    purrr,
     quanteda,
     quanteda.textstats,
     plotly,

diff --git a/NAMESPACE b/NAMESPACE
@@ -31,6 +31,5 @@ importFrom(rlang,.env)
 importFrom(rlang,enquos)
 importFrom(stats,reorder)
 importFrom(tibble,tibble)
-importFrom(tidyr,unnest)
 importFrom(tidytext,reorder_within)
 importFrom(tidytext,scale_x_reordered)
diff --git a/R/text_mining_functions.R b/R/text_mining_functions.R
@@ -114,24 +114,20 @@ plot_word_frequency <-
     }
 
 
-# Display text mining results from the structural topic model ----
-
-#' @title Plot topic per-term per-topic probabilities
+#' @title Examine highest per-term per-topic probabilities
 #'
-#' @name plot_topic_term
+#' @name examine_top_terms
 #'
 #' @description
-#' Plot per-term per-topic probabilities with highest word probabilities.
+#' Examine highest per-term per-topic probabilities.
 #'
 #' @param data A tidy data frame that includes per-term per-topic probabilities (beta).
 #' @param top_n A number of highest per-term per-topic probabilities in each document (number of top_n can be changed).
-#' @param ncol A number of columns in the facet plot.
-#' @param topic_names (Labeled) topic names
 #' @param ... Further arguments passed to \code{dplyr::group_by}.
 #'
 #' @export
-#' @return A ggplot object output from \code{stm::stm}, \code{tidytext::tidy}, and \code{ggplot2::ggplot}.
-#' The result is a ggplot object representing the topic-term plot.
+#' @return A tibble (data frame) object with a list of word probabilities from \code{tidytext::tidy}.
+#' The result is a data frame containing word probabilities for each topic.
 #'
 #' @examples
 #' suppressWarnings({
@@ -140,81 +136,45 @@ plot_word_frequency <-
 #'        preprocess_texts(text_field = "abstract") %>%
 #'        quanteda::dfm()
 #' data <- tidytext::tidy(stm_15, document_names = rownames(dfm), log = FALSE)
-#' data %>% plot_topic_term(top_n = 2, ncol = 3)
+#' data %>% examine_top_terms(top_n = 5) %>%
+#' dplyr::mutate_if(is.numeric, ~ round(., 3)) %>%
+#' DT::datatable(rownames = FALSE)
 #' }
 #' })
 #'
 #' @import dplyr
-#' @import ggplot2
 #' @importFrom magrittr %>%
 #' @importFrom rlang := enquos
-#' @importFrom tidytext scale_x_reordered reorder_within
 #'
-plot_topic_term <-
-    function(data, top_n, ncol = 3, topic_names = NULL, ...) {
-
-        topic_term_plot <- data %>%
-            group_by(topic, ...) %>%
-            top_n(top_n, beta) %>%
-            ungroup() %>%
-            mutate(
-                ord = factor(topic, levels = c(min(topic): max(topic))),
-                tt = as.numeric(topic),
-                topic = paste("Topic", topic),
-                term = reorder_within(term, beta, topic)) %>%
-            arrange(ord)
-        levelt = paste("Topic", topic_term_plot$ord) %>% unique()
-        topic_term_plot$topic = factor(topic_term_plot$topic,
-                                       levels = levelt)
-        if(!is.null(topic_names)){
-            topic_term_plot$topic = topic_names[topic_term_plot$tt]
-            topic_term_plot <- topic_term_plot %>%
-                mutate(topic = as.character(topic)) %>%
-                mutate(topic = ifelse(!is.na(topic), topic, paste('Topic',tt)))
-            topic_term_plot$topic =
-                factor(topic_term_plot$topic, levels = topic_term_plot$topic %>% unique())
-        }
-        topic_term_plot$tt = NULL
-        topic_term_plot <- topic_term_plot %>%
-            ggplot(aes(term, beta, fill = topic)) +
-            geom_col(show.legend = FALSE, alpha = 0.8) +
-            facet_wrap(~ topic, scales = "free", ncol = ncol) +
-            scale_x_reordered() +
-            scale_y_continuous(labels = numform::ff_num(zero = 0, digits = 3)) +
-            coord_flip() +
-            xlab("") +
-            ylab("Word probability") +
-            theme_minimal(base_size = 12) +
-            theme(
-                panel.grid.major = element_blank(),
-                panel.grid.minor = element_blank(),
-                axis.line = element_line(color = "#3B3B3B", size = 0.3),
-                axis.ticks = element_line(color = "#3B3B3B", size = 0.3),
-                strip.text.x = element_text(size = 12, color = "#3B3B3B"),
-                axis.text.x = element_text(size = 12, color = "#3B3B3B"),
-                axis.text.y = element_text(size = 12, color = "#3B3B3B"),
-                axis.title = element_text(size = 12, color = "#3B3B3B"),
-                axis.title.x = element_text(margin = margin(t = 7)),
-                axis.title.y = element_text(margin = margin(r = 7)))
-
-        return(topic_term_plot)
-    }
+examine_top_terms <-
 
+  function(data, top_n, ...) {
+    topic_term <- data %>%
+      group_by(topic, ...) %>%
+      top_n(top_n, beta) %>%
+      ungroup()
 
-#' @title Examine highest per-term per-topic probabilities
+    return(topic_term)
+  }
+
+
+# Display text mining results from the structural topic model ----
+
+#' @title Plot topic per-term per-topic probabilities
 #'
-#' @name examine_top_terms
+#' @name plot_topic_term
 #'
 #' @description
-#' Examine highest per-term per-topic probabilities.
+#' Plot per-term per-topic probabilities with highest word probabilities.
 #'
 #' @param data A tidy data frame that includes per-term per-topic probabilities (beta).
-#' @param top_n A number of highest per-term per-topic probabilities in each document (number of top_n can be changed).
+#' @param ncol A number of columns in the facet plot.
+#' @param topic_names (Labeled) topic names
 #' @param ... Further arguments passed to \code{dplyr::group_by}.
 #'
 #' @export
-#' @return A tibble (data frame) object with a list of word probabilities from \code{tidytext::tidy}.
-#' The result is a data frame containing word probabilities for each topic.
+#' @return A ggplot object output from \code{stm::stm}, \code{tidytext::tidy}, and \code{ggplot2::ggplot}.
+#' The result is a ggplot object representing the topic-term plot.
 #'
 #' @examples
 #' suppressWarnings({
@@ -223,30 +183,65 @@ plot_topic_term <-
 #'        preprocess_texts(text_field = "abstract") %>%
 #'        quanteda::dfm()
 #' data <- tidytext::tidy(stm_15, document_names = rownames(dfm), log = FALSE)
-#' data %>% examine_top_terms(top_n = 5) %>% DT::datatable(rownames = FALSE)
+#' data %>% examine_top_terms(top_n = 2) %>%
+#' plot_topic_term(ncol = 3)
 #' }
 #' })
 #'
 #' @import dplyr
+#' @import ggplot2
 #' @importFrom magrittr %>%
 #' @importFrom rlang := enquos
-#' @importFrom tidyr unnest
-#' @importFrom DT datatable
+#' @importFrom tidytext scale_x_reordered reorder_within
 #'
-examine_top_terms <-
+plot_topic_term <-
+  function(data, ncol = ncol, topic_names = NULL, ...) {
 
-  function(data, top_n, ...) {
-    top_terms <- data %>%
-      arrange(beta) %>%
-      group_by(topic, ...) %>%
-      top_n(top_n, beta) %>%
-      arrange(beta) %>%
-      select(topic, term) %>%
-      summarise(terms = list(term)) %>%
-      mutate(terms = purrr::map(terms, paste, collapse = ", ")) %>%
-      unnest(cols = c(terms))
+    topic_term <- data %>%
+      mutate(
+        ord = factor(topic, levels = c(min(topic): max(topic))),
+        tt = as.numeric(topic),
+        topic = paste("Topic", topic),
+        term = reorder_within(term, beta, topic)) %>%
+      arrange(ord)
+
+    levelt = paste("Topic", topic_term$ord) %>% unique()
+
+    topic_term$topic = factor(topic_term$topic,
+                              levels = levelt)
+    if(!is.null(topic_names)){
+      topic_term$topic = topic_names[topic_term$tt]
+      topic_term <- topic_term %>%
+        mutate(topic = as.character(topic)) %>%
+        mutate(topic = ifelse(!is.na(topic), topic, paste('Topic',tt)))
+      topic_term$topic =
+        factor(topic_term$topic, levels = topic_term$topic %>% unique())
+    }
+
+    topic_term$tt = NULL
+
+    topic_term_plot <- ggplot(topic_term, aes(term, beta, fill = topic)) +
+      geom_col(show.legend = FALSE, alpha = 0.8) +
+      facet_wrap(~ topic, scales = "free", ncol = ncol) +
+      scale_x_reordered() +
+      scale_y_continuous(labels = numform::ff_num(zero = 0, digits = 3)) +
+      coord_flip() +
+      xlab("") +
+      ylab("Word probability") +
+      theme_minimal(base_size = 12) +
+      theme(
+        panel.grid.major = element_blank(),
+        panel.grid.minor = element_blank(),
+        axis.line = element_line(color = "#3B3B3B", size = 0.3),
+        axis.ticks = element_line(color = "#3B3B3B", size = 0.3),
+        strip.text.x = element_text(size = 12, color = "#3B3B3B"),
+        axis.text.x = element_text(size = 12, color = "#3B3B3B"),
+        axis.text.y = element_text(size = 12, color = "#3B3B3B"),
+        axis.title = element_text(size = 12, color = "#3B3B3B"),
+        axis.title.x = element_text(margin = margin(t = 7)),
+        axis.title.y = element_text(margin = margin(r = 7)))
 
-    return(top_terms)
+    return(topic_term_plot)
   }
 
 

diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,14 @@
+comment: false
+
+coverage:
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
+    patch:
+      default:
+        target: auto
+        threshold: 1%
+        informational: true
diff --git a/inst/TextAnalysisR.app/source/SpecialEduTech.rda b/inst/TextAnalysisR.app/source/SpecialEduTech.rda
diff --git a/inst/TextAnalysisR.app/source/dictionary_list_1.rda b/inst/TextAnalysisR.app/source/dictionary_list_1.rda
diff --git a/inst/TextAnalysisR.app/source/dictionary_list_2.rda b/inst/TextAnalysisR.app/source/dictionary_list_2.rda
diff --git a/inst/TextAnalysisR.app/source/stopwords_list.rda b/inst/TextAnalysisR.app/source/stopwords_list.rda