Skip to content

Commit 871aa74

Browse files
authored
Merge pull request #105 from koheiw/fix-wordvector
Fix wordvector
2 parents bf5539b + 72ca781 commit 871aa74

File tree

6 files changed

+22
-11
lines changed

6 files changed

+22
-11
lines changed

LSX.Rproj

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Version: 1.0
2+
ProjectId: 7985635d-e1dc-4855-b781-313f42a839cc
23

34
RestoreWorkspace: Default
45
SaveWorkspace: Default

R/as.textmodel.R

+7-3
Original file line numberDiff line numberDiff line change
@@ -103,11 +103,15 @@ as.textmodel_lss.textmodel_lss <- function(x, ...) {
103103
#' @export
104104
#' @method as.textmodel_lss textmodel_wordvector
105105
as.textmodel_lss.textmodel_wordvector <- function(x, ...) {
106+
if (is.null(x$values) && is.null(x$vectors))
107+
stop("x must be a valid textmodel_wordvector object")
106108
if (!requireNamespace("wordvector"))
107109
stop("wordvector package must be installed")
108-
if (is.null(x$vectors))
109-
stop("x must be a valid textmodel_wordvector object")
110-
result <- as.textmodel_lss(t(x$vectors), ...)
110+
if (!is.null(x$values)) {
111+
result <- as.textmodel_lss(t(x$values), ...)
112+
} else {
113+
result <- as.textmodel_lss(t(x$vectors), ...) # for wordvector v0.1.0
114+
}
111115
result$frequency <- x$frequency[names(result$beta)]
112116
return(result)
113117
}

tests/data/save.R

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
require(quanteda)
2+
require(wordvector)
3+
4+
toks_test <- readRDS("../data/tokens_test.RDS")
5+
feat_test <- head(char_context(toks_test, "america*", min_count = 1, p = 0.05), 100)
6+
dfmt_test <- dfm(toks_test)
7+
seed_test <- as.seedwords(data_dictionary_sentiment)
8+
9+
lss_test <- textmodel_lss(dfmt_test, seed_test, terms = feat_test, k = 300)
10+
saveRDS(lss_test, "../data/lss_test.RDS")
11+
12+
w2v_test <- word2vec(head(toks, 10), min_count = 1)
13+
saveRDS(w2v_test, "tests/data/word2vec_test.RDS")

tests/data/word2vec_test.RDS

44.8 KB
Binary file not shown.

tests/testthat/test-as.textmodel.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ test_that("as.textmodel_lss works with textmodel_wordvector", {
101101
wdv <- readRDS("../data/word2vec_test.RDS")
102102
lss <- as.textmodel_lss(wdv, seed)
103103

104-
expect_equal(lss$embedding, t(wdv$vectors))
104+
expect_equal(lss$embedding, t(wdv$values))
105105
expect_identical(lss$frequency, wdv$frequency)
106106
expect_identical(names(lss$frequency), names(wdv$frequency))
107107

tests/testthat/test-utils.R

-7
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
require(quanteda)
22
require(ggplot2)
33

4-
# toks_test <- readRDS("../data/tokens_test.RDS")
5-
# feat_test <- head(char_context(toks_test, "america*", min_count = 1, p = 0.05), 100)
6-
# dfmt_test <- dfm(toks_test)
7-
# seed_test <- as.seedwords(data_dictionary_sentiment)
8-
# lss_test <- textmodel_lss(dfmt_test, seed_test, terms = feat_test, k = 300)
9-
# saveRDS(lss_test, "../data/lss_test.RDS")
10-
114
lss_test <- readRDS("../data/lss_test.RDS")
125

136
test_that("diagnosys works", {

0 commit comments

Comments
 (0)