-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path05_articles.R
67 lines (57 loc) · 1.8 KB
/
05_articles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
library(tidyverse)
library(feather)
library(tidytext)
articles <- read_feather("data/articles.feather")
user_groups <- read_feather("data/user_groups.feather")
comments <- read_feather("data/comments.feather")
user_word_groups <- read_feather("data/user_word_groups.feather")
# Top words in title by topic
title_words <- comments %>%
inner_join(user_groups, by = "by") %>%
select(-c(id, gamma)) %>%
rename(id = parent) %>%
inner_join(articles, by = "id") %>%
select(topic, title) %>%
unnest_tokens(word, title) %>%
anti_join(stop_words)
title_words %>%
group_by(topic, word) %>%
count() %>%
ungroup() %>%
group_by(topic) %>%
arrange(desc(n)) %>%
slice(1:25) %>%
ungroup() %>%
mutate(word = reorder_within(word, n, topic)) %>%
group_by(topic, word) %>%
arrange(desc(n)) %>%
ungroup() %>%
ggplot(aes(word, n, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
coord_flip() +
scale_x_reordered() +
labs(title = "Top 25 Title Keywords in Each Group",
x = NULL, y = expression(gamma)) +
facet_wrap(~ topic, ncol = 4, scales = "free")
ggsave("plot/articles_top_title_keywords_by_group.png", width = 12, height = 8)
# Top words in comments by topic
user_word_groups %>%
group_by(topic, word) %>%
count() %>%
ungroup() %>%
group_by(topic) %>%
arrange(desc(n)) %>%
slice(1:25) %>%
ungroup() %>%
mutate(word = reorder_within(word, n, topic)) %>%
group_by(topic, word) %>%
arrange(desc(n)) %>%
ungroup() %>%
ggplot(aes(word, n, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
coord_flip() +
scale_x_reordered() +
labs(title = "Top 25 Comment Keywords in Each Group",
x = NULL, y = expression(gamma)) +
facet_wrap(~ topic, ncol = 4, scales = "free")
ggsave("plot/articles_top_comment_keywords_by_group.png", width = 12, height = 8)