-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.R
38 lines (29 loc) · 976 Bytes
/
scrape.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
library(tidyverse)
library(rvest)
course_url <- paste0(
"https://www.datacamp.com/courses/",
"working-with-hugging-face"
)
exercises <- course_url %>%
read_html() %>%
html_nodes(xpath = '//div[@class="css-1k6or5q"]//a') %>%
html_attr("href")
scrape <- function(exercise) {
sections <- exercise %>%
read_html() %>%
html_nodes(xpath = '//div[@class="listview__content"]')
if (length(sections) == 0) {
return(NULL)
}
section1 <- sections[1] %>%
as.character()
section2 <- sections[2] %>%
html_nodes(xpath = '//div[@class="exercise--instructions__content"]') %>%
as.character() %>%
paste(collapse = "")
instructions_heading <- "<strong>Instructions</strong>"
solutions_heading <- "<strong>Answer</strong>"
paste0(section1, instructions_heading, section2, solutions_heading)
}
output <- map(exercises, ~ scrape(.x))
writeLines(unlist(output), file("scrape.html"))