-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreddit-extract.py
34 lines (30 loc) · 1.1 KB
/
reddit-extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import praw
import csv
import pandas as pd
reddit = praw.Reddit("reddit-extract", user_agent="reddit-extract")
urls = "url-list.csv"
with open(urls, "r") as csvfile:
datareader = list(csv.reader(csvfile, delimiter=","))
datareader = datareader[0]
# this is needed because the list returned is a *nested* list, viz, one that is [[ <blah, blah, blah> ]], and just trying to access it directly means you get the *one* element of the original list. Using [0] effectively strips out the extra brackets.
number = 1
for row in datareader:
print(number)
submission = reddit.submission(url=row)
submission.comments.replace_more(limit=None)
comments = submission.comments.list()
df_rows = [
[comment.author, comment.id, comment.score, comment.body]
for comment in comments
]
df = pd.DataFrame(
df_rows,
columns=[
"Author",
"Comment ID",
"Score",
"Body",
],
)
df.to_csv(f"file{number}.csv")
number = number + 1