-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyt_api_scraper.py
46 lines (38 loc) · 1.37 KB
/
yt_api_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import pandas as pd
from googleapiclient.discovery import build
from utils.comments import process_comments, make_csv
API_KEY = "AIzaSyDSLbJActSbVC0x97sbo_dpxfkP7dYCqB0"
youtube = build("youtube", "v3", developerKey=API_KEY)
def video_threads(videoIDs):
comments_list = []
inaccessible_vid_ids = []
for videoID in videoIDs:
try:
request = youtube.commentThreads().list(
part='id, snippet',
videoId=videoID,
maxResults=100,
order="relevance"
)
response = request.execute()
comments_list.extend(process_comments(response['items']))
except Exception as e:
# print(f"Error processing video ID {videoID}: {e}")
inaccessible_vid_ids.append(videoID)
continue
print('the following vids could not be accessed:')
print(inaccessible_vid_ids)
return comments_list
def main():
# extracting all video ids from dataset
us_url = 'https://raw.githubusercontent.com/MayurDeshmukh10/youtube_analysis/master/USvideos.csv'
us_data = pd.read_csv(us_url)
video_ids = us_data['video_id'].tolist()
print('video ids:')
print(len(video_ids))
unique_video_ids = list(set(video_ids))
print('unique video ids:')
print(len(unique_video_ids))
make_csv(video_threads(unique_video_ids))
if __name__ == "__main__":
main()