This repository has been archived by the owner on Oct 29, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathyoutube_channel.py
149 lines (114 loc) · 6.84 KB
/
youtube_channel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from requests import session
from youtube_util import getinitialdata, fullyexpand, getapikey, getlver
from time import sleep
mysession = session()
#extract latest version automatically
homepage = mysession.get("https://www.youtube.com/").text
API_KEY = getapikey(homepage)
params = (
('key', API_KEY),
)
API_VERSION = getlver(getinitialdata(homepage))
continuationheaders = {"x-youtube-client-name": "1", "x-youtube-client-version": API_VERSION, "Accept-Language": "en-US"}
del homepage
def process_channel(channelid: str):
playlists = set()
shelfres = set()
channellist = set()
# PLAYLISTS
data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EglwbGF5bGlzdHM%3D"}
while True:
initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
if initdata.status_code == 200:
initdata = initdata.json()
break
else:
print("Non-200 API status code, waiting 30 seconds before retrying...")
sleep(30)
CHANNELS_ID = 0
PLAYLISTS_ID = 0
current = 0
for tab in initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]:
if "tabRenderer" in tab.keys():
if tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "playlists":
PLAYLISTS_ID = current
elif tab["tabRenderer"]["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"].rsplit("/", 1)[-1] == "channels":
CHANNELS_ID = current
current += 1
del current
shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
shelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
playlistsint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
for item in shelfres:
while True:
shelfintp = mysession.get("https://www.youtube.com/"+str(item))
if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
<p>To continue with your YouTube experience, please fill out the form below.</p>""" in shelfintp.text and shelfintp.status_code == 200:
break
else:
print("Non-200 status code, waiting 30 seconds before retrying...")
sleep(30)
shelfiteminitdata = getinitialdata(shelfintp.text)
playlistsint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][PLAYLISTS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
for playlist in playlistsint:
playlists.add(playlist["gridPlaylistRenderer"]["playlistId"])
if "shortBylineText" in playlist["gridPlaylistRenderer"].keys():
channellist.add(playlist["gridPlaylistRenderer"]["shortBylineText"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"])
# CHANNELS
cshelfres = set()
# PLAYLISTS
data = {"context":{"client":{"hl":"en","gl":"US","clientName":"WEB","clientVersion":API_VERSION}},"browseId":channelid,"params":"EghjaGFubmVscw%3D%3D"}
while True:
initdata = mysession.post("https://www.youtube.com/youtubei/v1/browse", params=params, json=data)
if initdata.status_code == 200:
initdata = initdata.json()
break
else:
print("Non-200 API status code, waiting 30 seconds before retrying...")
sleep(30)
shelflist = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"]
for item in shelflist:
itemint = item["itemSectionRenderer"]["contents"][0]
if "shelfRenderer" in itemint.keys():
cshelfres.add(itemint["shelfRenderer"]["title"]["runs"][0]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"])
elif "gridRenderer" in itemint.keys():
chanlistint = fullyexpand(itemint["gridRenderer"], mysession, continuationheaders)["items"]
for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])
for item in cshelfres:
while True:
shelfintc = mysession.get("https://www.youtube.com/"+str(item))
if not """</div><div id="content" class=" content-alignment" role="main"><p class='largeText'>Sorry for the interruption. We have been receiving a large volume of requests from your network.</p>
<p>To continue with your YouTube experience, please fill out the form below.</p>""" in shelfintc.text and shelfintc.status_code == 200:
break
else:
print("Non-200 status code, waiting 30 seconds before retrying...")
sleep(30)
shelfiteminitdata = getinitialdata(shelfintc.text)
chanlistint = fullyexpand(shelfiteminitdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"][CHANNELS_ID]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"], mysession, continuationheaders)["items"]
for channel in chanlistint:
channellist.add(channel["gridChannelRenderer"]["channelId"])
return {"playlists": playlists, "channels": channellist}
if __name__ == "__main__":
from sys import argv
chanl = argv
chanl.pop(0)
for channel in chanl:
print(process_channel(channel))
# SAMPLES:
# UCqj7Cz7revf5maW9g5pgNcg lots of playlists
# UCRwczJ_nk1t9IGHyHfHbXRQ Nathaniel Bandy - created playlists only, featured channels only
# UCo8bcnLyZH8tBIH9V1mLgqQ the odd 1 is out - shelf, way too many subscriptions
# UCfXIV2vThxEF8Hq2OE17AeQ no playlists or channels featured
# UCJqV2-l0jqAa7uYN8IGJW7w TONS OF SUBSCRIPTIONS, no featured channels
# UC_1nZUpPS6jFv5Pn3f85CaA TONS OF SUBSCRIPTIONS, some featured channels
# UCJOh5FKisc0hUlEeWFBlD-w no subscriptions, plenty of featured channels
# UC7fjJERoGTs_eOKk-nn7RMw fair number of featured channels