-
Notifications
You must be signed in to change notification settings - Fork 3
/
ninova_downloader.py
172 lines (120 loc) · 4.01 KB
/
ninova_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import requests
import bs4
import getpass
import os
def login(s):
'''Get username'''
username = input("username: ")
'''Get password with getpass module, cuz muh privacy'''
password = getpass.getpass("pass: ")
'''Try to get main page of ninova'''
r = s.get('http://ninova.itu.edu.tr/kampus')
'''Parse the returned page with bs4'''
forms = bs4.BeautifulSoup(r.text, 'html.parser').findAll('input')
'''Fill POST data'''
data = {}
for form in forms:
if 'value' in form.attrs:
data[form['name']] = form['value']
else:
data[form['name']] = ""
data['__EVENTTARGET'] = ''
data['__EVENTARGUMENT'] = ''
data['ctl00$ContentPlaceHolder1$tbUserName'] = username,
data['ctl00$ContentPlaceHolder1$tbPassword'] = password,
'''Login and return'''
return s.post(r.url, data=data)
def getPage(session, url):
'''GET the url'''
kampusPage = session.get(url)
print(kampusPage.url)
'''Return parsed data'''
return bs4.BeautifulSoup(kampusPage.text, 'html.parser')
def getLinks(soup, filterString):
'''Fill the list with relevant links'''
tags = []
for line in soup.find_all('a'):
'''Only links with filterString in them'''
if filterString in str(line):
tags.append(line)
'''Return the list of tags'''
return tags
def saveFile(r, name):
'''Save the content of response to file "name"'''
f = open(name, 'wb')
f.write(r.content)
f.close()
def mkdir(classTag):
'''Get cwd'''
root = os.getcwd()
name = classTag.findPrevious('span').text
'''Try creating a new folder'''
try:
os.mkdir(name)
except FileExistsError:
'''If folder exists, create a new one'''
print('Folder already exists "'+name+'"')
name = name+' (dup)'
os.mkdir(name)
os.chdir(name)
'''Create the necessary subfolders'''
os.mkdir('dersDosyalari')
os.mkdir('sinifDosyalari')
'''Go back'''
os.chdir(root)
return name
def capturePage(session, resourceTagList):
'''Iterate through tags'''
for tag in resourceTagList:
'''Check for the icon, if it is a folder, create the subfolder,
and enter, then call capturePage for the subfolder page'''
if tag.findPrevious('img')['src'] == '/images/ds/folder.png':
'''Get root directory'''
root = os.getcwd()
os.mkdir(tag.text)
os.chdir(tag.text)
soup = getPage(session, url+tag['href'])
links = getLinks(soup, 'Dosyalari?g')
capturePage(session, links)
'''Go back when done'''
os.chdir(root)
elif tag.findPrevious('img')['src'] == '/images/ds/link.png':
'''If the icon is a link, dont touch it'''
continue
else:
'''Download the rest'''
r = session.get(url+tag['href'])
f = open(tag.text, 'wb')
f.write(r.content)
f.close()
def captureClass(session, classTag):
'''Get root directory'''
root = os.getcwd()
'''Create class folder'''
name = mkdir(link)
os.chdir(name)
'''GET and capture lecture files'''
pageSoup = getPage(s, url+link['href']+'/DersDosyalari')
links = getLinks(pageSoup, 'DersDosyalari?')
os.chdir('dersDosyalari')
capturePage(session, links)
os.chdir('..')
'''GET and capture class files'''
pageSoup = getPage(s, url+link['href']+'/SinifDosyalari')
links = getLinks(pageSoup, 'SinifDosyalari?')
os.chdir('sinifDosyalari')
capturePage(session, links)
'''Go back to root when done'''
os.chdir(root)
'''Base URL'''
url = 'http://ninova.itu.edu.tr'
'''Create a session for cookie management'''
s = requests.Session()
'''Login to ITU account'''
login(s)
'''Get the main page and class links from ninova'''
kampusSoup = getPage(s, url+'/Kampus1')
classLinks = getLinks(kampusSoup, 'ErisimAgaci')
'''Capture parsed classes'''
for link in classLinks:
captureClass(s, link)