-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_PV_CRE.py
40 lines (30 loc) · 1.13 KB
/
get_PV_CRE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Name: get_PV_CRE
# Purpose: Download EP PV and CRE in EN and RO using a list of dates
# How to use: python get_PV_CVRE.py dates_list.txt EN RO FR
# Date format in dates_list.txt, one per line: 20131011 [= October 11, 2013]
#
# Author: Filip
#
# Created: 4.11.2014
import sys
from align import align as func
def load_dates(fname):
with open(fname) as f:
content = f.readlines()
return content
def make_link_pv(string, lang):
a = "http://www.europarl.europa.eu/sides/getDoc.do?pubRef=-//EP//TEXT+PV+"
b = "+SIT+DOC+XML+V0//"
return a + string + b + lang
def make_link_cre(string, lang):
a = "http://www.europarl.europa.eu/sides/getDoc.do?pubRef=-//EP//TEXT+CRE+"
b = "+ITEMS+DOC+XML+V0//"
return a + string + b + lang
if __name__ == '__main__':
ep_dates = load_dates(sys.argv[1]) # collect dates from file
languages = sys.argv[2:] # collect language codes
for i in range(len(ep_dates)):
ep_dates[i] = ep_dates[i].strip('\n')
# Or replace make_link_cre with make_link_pv
# TODO broken
func.scraper(languages, make_link_cre, ep_dates[i], 'CRE')