File tree 4 files changed +14
-6
lines changed
4 files changed +14
-6
lines changed Original file line number Diff line number Diff line change 7
7
OAI_BASEURL = 'http://services.kb.nl/mdo/oai'
8
8
9
9
10
- class oai () :
10
+ class oai :
11
11
12
12
"""
13
13
OAI interface to the National Library of the Netherlands.
Original file line number Diff line number Diff line change 1
1
import sys
2
2
import requests
3
3
import urllib
4
- import pprint
4
+ import pprint
5
5
6
6
try :
7
7
from urllib import quote # Python 2.X
@@ -118,7 +118,7 @@ def __next__(self):
118
118
return response (record_data , self .sru )
119
119
else :
120
120
raise StopIteration
121
-
121
+
122
122
def next (self ):
123
123
return self .__next__ ()
124
124
Original file line number Diff line number Diff line change 13
13
'recordschema' : 'ddd' ,
14
14
'setname' : 'DPO' ,
15
15
'time_period' : [1781 , 1800 ],
16
- 'resolver' : '' },
16
+ 'resolver' : 'http://resolver.kb.nl/resolve?urn= ' },
17
17
'BYVANCK' : {'collection' : 'BYVANCK' ,
18
18
'description_en' : 'Medieval Illuminated Manuscripts' ,
19
19
'description_nl' : 'Middeleeuwse Verluchte Handschriften' ,
Original file line number Diff line number Diff line change 18
18
import elementtree .ElementTree as etree
19
19
except ImportError :
20
20
raise ("Failed to import ElementTree from any known place" )
21
-
21
+ import codecs
22
22
23
23
def alto_to_text (alto_data ):
24
24
''' Grab the selected text blocks and write them to disk '''
25
- alto_data = etree .fromstring (alto_data .encode ('utf-8' ))
25
+ try :
26
+ # some files have a BOM available which does not get stripped on windows
27
+ if not alto_data [0 ] == '<' :
28
+ alto_data = alto_data [3 :]
29
+ alto_data = etree .fromstring (alto_data .encode ('utf-8' ))
30
+
31
+ except etree .XMLSyntaxError as e :
32
+ print ("error! %s" % alto_str [:40 ])
33
+ return None
26
34
27
35
alto_text = u""
28
36
prev_was_hyp = False
You can’t perform that action at this time.
0 commit comments