forked from mikesname/ehri-wp11-test
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathead.py
117 lines (108 loc) · 5.37 KB
/
ead.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import re
from datetime import date
from typing import List, Optional
from xml.etree import ElementTree as ET
import langcodes
from microarchive import MicroArchive, Item
class Ead():
def __init__(self):
pass
@staticmethod
def paragraphs(text: str) -> List[str]:
"""Hack to split Markdown into multiple paragraphs"""
blanks = r'\r?\n\s*\n'
return re.split(blanks, text.strip())
def to_xml(self, data: MicroArchive, url: Optional[str] = None) -> str:
now = date.today()
root = ET.Element("ead", {
'xmlns': 'urn:isbn:1-931666-22-9',
'xmlns:xlink': 'http://www.w3.org/1999/xlink',
'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance'
})
eadheader = ET.SubElement(root, 'eadheader', {
'countryencoding': 'iso3166-1',
'dateencoding': 'iso8601',
'scriptencoding': 'iso15924', 'repositoryencoding': 'iso15511', 'relatedencoding': 'DC'
})
eadid = ET.SubElement(eadheader, 'eadid')
eadid.text = data.slug()
filedesc = ET.SubElement(eadheader, 'filedesc')
titlestmt = ET.SubElement(filedesc, 'titlestmt')
titleproper = ET.SubElement(titlestmt, 'titleproper')
titleproper.text = data.identity.title
publicationstmt = ET.SubElement(filedesc, 'publicationstmt')
if data.contact.lines():
address = ET.SubElement(publicationstmt, 'address')
for line in data.contact.lines():
addressline = ET.SubElement(address, 'addressline')
addressline.text = line.strip()
profiledesc = ET.SubElement(eadheader, 'profiledesc')
creation = ET.SubElement(profiledesc, 'creation')
creation.text = "This file was exported from the EHRI MicroArchives cataloguing demo"
date_ = ET.SubElement(creation, 'date', {'normal': now.strftime('%Y%m%d')})
date_.text = now.isoformat()
langusage = ET.SubElement(profiledesc, 'langusage')
language = ET.SubElement(langusage, 'language', {'langcode': 'eng'})
language.text = "English"
archdesc = ET.SubElement(root, 'archdesc', {'level': 'collection'})
did = ET.SubElement(archdesc, 'did')
unitid = ET.SubElement(did, 'unitid')
unitid.text = data.slug()
unittitle = ET.SubElement(did, 'unittitle')
unittitle.text = data.identity.title
if url:
materialspec = ET.SubElement(did, 'materialspec', {'label': 'Web Source'})
ET.SubElement(materialspec, 'extptr', {
'xlink:type': 'simple',
'xlink:href': url
})
if data.identity.extent:
physdesc = ET.SubElement(did, 'physdesc', {'label': 'Extent'})
extent = ET.SubElement(physdesc, 'extent')
extent.text = data.identity.extent
if data.description.lang:
langmaterial = ET.SubElement(did, 'langmaterial')
for lang in data.description.lang:
langdata = langcodes.get(lang)
language = ET.SubElement(langmaterial, 'language', {'langcode': langdata.to_alpha3()})
language.text = langdata.display_name()
if data.description.biog:
bioghist = ET.SubElement(archdesc, 'bioghist')
for ptext in self.paragraphs(data.description.biog):
bioghist_p = ET.SubElement(bioghist, 'p')
bioghist_p.text = ptext
if data.description.scope:
scopecontent = ET.SubElement(archdesc, 'scopecontent')
for ptext in self.paragraphs(data.description.scope):
scopecontent_p = ET.SubElement(scopecontent, 'p')
scopecontent_p.text = ptext
if data.control.datedesc or data.control.notes:
processinfo = ET.SubElement(archdesc, 'processinfo')
if data.control.notes:
processinfop = ET.SubElement(processinfo, 'p')
processinfop.text = data.control.notes
if data.control.datedesc:
processinfop2 = ET.SubElement(processinfo, 'p')
processinfop2.text = "Collection described on "
date_ = ET.SubElement(processinfop2, 'date', {'normal': data.control.datedesc.strftime('%Y%m%d')})
date_.text = str(data.control.datedesc)
if data.items:
dsc = ET.SubElement(archdesc, 'dsc')
for item in data.hierarchical_items():
def make_child(child: Item, parent: ET.Element, num: int):
c = ET.SubElement(parent, "c{:02d}".format(num), {'level': 'otherlevel'})
did = ET.SubElement(c, 'did')
unitid = ET.SubElement(did, 'unitid')
unitid.text = child.id
if child.identity.title:
unittitle = ET.SubElement(did, 'unittitle')
unittitle.text = child.identity.title
if child.content.scope:
scopecontent = ET.SubElement(c, "scopecontent")
scopecontent_p = ET.SubElement(scopecontent, "p")
scopecontent_p.text = child.content.scope
for cc in child.items:
make_child(cc, c, num + 1)
make_child(item, dsc, 1)
ET.indent(root, space=" ", level=0)
return ET.tostring(root, encoding="unicode")