Skip to content

Commit dd2d31e

Browse files
committed
Updates for DSpace@MIT uploads
* Refactor parse_value_from_text method * Refactor get_files_from_s3 method to sort bitstreams * Update timeouts for POST requests to account for slow responses * Add dspacemit.json config * Remove outdated aspace_mapping.json
1 parent 4089fd5 commit dd2d31e

File tree

4 files changed

+93
-43
lines changed

4 files changed

+93
-43
lines changed

config/aspace_mapping.json

-37
This file was deleted.

config/dspacemit.json

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
{
2+
"settings": {
3+
"bitstream_folders": [],
4+
"id_regex": "^.*$"
5+
},
6+
"mapping": {
7+
"item_identifier": {
8+
"csv_field_name": "item_identifier",
9+
"language": null,
10+
"delimiter": ""
11+
},
12+
"dc.publisher": {
13+
"csv_field_name": "dc.publisher",
14+
"language": "en_US",
15+
"delimiter": ""
16+
},
17+
"dc.identifier.mitlicense": {
18+
"csv_field_name": "dc.identifier.mitlicense",
19+
"language": "en_US",
20+
"delimiter": ""
21+
},
22+
"dc.eprint.version": {
23+
"csv_field_name": "dc.eprint.version",
24+
"language": "en_US",
25+
"delimiter": ""
26+
},
27+
"dc.type": {
28+
"csv_field_name": "dc.type",
29+
"language": "en_US",
30+
"delimiter": ""
31+
},
32+
"dc.source": {
33+
"csv_field_name": "dc.source",
34+
"language": "en_US",
35+
"delimiter": ""
36+
},
37+
"dc.contributor.author": {
38+
"csv_field_name": "dc.contributor.author",
39+
"language": "en_US",
40+
"delimiter": "|"
41+
},
42+
"dc.relation.isversionof": {
43+
"csv_field_name": "dc.relation.isversionof",
44+
"language": "",
45+
"delimiter": ""
46+
},
47+
"dc.title": {
48+
"csv_field_name": "dc.title",
49+
"language": "en_US",
50+
"delimiter": ""
51+
},
52+
"dc.relation.journal": {
53+
"csv_field_name": "dc.relation.journal",
54+
"language": "",
55+
"delimiter": ""
56+
},
57+
"dc.identifier.issn": {
58+
"csv_field_name": "dc.identifier.issn",
59+
"language": "",
60+
"delimiter": ""
61+
},
62+
"dc.date.issued": {
63+
"csv_field_name": "dc.date.issued",
64+
"language": "",
65+
"delimiter": ""
66+
},
67+
"dc.rights": {
68+
"csv_field_name": "dc.rights",
69+
"language": "en_US",
70+
"delimiter": ""
71+
},
72+
"dc.rights.uri": {
73+
"csv_field_name": "dc.rights.uri",
74+
"language": "",
75+
"delimiter": ""
76+
},
77+
"dc.description.sponsorship": {
78+
"csv_field_name": "dc.description.sponsorship",
79+
"language": "en_US",
80+
"delimiter": ""
81+
}
82+
}
83+
}

dsaps/dspace.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def authenticate(self, email, password):
2828
header = self.header
2929
data = {"email": email, "password": password}
3030
session = requests.post(
31-
f"{self.url}/login", headers=header, params=data, timeout=30
31+
f"{self.url}/login", headers=header, params=data, timeout=120
3232
).cookies["JSESSIONID"]
3333
cookies = {"JSESSIONID": session}
3434
status = requests.get(
@@ -107,7 +107,7 @@ def post_bitstream(self, item_uuid, bitstream):
107107
headers=header_upload,
108108
cookies=self.cookies,
109109
data=data,
110-
timeout=30,
110+
timeout=120,
111111
)
112112
logger.info(f"Bitstream POST status: {post_response}")
113113
response = post_response.json()
@@ -128,7 +128,7 @@ def post_collection_to_community(self, comm_handle, coll_name):
128128
headers=self.header,
129129
cookies=self.cookies,
130130
json={"name": coll_name},
131-
timeout=30,
131+
timeout=120,
132132
).json()
133133
coll_uuid = coll_uuid["uuid"]
134134
logger.info(f"Collection posted: {coll_uuid}")
@@ -143,7 +143,7 @@ def post_item_to_collection(self, collection_uuid, item):
143143
headers=self.header,
144144
cookies=self.cookies,
145145
json={"metadata": attr.asdict(item)["metadata"]},
146-
timeout=30,
146+
timeout=120,
147147
)
148148
logger.info(f"Item POST status: {post_resp}")
149149
post_response = post_resp.json()

dsaps/helpers.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ def get_files_from_s3(
6666
continue
6767
item_identifier = parse_value_from_text(file_name, id_regex)
6868
files.setdefault(item_identifier, []).append(file_path)
69+
for key, value in files.items():
70+
files[key] = sorted(
71+
value, key=lambda x: x.split(parse_value_from_text(x, id_regex))[1]
72+
)
6973
return dict(sorted(files.items()))
7074

7175

@@ -74,8 +78,8 @@ def parse_value_from_text(
7478
regex: str,
7579
):
7680
pattern = re.compile(regex)
77-
if match := pattern.search(text):
78-
return match.group(1)
81+
if matches := pattern.findall(text):
82+
return matches[0]
7983

8084

8185
def create_ingest_report(items, file_name):

0 commit comments

Comments
 (0)