Skip to content

Commit 8f61a2a

Browse files
Move bitstream retrieval process to Item class
* Add separate class methods for getting bitstreams, ids, and metadata * Add class method to consolidate steps required for creating a DSpace item * Update test suite
1 parent cce9099 commit 8f61a2a

File tree

4 files changed

+80
-30
lines changed

4 files changed

+80
-30
lines changed

dsaps/cli.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,11 @@ def additems(
153153
"Option '--metadata-csv' must be used or " "run 'reconcile' before 'additems'"
154154
)
155155

156-
bitstream_file_paths = helpers.get_bitstreams_from_csv(metadata_csv)
157156
dspace_collection = dspace.Collection(uuid=collection_uuid)
158157

159158
with open(metadata_csv, "r") as csvfile:
160159
metadata = csv.DictReader(csvfile)
161-
dspace_collection = dspace_collection.create_metadata_for_items_from_csv(
162-
metadata, mapping
163-
)
160+
dspace_collection = dspace_collection.add_items(metadata, mapping)
164161

165162
for item in dspace_collection.items:
166163
logger.info(f"Posting item: {item}")
@@ -170,7 +167,7 @@ def additems(
170167
item.uuid = item_uuid
171168
item.handle = item_handle
172169
logger.info(f"Item posted: {item_uuid}")
173-
for file_path in bitstream_file_paths.get(item.item_identifier):
170+
for file_path in item.bitstreams:
174171
file_name = file_path.split("/")[-1]
175172
bitstream = dspace.Bitstream(name=file_name, file_path=file_path)
176173
logger.info(f"Posting bitstream: {bitstream}")

dsaps/dspace.py

+36-16
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
import operator
1+
from __future__ import annotations
22

3+
import ast
34
import attr
5+
import operator
46
import requests
5-
import smart_open
67
import structlog
78

9+
import smart_open
10+
811
from attrs import field, define
912

13+
1014
logger = structlog.get_logger()
1115
op = operator.attrgetter("name")
1216

@@ -200,19 +204,39 @@ class Item(Object):
200204
source_system_identifier = field(default=None)
201205

202206
@classmethod
203-
def metadata_from_csv_row(cls, record, mapping):
207+
def create(cls, record, mapping) -> Item:
208+
return cls(
209+
metadata=cls.get_metadata(record, mapping),
210+
bitstreams=cls.get_bitstreams(record),
211+
**cls.get_ids(record, mapping),
212+
)
213+
214+
@classmethod
215+
def get_bitstreams(cls, record) -> list:
216+
if bitstreams := record.get("bitstreams"):
217+
return ast.literal_eval(bitstreams)
218+
219+
@classmethod
220+
def get_ids(cls, record, mapping) -> dict:
221+
ids = {}
222+
if item_id_mapping := mapping.get("item_identifier"):
223+
ids["item_identifier"] = record.get(item_id_mapping["csv_field_name"])
224+
if source_system_id_mapping := mapping.get("source_system_identifier"):
225+
ids["source_system_identifier"] = record.get(
226+
source_system_id_mapping["csv_field_name"]
227+
)
228+
return ids
229+
230+
@classmethod
231+
def get_metadata(cls, record, mapping) -> list:
204232
"""Create metadata for an item based on a CSV row and a JSON mapping field map."""
205233
metadata = []
206234
for field_name, field_mapping in mapping.items():
235+
if field_name in ["item_identifier", "source_system_identifier"]:
236+
continue
207237
field_value = record[field_mapping["csv_field_name"]]
208238

209239
if field_value:
210-
if field_name == "item_identifier":
211-
item_identifier = field_value
212-
continue # file_identifier is not included in DSpace metadata
213-
if field_name == "source_system_identifier":
214-
# source_system_identifier = field
215-
continue # source_system_identifier is not included in DSpace
216240
delimiter = field_mapping["delimiter"]
217241
language = field_mapping["language"]
218242
if delimiter:
@@ -234,22 +258,18 @@ def metadata_from_csv_row(cls, record, mapping):
234258
language=language,
235259
)
236260
)
237-
return cls(
238-
metadata=metadata,
239-
item_identifier=item_identifier,
240-
# source_system_identifier=source_system_identifier,
241-
)
261+
return metadata
242262

243263

244264
@define
245265
class Collection(Object):
246266
items = field(factory=list)
247267

248268
@classmethod
249-
def create_metadata_for_items_from_csv(cls, csv_reader, field_map):
269+
def add_items(cls, csv_reader, field_map) -> Collection:
250270
"""Create metadata for the collection's items based on a CSV and a JSON mapping
251271
field map."""
252-
items = [Item.metadata_from_csv_row(row, field_map) for row in csv_reader]
272+
items = [Item.create(row, field_map) for row in csv_reader]
253273
return cls(items=items)
254274

255275

tests/conftest.py

+7
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ def source_metadata_csv():
3535
yield reader
3636

3737

38+
@pytest.fixture
39+
def source_metadata_csv_with_bitstreams():
40+
with open("tests/fixtures/updated-source_metadata.csv") as file:
41+
reader = csv.DictReader(file)
42+
yield reader
43+
44+
3845
@pytest.fixture()
3946
def dspace_client():
4047
dspace_client = dspace.DSpaceClient("mock://example.com/")

tests/test_models.py

+35-9
Original file line numberDiff line numberDiff line change
@@ -78,19 +78,45 @@ def test_build_uuid_list(dspace_client):
7878
assert "1234" in child_list
7979

8080

81-
def test_collection_create_metadata_for_items_from_csv(
82-
source_metadata_csv, source_config
83-
):
84-
collection = Collection.create_metadata_for_items_from_csv(
85-
source_metadata_csv, source_config["mapping"]
86-
)
81+
def test_collection_add_items(source_metadata_csv, source_config):
82+
collection = Collection.add_items(source_metadata_csv, source_config["mapping"])
8783
assert len(collection.items) == 5
8884

8985

90-
def test_item_metadata_from_csv_row(source_metadata_csv, source_config):
86+
def test_item_create(source_metadata_csv_with_bitstreams, source_config):
87+
record = next(source_metadata_csv_with_bitstreams)
88+
assert attr.asdict(Item.create(record, source_config["mapping"])) == {
89+
"uuid": None,
90+
"name": None,
91+
"handle": None,
92+
"link": None,
93+
"type": None,
94+
"metadata": [
95+
{"key": "dc.title", "value": "Title 1", "language": "en_US"},
96+
{"key": "dc.contributor.author", "value": "May Smith", "language": None},
97+
],
98+
"bitstreams": ["s3://mocked-bucket/one-to-one/aaaa_001_01.pdf"],
99+
"item_identifier": "001",
100+
"source_system_identifier": None,
101+
}
102+
103+
104+
def test_item_get_ids(source_metadata_csv, source_config):
105+
record = next(source_metadata_csv)
106+
assert Item.get_ids(record, source_config["mapping"]) == {"item_identifier": "001"}
107+
108+
109+
def test_item_get_bitstreams(source_metadata_csv_with_bitstreams, source_config):
110+
record = next(source_metadata_csv_with_bitstreams)
111+
assert Item.get_bitstreams(record) == [
112+
"s3://mocked-bucket/one-to-one/aaaa_001_01.pdf"
113+
]
114+
115+
116+
def test_item_get_metadata(source_metadata_csv, source_config):
91117
record = next(source_metadata_csv)
92-
item = Item.metadata_from_csv_row(record, source_config["mapping"])
93-
assert attr.asdict(item)["metadata"] == [
118+
metadata = Item.get_metadata(record, source_config["mapping"])
119+
assert [attr.asdict(m) for m in metadata] == [
94120
{"key": "dc.title", "value": "Title 1", "language": "en_US"},
95121
{"key": "dc.contributor.author", "value": "May Smith", "language": None},
96122
]

0 commit comments

Comments
 (0)