forked from mikesname/ehri-wp11-test
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstore.py
108 lines (91 loc) · 3.59 KB
/
store.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import json
import os
import re
import sys
from dataclasses import dataclass
from typing import Tuple, List, Optional, Dict
from urllib.parse import quote_plus
import boto3
from botocore.exceptions import ClientError
THUMB_DIR = ".thumb"
EXT_PATTERN = re.compile('.*\\.(jpe?g|tiff?|png|gif|raw)$', re.IGNORECASE)
@dataclass
class StoreSettings:
bucket: str
region: str
access_key: str
secret_key: str
@dataclass
class IIIFSettings:
server_url: str
@dataclass
class Store:
def __init__(self, settings: StoreSettings, iiif_settings: IIIFSettings):
self.settings = settings
self.iiif_settings = iiif_settings
self.client = self.aws_client("s3")
def aws_client(self, service: str):
return boto3.client(service,
region_name=self.settings.region,
aws_access_key_id=self.settings.access_key,
aws_secret_access_key=self.settings.secret_key)
def load_files(self, prefix: Optional[str] = None) -> List[Tuple[str, str, str]]:
if not prefix:
return []
r = self.client.list_objects_v2(
Bucket=self.settings.bucket,
Prefix=prefix)
file_meta = [meta for meta in r["Contents"] if not meta["Key"].endswith("/")]
items = []
for i, meta in enumerate(file_meta):
key: str = meta["Key"]
if THUMB_DIR in key:
continue
if not EXT_PATTERN.match(key):
continue
path_no_ext = os.path.splitext(key)[0]
item_id = path_no_ext[len(prefix):]
url = self.iiif_settings.server_url + quote_plus(key) + "/full/max/0/default.jpg"
thumb_url = self.iiif_settings.server_url + quote_plus(key) + "/full/!75,100/0/default.jpg"
items.append((item_id, url, thumb_url))
return items
def get_meta(self, origin: str, name: str = "<unnamed>") -> Optional[Dict]:
"""Fetch the micro-archive manifest from existing storage"""
import io
buf = io.BytesIO()
origin_no_slash = origin[1:] if origin.startswith('/') else origin
try:
self.client.download_fileobj(
Bucket=self.settings.bucket,
Key=os.path.join(origin_no_slash, f".meta.json"),
Fileobj=buf
)
return json.loads(buf.getvalue().decode('utf-8'))
except ClientError:
print(f"Unable to find existing metadata for name {name} at origin {origin}", file=sys.stderr)
return None
def upload(self, name: str, origin: str, index: str, xml: str, iiif: str, meta: Dict):
"""Upload website data to storage"""
bucket = self.settings.bucket
origin_no_slash = origin[1:] if origin.startswith('/') else origin
files = [
("index.html", "text/html", index),
(f"{name}.xml", "text/xml", xml),
(f"{name}.json", "application/json", iiif),
]
# Upload a manifest privately
self.client.put_object(
Bucket=bucket,
Key=os.path.join(origin_no_slash, f".meta.json"),
ContentType="application/json",
Body=json.dumps(meta, indent=2, default=str).encode('utf-8')
)
# Upload the rest with Public ACL
for filename, content_type, data in files:
self.client.put_object(
ACL='public-read',
Bucket=bucket,
Key=os.path.join(origin_no_slash, filename),
ContentType=content_type,
Body=data.encode('utf-8')
)