8
8
import click
9
9
import structlog
10
10
11
- from dsaps .models import Client , Collection
12
11
from dsaps import helpers
12
+ from dsaps .models import Client , Collection
13
13
14
14
logger = structlog .get_logger ()
15
15
16
16
17
17
def validate_path (ctx , param , value ):
18
18
"""Validates the formatting of the submitted path"""
19
- if value [- 1 ] == '/' :
19
+ if value [- 1 ] == "/" :
20
20
return value
21
21
else :
22
- raise click .BadParameter (' Include / at the end of the path.' )
22
+ raise click .BadParameter (" Include / at the end of the path." )
23
23
24
24
25
25
@click .group (chain = True )
26
- @click .option ('--url' , envvar = 'DSPACE_URL' , required = True ,)
27
- @click .option ('-e' , '--email' , envvar = 'DSPACE_EMAIL' , required = True ,
28
- help = 'The email of the user for authentication.' )
29
- @click .option ('-p' , '--password' , envvar = 'DSPACE_PASSWORD' , required = True ,
30
- hide_input = True , help = 'The password for authentication.' )
26
+ @click .option (
27
+ "--url" ,
28
+ envvar = "DSPACE_URL" ,
29
+ required = True ,
30
+ )
31
+ @click .option (
32
+ "-e" ,
33
+ "--email" ,
34
+ envvar = "DSPACE_EMAIL" ,
35
+ required = True ,
36
+ help = "The email of the user for authentication." ,
37
+ )
38
+ @click .option (
39
+ "-p" ,
40
+ "--password" ,
41
+ envvar = "DSPACE_PASSWORD" ,
42
+ required = True ,
43
+ hide_input = True ,
44
+ help = "The password for authentication." ,
45
+ )
31
46
@click .pass_context
32
47
def main (ctx , url , email , password ):
33
48
ctx .obj = {}
34
- if os .path .isdir ('logs' ) is False :
35
- os .mkdir ('logs' )
36
- dt = datetime .datetime .utcnow ().isoformat (timespec = 'seconds' )
37
- log_suffix = f'{ dt } .log'
38
- structlog .configure (processors = [
39
- structlog .stdlib .filter_by_level ,
40
- structlog .stdlib .add_log_level ,
41
- structlog .stdlib .PositionalArgumentsFormatter (),
42
- structlog .processors .TimeStamper (fmt = "iso" ),
43
- structlog .processors .JSONRenderer ()
44
- ],
45
- context_class = dict ,
46
- logger_factory = structlog .stdlib .LoggerFactory ())
47
- logging .basicConfig (format = "%(message)s" ,
48
- handlers = [logging .FileHandler (f'logs/log-{ log_suffix } ' ,
49
- 'w' )],
50
- level = logging .INFO )
51
- logger .info ('Application start' )
49
+ if os .path .isdir ("logs" ) is False :
50
+ os .mkdir ("logs" )
51
+ dt = datetime .datetime .utcnow ().isoformat (timespec = "seconds" )
52
+ log_suffix = f"{ dt } .log"
53
+ structlog .configure (
54
+ processors = [
55
+ structlog .stdlib .filter_by_level ,
56
+ structlog .stdlib .add_log_level ,
57
+ structlog .stdlib .PositionalArgumentsFormatter (),
58
+ structlog .processors .TimeStamper (fmt = "iso" ),
59
+ structlog .processors .JSONRenderer (),
60
+ ],
61
+ context_class = dict ,
62
+ logger_factory = structlog .stdlib .LoggerFactory (),
63
+ )
64
+ logging .basicConfig (
65
+ format = "%(message)s" ,
66
+ handlers = [logging .FileHandler (f"logs/log-{ log_suffix } " , "w" )],
67
+ level = logging .INFO ,
68
+ )
69
+ logger .info ("Application start" )
52
70
client = Client (url )
53
71
client .authenticate (email , password )
54
72
start_time = time .time ()
55
- ctx .obj [' client' ] = client
56
- ctx .obj [' start_time' ] = start_time
57
- ctx .obj [' log_suffix' ] = log_suffix
73
+ ctx .obj [" client" ] = client
74
+ ctx .obj [" start_time" ] = start_time
75
+ ctx .obj [" log_suffix" ] = log_suffix
58
76
59
77
60
78
@main .command ()
61
- @click .option ('-m' , '--metadata-csv' , required = True ,
62
- type = click .Path (exists = True , file_okay = True , dir_okay = False ),
63
- help = 'The path to the CSV file of metadata for the items.' )
64
- @click .option ('-f' , '--field-map' , required = True ,
65
- type = click .Path (exists = True , file_okay = True , dir_okay = False ),
66
- help = 'The path to JSON field mapping file.' )
67
- @click .option ('-d' , '--content-directory' , required = True ,
68
- type = click .Path (exists = True , dir_okay = True , file_okay = False ),
69
- help = 'The full path to the content, either a directory of files '
70
- 'or a URL for the storage location.' )
71
- @click .option ('-t' , '--file-type' ,
72
- help = 'The file type to be uploaded, if limited to one file '
73
- 'type.' , default = '*' )
74
- @click .option ('-r' , '--ingest-report' , is_flag = True ,
75
- help = 'Create ingest report for updating other systems.' )
76
- @click .option ('-c' , '--collection-handle' ,
77
- help = 'The handle of the collection to which items are being '
78
- 'added.' , default = None )
79
+ @click .option (
80
+ "-m" ,
81
+ "--metadata-csv" ,
82
+ required = True ,
83
+ type = click .Path (exists = True , file_okay = True , dir_okay = False ),
84
+ help = "The path to the CSV file of metadata for the items." ,
85
+ )
86
+ @click .option (
87
+ "-f" ,
88
+ "--field-map" ,
89
+ required = True ,
90
+ type = click .Path (exists = True , file_okay = True , dir_okay = False ),
91
+ help = "The path to JSON field mapping file." ,
92
+ )
93
+ @click .option (
94
+ "-d" ,
95
+ "--content-directory" ,
96
+ required = True ,
97
+ type = click .Path (exists = True , dir_okay = True , file_okay = False ),
98
+ help = "The full path to the content, either a directory of files "
99
+ "or a URL for the storage location." ,
100
+ )
101
+ @click .option (
102
+ "-t" ,
103
+ "--file-type" ,
104
+ help = "The file type to be uploaded, if limited to one file " "type." ,
105
+ default = "*" ,
106
+ )
107
+ @click .option (
108
+ "-r" ,
109
+ "--ingest-report" ,
110
+ is_flag = True ,
111
+ help = "Create ingest report for updating other systems." ,
112
+ )
113
+ @click .option (
114
+ "-c" ,
115
+ "--collection-handle" ,
116
+ help = "The handle of the collection to which items are being " "added." ,
117
+ default = None ,
118
+ )
79
119
@click .pass_context
80
- def additems (ctx , metadata_csv , field_map , content_directory , file_type ,
81
- ingest_report , collection_handle ):
120
+ def additems (
121
+ ctx ,
122
+ metadata_csv ,
123
+ field_map ,
124
+ content_directory ,
125
+ file_type ,
126
+ ingest_report ,
127
+ collection_handle ,
128
+ ):
82
129
"""Add items to a specified collection from a metadata CSV, a field
83
- mapping file, and a directory of files. May be run in conjunction with the
84
- newcollection CLI command."""
85
- client = ctx .obj ['client' ]
86
- start_time = ctx .obj ['start_time' ]
87
- if 'collection_uuid' not in ctx .obj and collection_handle is None :
88
- raise click .UsageError ('collection_handle option must be used or '
89
- 'additems must be run after newcollection '
90
- 'command.' )
91
- elif 'collection_uuid' in ctx .obj :
92
- collection_uuid = ctx .obj ['collection_uuid' ]
130
+ mapping file, and a directory of files. May be run in conjunction with the
131
+ newcollection CLI command."""
132
+ client = ctx .obj ["client" ]
133
+ start_time = ctx .obj ["start_time" ]
134
+ if "collection_uuid" not in ctx .obj and collection_handle is None :
135
+ raise click .UsageError (
136
+ "collection_handle option must be used or "
137
+ "additems must be run after newcollection "
138
+ "command."
139
+ )
140
+ elif "collection_uuid" in ctx .obj :
141
+ collection_uuid = ctx .obj ["collection_uuid" ]
93
142
else :
94
143
collection_uuid = client .get_uuid_from_handle (collection_handle )
95
- with open (metadata_csv , 'r' ) as csvfile , open (field_map , 'r' ) as jsonfile :
144
+ with open (metadata_csv , "r" ) as csvfile , open (field_map , "r" ) as jsonfile :
96
145
metadata = csv .DictReader (csvfile )
97
146
mapping = json .load (jsonfile )
98
147
collection = Collection .create_metadata_for_items_from_csv (metadata , mapping )
@@ -101,58 +150,78 @@ def additems(ctx, metadata_csv, field_map, content_directory, file_type,
101
150
collection .uuid = collection_uuid
102
151
items = collection .post_items (client )
103
152
if ingest_report :
104
- report_name = metadata_csv .replace (' .csv' , ' -ingest.csv' )
153
+ report_name = metadata_csv .replace (" .csv" , " -ingest.csv" )
105
154
helpers .create_ingest_report (items , report_name )
106
155
elapsed_time = datetime .timedelta (seconds = time .time () - start_time )
107
- logger .info (f' Total runtime : { elapsed_time } ' )
156
+ logger .info (f" Total runtime : { elapsed_time } " )
108
157
109
158
110
159
@main .command ()
111
- @click .option ('-c' , '--community-handle' , required = True ,
112
- help = 'The handle of the community in which to create the ,'
113
- 'collection.' )
114
- @click .option ('-n' , '--collection-name' , required = True ,
115
- help = 'The name of the collection to be created.' )
160
+ @click .option (
161
+ "-c" ,
162
+ "--community-handle" ,
163
+ required = True ,
164
+ help = "The handle of the community in which to create the ," "collection." ,
165
+ )
166
+ @click .option (
167
+ "-n" ,
168
+ "--collection-name" ,
169
+ required = True ,
170
+ help = "The name of the collection to be created." ,
171
+ )
116
172
@click .pass_context
117
173
def newcollection (ctx , community_handle , collection_name ):
118
174
"""Post a new collection to a specified community. Used in conjunction
119
- with the additems CLI command to populate the new collection with
120
- items."""
121
- client = ctx .obj ['client' ]
122
- collection_uuid = client .post_coll_to_comm (community_handle ,
123
- collection_name )
124
- ctx .obj ['collection_uuid' ] = collection_uuid
175
+ with the additems CLI command to populate the new collection with
176
+ items."""
177
+ client = ctx .obj ["client" ]
178
+ collection_uuid = client .post_coll_to_comm (community_handle , collection_name )
179
+ ctx .obj ["collection_uuid" ] = collection_uuid
125
180
126
181
127
182
@main .command ()
128
- @click .option ('-m' , '--metadata-csv' , required = True ,
129
- type = click .Path (exists = True , file_okay = True , dir_okay = False ),
130
- help = 'The path of the CSV file of metadata.' )
131
- @click .option ('-o' , '--output-directory' ,
132
- type = click .Path (exists = True , file_okay = False ),
133
- default = f'{ os .getcwd ()} /' , callback = validate_path ,
134
- help = 'The path of the output files, include / at the end of the '
135
- 'path.' )
136
- @click .option ('-d' , '--content-directory' , required = True ,
137
- help = 'The full path to the content, either a directory of files '
138
- 'or a URL for the storage location.' )
139
- @click .option ('-t' , '--file-type' ,
140
- help = 'The file type to be uploaded, if limited to one file '
141
- 'type.' , default = '*' )
183
+ @click .option (
184
+ "-m" ,
185
+ "--metadata-csv" ,
186
+ required = True ,
187
+ type = click .Path (exists = True , file_okay = True , dir_okay = False ),
188
+ help = "The path of the CSV file of metadata." ,
189
+ )
190
+ @click .option (
191
+ "-o" ,
192
+ "--output-directory" ,
193
+ type = click .Path (exists = True , file_okay = False ),
194
+ default = f"{ os .getcwd ()} /" ,
195
+ callback = validate_path ,
196
+ help = "The path of the output files, include / at the end of the " "path." ,
197
+ )
198
+ @click .option (
199
+ "-d" ,
200
+ "--content-directory" ,
201
+ required = True ,
202
+ help = "The full path to the content, either a directory of files "
203
+ "or a URL for the storage location." ,
204
+ )
205
+ @click .option (
206
+ "-t" ,
207
+ "--file-type" ,
208
+ help = "The file type to be uploaded, if limited to one file " "type." ,
209
+ default = "*" ,
210
+ )
142
211
def reconcile (metadata_csv , output_directory , content_directory , file_type ):
143
212
"""Run a reconciliation of the specified files and metadata to produce
144
- reports of files with no metadata, metadata with no files, metadata
145
- matched to files, and an updated version of the metadata CSV with only
146
- the records that have matching files."""
213
+ reports of files with no metadata, metadata with no files, metadata
214
+ matched to files, and an updated version of the metadata CSV with only
215
+ the records that have matching files."""
147
216
file_ids = helpers .create_file_list (content_directory , file_type )
148
217
metadata_ids = helpers .create_metadata_id_list (metadata_csv )
149
218
metadata_matches = helpers .match_metadata_to_files (file_ids , metadata_ids )
150
219
file_matches = helpers .match_files_to_metadata (file_ids , metadata_ids )
151
220
no_files = set (metadata_ids ) - set (metadata_matches )
152
221
no_metadata = set (file_ids ) - set (file_matches )
153
- helpers .create_csv_from_list (no_metadata , f' { output_directory } no_metadata' )
154
- helpers .create_csv_from_list (no_files , f' { output_directory } no_files' )
155
- helpers .create_csv_from_list (metadata_matches ,
156
- f' { output_directory } metadata_matches' )
157
- helpers . update_metadata_csv ( metadata_csv , output_directory ,
158
- metadata_matches )
222
+ helpers .create_csv_from_list (no_metadata , f" { output_directory } no_metadata" )
223
+ helpers .create_csv_from_list (no_files , f" { output_directory } no_files" )
224
+ helpers .create_csv_from_list (
225
+ metadata_matches , f" { output_directory } metadata_matches"
226
+ )
227
+ helpers . update_metadata_csv ( metadata_csv , output_directory , metadata_matches )
0 commit comments