1
1
import csv
2
2
import datetime
3
- import glob
4
3
import json
5
4
import logging
6
5
import os
9
8
import click
10
9
import structlog
11
10
12
- from dsaps import models , workflows
11
+ from dsaps .models import Client , Collection
12
+ from dsaps import helpers
13
13
14
14
logger = structlog .get_logger ()
15
15
16
16
17
- @click .group ()
18
- @click .option ('--url' , envvar = 'DSPACE_URL' )
19
- @click .option ('-e' , '--email' , prompt = 'Enter email' ,
17
+ @click .group (chain = True )
18
+ @click .option ('--url' , envvar = 'DSPACE_URL' , required = True , )
19
+ @click .option ('-e' , '--email' , envvar = 'TEST_EMAIL' , required = True ,
20
20
help = 'The email of the user for authentication.' )
21
- @click .option ('-p' , '--password' , prompt = 'Enter password' ,
22
- envvar = 'TEST_PASS' , hide_input = True ,
23
- help = 'The password for authentication.' )
21
+ @click .option ('-p' , '--password' , envvar = 'TEST_PASS' , required = True ,
22
+ hide_input = True , help = 'The password for authentication.' )
24
23
@click .pass_context
25
24
def main (ctx , url , email , password ):
26
25
ctx .obj = {}
@@ -42,106 +41,93 @@ def main(ctx, url, email, password):
42
41
'w' )],
43
42
level = logging .INFO )
44
43
logger .info ('Application start' )
45
- client = models . Client (url )
44
+ client = Client (url )
46
45
client .authenticate (email , password )
47
46
start_time = time .time ()
48
47
ctx .obj ['client' ] = client
49
48
ctx .obj ['start_time' ] = start_time
49
+ ctx .obj ['log_suffix' ] = log_suffix
50
50
51
51
52
52
@main .command ()
53
- @click .option ('-c' , '--comm_handle' , prompt = 'Enter the community handle' ,
54
- help = 'The handle of the community in which to create the ,'
55
- 'collection.' )
56
- @click .option ('-n' , '--coll_name' , prompt = 'Enter the name of the collection' ,
57
- help = 'The name of the collection to be created.' )
58
- @click .option ('-m' , '--metadata' , prompt = 'Enter the path of the metadata file' ,
59
- help = 'The path of the JSON file of metadata.' )
60
- @click .option ('-f' , '--file_path' , prompt = 'Enter the path' ,
61
- help = 'The path of the content, a URL or local drive path.' )
62
- @click .option ('-t' , '--file_type' , prompt = 'Enter the file type' ,
63
- help = 'The file type to be uploaded.' )
64
- @click .option ('-i' , '--ingest_type' , prompt = 'Enter the type of ingest' ,
65
- help = 'The type of ingest to perform: local, remote.' ,
66
- type = click .Choice (['local' , 'remote' ]))
53
+ @click .option ('-m' , '--metadata-csv' , required = True ,
54
+ type = click .Path (exists = True ),
55
+ help = 'The full path to the CSV file of metadata for the items.' )
56
+ @click .option ('--field-map' , required = True , type = click .Path (exists = True ),
57
+ help = 'Path to JSON field mapping file' )
58
+ @click .option ('-d' , '--directory' , required = True ,
59
+ help = 'The full path to the content, either a directory of files '
60
+ 'or a URL for the storage location.' )
61
+ @click .option ('-t' , '--file-type' ,
62
+ help = 'The file type to be uploaded, if limited to one file '
63
+ 'type.' , default = '*' )
64
+ @click .option ('-r' , '--ingest-report' , is_flag = True ,
65
+ help = 'Create ingest report for updating other systems.' )
66
+ @click .option ('-c' , '--collection-handle' ,
67
+ help = 'The handle of the collection to which items are being '
68
+ 'added.' , default = None )
67
69
@click .pass_context
68
- def newcoll (ctx , comm_handle , coll_name , metadata , file_path , file_type ,
69
- ingest_type ):
70
+ def additems (ctx , metadata_csv , field_map , directory , file_type , ingest_report ,
71
+ collection_handle ):
72
+ """Adds items to a specified collection from a metadata CSV, a field
73
+ mapping file, and a directory of files. May be run in conjunction with the
74
+ newcollection CLI commands."""
70
75
client = ctx .obj ['client' ]
71
76
start_time = ctx .obj ['start_time' ]
72
- with open (metadata , encoding = 'UTF-8' ) as fp :
73
- coll_metadata = json .load (fp )
74
- coll_id = client .post_coll_to_comm (comm_handle , coll_name )
75
- file_dict = {}
76
- if ingest_type == 'local' :
77
- files = glob .glob (f'{ file_path } /**/*.{ file_type } ' , recursive = True )
78
- for file in files :
79
- file_name = os .path .splitext (os .path .basename (file ))[0 ]
80
- file_dict [file_name ] = file
81
- elif ingest_type == 'remote' :
82
- file_dict = models .build_file_dict_remote (file_path , file_type ,
83
- file_dict )
84
- items = client .post_items_to_coll (coll_id , coll_metadata , file_dict ,
85
- ingest_type )
86
- for item in items :
87
- logger .info (f'Item posted: { item } ' )
88
- models .elapsed_time (start_time , 'Total runtime:' )
77
+ if 'collection_uuid' not in ctx .obj and collection_handle is None :
78
+ raise click .UsageError ('collection_handle option must be used or '
79
+ 'additems must be run after newcollection '
80
+ 'command.' )
81
+ elif 'collection_uuid' in ctx .obj :
82
+ collection_uuid = ctx .obj ['collection_uuid' ]
83
+ else :
84
+ collection_uuid = client .get_uuid_from_handle (collection_handle )
85
+ with open (metadata_csv , 'r' ) as csvfile , open (field_map , 'r' ) as jsonfile :
86
+ metadata = csv .DictReader (csvfile )
87
+ mapping = json .load (jsonfile )
88
+ collection = Collection .from_csv (metadata , mapping )
89
+ for item in collection .items :
90
+ item .bitstreams_from_directory (directory , file_type )
91
+ collection .uuid = collection_uuid
92
+ items = collection .post_items (client )
93
+ if ingest_report :
94
+ report_name = metadata_csv .replace ('.csv' , '-ingest.csv' )
95
+ helpers .create_ingest_report (items , report_name )
96
+ elapsed_time = datetime .timedelta (seconds = time .time () - start_time )
97
+ logger .info (f'Total runtime : { elapsed_time } ' )
89
98
90
99
91
100
@main .command ()
92
- @click .option ('-m' , '--metadata_csv' , prompt = 'Enter the metadata CSV file' ,
93
- help = 'The path of the CSV file of metadata.' )
94
- @click .option ('-o' , '--output_path' , prompt = 'Enter the output path' ,
95
- default = '' , help = 'The path of the output files, include '
96
- '/ at the end of the path' )
97
- @click .option ('-f' , '--file_path' , prompt = 'Enter the path' ,
98
- help = 'The path of the content, a URL or local drive path.'
99
- 'Include / at the end of a local drive path.' )
100
- @click .option ('-t' , '--file_type' , prompt = 'Enter the file type' ,
101
- help = 'The file type to be uploaded.' )
102
- def reconcile (metadata_csv , file_path , file_type , output_path ):
103
- workflows .reconcile_files_and_metadata (metadata_csv , output_path ,
104
- file_path , file_type )
101
+ @click .option ('-c' , '--community-handle' , required = True ,
102
+ help = 'The handle of the community in which to create the ,'
103
+ 'collection.' )
104
+ @click .option ('-n' , '--collection-name' , required = True ,
105
+ help = 'The name of the collection to be created.' )
106
+ @click .pass_context
107
+ def newcollection (ctx , community_handle , collection_name ):
108
+ """Posts a new collection to a specified community. Used in conjunction
109
+ with the additems CLI command to populate the new collection with
110
+ items."""
111
+ client = ctx .obj ['client' ]
112
+ collection_uuid = client .post_coll_to_comm (community_handle ,
113
+ collection_name )
114
+ ctx .obj ['collection_uuid' ] = collection_uuid
105
115
106
116
107
- @main .command ()
108
- @click .option ('-m' , '--metadata_csv' , prompt = 'Enter the metadata CSV file' ,
109
- help = 'The path of the CSV file of metadata.' )
110
- def metadatajson (metadata_csv ):
111
- with open (metadata_csv ) as csvfile :
112
- reader = csv .DictReader (csvfile )
113
- metadata_group = []
114
- mapping_dict = {'fileIdentifier' : ['file_identifier' ],
115
- 'dc.contributor.author' : ['author name - direct' ],
116
- 'dc.contributor.advisor' : ['supervisor(s)' ],
117
- 'dc.date.issued' : ['pub date' ],
118
- 'dc.description.abstract' : ['Abstract' , 'en_US' ],
119
- 'dc.title' : ['Title' , 'en_US' ],
120
- 'dc.relation.ispartofseries' : ['file_identifier' ]}
121
- for row in reader :
122
- metadata_rec = []
123
- metadata_rec = models .create_metadata_rec (mapping_dict , row ,
124
- metadata_rec )
125
- metadata_rec .append ({'key' : 'dc.format.mimetype' , 'language' :
126
- 'en_US' , 'value' : 'application/pdf' })
127
- metadata_rec .append ({'key' : 'dc.language.iso' , 'language' :
128
- 'en_US' , 'value' : 'en_US' })
129
- metadata_rec .append ({'key' : 'dc.publisher' , 'language' : 'en_US' ,
130
- 'value' : 'Massachusetts Institute of '
131
- 'Technology. Laboratory for Computer'
132
- 'Science' })
133
- metadata_rec .append ({'key' : 'dc.rights' , 'language' : 'en_US' ,
134
- 'value' : 'Educational use permitted' })
135
- metadata_rec .append ({'key' : 'dc.rights.uri' , 'language' : 'en_US' ,
136
- 'value' : 'http://rightsstatements.org/vocab/'
137
- 'InC-EDU/1.0/' })
138
- metadata_rec .append ({'key' : 'dc.type' , 'language' : 'en_US' ,
139
- 'value' : 'Technical Report' })
140
- item = {'metadata' : metadata_rec }
141
- metadata_group .append (item )
142
- file_name = os .path .splitext (os .path .basename (metadata_csv ))[0 ]
143
- with open (f'{ file_name } .json' , 'w' ) as f :
144
- json .dump (metadata_group , f )
117
+ # @main.command()
118
+ # @click.option('-m', '--metadata_csv', prompt='Enter the metadata CSV file',
119
+ # help='The path of the CSV file of metadata.')
120
+ # @click.option('-o', '--output_path', prompt='Enter the output path',
121
+ # default='', help='The path of the output files, include '
122
+ # '/ at the end of the path')
123
+ # @click.option('-f', '--file_path', prompt='Enter the path',
124
+ # help='The path of the content, a URL or local drive path.'
125
+ # 'Include / at the end of a local drive path.')
126
+ # @click.option('-t', '--file_type', prompt='Enter the file type',
127
+ # help='The file type to be uploaded.')
128
+ # def reconcile(metadata_csv, file_path, file_type, output_path):
129
+ # workflows.reconcile_files_and_metadata(metadata_csv, output_path,
130
+ # file_path, file_type)
145
131
146
132
147
133
if __name__ == '__main__' :
0 commit comments