Skip to content

Commit f42b344

Browse files
committed
tested extractor on single file and is working
1 parent 38ed787 commit f42b344

File tree

2 files changed

+24
-9
lines changed

2 files changed

+24
-9
lines changed

pyclowder/connectors.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class Connector(object):
6363
"""
6464

6565
def __init__(self, extractor_name, extractor_info, check_message=None, process_message=None, ssl_verify=True,
66-
mounted_paths=None, clowder_url=None, max_retry=10, extractor_key=None, clowder_email=None):
66+
mounted_paths=None, minio_mounted_path=None, clowder_url=None, max_retry=10, extractor_key=None, clowder_email=None):
6767
self.extractor_name = extractor_name
6868
self.extractor_info = extractor_info
6969
self.check_message = check_message
@@ -73,6 +73,10 @@ def __init__(self, extractor_name, extractor_info, check_message=None, process_m
7373
self.mounted_paths = {}
7474
else:
7575
self.mounted_paths = mounted_paths
76+
if minio_mounted_path is None:
77+
self.minio_mounted_path = ''
78+
else:
79+
self.minio_mounted_path = minio_mounted_path
7680
self.clowder_url = clowder_url
7781
self.clowder_email = clowder_email
7882
self.extractor_key = extractor_key
@@ -268,8 +272,14 @@ def _build_resource(self, body, host, secret_key, clowder_version):
268272
"metadata": body['metadata']
269273
}
270274

271-
def _check_for_local_file(self, file_metadata):
275+
def _check_for_local_file(self, file_metadata, file_id=None):
272276
""" Try to get pointer to locally accessible copy of file for extractor."""
277+
# Check if file is present in a minio mount (only valid for Clowder v2)
278+
if self.minio_mounted_path and file_id:
279+
minio_file_path = self.minio_mounted_path + "/" + file_id
280+
print("Checking for minio local file: %s" % minio_file_path)
281+
if os.path.isfile(minio_file_path):
282+
return minio_file_path
273283

274284
# first check if file is accessible locally
275285
if 'filepath' in file_metadata:
@@ -278,7 +288,7 @@ def _check_for_local_file(self, file_metadata):
278288
# first simply check if file is present locally
279289
if os.path.isfile(file_path):
280290
return file_path
281-
291+
282292
# otherwise check any mounted paths...
283293
if len(self.mounted_paths) > 0:
284294
for source_path in self.mounted_paths:
@@ -427,7 +437,7 @@ def _process_message(self, body):
427437
try:
428438
if check_result != pyclowder.utils.CheckMessage.bypass:
429439
file_metadata = pyclowder.files.download_info(self, host, secret_key, resource["id"])
430-
file_path = self._check_for_local_file(file_metadata)
440+
file_path = self._check_for_local_file(file_metadata, resource["id"])
431441
if not file_path:
432442
file_path = pyclowder.files.download(self, host, secret_key, resource["id"],
433443
resource["intermediate_id"],
@@ -628,10 +638,10 @@ class RabbitMQConnector(Connector):
628638
# pylint: disable=too-many-arguments
629639
def __init__(self, extractor_name, extractor_info,
630640
rabbitmq_uri, rabbitmq_key=None, rabbitmq_queue=None,
631-
check_message=None, process_message=None, ssl_verify=True, mounted_paths=None,
641+
check_message=None, process_message=None, ssl_verify=True, mounted_paths=None, minio_mounted_path=None,
632642
heartbeat=10, clowder_url=None, max_retry=10, extractor_key=None, clowder_email=None):
633643
super(RabbitMQConnector, self).__init__(extractor_name, extractor_info, check_message, process_message,
634-
ssl_verify, mounted_paths, clowder_url, max_retry, extractor_key, clowder_email)
644+
ssl_verify, mounted_paths, minio_mounted_path, clowder_url, max_retry, extractor_key, clowder_email)
635645
self.rabbitmq_uri = rabbitmq_uri
636646
self.rabbitmq_key = rabbitmq_key
637647
if rabbitmq_queue is None:
@@ -756,7 +766,7 @@ def on_message(self, channel, method, header, body):
756766
job_id = None
757767

758768
self.worker = RabbitMQHandler(self.extractor_name, self.extractor_info, job_id, self.check_message,
759-
self.process_message, self.ssl_verify, self.mounted_paths, self.clowder_url,
769+
self.process_message, self.ssl_verify, self.mounted_paths, self.minio_mounted_path, self.clowder_url,
760770
method, header, body)
761771
self.worker.start_thread(json_body)
762772

@@ -836,10 +846,10 @@ class RabbitMQHandler(Connector):
836846
"""
837847

838848
def __init__(self, extractor_name, extractor_info, job_id, check_message=None, process_message=None, ssl_verify=True,
839-
mounted_paths=None, clowder_url=None, method=None, header=None, body=None, max_retry=10):
849+
mounted_paths=None, minio_mounted_path=None, clowder_url=None, method=None, header=None, body=None, max_retry=10):
840850

841851
super(RabbitMQHandler, self).__init__(extractor_name, extractor_info, check_message, process_message,
842-
ssl_verify, mounted_paths, clowder_url, max_retry)
852+
ssl_verify, mounted_paths, minio_mounted_path,clowder_url, max_retry)
843853
self.method = method
844854
self.header = header
845855
self.body = body

pyclowder/extractors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def __init__(self):
7272
clowder_email = os.getenv("CLOWDER_EMAIL", "")
7373
logging_config = os.getenv("LOGGING")
7474
mounted_paths = os.getenv("MOUNTED_PATHS", "{}")
75+
minio_mounted_path = os.getenv("MINIO_MOUNTED_PATH", "")
7576
input_file_path = os.getenv("INPUT_FILE_PATH")
7677
output_file_path = os.getenv("OUTPUT_FILE_PATH")
7778
connector_default = "RabbitMQ"
@@ -105,6 +106,8 @@ def __init__(self):
105106
help='rabbitMQ queue name (default=%s)' % rabbitmq_queuename)
106107
self.parser.add_argument('--mounts', '-m', dest="mounted_paths", default=mounted_paths,
107108
help="dictionary of {'remote path':'local path'} mount mappings")
109+
self.parser.add_argument('--minio-mount', dest="minio_mounted_path", default=minio_mounted_path,
110+
help="path to mount Minio storage")
108111
self.parser.add_argument('--input-file-path', '-ifp', dest="input_file_path", default=input_file_path,
109112
help="Full path to local input file to be processed (used by Big Data feature)")
110113
self.parser.add_argument('--output-file-path', '-ofp', dest="output_file_path", default=output_file_path,
@@ -175,6 +178,7 @@ def start(self):
175178
rabbitmq_key=rabbitmq_key,
176179
rabbitmq_queue=self.args.rabbitmq_queuename,
177180
mounted_paths=json.loads(self.args.mounted_paths),
181+
minio_mounted_path=self.args.minio_mounted_path,
178182
clowder_url=self.args.clowder_url,
179183
max_retry=self.args.max_retry,
180184
heartbeat=self.args.heartbeat,
@@ -193,6 +197,7 @@ def start(self):
193197
process_message=self.process_message,
194198
picklefile=self.args.hpc_picklefile,
195199
mounted_paths=json.loads(self.args.mounted_paths),
200+
minio_mounted_path=self.args.minio_mounted_path,
196201
max_retry=self.args.max_retry)
197202
threading.Thread(target=connector.listen, name="HPCConnector").start()
198203

0 commit comments

Comments
 (0)