Skip to content

Commit a28c3b0

Browse files
authored
Merge pull request #261 from qld-gov-au/qgov-backports-20250918
Backport features from QGOV fork
2 parents fb0ff1c + 7928abd commit a28c3b0

File tree

11 files changed

+331
-295
lines changed

11 files changed

+331
-295
lines changed

ckanext/xloader/helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def is_resource_supported_by_xloader(res_dict, check_access=True):
4040
try:
4141
is_supported_url_type = url_type not in toolkit.h.datastore_rw_resource_url_types()
4242
except AttributeError:
43-
is_supported_url_type = (url_type == 'upload')
43+
is_supported_url_type = (url_type in ['upload', 'None'])
4444
else:
4545
is_supported_url_type = True
4646
return (is_supported_format or is_datastore_active) and user_has_access and is_supported_url_type

ckanext/xloader/job_exceptions.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class LoaderError(JobError):
5353
'''Exception that's raised if a load fails'''
5454
pass
5555

56+
5657
class XLoaderTimeoutError(JobError):
5758
"""Custom timeout exception that can be retried"""
58-
pass
59+
pass

ckanext/xloader/jobs.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,10 @@
1818
from rq.timeouts import JobTimeoutException
1919
import sqlalchemy as sa
2020

21-
from ckan import model
2221
from ckan.plugins.toolkit import get_action, asbool, enqueue_job, ObjectNotFound, config, h
2322

2423
from . import db, loader
25-
from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError, XLoaderTimeoutError
24+
from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError, LoaderError, XLoaderTimeoutError
2625
from .utils import datastore_resource_exists, set_resource_metadata, modify_input_url
2726

2827

@@ -258,11 +257,12 @@ def xloader_data_into_datastore_(input, job_dict, logger):
258257
logger.info('File hash: %s', file_hash)
259258
resource['hash'] = file_hash
260259

261-
def direct_load():
260+
def direct_load(allow_type_guessing=False):
262261
fields = loader.load_csv(
263262
tmp_file.name,
264263
resource_id=resource['id'],
265264
mimetype=resource.get('format'),
265+
allow_type_guessing=allow_type_guessing,
266266
logger=logger)
267267
loader.calculate_record_count(
268268
resource_id=resource['id'], logger=logger)
@@ -318,24 +318,24 @@ def tabulator_load():
318318
direct_load()
319319
else:
320320
try:
321-
direct_load()
322-
except JobError as e:
321+
direct_load(allow_type_guessing=True)
322+
except (JobError, LoaderError) as e:
323323
logger.warning('Load using COPY failed: %s', e)
324324
logger.info('Trying again with tabulator')
325325
tabulator_load()
326326
except JobTimeoutException:
327-
try:
328-
tmp_file.close()
329-
except FileNotFoundError:
330-
pass
331327
logger.warning('Job timed out after %ss', RETRIED_JOB_TIMEOUT)
332328
raise JobError('Job timed out after {}s'.format(RETRIED_JOB_TIMEOUT))
333329
except FileCouldNotBeLoadedError as e:
334330
logger.warning('Loading excerpt for this format not supported.')
335331
logger.error('Loading file raised an error: %s', e)
336332
raise JobError('Loading file raised an error: {}'.format(e))
337-
338-
tmp_file.close()
333+
finally:
334+
try:
335+
tmp_file.close()
336+
os.remove(tmp_file.name)
337+
except FileNotFoundError:
338+
pass
339339

340340
logger.info('Express Load completed')
341341

@@ -439,7 +439,7 @@ def _download_resource_data(resource, data, api_key, logger):
439439
except requests.exceptions.Timeout:
440440
logger.warning('URL time out after %ss', DOWNLOAD_TIMEOUT)
441441
raise XLoaderTimeoutError('Connection timed out after {}s'.format(
442-
DOWNLOAD_TIMEOUT))
442+
DOWNLOAD_TIMEOUT))
443443
except requests.exceptions.RequestException as e:
444444
tmp_file.close()
445445
try:
@@ -525,7 +525,7 @@ def callback_xloader_hook(result_url, api_key, job_dict):
525525

526526
try:
527527
result = requests.post(
528-
modify_input_url(result_url), # modify with local config
528+
modify_input_url(result_url), # modify with local config
529529
data=json.dumps(job_dict, cls=DatetimeJsonEncoder),
530530
verify=SSL_VERIFY,
531531
headers=headers)
@@ -572,7 +572,6 @@ def _get_user_from_key(api_key_or_token):
572572
return get_user_from_token(api_key_or_token)
573573

574574

575-
576575
def get_resource_and_dataset(resource_id, api_key):
577576
"""
578577
Gets available information about the resource and its dataset from CKAN

0 commit comments

Comments
 (0)