1616from rq import get_current_job
1717import sqlalchemy as sa
1818
19- import ckan . model as model
19+ from ckan import model
2020from ckan .plugins .toolkit import get_action , asbool , ObjectNotFound , config , check_ckan_version
21- import ckan .lib .search as search
2221
2322from . import loader
2423from . import db
2524from .job_exceptions import JobError , HTTPError , DataTooBigError , FileCouldNotBeLoadedError
25+ from .utils import set_resource_metadata
2626
2727try :
2828 from ckan .lib .api_token import get_user_from_token
@@ -244,7 +244,8 @@ def _download_resource_data(resource, data, api_key, logger):
244244 '''
245245 # check scheme
246246 url = resource .get ('url' )
247- scheme = urlsplit (url ).scheme
247+ url_parts = urlsplit (url )
248+ scheme = url_parts .scheme
248249 if scheme not in ('http' , 'https' , 'ftp' ):
249250 raise JobError (
250251 'Only http, https, and ftp resources may be fetched.'
@@ -263,7 +264,17 @@ def _download_resource_data(resource, data, api_key, logger):
263264 # otherwise we won't get file from private resources
264265 headers ['Authorization' ] = api_key
265266
266- response = get_response (url , headers )
267+ # Add a constantly changing parameter to bypass URL caching.
268+ # If we're running XLoader, then either the resource has
269+ # changed, or something went wrong and we want a clean start.
270+ # Either way, we don't want a cached file.
271+ download_url = url_parts ._replace (
272+ query = '{}&nonce={}' .format (url_parts .query , time .time ())
273+ ).geturl ()
274+ else :
275+ download_url = url
276+
277+ response = get_response (download_url , headers )
267278
268279 cl = response .headers .get ('content-length' )
269280 if cl and int (cl ) > MAX_CONTENT_LENGTH :
@@ -369,9 +380,12 @@ def set_datastore_active(data, resource, logger):
369380
370381 data ['datastore_active' ] = True
371382 logger .info ('Setting resource.datastore_active = True' )
383+ contains_all_records = data .get (
384+ 'datastore_contains_all_records_of_source_file' , True )
385+ data ['datastore_contains_all_records_of_source_file' ] = contains_all_records
372386 logger .info (
373- 'Setting resource.datastore_contains_all_records_of_source_file = {}'
374- . format ( data . get ( 'datastore_contains_all_records_of_source_file' )) )
387+ 'Setting resource.datastore_contains_all_records_of_source_file = %s' ,
388+ contains_all_records )
375389 set_resource_metadata (update_dict = data )
376390
377391
@@ -403,59 +417,6 @@ def callback_xloader_hook(result_url, api_key, job_dict):
403417 return result .status_code == requests .codes .ok
404418
405419
406- def set_resource_metadata (update_dict ):
407- '''
408- Set appropriate datastore_active flag on CKAN resource.
409-
410- Called after creation or deletion of DataStore table.
411- '''
412- from ckan import model
413- # We're modifying the resource extra directly here to avoid a
414- # race condition, see issue #3245 for details and plan for a
415- # better fix
416- update_dict .update ({
417- 'datastore_active' : update_dict .get ('datastore_active' , True ),
418- 'datastore_contains_all_records_of_source_file' :
419- update_dict .get ('datastore_contains_all_records_of_source_file' , True )
420- })
421-
422- q = model .Session .query (model .Resource ). \
423- filter (model .Resource .id == update_dict ['resource_id' ])
424- resource = q .one ()
425-
426- # update extras in database for record
427- extras = resource .extras
428- extras .update (update_dict )
429- q .update ({'extras' : extras }, synchronize_session = False )
430-
431- # TODO: Remove resource_revision_table when dropping support for 2.8
432- if hasattr (model , 'resource_revision_table' ):
433- model .Session .query (model .resource_revision_table ).filter (
434- model .ResourceRevision .id == update_dict ['resource_id' ],
435- model .ResourceRevision .current is True
436- ).update ({'extras' : extras }, synchronize_session = False )
437- model .Session .commit ()
438-
439- # get package with updated resource from solr
440- # find changed resource, patch it and reindex package
441- psi = search .PackageSearchIndex ()
442- solr_query = search .PackageSearchQuery ()
443- q = {
444- 'q' : 'id:"{0}"' .format (resource .package_id ),
445- 'fl' : 'data_dict' ,
446- 'wt' : 'json' ,
447- 'fq' : 'site_id:"%s"' % config .get ('ckan.site_id' ),
448- 'rows' : 1
449- }
450- for record in solr_query .run (q )['results' ]:
451- solr_data_dict = json .loads (record ['data_dict' ])
452- for resource in solr_data_dict ['resources' ]:
453- if resource ['id' ] == update_dict ['resource_id' ]:
454- resource .update (update_dict )
455- psi .index_package (solr_data_dict )
456- break
457-
458-
459420def validate_input (input ):
460421 # Especially validate metadata which is provided by the user
461422 if 'metadata' not in input :
0 commit comments