Skip to content

Commit

Permalink
Break source date parsing into standalone method
Browse files Browse the repository at this point in the history
  • Loading branch information
hancush committed Sep 22, 2021
1 parent 7c2216f commit 7f6cb48
Showing 1 changed file with 23 additions and 17 deletions.
40 changes: 23 additions & 17 deletions sfm_pc/management/commands/import_google_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1420,27 +1420,16 @@ def create_sources(self, source_sheet):
}

for prefix in ('published', 'created', 'uploaded'):
date_val = source_data[Source.get_spreadsheet_field_name('{}_date'.format(prefix))]
parsed_date = None
date_value = source_data[Source.get_spreadsheet_field_name('{}_date'.format(prefix))]
parsed_date = self.get_source_date(date_value)

try:
# Try to parse the value as a timestamp (remove timezone
# marker for Pyton <3.7)
parsed_date = datetime.strptime(date_val.replace('Z', ''), '%Y-%m-%dT%H:%M:%S')

except ValueError:
# Fall back to an empty string because we want to use
# source_info to retrieve and update existing Sources, and
# date fields default to an empty string if no data is
# provided
parsed_date = self.parse_date(date_val) or ''
source_info['{}_date'.format(prefix)] = parsed_date

else:
if isinstance(parsed_date, datetime):
source_info['{}_timestamp'.format(prefix)] = parsed_date
else:
source_info['{}_date'.format(prefix)] = parsed_date

if not parsed_date and prefix == 'published':
message = 'Invalid published_date "{1}" at {2}'.format(prefix, date_val, access_point_uuid)
message = 'Invalid published_date "{1}" at {2}'.format(prefix, date_value, access_point_uuid)
self.log_error(message, sheet='sources', current_row=idx + 2)

source, created = Source.objects.get_or_create(**source_info)
Expand All @@ -1465,6 +1454,23 @@ def create_sources(self, source_sheet):

access_point.save()

def get_source_date(self, date_value):
'''
Source dates can come to us as full timestamps or dates. Given a string
representing one of these values, return a parsed datetime or date
object, or an empty string, if neither can be parsed.
'''
try:
# Try to parse the value as a timestamp (remove timezone marker for
# Python <3.7)
return datetime.strptime(date_value.replace('Z', ''), '%Y-%m-%dT%H:%M:%S')

except ValueError:
# Fall back to an empty string because we want to use this value to
# retrieve and update existing Sources, and date fields default to
# an empty string if no data is provided
return self.parse_date(date_value) or ''

def get_sources(self, source_id_string):

sources = []
Expand Down

0 comments on commit 7f6cb48

Please sign in to comment.