Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use admin IDs to resolve entities during import, add validation for conflicts in entity maps #777

Merged
merged 19 commits into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
pull_request:
branches:
- master
- hcg/translation

jobs:
test:
Expand Down
214 changes: 121 additions & 93 deletions sfm_pc/management/commands/import_google_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,38 +173,13 @@ def handle(self, *args, **options):
one_index_start = int(options['start'])
zero_index_start = one_index_start - 1

# These are used for the mapping that we need to make below
entity_mapping_entities = [
('organization', 'unit:id:admin', 'unit:name'),
('person', 'person:id:admin', 'person:name'),
]
self.organization_entity_map = {}
self.person_entity_map = {}

# Create entity maps for persons and units
for entity_type, id_key, name_key in entity_mapping_entities:

sheets = all_sheets[entity_type]

for sheet in sheets.values():

entity_map = {}

for idx, row in enumerate(sheet[zero_index_start:]):
if row:
entity_uuid = row[id_key]
try:
entity_name = row[name_key]
except KeyError:
self.log_error(
'Entity with ID "{}" is missing a name'.format(
entity_uuid
),
sheet=entity_type,
current_row=one_index_start + (idx + 1)
)
if entity_uuid:
entity_map[entity_name] = entity_uuid

setattr(self, '{}_entity_map'.format(entity_type), entity_map)
name_error_format = (
'Got multiple name values for {entity_type} UUID "{uuid}". Current '
'row contains value "{name}" in column "{column}"'
)

for entity_type in options['entity_types'].split(','):

Expand All @@ -221,6 +196,26 @@ def handle(self, *args, **options):
self.current_row = one_index_start + (index + 1)
getattr(self, 'create_{}'.format(entity_type))(row)

for entity_type in ('organization', 'person'):
fgregg marked this conversation as resolved.
Show resolved Hide resolved
entity_map = getattr(self, '{}_entity_map'.format(entity_type), None)

# If an entity's name differs between records sharing the same
# UUID, log an error for each row containing the UUID.
if entity_map:
for uuid, name_values in entity_map.items():
distinct_names = set([value[0] for value in name_values])

if len(distinct_names) > 1:
for value in sorted(name_values, key=lambda row: row[1]):
name, row, sheet, column = value
msg = name_error_format.format(**{
'entity_type': entity_type,
'uuid': uuid,
'name': name,
'column': column,
})
self.log_error(msg, sheet=sheet, current_row=row)

data_src = options['folder'] if options.get('folder') else options['doc_id']
self.stdout.write(self.style.SUCCESS('Successfully imported data from {}'.format(data_src)))

Expand All @@ -230,6 +225,17 @@ def handle(self, *args, **options):
# Connect post save signals
self.connectSignals()

def update_entity_map(self, entity_type, uuid, name, column, sheet=None):
entity_map = getattr(self, '{}_entity_map'.format(entity_type), None)

if entity_map is not None:
if uuid not in entity_map:
entity_map[uuid] = set()

entity_map[uuid].add(
(name, self.current_row, sheet if sheet else entity_type, column)
)

def create_locations(self):
this_dir = os.path.abspath(os.path.dirname(__file__))
location_file = os.path.join(this_dir, 'data', 'locations.geojson')
Expand Down Expand Up @@ -624,11 +630,7 @@ def create_organization(self, org_data):
}
}

try:
uuid = self.organization_entity_map[name_value]
except KeyError:
self.log_error('Could not find "{}" in list of organization names'.format(name_value))
return None
uuid = org_data['unit:id:admin']

try:
organization = Organization.objects.get(uuid=uuid)
Expand All @@ -644,6 +646,13 @@ def create_organization(self, org_data):

organization.update(org_info)

self.update_entity_map(
'organization',
uuid,
name_value,
org_positions['Name']['value']
)

org_attributes = ['Alias', 'Classification', 'OpenEnded', 'Headquarters']

for attr in org_attributes:
Expand Down Expand Up @@ -735,11 +744,7 @@ def create_organization(self, org_data):
},
}

try:
uuid = self.organization_entity_map[parent_org_name]
except KeyError:
self.log_error('Could not find "{}" in list of organization names'.format(parent_org_name))
return None
uuid = org_data['unit:related_unit_id:admin']

try:
parent_organization = Organization.objects.get(uuid=uuid)
Expand All @@ -755,6 +760,13 @@ def create_organization(self, org_data):

parent_organization.update(parent_org_info)

self.update_entity_map(
'organization',
uuid,
parent_org_name,
composition_positions['Parent']['value']
)

comp_info = {
'Composition_CompositionParent': {
'value': parent_organization,
Expand Down Expand Up @@ -841,11 +853,7 @@ def create_organization(self, org_data):
},
}

try:
uuid = self.organization_entity_map[member_org_name]
except KeyError:
self.log_error('Could not find "{}" in list of organization names'.format(member_org_name))
return None
uuid = org_data['unit:related_unit_id:admin']

try:
member_organization = Organization.objects.get(uuid=uuid)
Expand All @@ -861,6 +869,13 @@ def create_organization(self, org_data):

member_organization.update(member_org_info)

self.update_entity_map(
'organization',
uuid,
member_org_name,
membership_positions['OrganizationOrganization']['value']
)

membership_info = {
'MembershipOrganization_MembershipOrganizationMember': {
'value': organization,
Expand Down Expand Up @@ -1597,11 +1612,7 @@ def create_person(self, person_data):
}
}

try:
uuid = self.person_entity_map[name_value]
except KeyError:
self.log_error('Could not find "{}" in list of person names'.format(name_value))
return None
uuid = person_data['person:id:admin']

try:
person = Person.objects.get(uuid=uuid)
Expand All @@ -1616,6 +1627,13 @@ def create_person(self, person_data):

person.update(person_info)

self.update_entity_map(
'person',
uuid,
name_value,
person_positions['Name']['value']
)

self.make_relation('Alias',
person_positions['Alias'],
person_data,
Expand All @@ -1624,16 +1642,23 @@ def create_person(self, person_data):
# Make membership objects
try:
uuid = person_data[membership_positions['Organization']['value']]
organization_name = {
v: k for k, v in self.organization_entity_map.items()
}[uuid]
except IndexError:
self.log_error('Row seems to be empty')
return None
except KeyError:
self.log_error('Organization "{}" not in entity map'.format(uuid))

try:
organization = Organization.objects.get(uuid=uuid)

except Organization.DoesNotExist:
organization = Organization.objects.create(uuid=uuid,
published=True)

except ValidationError:
self.log_error('Invalid member unit UUID: "{}"'.format(uuid))
return None

organization_name = person_data['person:posting_unit_name']

try:
confidence = self.get_confidence(person_data[membership_positions['Organization']['confidence']])
except (KeyError, IndexError):
Expand All @@ -1659,40 +1684,35 @@ def create_person(self, person_data):
}
}

try:
organization = Organization.objects.get(uuid=uuid)
name_sources = self.sourcesList(organization, 'name')
div_sources = self.sourcesList(organization, 'division_id')

except Organization.DoesNotExist:
organization = Organization.objects.create(uuid=uuid,
published=True)
organization.update(org_info)
org_info["Organization_OrganizationName"]['sources'] += name_sources
org_info["Organization_OrganizationDivisionId"]['sources'] += div_sources

except ValidationError:
self.log_error('Invalid member unit UUID: "{}"'.format(uuid))
return None

else:
name_sources = self.sourcesList(organization, 'name')
div_sources = self.sourcesList(organization, 'division_id')
if organization.name.get_value():
name_confidence = organization.name.get_value().confidence

org_info["Organization_OrganizationName"]['sources'] += name_sources
org_info["Organization_OrganizationDivisionId"]['sources'] += div_sources
if name_confidence:
name_confidence = int(name_confidence)
org_info["Organization_OrganizationName"]['confidence'] = name_confidence

if organization.name.get_value():
name_confidence = organization.name.get_value().confidence
if organization.division_id.get_value():
div_confidence = organization.division_id.get_value().confidence

if name_confidence:
name_confidence = int(name_confidence)
org_info["Organization_OrganizationName"]['confidence'] = name_confidence
if div_confidence:
div_confidence = int(div_confidence)
org_info["Organization_OrganizationDivisionId"]['confidence'] = div_confidence

if organization.division_id.get_value():
div_confidence = organization.division_id.get_value().confidence

if div_confidence:
div_confidence = int(div_confidence)
org_info["Organization_OrganizationDivisionId"]['confidence'] = div_confidence
organization.update(org_info)

organization.update(org_info)
self.update_entity_map(
'organization',
uuid,
organization_name,
person_data['person:posting_unit_name'],
sheet='person'
)

membership_data = {
'MembershipPerson_MembershipPersonMember': {
Expand Down Expand Up @@ -2214,11 +2234,7 @@ def create_event(self, event_data):

for perp in perps:

try:
uuid = self.person_entity_map[perp]
except KeyError:
self.log_error('Could not find "{}" in list of person names'.format(perp))
continue
uuid = event_data['incident:perpetrator_person_id:admin']

try:
person = Person.objects.get(uuid=uuid)
Expand All @@ -2241,6 +2257,14 @@ def create_event(self, event_data):
person = Person.objects.create(uuid=uuid, published=True)
person.update(person_info)

self.update_entity_map(
'person',
uuid,
perp,
positions['Perpetrator']['value'],
sheet='event'
)

vp, created = ViolationPerpetrator.objects.get_or_create(value=person,
object_ref=violation)
if created:
Expand All @@ -2263,11 +2287,7 @@ def create_event(self, event_data):

for org in orgs:

try:
uuid = self.organization_entity_map[org]
except KeyError:
self.log_error('Could not find "{}" in list of organization names'.format(org))
continue
uuid = event_data['incident:perpetrator_unit_id:admin']

try:
organization = Organization.objects.get(uuid=uuid)
Expand All @@ -2292,6 +2312,14 @@ def create_event(self, event_data):
published=True)
organization.update(info)

self.update_entity_map(
'organization',
uuid,
org,
positions['PerpetratorOrganization']['value'],
sheet='event'
)

vpo_obj, created = ViolationPerpetratorOrganization.objects.get_or_create(value=organization,
object_ref=violation)

Expand Down
Loading