Skip to content

Commit

Permalink
Fix to determine if "relationships have changed" prior to reindexing …
Browse files Browse the repository at this point in the history
…the entity, rather than after.
  • Loading branch information
kburke committed Jul 3, 2024
1 parent ecbb9c8 commit d4f8223
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions src/hubmap_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,8 @@ def _get_existing_entity_relationships(self, entity_uuid:str, es_url:str, es_ind

if not resp_json or \
'hits' not in resp_json or \
'hits' not in resp_json['hits']:
'hits' not in resp_json['hits'] or \
len(resp_json['hits']['hits']) == 0:
# If OpenSearch does not have an existing document for this entity, drop down to reindexing.
# Anything else Falsy JSON could be an unexpected result for an existing entity, but fall back to
# reindexing under those circumstances, too.
Expand Down Expand Up @@ -528,10 +529,6 @@ def translate(self, entity_id):
# get URL for the OpenSearch server
es_url = self.INDICES['indices']['entities']['elasticsearch']['url'].strip('/')

# Reindex the entity itself first before dealing with other documents for related entities.
self._call_indexer(entity=entity
, delete_existing_doc_first=True)

# Get the ancestors and descendants of this entity as they exist in Neo4j, and as they
# exist in OpenSearch.
neo4j_ancestor_ids = self.call_entity_api(entity_id=entity_id
Expand Down Expand Up @@ -572,7 +569,14 @@ def translate(self, entity_id):
relationships_changed = self._relationships_changed_since_indexed(neo4j_ancestor_ids=neo4j_ancestor_ids
, neo4j_descendant_ids=neo4j_descendant_ids
, existing_oss_doc=existing_entity_json)

# Now that it has been determined whether relationships have changed for this entity,
# reindex the entity itself first before dealing with other documents for related entities.
self._call_indexer(entity=entity
, delete_existing_doc_first=True)

if relationships_changed:
logger.info(f"Related entities for {entity_id} have changed in Neo4j. Reindexing")
# Since the entity is new or the Neo4j relationships with related entities have changed,
# reindex the current entity
self._reindex_related_entities(entity_id=entity_id
Expand All @@ -582,6 +586,8 @@ def translate(self, entity_id):
, neo4j_collection_ids=neo4j_collection_ids
, neo4j_upload_ids=neo4j_upload_ids)
else:
logger.info(f"Related entities for {entity_id} are unchanged in Neo4j."
f" Directly updating index docs of related entities.")
# Since the entity's relationships are identical in Neo4j and OpenSearch, just update
# documents in the entities indices with a copy of the current entity.
for es_index in [ self.INDICES['indices']['entities']['private']
Expand Down

0 comments on commit d4f8223

Please sign in to comment.