Skip to content

Commit

Permalink
Make build_graph output stable
Browse files Browse the repository at this point in the history
  • Loading branch information
cuihaoleo committed Jun 21, 2023
1 parent 7d79a1b commit d0d510a
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions poligrapher/scripts/build_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,14 +262,16 @@ def normalize_terms():
"""Step 6: Run phrase normalization."""

for src, token_type in token_type_map.items():
normalized_terms_map[src] = terms = set()

if (phrase := _expand_phrase(src)) is None:
normalized_terms_map[src] = []
continue

# Fallback to lemmatization only if token types from NER and graph relation agrees
flag_use_stem = phrase.root.ent_type_ == token_type

# All candidate terms
terms = set()

match token_type:
case "DATA":
terms.update(self.data_phrase_normalizer.normalize(phrase, flag_use_stem))
Expand Down Expand Up @@ -318,7 +320,8 @@ def normalize_terms():

G_final.add_nodes_from(terms, type=token_type)

logging.info("Phrase %r (%s) -> %r", phrase.text, token_type, ", ".join(sorted(terms)))
normalized_terms_map[src] = sorted(terms, reverse=True) # Stablize set order for reproducibility
logging.info("Phrase %r (%s) -> %r", phrase.text, token_type, ", ".join(normalized_terms_map[src]))

def merge_subsum_graph():
"""Step 7: Populate SUBSUM edges in G_final from G_subsum."""
Expand Down

0 comments on commit d0d510a

Please sign in to comment.