Skip to content

Commit

Permalink
Update terms and allow deleting terms
Browse files Browse the repository at this point in the history
  • Loading branch information
rafelafrance committed Feb 28, 2024
1 parent 9668da5 commit acdba6a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 2 deletions.
6 changes: 6 additions & 0 deletions traiter/pylib/pipes/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,13 @@ def term_pipe(
name: str,
path: Path | list[Path],
default_labels: dict[str, str] | None = None,
delete_patterns: list[str] | str | None = None,
):
default_labels = default_labels if default_labels else {}
paths = path if isinstance(path, Iterable) else [path]
if isinstance(delete_patterns, str):
delete_patterns = delete_patterns.split()
delete_patterns = delete_patterns if delete_patterns else []

# Gather terms and make sure they have the needed fields
by_attr = defaultdict(list)
Expand All @@ -33,6 +37,8 @@ def term_pipe(
for path in paths:
terms = term_util.read_terms(path)
for term in terms:
if term["pattern"] in delete_patterns:
continue
label = term.get("label", default_labels.get(path.stem))
pattern = {"label": label, "pattern": term["pattern"]}
attr = term.get("attr", "lower").upper()
Expand Down
9 changes: 8 additions & 1 deletion traiter/pylib/rules/terms/name_terms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,10 @@ not_name,landing,lower,
not_name,lat,lower,
not_name,latitude,lower,
not_name,legal,lower,
not_name,liberty,lower,
not_name,legal,lower,
not_name,loam,lower,
not_name,life,lower,
not_name,limestone,lower,
not_name,line,lower,
not_name,list,lower,
not_name,lng,lower,
Expand Down Expand Up @@ -258,6 +260,7 @@ not_name,puente,lower,
not_name,puerto rico,lower,
not_name,quad,lower,
not_name,quadrangle,lower,
not_name,quarter,lower,
not_name,quartermaster,lower,
not_name,railroad,lower,
not_name,railway,lower,
Expand All @@ -282,6 +285,7 @@ not_name,rt,lower,
not_name,rte,lower,
not_name,s. paulo,lower,
not_name,sampled,lower,
not_name,san benito,lower,
not_name,san diego,lower,
not_name,san juan,lower,
not_name,sci.,lower,
Expand Down Expand Up @@ -338,18 +342,21 @@ not_name_prefix,project,lower,
not_name_prefix,project.,lower,
not_name_prefix,sponsored by,lower,
not_name_prefix,sponsored,lower,
not_name_prefix,san,lower,
not_name_prefix,state,lower,
not_name_prefix,states,lower,
not_name_prefix,station,lower,
not_name_prefix,the university of,lower,
not_name_prefix,the university,lower,
not_name_prefix,univ. of,lower,
not_name_prefix,university of,lower,
not_name_prefix,upper,lower,
not_name_suffix,area,lower,
not_name_suffix,barcode,lower,
not_name_suffix,biological,lower,
not_name_suffix,co,lower,
not_name_suffix,co.,lower,
not_name_suffix,coastal,lower,
not_name_suffix,collection,lower,
not_name_suffix,community,lower,
not_name_suffix,conservancy,lower,
Expand Down
2 changes: 1 addition & 1 deletion traiter/pylib/rules/terms/unit_length_terms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ imperial_length,feet,ft,30.48
imperial_length,foot,ft,30.48
imperial_length,ft,ft,30.48
imperial_length,ft.,ft,30.48
imperial_length,in .,in,2.54
imperial_length,in,in,2.54
imperial_length,in.,in,2.54
imperial_length,inch,in,2.54
imperial_length,inches,in,2.54
Expand Down
5 changes: 5 additions & 0 deletions traiter/pylib/term_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ def get_labels(
return sorted(labels)


def delete_terms(terms: list, patterns: list[str] | str) -> list:
patterns = patterns if isinstance(patterns, list) else patterns.split()
terms = [t for t in terms if t["pattern"] not in patterns]


def labels_to_remove(
csv_paths: Path | Iterable[Path],
*,
Expand Down

0 comments on commit acdba6a

Please sign in to comment.