Skip to content

Commit

Permalink
parametrized
Browse files Browse the repository at this point in the history
  • Loading branch information
SkBlaz committed Jan 30, 2024
1 parent 9f584a2 commit 8bfa3c4
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
7 changes: 7 additions & 0 deletions outrank/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ def main():
help='Name of the target attribute for ranking. Note that this can be any other feature for most implemented heuristics.',
)

parser.add_argument(
'--max_unique_hist_constraint',
type=int,
default=30_000,
help='Max number of unique values for which counts are recalled.',
)

parser.add_argument(
'--transformers',
type=str,
Expand Down
6 changes: 3 additions & 3 deletions outrank/core_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def compute_value_counts(input_dataframe: pd.DataFrame, args: Any):
del GLOBAL_RARE_VALUE_STORAGE[to_remove_val]


def compute_cardinalities(input_dataframe: pd.DataFrame, pbar: Any) -> None:
def compute_cardinalities(input_dataframe: pd.DataFrame, pbar: Any, max_unique_hist_constraint: int) -> None:
"""Compute cardinalities of features, incrementally"""

global GLOBAL_CARDINALITY_STORAGE
Expand All @@ -434,7 +434,7 @@ def compute_cardinalities(input_dataframe: pd.DataFrame, pbar: Any) -> None:
)

if column not in GLOBAL_COUNTS_STORAGE:
GLOBAL_COUNTS_STORAGE[column] = PrimitiveConstrainedCounter()
GLOBAL_COUNTS_STORAGE[column] = PrimitiveConstrainedCounter(max_unique_hist_constraint)

[GLOBAL_COUNTS_STORAGE[column].add(value) for value in input_dataframe[column].values]

Expand Down Expand Up @@ -553,7 +553,7 @@ def compute_batch_ranking(
feature_memory_consumption = compute_feature_memory_consumption(
input_dataframe, args,
)
compute_cardinalities(input_dataframe, pbar)
compute_cardinalities(input_dataframe, pbar, args.max_unique_hist_constraint)

if args.task == 'identify_rare_values':
compute_value_counts(input_dataframe, args)
Expand Down

0 comments on commit 8bfa3c4

Please sign in to comment.