From 49a996de8b5c01979712121b5e6d3f2f3e740f30 Mon Sep 17 00:00:00 2001 From: Omri Mendels Date: Mon, 25 Mar 2024 11:19:25 +0200 Subject: [PATCH] Changed default aggregation_strategy to max (#1342) --- presidio-analyzer/conf/transformers.yaml | 12 ++++++------ .../nlp_engine/ner_model_configuration.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/presidio-analyzer/conf/transformers.yaml b/presidio-analyzer/conf/transformers.yaml index 37388cc60..c36218530 100644 --- a/presidio-analyzer/conf/transformers.yaml +++ b/presidio-analyzer/conf/transformers.yaml @@ -9,12 +9,12 @@ models: ner_model_configuration: labels_to_ignore: - O - aggregation_strategy: simple # "simple", "first", "average", "max" - stride: 16 # If stride >= 0, process long texts in - # overlapping windows of the model max - # length. The value is the length of the - # window overlap in transformer tokenizer - # tokens, NOT the length of the stride. + aggregation_strategy: max # "simple", "first", "average", "max" + stride: 16 # If stride >= 0, process long texts in + # overlapping windows of the model max + # length. The value is the length of the + # window overlap in transformer tokenizer + # tokens, NOT the length of the stride. alignment_mode: expand # "strict", "contract", "expand" model_to_presidio_entity_mapping: PER: PERSON diff --git a/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py b/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py index 8408f776a..0453dc87a 100644 --- a/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py +++ b/presidio-analyzer/presidio_analyzer/nlp_engine/ner_model_configuration.py @@ -64,7 +64,7 @@ class NerModelConfiguration: """ # noqa E501 labels_to_ignore: Optional[Collection[str]] = None - aggregation_strategy: Optional[str] = "simple" + aggregation_strategy: Optional[str] = "max" stride: Optional[int] = 14 alignment_mode: Optional[str] = "expand" default_score: Optional[float] = 0.85