Skip to content

Commit

Permalink
Add text linearization configuration to hide key value layouts
Browse files Browse the repository at this point in the history
  • Loading branch information
Belval authored Mar 6, 2024
2 parents f52dfbe + 8283553 commit 1f2e99f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
10 changes: 6 additions & 4 deletions textractor/data/text_linearization_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ class TextLinearizationConfig:

max_number_of_consecutive_spaces: int = None #: Removes extra whitespace (None skips whitespace removal)

hide_header_layout: bool = False #: Hide headers in the linearized output
hide_header_layout: bool = False #: Hide headers layouts in the linearized output

hide_footer_layout: bool = False #: Hide footers in the linearized output
hide_footer_layout: bool = False #: Hide footers layouts in the linearized output

hide_figure_layout: bool = False #: Hide figures in the linearized output
hide_figure_layout: bool = False #: Hide figures layouts in the linearized output

hide_table_layout: bool = False #: Hide tables in the linearized output
hide_table_layout: bool = False #: Hide tables layouts in the linearized output

hide_key_value_layout: bool = False #: Hide key-value layouts in the linearized output

hide_page_num_layout: bool = False #: Hide page numbers in the linearized output

Expand Down
1 change: 1 addition & 0 deletions textractor/entities/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def get_text_and_words(
or (self.layout_type == LAYOUT_FIGURE and config.hide_figure_layout)
or (self.layout_type == LAYOUT_PAGE_NUMBER and config.hide_page_num_layout)
or (self.layout_type == LAYOUT_TABLE and config.hide_table_layout)
or (self.layout_type == LAYOUT_KEY_VALUE and config.hide_key_value_layout)
):
return "", []
elif self.layout_type == LAYOUT_PAGE_NUMBER:
Expand Down

0 comments on commit 1f2e99f

Please sign in to comment.