Skip to content

Commit

Permalink
Merge pull request aws-samples#256 from grantrosse/add-get_words_above()
Browse files Browse the repository at this point in the history
add get_words_above()
  • Loading branch information
schadem authored Oct 20, 2023
2 parents a1256f3 + ffbccc1 commit b2bfd77
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions tpipelinegeofinder/textractgeofinder/tgeofinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,35 @@ def get_words_below(self,
area_selection=area_selection,
page_number=page_number,
exclude_ids=exclude_ids)

def get_words_above(self,
anker: AreaSelection,
number_of_words_to_return: int = None,
text_type: str = 'word',
area_selection: AreaSelection = None,
page_number: int = 1,
exclude_ids: List[str] = None) -> List[TWord]:
xmin = anker.top_left.x
ymin = anker.top_left.y
xmax = anker.lower_right.x
ymax = anker.lower_right.y

query = ''' and ? < (xmin + xmax) / 2
and ? > ( xmin + xmax ) / 2
and ? > ymax
and text_type = ?
order by ymin asc '''
params = [xmin, xmax, ymin, text_type]
if number_of_words_to_return:
query += " limit ? "
params.append(number_of_words_to_return)

return self.ocrdb.execute(query=query,
textract_doc_uuid=self.textract_doc_uuid,
params=params,
area_selection=area_selection,
page_number=page_number,
exclude_ids=exclude_ids)

def get_words_to_the_right(self,
anker: AreaSelection,
Expand Down

0 comments on commit b2bfd77

Please sign in to comment.