Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…hatGPT into abhahn/embedding-dependency-mi
  • Loading branch information
abhahn committed Aug 5, 2024
2 parents 7b19a71 + 9f17cfa commit b5662f0
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions scripts/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,10 +616,12 @@ def extract_pdf_content(file_path, form_recognizer_client, use_layout=False):
if use_layout:
tables_on_page = []
for table in form_recognizer_results.tables:
table_offset = table.spans[0].offset
table_length = table.spans[0].length
if page_offset <= table_offset and table_offset + table_length < page_offset + page_length:
tables_on_page.append(table)
# If the table is empty, the span is empty, so we skip it
if len(table.spans) > 0:
table_offset = table.spans[0].offset
table_length = table.spans[0].length
if page_offset <= table_offset and table_offset + table_length < page_offset + page_length:
tables_on_page.append(table)
else:
tables_on_page = []

Expand Down

0 comments on commit b5662f0

Please sign in to comment.