From 9f17cfa0991cc3ea9d1a5bde7191d944b54f6f61 Mon Sep 17 00:00:00 2001 From: vkrd <49703203+vkrd@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:42:01 -0700 Subject: [PATCH] Prevent ingestion failure on empty tables (#1040) Co-authored-by: Vikram Duvvur --- scripts/data_utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/data_utils.py b/scripts/data_utils.py index dde9b6ece8..fc726a0115 100644 --- a/scripts/data_utils.py +++ b/scripts/data_utils.py @@ -616,10 +616,12 @@ def extract_pdf_content(file_path, form_recognizer_client, use_layout=False): if use_layout: tables_on_page = [] for table in form_recognizer_results.tables: - table_offset = table.spans[0].offset - table_length = table.spans[0].length - if page_offset <= table_offset and table_offset + table_length < page_offset + page_length: - tables_on_page.append(table) + # If the table is empty, the span is empty, so we skip it + if len(table.spans) > 0: + table_offset = table.spans[0].offset + table_length = table.spans[0].length + if page_offset <= table_offset and table_offset + table_length < page_offset + page_length: + tables_on_page.append(table) else: tables_on_page = []