Skip to content

Commit

Permalink
Updating the merged api response object to not contain confusing data…
Browse files Browse the repository at this point in the history
… that isn't actually merged. (#26)

### Correcting Merged Api Result 
--- 
Previously the merged api result still contained data like `content` and
`pages` from the first api response in the array to merge. This was
somewhat confusing as it is not actually merged data.

Thus, removing and updating the tests accordingly.

---------

Co-authored-by: Mark <[email protected]>
  • Loading branch information
THOR300 and Mark authored Oct 18, 2023
1 parent 9fd44ae commit 9561485
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 8 deletions.
5 changes: 3 additions & 2 deletions src/azure_pdf_parser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ def merge_responses(batches: Sequence[PDFPagesBatchExtracted]) -> AnalyzeResult:
all_tables.extend(batch.extracted_content.tables)
all_pages.extend(batch.extracted_content.pages)

# Copy the first result to a variable and add the content for all the pages.
merged_analyse_result: AnalyzeResult = batches.pop(0).extracted_content
merged_analyse_result = AnalyzeResult()
merged_analyse_result.api_version = batches[0].extracted_content.api_version
merged_analyse_result.model_id = batches[0].extracted_content.model_id
merged_analyse_result.paragraphs = all_paragraphs
merged_analyse_result.tables = all_tables
merged_analyse_result.pages = all_pages
Expand Down
19 changes: 13 additions & 6 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ def test_merge_responses_one_page_results(
one_page_analyse_result: AnalyzeResult,
) -> None:
"""Test that the responses are merged correctly."""
# The one_page_analyse_result test data has empty values for these fields, and thus
# we need to set values for them to assert that they don't persist in the merged
# result.
one_page_analyse_result.documents = ["test_document"]
one_page_analyse_result.languages = ["test_language"]
one_page_analyse_result.styles = ["test_style"]

api_responses = [
PDFPagesBatchExtracted(
page_range=(1, 1),
Expand Down Expand Up @@ -115,9 +122,9 @@ def test_merge_responses_one_page_results(
assert isinstance(merged_api_response, AnalyzeResult)
assert merged_api_response.api_version == one_page_analyse_result.api_version
assert merged_api_response.model_id == one_page_analyse_result.model_id
assert merged_api_response.languages == one_page_analyse_result.languages
assert merged_api_response.styles == one_page_analyse_result.styles
assert merged_api_response.documents == one_page_analyse_result.documents
assert merged_api_response.languages != one_page_analyse_result.languages
assert merged_api_response.styles != one_page_analyse_result.styles
assert merged_api_response.documents != one_page_analyse_result.documents

# Check that the number of paragraphs and tables is correct
assert merged_api_response.paragraphs is not None
Expand Down Expand Up @@ -164,9 +171,9 @@ def test_merge_api_responses_sixteen_page_results(
assert isinstance(merged_api_response, AnalyzeResult)
assert merged_api_response.api_version == sixteen_page_analyse_result.api_version
assert merged_api_response.model_id == sixteen_page_analyse_result.model_id
assert merged_api_response.languages == sixteen_page_analyse_result.languages
assert merged_api_response.styles == sixteen_page_analyse_result.styles
assert merged_api_response.documents == sixteen_page_analyse_result.documents
assert merged_api_response.languages != sixteen_page_analyse_result.languages
assert merged_api_response.styles != sixteen_page_analyse_result.styles
assert merged_api_response.documents != sixteen_page_analyse_result.documents

# Check that the number of paragraphs and tables is correct
assert merged_api_response.paragraphs is not None
Expand Down

0 comments on commit 9561485

Please sign in to comment.