Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include helper functions in sample snippets #36426

Merged
merged 10 commits into from
Jul 19, 2024
94 changes: 83 additions & 11 deletions sdk/documentintelligence/azure-ai-documentintelligence/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,29 @@ from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult

def get_words(page, line):
result = []
for word in page.words:
if _in_span(word, line.spans):
result.append(word)
catalinaperalta marked this conversation as resolved.
Show resolved Hide resolved
return result

def _in_span(word, spans):
catalinaperalta marked this conversation as resolved.
Show resolved Hide resolved
for span in spans:
if word.span.offset >= span.offset and (word.span.offset + word.span.length) <= (span.offset + span.length):
return True
return False

def format_bounding_region(bounding_regions):
if not bounding_regions:
return "N/A"
return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)

def format_polygon(polygon):
if not polygon:
return "N/A"
return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]

Expand Down Expand Up @@ -296,6 +319,29 @@ from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult

def get_words(words, line):
result = []
for word in words:
if _in_span(word, line.spans):
result.append(word)
return result

def _in_span(word, spans):
for span in spans:
if word.span.offset >= span.offset and (word.span.offset + word.span.length) <= (span.offset + span.length):
return True
return False

def format_bounding_region(bounding_regions):
if not bounding_regions:
return "N/A"
return ", ".join(f"Page #{region.page_number}: {format_polygon(region.polygon)}" for region in bounding_regions)

def format_polygon(polygon):
if not polygon:
return "N/A"
return ", ".join([f"[{polygon[i]}, {polygon[i + 1]}]" for i in range(0, len(polygon), 2)])

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]

Expand Down Expand Up @@ -388,6 +434,9 @@ from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult

def format_price(price_dict):
return "".join([f"{p}" for p in price_dict.values()])

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]

Expand Down Expand Up @@ -525,6 +574,33 @@ from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult

def print_table(header_names, table_data):
"""Print a two-dimensional array like a table.

Based on provided column header names and two two-dimensional array data, print the strings like table.

Args:
header_names: An array of string, it's the column header names. e.g. ["name", "gender", "age"]
table_data: A two-dimensional array, they're the table data. e.g. [["Mike", "M", 25], ["John", "M", 19], ["Lily", "F", 23]]
Return: None
YalinLi0312 marked this conversation as resolved.
Show resolved Hide resolved
It's will print the string like table in output window. e.g.
Name Gender Age
Mike M 25
John M 19
Lily F 23
"""
max_len_list = []
for i in range(len(header_names)):
col_values = list(map(lambda row: len(str(row[i])), table_data))
col_values.append(len(str(header_names[i])))
max_len_list.append(max(col_values))

row_format_str = "".join(map(lambda len: f"{{:<{len + 4}}}", max_len_list))

print(row_format_str.format(*header_names))
for row in table_data:
print(row_format_str.format(*row))

endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]
model_id = os.getenv("CUSTOM_BUILT_MODEL_ID", custom_model_id)
Expand Down Expand Up @@ -561,10 +637,7 @@ if result.documents:
if not doc.fields is None:
for field_name, field_value in doc.fields.items():
# Dynamic Table cell information store as array in document field.
if (
field_value.type == SYMBOL_OF_TABLE_TYPE
and field_value.value_array
):
if field_value.type == SYMBOL_OF_TABLE_TYPE and field_value.value_array:
col_names = []
sample_obj = field_value.value_array[0]
if KEY_OF_VALUE_OBJECT in sample_obj:
Expand All @@ -582,7 +655,7 @@ if result.documents:
row_data = list(map(extract_value_by_col_name, col_names))
table_rows.append(row_data)
print_table(col_names, table_rows)

elif (
field_value.type == SYMBOL_OF_OBJECT_TYPE
and KEY_OF_VALUE_OBJECT in field_value
Expand All @@ -592,9 +665,7 @@ if result.documents:
is_fixed_table = all(
(
rows_of_column["type"] == SYMBOL_OF_OBJECT_TYPE
and Counter(
list(rows_by_columns[0][KEY_OF_VALUE_OBJECT].keys())
)
and Counter(list(rows_by_columns[0][KEY_OF_VALUE_OBJECT].keys()))
== Counter(list(rows_of_column[KEY_OF_VALUE_OBJECT].keys()))
)
for rows_of_column in rows_by_columns
Expand All @@ -609,9 +680,7 @@ if result.documents:
rows = rows_of_column[KEY_OF_VALUE_OBJECT]
for row_key in list(rows.keys()):
if row_key in row_dict:
row_dict[row_key].append(
rows[row_key].get(KEY_OF_CELL_CONTENT)
)
row_dict[row_key].append(rows[row_key].get(KEY_OF_CELL_CONTENT))
else:
row_dict[row_key] = [
row_key,
Expand All @@ -635,6 +704,9 @@ from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, AnalyzeResult

def format_price(price_dict):
return "".join([f"{p}" for p in price_dict.values()])

catalinaperalta marked this conversation as resolved.
Show resolved Hide resolved
endpoint = os.environ["DOCUMENTINTELLIGENCE_ENDPOINT"]
key = os.environ["DOCUMENTINTELLIGENCE_API_KEY"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ async def analyze_barcodes():
"sample_forms/add_ons/barcodes.jpg",
)
)
# [START analyze_barcodes]
catalinaperalta marked this conversation as resolved.
Show resolved Hide resolved
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
Expand Down Expand Up @@ -91,7 +90,6 @@ async def analyze_barcodes():
print(f" Bounding regions: {format_polygon(barcode.polygon)}")

print("----------------------------------------")
# [END analyze_barcodes]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ async def analyze_fonts():
"sample_forms/add_ons/fonts_and_languages.png",
)
)
# [START analyze_fonts]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
Expand Down Expand Up @@ -134,7 +133,6 @@ async def analyze_fonts():
print(f" Text: '{get_styled_text(styles, result.content)}'")

print("----------------------------------------")
# [END analyze_fonts]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ async def analyze_formulas():
"sample_forms/add_ons/formulas.pdf",
)
)
# [START analyze_formulas]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
Expand Down Expand Up @@ -100,7 +99,6 @@ async def analyze_formulas():
print(f" Bounding regions: {format_polygon(formula.polygon)}")

print("----------------------------------------")
# [END analyze_formulas]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ async def analyze_with_highres():
"sample_forms/add_ons/highres.png",
)
)
# [START analyze_with_highres]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
Expand Down Expand Up @@ -139,7 +138,6 @@ async def analyze_with_highres():
)

print("----------------------------------------")
# [END analyze_with_highres]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ async def analyze_languages():
"sample_forms/add_ons/fonts_and_languages.png",
)
)
# [START analyze_languages]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
Expand Down Expand Up @@ -84,7 +83,6 @@ async def analyze_languages():
)

print("----------------------------------------")
# [END analyze_languages]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@


async def analyze_query_fields():
# [START analyze_query_fields]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, DocumentAnalysisFeature, AnalyzeResult
Expand All @@ -71,7 +70,6 @@ async def analyze_query_fields():
print(f"Address: {doc.fields['Address'].value_string}")
if doc.fields and doc.fields["InvoiceNumber"]:
print(f"Invoice number: {doc.fields['InvoiceNumber'].value_string}")
# [END analyze_query_fields]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ async def analyze_custom_documents(custom_model_id):
path_to_sample_documents = os.path.abspath(
os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/forms/Form_1.jpg")
)
# [START analyze_custom_documents]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
Expand Down Expand Up @@ -172,7 +171,6 @@ async def analyze_custom_documents(custom_model_id):
col_names.insert(0, "")
print_table(col_names, list(row_dict.values()))
print("-----------------------------------")
# [END analyze_custom_documents]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@


async def analyze_documents_output_in_markdown():
# [START analyze_documents_output_in_markdown]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, ContentFormat, AnalyzeResult
Expand All @@ -45,7 +44,6 @@ async def analyze_documents_output_in_markdown():

print(f"Here's the full content in format {result.content_format}:\n")
print(result.content)
# [END analyze_documents_output_in_markdown]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ async def analyze_general_documents():
)
)

# [START analyze_general_documents]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import DocumentAnalysisFeature, AnalyzeResult
Expand Down Expand Up @@ -141,7 +140,6 @@ async def analyze_general_documents():
f"...content on page {region.page_number} is within bounding polygon '{format_polygon(region.polygon)}'\n"
)
print("----------------------------------------")
# [END analyze_general_documents]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ async def analyze_invoice():
)
)

# [START analyze_invoices]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
Expand Down Expand Up @@ -217,7 +216,6 @@ async def analyze_invoice():
print(
f"Remittance Address Recipient: {remittance_address_recipient.get('content')} has confidence: {remittance_address_recipient.get('confidence')}"
)
# [END analyze_invoices]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ async def analyze_layout():
)
)

# [START extract_layout]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
Expand Down Expand Up @@ -139,7 +138,6 @@ async def analyze_layout():
)

print("----------------------------------------")
# [END extract_layout]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ async def analyze_receipts():
)
)

# [START analyze_receipts]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
Expand Down Expand Up @@ -113,7 +112,6 @@ async def analyze_receipts():
if total:
print(f"Total: {format_price(total.get('valueCurrency'))} has confidence: {total.confidence}")
print("--------------------------------------")
# [END analyze_receipts]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def format_price(price_dict):


async def analyze_receipts_from_url():
# [START analyze_receipts_from_url]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeDocumentRequest, AnalyzeResult
Expand All @@ -48,7 +47,6 @@ async def analyze_receipts_from_url():
"prebuilt-receipt", AnalyzeDocumentRequest(url_source=url)
)
receipts: AnalyzeResult = await poller.result()
# [END analyze_receipts_from_url]

if receipts.documents:
for idx, receipt in enumerate(receipts.documents):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ async def classify_document(classifier_id):
)
)

# [START classify_document]
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult
Expand All @@ -67,7 +66,6 @@ async def classify_document(classifier_id):
f"Found document of type '{doc.doc_type or 'N/A'}' with a confidence of {doc.confidence} contained on "
f"the following pages: {[region.page_number for region in doc.bounding_regions]}"
)
# [END classify_document]


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@


async def sample_compose_model():
# [START composed_model]
import uuid
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence.aio import DocumentIntelligenceAdministrationClient
Expand Down Expand Up @@ -128,7 +127,6 @@ async def sample_compose_model():
print("Warnings encountered while building the model:")
for warning in model.warnings:
print(f"warning code: {warning.code}, message: {warning.message}, target of the error: {warning.target}")
# [END composed_model]


async def main():
Expand Down
Loading