Skip to content

Commit

Permalink
fix: Handled the file format if its in UPPER CASE (#1200)
Browse files Browse the repository at this point in the history
  • Loading branch information
Roopan-Microsoft committed Aug 6, 2024
1 parent 20a6843 commit 1786e21
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, blob_client: AzureBlobStorageClient, env_helper: EnvHelper):
self.embedding_configs[ext] = processor

def embed_file(self, source_url: str, file_name: str):
file_extension = file_name.split(".")[-1]
file_extension = file_name.split(".")[-1].lower()
embedding_config = self.embedding_configs.get(file_extension)
self.__embed(
source_url=source_url,
Expand Down
6 changes: 6 additions & 0 deletions code/tests/functional/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,12 @@ def setup_config_mocking(httpserver: HTTPServer):
"loading": {"strategy": "web"},
"use_advanced_image_processing": False,
},
{
"document_type": "htm",
"chunking": {"strategy": "layout", "size": 500, "overlap": 100},
"loading": {"strategy": "web"},
"use_advanced_image_processing": False,
},
{
"document_type": "docx",
"chunking": {"strategy": "layout", "size": 500, "overlap": 100},
Expand Down
16 changes: 16 additions & 0 deletions code/tests/utilities/helpers/test_push_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,22 @@ def test_embed_file_chunks_documents(document_loading_mock, document_chunking_mo
)


def test_embed_file_chunks_documents_upper_case(document_loading_mock, document_chunking_mock, env_helper_mock):
# given
push_embedder = PushEmbedder(MagicMock(), env_helper_mock)

# when
push_embedder.embed_file(
"some-url",
"some-file-name.PDF",
)

# then
document_chunking_mock.return_value.chunk.assert_called_once_with(
document_loading_mock.return_value.load.return_value, CHUNKING_SETTINGS
)


def test_embed_file_generates_embeddings_for_documents(llm_helper_mock, env_helper_mock):
# given
push_embedder = PushEmbedder(MagicMock(), env_helper_mock)
Expand Down

0 comments on commit 1786e21

Please sign in to comment.