Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Configured main index search field names in environment variables #1198

Merged
merged 6 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .env.sample
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG=default
AZURE_SEARCH_TOP_K=5
AZURE_SEARCH_ENABLE_IN_DOMAIN=False
AZURE_SEARCH_FIELDS_ID=id
AZURE_SEARCH_CONTENT_COLUMNS=content
AZURE_SEARCH_CONTENT_VECTOR_COLUMNS=content_vector
AZURE_SEARCH_CONTENT_COLUMN=content
AZURE_SEARCH_CONTENT_VECTOR_COLUMN=content_vector
AZURE_SEARCH_DIMENSIONS=1536
AZURE_SEARCH_FIELDS_TAG=tag
AZURE_SEARCH_FIELDS_METADATA=metadata
Expand Down
18 changes: 9 additions & 9 deletions code/backend/batch/utilities/helpers/azure_search_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,44 +90,44 @@ def image_search_dimensions(self) -> int:
def create_index(self):
fields = [
SimpleField(
name="id",
name=self.env_helper.AZURE_SEARCH_FIELDS_ID,
type=SearchFieldDataType.String,
key=True,
filterable=True,
),
SearchableField(
name="content",
name=self.env_helper.AZURE_SEARCH_CONTENT_COLUMN,
type=SearchFieldDataType.String,
),
SearchField(
name="content_vector",
name=self.env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN,
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=self.search_dimensions,
vector_search_profile_name="myHnswProfile",
),
SearchableField(
name="metadata",
name=self.env_helper.AZURE_SEARCH_FIELDS_METADATA,
type=SearchFieldDataType.String,
),
SearchableField(
name="title",
name=self.env_helper.AZURE_SEARCH_TITLE_COLUMN,
type=SearchFieldDataType.String,
facetable=True,
filterable=True,
),
SearchableField(
name="source",
name=self.env_helper.AZURE_SEARCH_SOURCE_COLUMN,
type=SearchFieldDataType.String,
filterable=True,
),
SimpleField(
name="chunk",
name=self.env_helper.AZURE_SEARCH_CHUNK_COLUMN,
type=SearchFieldDataType.Int32,
filterable=True,
),
SimpleField(
name="offset",
name=self.env_helper.AZURE_SEARCH_OFFSET_COLUMN,
type=SearchFieldDataType.Int32,
filterable=True,
),
Expand Down Expand Up @@ -155,7 +155,7 @@ def create_index(self):
name=self.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG,
prioritized_fields=SemanticPrioritizedFields(
title_field=None,
content_fields=[SemanticField(field_name="content")],
content_fields=[SemanticField(field_name=self.env_helper.AZURE_SEARCH_CONTENT_COLUMN)],
),
)
]
Expand Down
26 changes: 13 additions & 13 deletions code/backend/batch/utilities/helpers/embedders/push_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,23 +115,23 @@ def __generate_image_caption(self, source_url):
def __convert_to_search_document(self, document: SourceDocument):
embedded_content = self.llm_helper.generate_embeddings(document.content)
metadata = {
"id": document.id,
"source": document.source,
"title": document.title,
"chunk": document.chunk,
"offset": document.offset,
self.env_helper.AZURE_SEARCH_FIELDS_ID: document.id,
self.env_helper.AZURE_SEARCH_SOURCE_COLUMN: document.source,
self.env_helper.AZURE_SEARCH_TITLE_COLUMN: document.title,
self.env_helper.AZURE_SEARCH_CHUNK_COLUMN: document.chunk,
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
"page_number": document.page_number,
"chunk_id": document.chunk_id,
}
return {
"id": document.id,
"content": document.content,
"content_vector": embedded_content,
"metadata": json.dumps(metadata),
"title": document.title,
"source": document.source,
"chunk": document.chunk,
"offset": document.offset,
self.env_helper.AZURE_SEARCH_FIELDS_ID: document.id,
self.env_helper.AZURE_SEARCH_CONTENT_COLUMN: document.content,
self.env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN: embedded_content,
self.env_helper.AZURE_SEARCH_FIELDS_METADATA: json.dumps(metadata),
self.env_helper.AZURE_SEARCH_TITLE_COLUMN: document.title,
self.env_helper.AZURE_SEARCH_SOURCE_COLUMN: document.source,
self.env_helper.AZURE_SEARCH_CHUNK_COLUMN: document.chunk,
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
}

def __generate_document_id(self, source_url: str) -> str:
Expand Down
11 changes: 7 additions & 4 deletions code/backend/batch/utilities/helpers/env_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ def __load_config(self, **kwargs) -> None:
os.getenv("AZURE_SEARCH_ENABLE_IN_DOMAIN", "true").lower() == "true"
)
self.AZURE_SEARCH_FIELDS_ID = os.getenv("AZURE_SEARCH_FIELDS_ID", "id")
self.AZURE_SEARCH_CONTENT_COLUMNS = os.getenv(
"AZURE_SEARCH_CONTENT_COLUMNS", "content"
self.AZURE_SEARCH_CONTENT_COLUMN = os.getenv(
"AZURE_SEARCH_CONTENT_COLUMN", "content"
)
self.AZURE_SEARCH_CONTENT_VECTOR_COLUMNS = os.getenv(
"AZURE_SEARCH_CONTENT_VECTOR_COLUMNS", "content_vector"
self.AZURE_SEARCH_CONTENT_VECTOR_COLUMN = os.getenv(
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN", "content_vector"
)
self.AZURE_SEARCH_DIMENSIONS = os.getenv("AZURE_SEARCH_DIMENSIONS", "1536")
self.AZURE_SEARCH_FILENAME_COLUMN = os.getenv(
Expand All @@ -69,6 +69,9 @@ def __load_config(self, **kwargs) -> None:
self.AZURE_SEARCH_FIELDS_METADATA = os.getenv(
"AZURE_SEARCH_FIELDS_METADATA", "metadata"
)
self.AZURE_SEARCH_SOURCE_COLUMN = os.getenv("AZURE_SEARCH_SOURCE_COLUMN", "source")
self.AZURE_SEARCH_CHUNK_COLUMN = os.getenv("AZURE_SEARCH_CHUNK_COLUMN", "chunk")
self.AZURE_SEARCH_OFFSET_COLUMN = os.getenv("AZURE_SEARCH_OFFSET_COLUMN", "offset")
self.AZURE_SEARCH_CONVERSATIONS_LOG_INDEX = os.getenv(
"AZURE_SEARCH_CONVERSATIONS_LOG_INDEX", "conversations"
)
Expand Down
6 changes: 3 additions & 3 deletions code/create_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,12 @@ def conversation_with_data(conversation: Request, env_helper: EnvHelper):
"index_name": env_helper.AZURE_SEARCH_INDEX,
"fields_mapping": {
"content_fields": (
env_helper.AZURE_SEARCH_CONTENT_COLUMNS.split("|")
if env_helper.AZURE_SEARCH_CONTENT_COLUMNS
env_helper.AZURE_SEARCH_CONTENT_COLUMN.split("|")
if env_helper.AZURE_SEARCH_CONTENT_COLUMN
else []
),
"vector_fields": [
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMNS
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN
],
"title_field": env_helper.AZURE_SEARCH_TITLE_COLUMN or None,
"url_field": env_helper.AZURE_SEARCH_URL_COLUMN or None,
Expand Down
7 changes: 5 additions & 2 deletions code/tests/functional/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ class AppConfig:
"AZURE_OPENAI_TOP_P": "1.0",
"AZURE_RESOURCE_GROUP": "some-resource-group",
"AZURE_SEARCH_CONVERSATIONS_LOG_INDEX": "some-log-index",
"AZURE_SEARCH_CONTENT_COLUMNS": "content",
"AZURE_SEARCH_CONTENT_VECTOR_COLUMNS": "some-search-content-vector-columns",
"AZURE_SEARCH_CONTENT_COLUMN": "content",
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN": "some-search-content-vector-columns",
"AZURE_SEARCH_DIMENSIONS": "some-search-dimensions",
"AZURE_SEARCH_ENABLE_IN_DOMAIN": "True",
"AZURE_SEARCH_FIELDS_ID": "some-search-fields-id",
Expand All @@ -53,6 +53,9 @@ class AppConfig:
"AZURE_SEARCH_SERVICE": "some-azure-search-service",
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG": "some-search-semantic-search-config",
"AZURE_SEARCH_TITLE_COLUMN": "title",
"AZURE_SEARCH_CHUNK_COLUMN": "chunk",
"AZURE_SEARCH_SOURCE_COLUMN": "source",
"AZURE_SEARCH_OFFSET_COLUMN": "offset",
"AZURE_SEARCH_TOP_K": "5",
"AZURE_SEARCH_URL_COLUMN": "url",
"AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION": "False",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)
from tests.functional.app_config import AppConfig


pytestmark = pytest.mark.functional

path = "/api/conversation"
Expand Down Expand Up @@ -365,7 +366,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"name": app_config.get("AZURE_SEARCH_INDEX"),
"fields": [
{
"name": "id",
"name": app_config.get("AZURE_SEARCH_FIELDS_ID"),
"type": "Edm.String",
"key": True,
"retrievable": True,
Expand All @@ -375,7 +376,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "content",
"name": app_config.get("AZURE_SEARCH_CONTENT_COLUMN"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -385,14 +386,14 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "content_vector",
"name": app_config.get("AZURE_SEARCH_CONTENT_VECTOR_COLUMN"),
"type": "Collection(Edm.Single)",
"searchable": True,
"dimensions": 2,
"vectorSearchProfile": "myHnswProfile",
},
{
"name": "metadata",
"name": app_config.get("AZURE_SEARCH_FIELDS_METADATA"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -402,7 +403,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "title",
"name": app_config.get("AZURE_SEARCH_TITLE_COLUMN"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -412,7 +413,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"facetable": True,
},
{
"name": "source",
"name": app_config.get("AZURE_SEARCH_SOURCE_COLUMN"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -422,7 +423,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "chunk",
"name": app_config.get("AZURE_SEARCH_CHUNK_COLUMN"),
"type": "Edm.Int32",
"key": False,
"retrievable": True,
Expand All @@ -432,7 +433,7 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "offset",
"name": app_config.get("AZURE_SEARCH_OFFSET_COLUMN"),
"type": "Edm.Int32",
"key": False,
"retrievable": True,
Expand All @@ -456,7 +457,8 @@ def test_post_makes_correct_call_to_create_documents_search_index(
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG"
),
"prioritizedFields": {
"prioritizedContentFields": [{"fieldName": "content"}]
"prioritizedContentFields":
[{"fieldName": app_config.get("AZURE_SEARCH_CONTENT_COLUMN")}]
},
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_post_makes_correct_call_to_azure_openai(
"content_fields": ["content"],
"vector_fields": [
app_config.get(
"AZURE_SEARCH_CONTENT_VECTOR_COLUMNS"
"AZURE_SEARCH_CONTENT_VECTOR_COLUMN"
)
],
"title_field": "title",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def test_makes_correct_call_to_create_documents_search_index(
"name": app_config.get("AZURE_SEARCH_INDEX"),
"fields": [
{
"name": "id",
"name": app_config.get("AZURE_SEARCH_FIELDS_ID"),
"type": "Edm.String",
"key": True,
"retrievable": True,
Expand All @@ -306,7 +306,7 @@ def test_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "content",
"name": app_config.get("AZURE_SEARCH_CONTENT_COLUMN"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -316,14 +316,14 @@ def test_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "content_vector",
"name": app_config.get("AZURE_SEARCH_CONTENT_VECTOR_COLUMN"),
"type": "Collection(Edm.Single)",
"searchable": True,
"dimensions": 2,
"vectorSearchProfile": "myHnswProfile",
},
{
"name": "metadata",
"name": app_config.get("AZURE_SEARCH_FIELDS_METADATA"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -333,7 +333,7 @@ def test_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "title",
"name": app_config.get("AZURE_SEARCH_TITLE_COLUMN"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -343,7 +343,7 @@ def test_makes_correct_call_to_create_documents_search_index(
"facetable": True,
},
{
"name": "source",
"name": app_config.get("AZURE_SEARCH_SOURCE_COLUMN"),
"type": "Edm.String",
"key": False,
"retrievable": True,
Expand All @@ -353,7 +353,7 @@ def test_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "chunk",
"name": app_config.get("AZURE_SEARCH_CHUNK_COLUMN"),
"type": "Edm.Int32",
"key": False,
"retrievable": True,
Expand All @@ -363,7 +363,7 @@ def test_makes_correct_call_to_create_documents_search_index(
"facetable": False,
},
{
"name": "offset",
"name": app_config.get("AZURE_SEARCH_OFFSET_COLUMN"),
"type": "Edm.Int32",
"key": False,
"retrievable": True,
Expand All @@ -387,7 +387,8 @@ def test_makes_correct_call_to_create_documents_search_index(
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG"
),
"prioritizedFields": {
"prioritizedContentFields": [{"fieldName": "content"}]
"prioritizedContentFields":
[{"fieldName": app_config.get("AZURE_SEARCH_CONTENT_COLUMN")}]
},
}
]
Expand Down
12 changes: 6 additions & 6 deletions code/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
AZURE_SEARCH_KEY = "mock-search-key"
AZURE_SEARCH_INDEX = "mock-search-index"
AZURE_SEARCH_SERVICE = "mock-search-service"
AZURE_SEARCH_CONTENT_COLUMNS = "field1|field2"
AZURE_SEARCH_CONTENT_VECTOR_COLUMNS = "vector-column"
AZURE_SEARCH_CONTENT_COLUMN = "field1|field2"
AZURE_SEARCH_CONTENT_VECTOR_COLUMN = "vector-column"
AZURE_SEARCH_TITLE_COLUMN = "title"
AZURE_SEARCH_FILENAME_COLUMN = "filename"
AZURE_SEARCH_URL_COLUMN = "url"
Expand Down Expand Up @@ -68,9 +68,9 @@ def env_helper_mock():
env_helper.AZURE_OPENAI_STOP_SEQUENCE = AZURE_OPENAI_STOP_SEQUENCE
env_helper.AZURE_SEARCH_INDEX = AZURE_SEARCH_INDEX
env_helper.AZURE_SEARCH_SERVICE = AZURE_SEARCH_SERVICE
env_helper.AZURE_SEARCH_CONTENT_COLUMNS = AZURE_SEARCH_CONTENT_COLUMNS
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMNS = (
AZURE_SEARCH_CONTENT_VECTOR_COLUMNS
env_helper.AZURE_SEARCH_CONTENT_COLUMN = AZURE_SEARCH_CONTENT_COLUMN
env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN = (
AZURE_SEARCH_CONTENT_VECTOR_COLUMN
)
env_helper.AZURE_SEARCH_TITLE_COLUMN = AZURE_SEARCH_TITLE_COLUMN
env_helper.AZURE_SEARCH_FILENAME_COLUMN = AZURE_SEARCH_FILENAME_COLUMN
Expand Down Expand Up @@ -617,7 +617,7 @@ def test_conversation_azure_byod_returns_correct_response_when_streaming_with_da
"index_name": AZURE_SEARCH_INDEX,
"fields_mapping": {
"content_fields": ["field1", "field2"],
"vector_fields": [AZURE_SEARCH_CONTENT_VECTOR_COLUMNS],
"vector_fields": [AZURE_SEARCH_CONTENT_VECTOR_COLUMN],
"title_field": AZURE_SEARCH_TITLE_COLUMN,
"url_field": AZURE_SEARCH_URL_COLUMN,
"filepath_field": AZURE_SEARCH_FILENAME_COLUMN,
Expand Down
Loading
Loading