Skip to content

Commit

Permalink
Remove use of old Law/Policy labels in search (#165)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joel Wright authored Sep 18, 2023
1 parent 44c6cf3 commit 87def48
Show file tree
Hide file tree
Showing 11 changed files with 889 additions and 354 deletions.
16 changes: 0 additions & 16 deletions app/api/api_v1/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,6 @@
search_router = APIRouter()


def _map_new_category_to_old(supplied_category: str) -> str:
"""Temporarily translate new category strings into old values when searching"""
# TODO: remove after opensearch data & frontend upgrades
if supplied_category.lower() == "legislative":
return "Law"
if supplied_category.lower() == "executive":
return "Policy"
return supplied_category


def _search_request(db: Session, search_body: SearchRequestBody) -> SearchResponse:
if search_body.keyword_filters is not None:
search_body.keyword_filters = process_search_keyword_filters(
Expand All @@ -66,12 +56,6 @@ def _search_request(db: Session, search_body: SearchRequestBody) -> SearchRespon
req=_get_browse_args_from_search_request_body(search_body),
)
else:
if search_body.keyword_filters is not None:
if categories := search_body.keyword_filters.get(FilterField.CATEGORY):
fixed_categories = [_map_new_category_to_old(c) for c in categories]
keyword_filters = dict(search_body.keyword_filters)
keyword_filters[FilterField.CATEGORY] = fixed_categories
search_body.keyword_filters = keyword_filters
return _OPENSEARCH_CONNECTION.query_families(
search_request_body=search_body,
opensearch_internal_config=_OPENSEARCH_INDEX_CONFIG,
Expand Down
591 changes: 307 additions & 284 deletions poetry.lock

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions tests/data/navigator_test_core.json

Large diffs are not rendered by default.

110 changes: 109 additions & 1 deletion tests/data/navigator_test_core.mapping.json
Original file line number Diff line number Diff line change
@@ -1 +1,109 @@
{"navigator_test_core":{"mappings":{"properties":{"document_category":{"type":"keyword"},"document_cdn_object":{"type":"keyword"},"document_content_type":{"type":"keyword"},"document_date":{"type":"date","format":"dd/MM/yyyy"},"document_description":{"type":"keyword","normalizer":"folding"},"document_description_embedding":{"type":"knn_vector","dimension":768,"method":{"engine":"nmslib","space_type":"innerproduct","name":"hnsw","parameters":{"ef_construction":512,"m":16}}},"document_geography":{"type":"keyword"},"document_id":{"type":"keyword","normalizer":"folding"},"document_md5_sum":{"type":"keyword"},"document_name":{"type":"keyword","normalizer":"folding"},"document_name_and_slug":{"type":"keyword","eager_global_ordinals":true,"normalizer":"folding"},"document_sectors":{"type":"keyword"},"document_slug":{"type":"keyword"},"document_source":{"type":"keyword"},"document_source_url":{"type":"keyword"},"document_type":{"type":"keyword"},"for_search_document_description":{"type":"text","analyzer":"folding"},"for_search_document_name":{"type":"text","analyzer":"folding"},"text":{"type":"text","analyzer":"folding"},"text_block_coords":{"type":"keyword"},"text_block_id":{"type":"keyword"},"text_block_page":{"type":"integer"},"text_embedding":{"type":"knn_vector","dimension":768,"method":{"engine":"nmslib","space_type":"innerproduct","name":"hnsw","parameters":{"ef_construction":512,"m":16}}},"translated":{"type":"boolean"}}}}}
{
"navigator_test_core": {
"mappings": {
"properties": {
"document_category": {
"type": "keyword"
},
"document_cdn_object": {
"type": "keyword"
},
"document_content_type": {
"type": "keyword"
},
"document_date": {
"type": "date",
"format": "dd/MM/yyyy"
},
"document_description": {
"type": "keyword",
"normalizer": "folding"
},
"document_description_embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"engine": "nmslib",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 512,
"m": 16
}
}
},
"document_geography": {
"type": "keyword"
},
"document_id": {
"type": "keyword",
"normalizer": "folding"
},
"document_md5_sum": {
"type": "keyword"
},
"document_name": {
"type": "keyword",
"normalizer": "folding"
},
"document_name_and_slug": {
"type": "keyword",
"eager_global_ordinals": true,
"normalizer": "folding"
},
"document_sectors": {
"type": "keyword"
},
"document_slug": {
"type": "keyword"
},
"document_source": {
"type": "keyword"
},
"document_source_url": {
"type": "keyword"
},
"document_type": {
"type": "keyword"
},
"for_search_document_description": {
"type": "text",
"analyzer": "folding"
},
"for_search_document_name": {
"type": "text",
"analyzer": "folding"
},
"text": {
"type": "text",
"analyzer": "folding"
},
"text_block_coords": {
"type": "keyword"
},
"text_block_id": {
"type": "keyword"
},
"text_block_page": {
"type": "integer"
},
"text_embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"engine": "nmslib",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 512,
"m": 16
}
}
},
"translated": {
"type": "boolean"
}
}
}
}
}
2 changes: 1 addition & 1 deletion tests/data/navigator_test_core.settings.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"{\"navigator_test_core\":{\"settings\":{\"index\":{\"refresh_interval\":\"1s\",\"number_of_shards\":\"1\",\"knn.algo_param\":{\"ef_search\":\"100\"},\"knn\":\"true\",\"analysis\":{\"filter\":{\"ascii_folding_preserve_original\":{\"type\":\"asciifolding\",\"preserve_original\":\"true\"}},\"normalizer\":{\"folding\":{\"filter\":[\"lowercase\",\"asciifolding\"],\"type\":\"custom\",\"char_filter\":[]}},\"analyzer\":{\"folding\":{\"filter\":[\"lowercase\",\"ascii_folding_preserve_original\"],\"tokenizer\":\"standard\"}}},\"number_of_replicas\":\"2\"}}}}"
"{\"navigator_test_core\":{\"settings\":{\"index\":{\"refresh_interval\":\"1s\",\"number_of_shards\":\"1\",\"knn.algo_param\":{\"ef_search\":\"100\"},\"knn\":\"true\",\"analysis\":{\"filter\":{\"ascii_folding_preserve_original\":{\"type\":\"asciifolding\",\"preserve_original\":\"true\"}},\"normalizer\":{\"folding\":{\"filter\":[\"lowercase\",\"asciifolding\"],\"type\":\"custom\",\"char_filter\":[]}},\"analyzer\":{\"folding\":{\"filter\":[\"lowercase\",\"ascii_folding_preserve_original\"],\"tokenizer\":\"standard\"}}},\"number_of_replicas\":\"2\"}}}}"
40 changes: 20 additions & 20 deletions tests/data/navigator_test_htmls_non_translated.json

Large diffs are not rendered by default.

110 changes: 109 additions & 1 deletion tests/data/navigator_test_htmls_non_translated.mapping.json
Original file line number Diff line number Diff line change
@@ -1 +1,109 @@
{"navigator_test_htmls_non_translated":{"mappings":{"properties":{"document_category":{"type":"keyword"},"document_cdn_object":{"type":"keyword"},"document_content_type":{"type":"keyword"},"document_date":{"type":"date","format":"dd/MM/yyyy"},"document_description":{"type":"keyword","normalizer":"folding"},"document_description_embedding":{"type":"knn_vector","dimension":768,"method":{"engine":"nmslib","space_type":"innerproduct","name":"hnsw","parameters":{"ef_construction":512,"m":16}}},"document_geography":{"type":"keyword"},"document_id":{"type":"keyword","normalizer":"folding"},"document_md5_sum":{"type":"keyword"},"document_name":{"type":"keyword","normalizer":"folding"},"document_name_and_slug":{"type":"keyword","eager_global_ordinals":true,"normalizer":"folding"},"document_sectors":{"type":"keyword"},"document_slug":{"type":"keyword"},"document_source":{"type":"keyword"},"document_source_url":{"type":"keyword"},"document_type":{"type":"keyword"},"for_search_document_description":{"type":"text","analyzer":"folding"},"for_search_document_name":{"type":"text","analyzer":"folding"},"text":{"type":"text","analyzer":"folding"},"text_block_coords":{"type":"keyword"},"text_block_id":{"type":"keyword"},"text_block_page":{"type":"integer"},"text_embedding":{"type":"knn_vector","dimension":768,"method":{"engine":"nmslib","space_type":"innerproduct","name":"hnsw","parameters":{"ef_construction":512,"m":16}}},"translated":{"type":"boolean"}}}}}
{
"navigator_test_htmls_non_translated": {
"mappings": {
"properties": {
"document_category": {
"type": "keyword"
},
"document_cdn_object": {
"type": "keyword"
},
"document_content_type": {
"type": "keyword"
},
"document_date": {
"type": "date",
"format": "dd/MM/yyyy"
},
"document_description": {
"type": "keyword",
"normalizer": "folding"
},
"document_description_embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"engine": "nmslib",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 512,
"m": 16
}
}
},
"document_geography": {
"type": "keyword"
},
"document_id": {
"type": "keyword",
"normalizer": "folding"
},
"document_md5_sum": {
"type": "keyword"
},
"document_name": {
"type": "keyword",
"normalizer": "folding"
},
"document_name_and_slug": {
"type": "keyword",
"eager_global_ordinals": true,
"normalizer": "folding"
},
"document_sectors": {
"type": "keyword"
},
"document_slug": {
"type": "keyword"
},
"document_source": {
"type": "keyword"
},
"document_source_url": {
"type": "keyword"
},
"document_type": {
"type": "keyword"
},
"for_search_document_description": {
"type": "text",
"analyzer": "folding"
},
"for_search_document_name": {
"type": "text",
"analyzer": "folding"
},
"text": {
"type": "text",
"analyzer": "folding"
},
"text_block_coords": {
"type": "keyword"
},
"text_block_id": {
"type": "keyword"
},
"text_block_page": {
"type": "integer"
},
"text_embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"engine": "nmslib",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 512,
"m": 16
}
}
},
"translated": {
"type": "boolean"
}
}
}
}
}
110 changes: 109 additions & 1 deletion tests/data/navigator_test_htmls_translated.mapping.json
Original file line number Diff line number Diff line change
@@ -1 +1,109 @@
{"navigator_test_htmls_translated":{"mappings":{"properties":{"document_category":{"type":"keyword"},"document_cdn_object":{"type":"keyword"},"document_content_type":{"type":"keyword"},"document_date":{"type":"date","format":"dd/MM/yyyy"},"document_description":{"type":"keyword","normalizer":"folding"},"document_description_embedding":{"type":"knn_vector","dimension":768,"method":{"engine":"nmslib","space_type":"innerproduct","name":"hnsw","parameters":{"ef_construction":512,"m":16}}},"document_geography":{"type":"keyword"},"document_id":{"type":"keyword","normalizer":"folding"},"document_md5_sum":{"type":"keyword"},"document_name":{"type":"keyword","normalizer":"folding"},"document_name_and_slug":{"type":"keyword","eager_global_ordinals":true,"normalizer":"folding"},"document_sectors":{"type":"keyword"},"document_slug":{"type":"keyword"},"document_source":{"type":"keyword"},"document_source_url":{"type":"keyword"},"document_type":{"type":"keyword"},"for_search_document_description":{"type":"text","analyzer":"folding"},"for_search_document_name":{"type":"text","analyzer":"folding"},"text":{"type":"text","analyzer":"folding"},"text_block_coords":{"type":"keyword"},"text_block_id":{"type":"keyword"},"text_block_page":{"type":"integer"},"text_embedding":{"type":"knn_vector","dimension":768,"method":{"engine":"nmslib","space_type":"innerproduct","name":"hnsw","parameters":{"ef_construction":512,"m":16}}},"translated":{"type":"boolean"}}}}}
{
"navigator_test_htmls_translated": {
"mappings": {
"properties": {
"document_category": {
"type": "keyword"
},
"document_cdn_object": {
"type": "keyword"
},
"document_content_type": {
"type": "keyword"
},
"document_date": {
"type": "date",
"format": "dd/MM/yyyy"
},
"document_description": {
"type": "keyword",
"normalizer": "folding"
},
"document_description_embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"engine": "nmslib",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 512,
"m": 16
}
}
},
"document_geography": {
"type": "keyword"
},
"document_id": {
"type": "keyword",
"normalizer": "folding"
},
"document_md5_sum": {
"type": "keyword"
},
"document_name": {
"type": "keyword",
"normalizer": "folding"
},
"document_name_and_slug": {
"type": "keyword",
"eager_global_ordinals": true,
"normalizer": "folding"
},
"document_sectors": {
"type": "keyword"
},
"document_slug": {
"type": "keyword"
},
"document_source": {
"type": "keyword"
},
"document_source_url": {
"type": "keyword"
},
"document_type": {
"type": "keyword"
},
"for_search_document_description": {
"type": "text",
"analyzer": "folding"
},
"for_search_document_name": {
"type": "text",
"analyzer": "folding"
},
"text": {
"type": "text",
"analyzer": "folding"
},
"text_block_coords": {
"type": "keyword"
},
"text_block_id": {
"type": "keyword"
},
"text_block_page": {
"type": "integer"
},
"text_embedding": {
"type": "knn_vector",
"dimension": 768,
"method": {
"engine": "nmslib",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 512,
"m": 16
}
}
},
"translated": {
"type": "boolean"
}
}
}
}
}
Loading

0 comments on commit 87def48

Please sign in to comment.