From b5fd74026193fde0edd713c296dc0a7bfec97a25 Mon Sep 17 00:00:00 2001 From: Alex Keeler Date: Wed, 28 Aug 2024 13:29:15 -0400 Subject: [PATCH] Update knn query spec Signed-off-by: Alex Keeler --- spec/namespaces/_core.yaml | 4 +- spec/schemas/_common.query_dsl.yaml | 8 ++ spec/schemas/_common.yaml | 41 ++------ spec/schemas/_core.msearch.yaml | 4 +- tests/default/_core/search/knn.yaml | 153 ++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 36 deletions(-) create mode 100644 tests/default/_core/search/knn.yaml diff --git a/spec/namespaces/_core.yaml b/spec/namespaces/_core.yaml index 9ba5edb16..f26fb4de6 100644 --- a/spec/namespaces/_core.yaml +++ b/spec/namespaces/_core.yaml @@ -2437,10 +2437,10 @@ components: knn: description: Defines the approximate kNN search to run. oneOf: - - $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery' + - $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery' - type: array items: - $ref: '../schemas/_common.yaml#/components/schemas/KnnQuery' + $ref: '../schemas/_common.query_dsl.yaml#/components/schemas/KnnQuery' rank: $ref: '../schemas/_common.yaml#/components/schemas/RankContainer' min_score: diff --git a/spec/schemas/_common.query_dsl.yaml b/spec/schemas/_common.query_dsl.yaml index ba5afcc3b..6633c043c 100644 --- a/spec/schemas/_common.query_dsl.yaml +++ b/spec/schemas/_common.query_dsl.yaml @@ -65,6 +65,8 @@ components: $ref: '#/components/schemas/IntervalsQuery' minProperties: 1 maxProperties: 1 + knn: + $ref: '#/components/schemas/KnnQuery' match: description: |- Returns documents that match a provided text, number, date or boolean value. @@ -896,6 +898,12 @@ components: $ref: '_common.yaml#/components/schemas/Field' required: - pattern + KnnQuery: + type: object + additionalProperties: + $ref: '_common.yaml#/components/schemas/KnnField' + minProperties: 1 + maxProperties: 1 MatchQuery: allOf: - $ref: '#/components/schemas/QueryBase' diff --git a/spec/schemas/_common.yaml b/spec/schemas/_common.yaml index 903c97c04..df0b585e6 100644 --- a/spec/schemas/_common.yaml +++ b/spec/schemas/_common.yaml @@ -591,23 +591,19 @@ components: type: number EmptyObject: type: object - KnnQuery: + KnnField: type: object properties: - field: - $ref: '#/components/schemas/Field' - query_vector: + vector: $ref: '#/components/schemas/QueryVector' - query_vector_builder: - $ref: '#/components/schemas/QueryVectorBuilder' k: description: The final number of nearest neighbors to return as top hits type: number - num_candidates: - description: The number of nearest neighbor candidates to consider per shard + min_score: + description: The minimum similarity score for a neighbor to be considered a hit type: number - boost: - description: Boost value to apply to kNN scores + max_distance: + description: The maximum physical distance in vector space for a neighbor to be considered a hit type: number filter: description: Filters for the kNN search query @@ -616,34 +612,15 @@ components: - type: array items: $ref: '_common.query_dsl.yaml#/components/schemas/QueryContainer' - similarity: - description: The minimum similarity for a vector to be considered a match + boost: + description: Boost value to apply to kNN scores type: number required: - - field - - k - - num_candidates + - vector QueryVector: type: array items: type: number - QueryVectorBuilder: - type: object - properties: - text_embedding: - $ref: '#/components/schemas/TextEmbedding' - minProperties: 1 - maxProperties: 1 - TextEmbedding: - type: object - properties: - model_id: - type: string - model_text: - type: string - required: - - model_id - - model_text SlicedScroll: type: object properties: diff --git a/spec/schemas/_core.msearch.yaml b/spec/schemas/_core.msearch.yaml index 23bca730b..06c256943 100644 --- a/spec/schemas/_core.msearch.yaml +++ b/spec/schemas/_core.msearch.yaml @@ -66,10 +66,10 @@ components: knn: description: Defines the approximate kNN search to run. oneOf: - - $ref: '_common.yaml#/components/schemas/KnnQuery' + - $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery' - type: array items: - $ref: '_common.yaml#/components/schemas/KnnQuery' + $ref: '_common.query_dsl.yaml#/components/schemas/KnnQuery' from: description: |- Starting document offset. By default, you cannot page through more than 10,000 diff --git a/tests/default/_core/search/knn.yaml b/tests/default/_core/search/knn.yaml new file mode 100644 index 000000000..b515e822c --- /dev/null +++ b/tests/default/_core/search/knn.yaml @@ -0,0 +1,153 @@ +$schema: ../../../../json_schemas/test_story.schema.yaml + +description: Test search endpoint with knn query. +prologues: + - path: /movies + method: PUT + request: + payload: + settings: + index: + knn: true + mappings: + properties: + director: + type: text + title: + type: text + year: + type: integer + embedding: + type: knn_vector + dimension: 5 + method: + name: hnsw + space_type: l2 + engine: faiss + - path: /movies/_doc + method: POST + parameters: + refresh: true + request: + payload: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + status: [201] +epilogues: + - path: /movies + method: DELETE + status: [200, 404] +chapters: + - synopsis: Search using the k parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + k: 1 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + + - synopsis: Search using the min_score parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + min_score: 0.9 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + + - synopsis: Search using the max_distance parameter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + max_distance: 0.1 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 1 + relation: eq + hits: + - _index: movies + _score: 1 + _source: + director: Bennett Miller + title: Moneyball + year: 2011 + embedding: [1.4, 3.5, 2.3, 4.1, 9.2] + + - synopsis: Search using a filter. + path: /{index}/_search + parameters: + index: movies + method: POST + request: + payload: + query: + knn: + embedding: + vector: [1.4, 3.5, 2.3, 4.1, 9.2] + k: 1 + filter: + term: + year: 2012 + response: + status: 200 + payload: + timed_out: false + hits: + total: + value: 0 + relation: eq + hits: [] \ No newline at end of file