diff --git a/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt b/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt index 091f2d2534..4362c11798 100644 --- a/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt +++ b/.github/vale/styles/Vocab/OpenSearch/Words/accept.txt @@ -81,6 +81,7 @@ Levenshtein [Oo]nboarding pebibyte [Pp]erformant +[Pp]laintext [Pp]luggable [Pp]reconfigure [Pp]refetch @@ -92,6 +93,7 @@ pebibyte [Pp]reprocess [Pp]retrain [Pp]seudocode +[Quantiz](e|ation|ing|er) [Rr]ebalance [Rr]ebalancing [Rr]edownload diff --git a/.github/workflows/.delete_backport_branch.yml.swp b/.github/workflows/.delete_backport_branch.yml.swp new file mode 100644 index 0000000000..248b66532a Binary files /dev/null and b/.github/workflows/.delete_backport_branch.yml.swp differ diff --git a/.github/workflows/delete_backport_branch.yml b/.github/workflows/delete_backport_branch.yml deleted file mode 100644 index 387a124b8c..0000000000 --- a/.github/workflows/delete_backport_branch.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: Delete merged branch of the backport PRs -on: - pull_request: - types: - - closed - -jobs: - delete-branch: - runs-on: ubuntu-latest - if: startsWith(github.event.pull_request.head.ref,'backport/') - steps: - - name: Delete merged branch - uses: SvanBoxel/delete-merged-branch@main - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/delete_merged_branch.yml b/.github/workflows/delete_merged_branch.yml new file mode 100644 index 0000000000..a97d2674e4 --- /dev/null +++ b/.github/workflows/delete_merged_branch.yml @@ -0,0 +1,22 @@ +name: Delete merged branch of the PRs +on: + pull_request: + types: + - closed + +jobs: + delete-branch: + runs-on: ubuntu-latest + if: | + startsWith(github.event.pull_request.head.repo.full_name, 'opensearch-project/documentation-website') && + ${{ !startsWith(github.event.pull_request.head.ref, 'main') }} && + ${{ !startsWith(github.event.pull_request.head.ref, '1.') }} && + ${{ !startsWith(github.event.pull_request.head.ref, '2.') }} && + ${{ !startsWith(github.event.pull_request.head.ref, 'version/') }} + steps: + - name: Echo remove branch + run: echo Removing ${{github.event.pull_request.head.ref}} + - name: Delete merged branch + uses: SvanBoxel/delete-merged-branch@main + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/_automating-configurations/api/create-workflow.md b/_automating-configurations/api/create-workflow.md index e99a421fb9..5c501ce4e8 100644 --- a/_automating-configurations/api/create-workflow.md +++ b/_automating-configurations/api/create-workflow.md @@ -16,7 +16,7 @@ Creating a workflow adds the content of a workflow template to the flow framewor To obtain the validation template for workflow steps, call the [Get Workflow Steps API]({{site.url}}{{site.baseurl}}/automating-configurations/api/get-workflow-steps/). -You can include placeholder expressions in the value of workflow step fields. For example, you can specify a credential field in a template as `openAI_key: '${{ openai_key }}'`. The expression will be substituted with the user-provided value during provisioning, using the format `${{ }}`. You can pass the actual key as a parameter using the [Provision Workflow API]({{site.url}}{{site.baseurl}}/automating-configurations/api/provision-workflow/) or using this API with the `provision` parameter set to `true`. +You can include placeholder expressions in the value of workflow step fields. For example, you can specify a credential field in a template as `openAI_key: '${{ openai_key }}'`. The expression will be substituted with the user-provided value during provisioning, using the format {% raw %}`${{ }}`{% endraw %}. You can pass the actual key as a parameter by using the [Provision Workflow API]({{site.url}}{{site.baseurl}}/automating-configurations/api/provision-workflow/) or by using this API with the `provision` parameter set to `true`. Once a workflow is created, provide its `workflow_id` to other APIs. diff --git a/_automating-configurations/index.md b/_automating-configurations/index.md index a7462ad16a..ef9cb4f850 100644 --- a/_automating-configurations/index.md +++ b/_automating-configurations/index.md @@ -8,7 +8,7 @@ redirect_from: /automating-configurations/ --- # Automating configurations -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } You can automate complex OpenSearch setup and preprocessing tasks by providing templates for common use cases. For example, automating machine learning (ML) setup tasks streamlines the use of OpenSearch ML offerings. diff --git a/_automating-configurations/workflow-templates.md b/_automating-configurations/workflow-templates.md new file mode 100644 index 0000000000..44700ae1ac --- /dev/null +++ b/_automating-configurations/workflow-templates.md @@ -0,0 +1,147 @@ +--- +layout: default +title: Workflow templates +nav_order: 25 +--- + +# Workflow templates + +OpenSearch provides several workflow templates for some common machine learning (ML) use cases, such as semantic or conversational search. + +You can specify a workflow template when you call the [Create Workflow API]({{site.url}}{{site.baseurl}}/automating-configurations/api/create-workflow/). To provision the workflow, specify `provision=true` as a query parameter. For example, you can configure [neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/) by providing the `local_neural_sparse_search_bi_encoder` query parameter as `use_case`, as shown in the following request: + +```json +POST /_plugins/_flow_framework/workflow?use_case=local_neural_sparse_search_bi_encoder +``` +{% include copy-curl.html %} + +The workflow created using this template performs the following configuration steps: + +- Deploys the default pretrained sparse encoding model (`amazon/neural-sparse/opensearch-neural-sparse-encoding-v1`). +- Creates an ingest pipeline that contains a `sparse_encoding` processor, which converts the text in a document field to vector embeddings using the deployed model. +- Creates a sample index for sparse search, specifying the default pipeline as the newly created ingest pipeline. + +## Parameters + +Each workflow template has a defined schema and a set of APIs with predefined default values for each step. For more information about template parameter defaults, see [Supported workflow templates](#supported-workflow-templates). + +### Overriding default values + +To override a template's default values, provide the new values in the request body when sending a create workflow request. For example, the following request changes the Cohere model, the name of the `text_embedding` processor output field, and the name of the sparse index of the `semantic_search_with_cohere_embedding` template: + +```json +POST /_plugins/_flow_framework/workflow?use_case=semantic_search_with_cohere_embedding +{ + "create_connector.model" : "embed-multilingual-v3.0", + "text_embedding.field_map.output": "book_embedding", + "create_index.name": "sparse-book-index" +} +``` +{% include copy-curl.html %} + +## Example + +In this example, you'll configure the `semantic_search_with_cohere_embedding_query_enricher` workflow template. The workflow created using this template performs the following configuration steps: + +- Deploys an externally hosted Cohere model +- Creates an ingest pipeline using the model +- Creates a sample k-NN index and configures a search pipeline to define the default model ID for that index + +### Step 1: Create and provision the workflow + +Send the following request to create and provision a workflow using the `semantic_search_with_cohere_embedding_query_enricher` workflow template. The only required request body field for this template is the API key for the Cohere Embed model: + +```json +POST /_plugins/_flow_framework/workflow?use_case=semantic_search_with_cohere_embedding_query_enricher&provision=true +{ + "create_connector.credential.key" : "" +} +``` +{% include copy-curl.html %} + +OpenSearch responds with a workflow ID for the created workflow: + +```json +{ + "workflow_id" : "8xL8bowB8y25Tqfenm50" +} +``` + +The workflow in the previous step creates a default k-NN index. The default index name is `my-nlp-index`: + +```json +{ + "create_index.name": "my-nlp-index" +} +``` + +For all default parameter values for this workflow template, see [Cohere Embed semantic search defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json). + +### Step 2: Ingest documents into the index + +To ingest documents into the index created in the previous step, send the following request: + +```json +PUT /my-nlp-index/_doc/1 +{ + "passage_text": "Hello world", + "id": "s1" +} +``` +{% include copy-curl.html %} + +### Step 3: Perform vector search + +To perform a vector search on your index, use a [`neural` query]({{site.url}}{{site.baseurl}}/query-dsl/specialized/neural/) clause: + +```json +GET /my-nlp-index/_search +{ + "_source": { + "excludes": [ + "passage_embedding" + ] + }, + "query": { + "neural": { + "passage_embedding": { + "query_text": "Hi world", + "k": 100 + } + } + } +} +``` +{% include copy-curl.html %} + +## Viewing workflow resources + +The workflow you created provisioned all the necessary resources for semantic search. To view the provisioned resources, call the [Get Workflow Status API]({{site.url}}{{site.baseurl}}/automating-configurations/api/get-workflow-status/) and provide the `workflowID` for your workflow: + +```json +GET /_plugins/_flow_framework/workflow/8xL8bowB8y25Tqfenm50/_status +``` +{% include copy-curl.html %} + +## Supported workflow templates + +The following table lists the supported workflow templates. To use a workflow template, specify it in the `use_case` query parameter when creating a workflow. + +| Template use case | Description | Required parameters | Defaults | +| `bedrock_titan_embedding_model_deploy` | Creates and deploys an Amazon Bedrock embedding model (by default, `titan-embed-text-v1`).| `create_connector.credential.access_key`, `create_connector.credential.secret_key`, `create_connector.credential.session_token` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/bedrock-titan-embedding-defaults.json)| +| `bedrock_titan_multimodal_model_deploy` | Creates and deploys an Amazon Bedrock multimodal embedding model (by default, `titan-embed-image-v1`). | `create_connector.credential.access_key`, `create_connector.credential.secret_key`, `create_connector.credential.session_token` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/bedrock-titan-multimodal-defaults.json). | +| `cohere_embedding_model_deploy`| Creates and deploys a Cohere embedding model (by default, `embed-english-v3.0`). | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/cohere-embedding-defaults.json) | +| `cohere_chat_model_deploy` | Creates and deploys a Cohere chat model (by default, Cohere Command). | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/cohere-chat-defaults.json) | +| `open_ai_embedding_model_deploy` | Creates and deploys an OpenAI embedding model (by default, `text-embedding-ada-002`). | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/openai-embedding-defaults.json) | +| `openai_chat_model_deploy` | Creates and deploys an OpenAI chat model (by default, `gpt-3.5-turbo`). | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/openai-chat-defaults.json) | +| `local_neural_sparse_search_bi_encoder` | Configures [neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/):
- Deploys a pretrained sparse encoding model.
- Creates an ingest pipeline with a sparse encoding processor.
- Creates a sample index to use for sparse search, specifying the newly created pipeline as the default pipeline. | None |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json) | +| `semantic_search` | Configures [semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/):
- Creates an ingest pipeline with a `text_embedding` processor and a k-NN index
You must provide the model ID of the text embedding model to be used. | `create_ingest_pipeline.model_id` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/semantic-search-defaults.json) | +| `semantic_search_with_query_enricher` | Configures [semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/) similarly to the `semantic_search` template. Adds a [`query_enricher`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/neural-query-enricher/) search processor that sets a default model ID for neural queries. You must provide the model ID of the text embedding model to be used. | `create_ingest_pipeline.model_id` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/semantic-search-query-enricher-defaults.json) | +| `semantic_search_with_cohere_embedding` | Configures [semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/) and deploys a Cohere embedding model. You must provide the API key for the Cohere model. | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json) | +| `semantic_search_with_cohere_embedding_query_enricher` | Configures [semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/) and deploys a Cohere embedding model. Adds a [`query_enricher`]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/neural-query-enricher/) search processor that sets a default model ID for neural queries. You must provide the API key for the Cohere model. | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/cohere-embedding-semantic-search-with-query-enricher-defaults.json) | +| `multimodal_search` | Configures an ingest pipeline with a `text_image_embedding` processor and a k-NN index for [multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/). You must provide the model ID of the multimodal embedding model to be used. | `create_ingest_pipeline.model_id` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/multi-modal-search-defaults.json) | +| `multimodal_search_with_bedrock_titan` | Deploys an Amazon Bedrock multimodal model and configures an ingest pipeline with a `text_image_embedding` processor and a k-NN index for [multimodal search]({{site.url}}{{site.baseurl}}/search-plugins/multimodal-search/). You must provide your AWS credentials. | `create_connector.credential.access_key`, `create_connector.credential.secret_key`, `create_connector.credential.session_token` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json) | +| `hybrid_search` | Configures [hybrid search]({{site.url}}{{site.baseurl}}/search-plugins/hybrid-search/):
- Creates an ingest pipeline, a k-NN index, and a search pipeline with a `normalization_processor`. You must provide the model ID of the text embedding model to be used. | `create_ingest_pipeline.model_id` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/hybrid-search-defaults.json) | +| `conversational_search_with_llm_deploy` | Deploys a large language model (LLM) (by default, Cohere Chat) and configures a search pipeline with a `retrieval_augmented_generation` processor for [conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/). | `create_connector.credential.key` |[Defaults](https://github.com/opensearch-project/flow-framework/blob/2.13/src/main/resources/defaults/conversational-search-defaults.json) | + + diff --git a/_benchmark/user-guide/creating-osb-workloads.md b/_benchmark/user-guide/creating-custom-workloads.md similarity index 99% rename from _benchmark/user-guide/creating-osb-workloads.md rename to _benchmark/user-guide/creating-custom-workloads.md index 76c573907f..d06610467f 100644 --- a/_benchmark/user-guide/creating-osb-workloads.md +++ b/_benchmark/user-guide/creating-custom-workloads.md @@ -1,11 +1,11 @@ --- layout: default -title: Creating OpenSearch Benchmark workloads +title: Creating custom workloads nav_order: 10 parent: User guide redirect_from: - /benchmark/creating-custom-workloads/ - - /benchmark/user-guide/creating-custom-workloads + - /benchmark/user-guide/creating-osb-workloads/ --- # Creating custom workloads diff --git a/_dashboards/dashboards-assistant/index.md b/_dashboards/dashboards-assistant/index.md index 9313dd2e97..d44e6b58e8 100644 --- a/_dashboards/dashboards-assistant/index.md +++ b/_dashboards/dashboards-assistant/index.md @@ -6,14 +6,11 @@ has_children: false has_toc: false --- -This is an experimental feature and is not recommended for use in a production environment. For updates on the feature's progress or to leave feedback, go to the [`dashboards-assistant` repository](https://github.com/opensearch-project/dashboards-assistant) on GitHub or the associated [OpenSearch forum thread](https://forum.opensearch.org/t/feedback-opensearch-assistant/16741). -{: .warning} - Note that machine learning models are probabilistic and that some may perform better than others, so the OpenSearch Assistant may occasionally produce inaccurate information. We recommend evaluating outputs for accuracy as appropriate to your use case, including reviewing the output or combining it with other verification factors. {: .important} # OpenSearch Assistant for OpenSearch Dashboards -Introduced 2.12 +**Introduced 2.13** {: .label .label-purple } The OpenSearch Assistant toolkit helps you create AI-powered assistants for OpenSearch Dashboards without requiring you to have specialized query tools or skills. @@ -49,9 +46,6 @@ A screenshot of the interface is shown in the following image. OpenSearch Assistant interface -For more information about ways to enable experimental features, see [Experimental feature flags]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/experimental/). -{: .note} - ## Configuring OpenSearch Assistant You can use the OpenSearch Dashboards interface to configure OpenSearch Assistant. Go to the [Getting started guide](https://github.com/opensearch-project/dashboards-assistant/blob/main/GETTING_STARTED_GUIDE.md) for step-by-step instructions. For the chatbot template, go to the [Flow Framework plugin](https://github.com/opensearch-project/flow-framework) documentation. You can modify this template to use your own model and customize the chatbot tools. diff --git a/_data-prepper/pipelines/configuration/sinks/opensearch.md b/_data-prepper/pipelines/configuration/sinks/opensearch.md index d485fbb2b9..628515a985 100644 --- a/_data-prepper/pipelines/configuration/sinks/opensearch.md +++ b/_data-prepper/pipelines/configuration/sinks/opensearch.md @@ -91,7 +91,6 @@ Option | Required | Type | Description `document_root_key` | No | String | The key in the event that will be used as the root in the document. The default is the root of the event. If the key does not exist, then the entire event is written as the document. If `document_root_key` is of a basic value type, such as a string or integer, then the document will have a structure of `{"data": }`. `serverless` | No | Boolean | Determines whether the OpenSearch backend is Amazon OpenSearch Serverless. Set this value to `true` when the destination for the `opensearch` sink is an Amazon OpenSearch Serverless collection. Default is `false`. `serverless_options` | No | Object | The network configuration options available when the backend of the `opensearch` sink is set to Amazon OpenSearch Serverless. For more information, see [Serverless options](#serverless-options). - ## aws diff --git a/_ingest-pipelines/processors/text-chunking.md b/_ingest-pipelines/processors/text-chunking.md index e9ff55b210..d11c380bde 100644 --- a/_ingest-pipelines/processors/text-chunking.md +++ b/_ingest-pipelines/processors/text-chunking.md @@ -157,119 +157,11 @@ The response confirms that, in addition to the `passage_text` field, the process } ``` -Once you have created an ingest pipeline, you need to create an index for ingestion and ingest documents into the index. To learn more, see [Step 2: Create an index for ingestion]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/#step-2-create-an-index-for-ingestion) and [Step 3: Ingest documents into the index]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/#step-3-ingest-documents-into-the-index) of the [neural sparse search documentation]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). - -## Chaining text chunking and embedding processors - -You can use a `text_chunking` processor as a preprocessing step for a `text_embedding` or `sparse_encoding` processor in order to obtain embeddings for each chunked passage. - -**Prerequisites** - -Follow the steps outlined in the [pretrained model documentation]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/) to register an embedding model. - -**Step 1: Create a pipeline** - -The following example request creates an ingest pipeline that converts the text in the `passage_text` field into chunked passages, which will be stored in the `passage_chunk` field. The text in the `passage_chunk` field is then converted into text embeddings, and the embeddings are stored in the `passage_embedding` field: - -```json -PUT _ingest/pipeline/text-chunking-embedding-ingest-pipeline -{ - "description": "A text chunking and embedding ingest pipeline", - "processors": [ - { - "text_chunking": { - "algorithm": { - "fixed_token_length": { - "token_limit": 10, - "overlap_rate": 0.2, - "tokenizer": "standard" - } - }, - "field_map": { - "passage_text": "passage_chunk" - } - } - }, - { - "text_embedding": { - "model_id": "LMLPWY4BROvhdbtgETaI", - "field_map": { - "passage_chunk": "passage_chunk_embedding" - } - } - } - ] -} -``` -{% include copy-curl.html %} - -**Step 2 (Optional): Test the pipeline** - -It is recommended that you test your pipeline before ingesting documents. -{: .tip} - -To test the pipeline, run the following query: - -```json -POST _ingest/pipeline/text-chunking-embedding-ingest-pipeline/_simulate -{ - "docs": [ - { - "_index": "testindex", - "_id": "1", - "_source":{ - "passage_text": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch." - } - } - ] -} -``` -{% include copy-curl.html %} - -#### Response - -The response confirms that, in addition to the `passage_text` and `passage_chunk` fields, the processor has generated text embeddings for each of the three passages in the `passage_chunk_embedding` field. The embedding vectors are stored in the `knn` field for each chunk: - -```json -{ - "docs": [ - { - "doc": { - "_index": "testindex", - "_id": "1", - "_source": { - "passage_chunk_embedding": [ - { - "knn": [...] - }, - { - "knn": [...] - }, - { - "knn": [...] - } - ], - "passage_text": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch.", - "passage_chunk": [ - "This is an example document to be chunked. The document ", - "The document contains a single paragraph, two sentences and 24 ", - "and 24 tokens by standard tokenizer in OpenSearch." - ] - }, - "_ingest": { - "timestamp": "2024-03-20T03:04:49.144054Z" - } - } - } - ] -} -``` - -Once you have created an ingest pipeline, you need to create an index for ingestion and ingest documents into the index. To learn more, see [Step 2: Create an index for ingestion]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/#step-2-create-an-index-for-ingestion) and [Step 3: Ingest documents into the index]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/#step-3-ingest-documents-into-the-index) of the [neural sparse search documentation]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). +Once you have created an ingest pipeline, you need to create an index for document ingestion. To learn more, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). ## Cascaded text chunking processors -You can chain multiple chunking processors together. For example, to split documents into paragraphs, apply the `delimiter` algorithm and specify the parameter as `\n\n`. To prevent a paragraph from exceeding the token limit, append another chunking processor that uses the `fixed_token_length` algorithm. You can configure the ingest pipeline for this example as follows: +You can chain multiple text chunking processors together. For example, to split documents into paragraphs, apply the `delimiter` algorithm and specify the parameter as `\n\n`. To prevent a paragraph from exceeding the token limit, append another text chunking processor that uses the `fixed_token_length` algorithm. You can configure the ingest pipeline for this example as follows: ```json PUT _ingest/pipeline/text-chunking-cascade-ingest-pipeline @@ -309,7 +201,7 @@ PUT _ingest/pipeline/text-chunking-cascade-ingest-pipeline ## Next steps +- For a complete example, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). - To learn more about semantic search, see [Semantic search]({{site.url}}{{site.baseurl}}/search-plugins/semantic-search/). - To learn more about sparse search, see [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). - To learn more about using models in OpenSearch, see [Choosing a model]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/#choosing-a-model). -- For a comprehensive example, see [Neural search tutorial]({{site.url}}{{site.baseurl}}/search-plugins/neural-search-tutorial/). diff --git a/_ml-commons-plugin/agents-tools/agents-tools-tutorial.md b/_ml-commons-plugin/agents-tools/agents-tools-tutorial.md index 109cbf8836..68d979d6d6 100644 --- a/_ml-commons-plugin/agents-tools/agents-tools-tutorial.md +++ b/_ml-commons-plugin/agents-tools/agents-tools-tutorial.md @@ -7,12 +7,9 @@ nav_order: 10 --- # Agents and tools tutorial -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The following tutorial illustrates creating a flow agent for retrieval-augmented generation (RAG). A flow agent runs its configured tools sequentially, in the order specified. In this example, you'll create an agent with two tools: 1. `VectorDBTool`: The agent will use this tool to retrieve OpenSearch documents relevant to the user question. You'll ingest supplementary information into an OpenSearch index. To facilitate vector search, you'll deploy a text embedding model that translates text into vector embeddings. OpenSearch will translate the ingested documents into embeddings and store them in the index. When you provide a user question to the agent, the agent will construct a query from the question, run vector search on the OpenSearch index, and pass the relevant retrieved documents to the `MLModelTool`. diff --git a/_ml-commons-plugin/agents-tools/index.md b/_ml-commons-plugin/agents-tools/index.md index 016a077c62..ba88edef2f 100644 --- a/_ml-commons-plugin/agents-tools/index.md +++ b/_ml-commons-plugin/agents-tools/index.md @@ -7,12 +7,9 @@ nav_order: 27 --- # Agents and tools -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - You can automate machine learning (ML) tasks using agents and tools. An _agent_ orchestrates and runs ML models and tools. A _tool_ performs a set of specific tasks. Some examples of tools are the `VectorDBTool`, which supports vector search, and the `CATIndexTool`, which executes the `cat indices` operation. For a list of supported tools, see [Tools]({{site.url}}{{site.baseurl}}/ml-commons-plugin/agents-tools/tools/index/). ## Agents @@ -155,24 +152,6 @@ POST /_plugins/_ml/agents/_register It is important to provide thorough descriptions of the tools so that the LLM can decide in which situations to use those tools. {: .tip} -## Enabling the feature - -To enable agents and tools, configure the following setting: - -```yaml -plugins.ml_commons.agent_framework_enabled: true -``` -{% include copy.html %} - -For conversational agents, you also need to enable RAG for use in conversational search. To enable RAG, configure the following setting: - -```yaml -plugins.ml_commons.rag_pipeline_feature_enabled: true -``` -{% include copy.html %} - -For more information about ways to enable experimental features, see [Experimental feature flags]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/experimental/). - ## Next steps - For a list of supported tools, see [Tools]({{site.url}}{{site.baseurl}}/ml-commons-plugin/agents-tools/tools/index/). diff --git a/_ml-commons-plugin/agents-tools/tools/agent-tool.md b/_ml-commons-plugin/agents-tools/tools/agent-tool.md index 272456d693..272af51e4d 100644 --- a/_ml-commons-plugin/agents-tools/tools/agent-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/agent-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Agent tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `AgentTool` runs any agent. ## Step 1: Set up an agent for AgentTool to run diff --git a/_ml-commons-plugin/agents-tools/tools/cat-index-tool.md b/_ml-commons-plugin/agents-tools/tools/cat-index-tool.md index 77b28ed527..50ccf28b9b 100644 --- a/_ml-commons-plugin/agents-tools/tools/cat-index-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/cat-index-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # CAT Index tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `CatIndexTool` retrieves index information for the OpenSearch cluster, similarly to the [CAT Indices API]({{site.url}}{{site.baseurl}}/api-reference/cat/cat-indices/). ## Step 1: Register a flow agent that will run the CatIndexTool diff --git a/_ml-commons-plugin/agents-tools/tools/index-mapping-tool.md b/_ml-commons-plugin/agents-tools/tools/index-mapping-tool.md index f27b0592a8..8649d2d74d 100644 --- a/_ml-commons-plugin/agents-tools/tools/index-mapping-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/index-mapping-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Index Mapping tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `IndexMappingTool` retrieves mapping and setting information for indexes in your cluster. ## Step 1: Register a flow agent that will run the IndexMappingTool diff --git a/_ml-commons-plugin/agents-tools/tools/index.md b/_ml-commons-plugin/agents-tools/tools/index.md index fe6d574d63..8db522006e 100644 --- a/_ml-commons-plugin/agents-tools/tools/index.md +++ b/_ml-commons-plugin/agents-tools/tools/index.md @@ -10,7 +10,7 @@ redirect_from: --- # Tools -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } A _tool_ performs a set of specific tasks. The following table lists all tools that OpenSearch supports. diff --git a/_ml-commons-plugin/agents-tools/tools/ml-model-tool.md b/_ml-commons-plugin/agents-tools/tools/ml-model-tool.md index c0f8aeab86..ceeda40528 100644 --- a/_ml-commons-plugin/agents-tools/tools/ml-model-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/ml-model-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # ML Model tool -**Introduced 2.12** +plugins.ml_commons.rag_pipeline_feature_enabled: true {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `MLModelTool` runs a machine learning (ML) model and returns inference results. ## Step 1: Create a connector for a model diff --git a/_ml-commons-plugin/agents-tools/tools/neural-sparse-tool.md b/_ml-commons-plugin/agents-tools/tools/neural-sparse-tool.md index bc1fd4845e..9fee4dcbd2 100644 --- a/_ml-commons-plugin/agents-tools/tools/neural-sparse-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/neural-sparse-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Neural Sparse Search tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `NeuralSparseSearchTool` performs sparse vector retrieval. For more information about neural sparse search, see [Neural sparse search]({{site.url}}{{site.baseurl}}/search-plugins/neural-sparse-search/). ## Step 1: Register and deploy a sparse encoding model diff --git a/_ml-commons-plugin/agents-tools/tools/ppl-tool.md b/_ml-commons-plugin/agents-tools/tools/ppl-tool.md index f153ca88f3..72d8ba30b5 100644 --- a/_ml-commons-plugin/agents-tools/tools/ppl-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/ppl-tool.md @@ -9,12 +9,9 @@ grand_parent: Agents and tools --- # PPL tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `PPLTool` translates natural language into a PPL query. The tool provides an `execute` flag to specify whether to run the query. If you set the flag to `true`, the `PPLTool` runs the query and returns the query and the results. ## Prerequisite diff --git a/_ml-commons-plugin/agents-tools/tools/rag-tool.md b/_ml-commons-plugin/agents-tools/tools/rag-tool.md index ae3ad1281a..1f6fafe49a 100644 --- a/_ml-commons-plugin/agents-tools/tools/rag-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/rag-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # RAG tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `RAGTool` performs retrieval-augmented generation (RAG). For more information about RAG, see [Conversational search]({{site.url}}{{site.baseurl}}/search-plugins/conversational-search/). RAG calls a large language model (LLM) and supplements its knowledge by providing relevant OpenSearch documents along with the user question. To retrieve relevant documents from an OpenSearch index, you'll need a text embedding model that facilitates vector search. diff --git a/_ml-commons-plugin/agents-tools/tools/search-alerts-tool.md b/_ml-commons-plugin/agents-tools/tools/search-alerts-tool.md index 387ef1cbab..76f9e4b4dc 100644 --- a/_ml-commons-plugin/agents-tools/tools/search-alerts-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/search-alerts-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Search Alerts tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `SearchAlertsTool` retrieves information about generated alerts. For more information about alerts, see [Alerting]({{site.url}}{{site.baseurl}}/observing-your-data/alerting/index/). ## Step 1: Register a flow agent that will run the SearchAlertsTool diff --git a/_ml-commons-plugin/agents-tools/tools/search-anomaly-detectors.md b/_ml-commons-plugin/agents-tools/tools/search-anomaly-detectors.md index de93a404a3..9f31dea057 100644 --- a/_ml-commons-plugin/agents-tools/tools/search-anomaly-detectors.md +++ b/_ml-commons-plugin/agents-tools/tools/search-anomaly-detectors.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Search Anomaly Detectors tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `SearchAnomalyDetectorsTool` retrieves information about anomaly detectors set up on your cluster. For more information about anomaly detectors, see [Anomaly detection]({{site.url}}{{site.baseurl}}/observing-your-data/ad/index/). ## Step 1: Register a flow agent that will run the SearchAnomalyDetectorsTool diff --git a/_ml-commons-plugin/agents-tools/tools/search-anomaly-results.md b/_ml-commons-plugin/agents-tools/tools/search-anomaly-results.md index bce27bba55..2f2728e32d 100644 --- a/_ml-commons-plugin/agents-tools/tools/search-anomaly-results.md +++ b/_ml-commons-plugin/agents-tools/tools/search-anomaly-results.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Search Anomaly Results tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `SearchAnomalyResultsTool` retrieves information about anomaly detector results. For more information about anomaly detectors, see [Anomaly detection]({{site.url}}{{site.baseurl}}/observing-your-data/ad/index/). ## Step 1: Register a flow agent that will run the SearchAnomalyResultsTool diff --git a/_ml-commons-plugin/agents-tools/tools/search-index-tool.md b/_ml-commons-plugin/agents-tools/tools/search-index-tool.md index 86ecbfc609..b023522893 100644 --- a/_ml-commons-plugin/agents-tools/tools/search-index-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/search-index-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Search Index tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `SearchIndexTool` searches an index using a query written in query domain-specific language (DSL) and returns the query results. ## Step 1: Register a flow agent that will run the SearchIndexTool diff --git a/_ml-commons-plugin/agents-tools/tools/search-monitors-tool.md b/_ml-commons-plugin/agents-tools/tools/search-monitors-tool.md index 2b746d3453..77b51d4964 100644 --- a/_ml-commons-plugin/agents-tools/tools/search-monitors-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/search-monitors-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Search Monitors tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `SearchMonitorsTool` retrieves information about alerting monitors set up on your cluster. For more information about alerting monitors, see [Monitors]({{site.url}}{{site.baseurl}}/observing-your-data/alerting/monitors/). ## Step 1: Register a flow agent that will run the SearchMonitorsTool diff --git a/_ml-commons-plugin/agents-tools/tools/vector-db-tool.md b/_ml-commons-plugin/agents-tools/tools/vector-db-tool.md index d8b8083df3..9093541cbb 100644 --- a/_ml-commons-plugin/agents-tools/tools/vector-db-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/vector-db-tool.md @@ -10,13 +10,10 @@ grand_parent: Agents and tools # Vector DB tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - The `VectorDBTool` performs dense vector retrieval. For more information about OpenSearch vector database capabilities, see [neural search]({{site.url}}{{site.baseurl}}/search-plugins/neural-search/). ## Step 1: Register and deploy a sparse encoding model diff --git a/_ml-commons-plugin/agents-tools/tools/visualization-tool.md b/_ml-commons-plugin/agents-tools/tools/visualization-tool.md index 1407232555..98457932c2 100644 --- a/_ml-commons-plugin/agents-tools/tools/visualization-tool.md +++ b/_ml-commons-plugin/agents-tools/tools/visualization-tool.md @@ -9,12 +9,9 @@ grand_parent: Agents and tools --- # Visualization tool -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - Use the `VisualizationTool` to find visualizations relevant to a question. ## Step 1: Register a flow agent that will run the VisualizationTool diff --git a/_ml-commons-plugin/api/agent-apis/delete-agent.md b/_ml-commons-plugin/api/agent-apis/delete-agent.md index 0327c3bf04..ddde8fb19b 100644 --- a/_ml-commons-plugin/api/agent-apis/delete-agent.md +++ b/_ml-commons-plugin/api/agent-apis/delete-agent.md @@ -7,12 +7,9 @@ nav_order: 50 --- # Delete an agent -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - You can use this API to delete an agent based on the `agent_id`. ## Path and HTTP methods diff --git a/_ml-commons-plugin/api/agent-apis/execute-agent.md b/_ml-commons-plugin/api/agent-apis/execute-agent.md index 8302ac265f..27d50bced0 100644 --- a/_ml-commons-plugin/api/agent-apis/execute-agent.md +++ b/_ml-commons-plugin/api/agent-apis/execute-agent.md @@ -7,12 +7,9 @@ nav_order: 20 --- # Execute an agent -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - When an agent is executed, it runs the tools with which it is configured. ### Path and HTTP methods diff --git a/_ml-commons-plugin/api/agent-apis/get-agent.md b/_ml-commons-plugin/api/agent-apis/get-agent.md index be49a87502..6190406649 100644 --- a/_ml-commons-plugin/api/agent-apis/get-agent.md +++ b/_ml-commons-plugin/api/agent-apis/get-agent.md @@ -7,12 +7,9 @@ nav_order: 20 --- # Get an agent -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - You can retrieve agent information using the `agent_id`. ## Path and HTTP methods diff --git a/_ml-commons-plugin/api/agent-apis/index.md b/_ml-commons-plugin/api/agent-apis/index.md index 4b6954a79f..72bf6082ce 100644 --- a/_ml-commons-plugin/api/agent-apis/index.md +++ b/_ml-commons-plugin/api/agent-apis/index.md @@ -9,12 +9,9 @@ redirect_from: /ml-commons-plugin/api/agent-apis/ --- # Agent APIs -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - You can automate machine learning (ML) tasks using agents and tools. An _agent_ orchestrates and runs ML models and tools. For more information, see [Agents and tools]({{site.url}}{{site.baseurl}}/ml-commons-plugin/agents-tools/index/). ML Commons supports the following agent-level APIs: diff --git a/_ml-commons-plugin/api/agent-apis/register-agent.md b/_ml-commons-plugin/api/agent-apis/register-agent.md index 75a63d40cf..820bb923f7 100644 --- a/_ml-commons-plugin/api/agent-apis/register-agent.md +++ b/_ml-commons-plugin/api/agent-apis/register-agent.md @@ -7,12 +7,9 @@ nav_order: 10 --- # Register an agent -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - Use this API to register an agent. Agents may be of the following types: diff --git a/_ml-commons-plugin/api/agent-apis/search-agent.md b/_ml-commons-plugin/api/agent-apis/search-agent.md index c5df482ac2..3d950cde8f 100644 --- a/_ml-commons-plugin/api/agent-apis/search-agent.md +++ b/_ml-commons-plugin/api/agent-apis/search-agent.md @@ -7,12 +7,9 @@ nav_order: 30 --- # Search for an agent -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/ml-commons/issues/1161). -{: .warning} - Use this command to search for agents you've already created. You can provide any OpenSearch search query in the request body. ## Path and HTTP methods diff --git a/_ml-commons-plugin/api/model-apis/register-model.md b/_ml-commons-plugin/api/model-apis/register-model.md index 880cbd68e5..dd157ed264 100644 --- a/_ml-commons-plugin/api/model-apis/register-model.md +++ b/_ml-commons-plugin/api/model-apis/register-model.md @@ -183,8 +183,9 @@ Field | Data type | Required/Optional | Description `description` | String | Optional| The model description. | `model_group_id` | String | Optional | The model group ID of the model group to register this model to. `is_enabled`| Boolean | Specifies whether the model is enabled. Disabling the model makes it unavailable for Predict API requests, regardless of the model's deployment status. Default is `true`. +`guardrails`| Object | Optional | The guardrails for the model input. For more information, see [Guardrails](#the-guardrails-parameter).| -#### Example request: Remote model with a standalone connector +#### Example request: Externally hosted with a standalone connector ```json POST /_plugins/_ml/models/_register @@ -198,7 +199,7 @@ POST /_plugins/_ml/models/_register ``` {% include copy-curl.html %} -#### Example request: Remote model with a connector specified as part of the model +#### Example request: Externally hosted with a connector specified as part of the model ```json POST /_plugins/_ml/models/_register @@ -248,6 +249,70 @@ OpenSearch responds with the `task_id` and task `status`. } ``` +### The `guardrails` parameter + +Guardrails are safety measures for large language models (LLMs). They provide a set of rules and boundaries that control how an LLM behaves and what kind of output it generates. + +To register an externally hosted model with guardrails, provide the `guardrails` parameter, which supports the following fields. All fields are optional. + +Field | Data type | Description +:--- | :--- | :--- +`type` | String | The guardrail type. Currently, only `local_regex` is supported. +`input_guardrail`| Object | The guardrail for the model input. | +`output_guardrail`| Object | The guardrail for the model output. | +`stop_words`| Object | The list of indexes containing stopwords used for the model input/output validation. If the model prompt/response contains a stopword contained in any of the indexes, the predict request on this model is rejected. | +`index_name`| Object | The name of the index storing the stopwords. | +`source_fields`| Object | The name of the field storing the stopwords. | +`regex`| Object | A regular expression used for input/output validation. If the model prompt/response matches the regular expression, the predict request on this model is rejected. | + +#### Example request: Externally hosted model with guardrails + +```json +POST /_plugins/_ml/models/_register +{ + "name": "openAI-gpt-3.5-turbo", + "function_name": "remote", + "model_group_id": "1jriBYsBq7EKuKzZX131", + "description": "test model", + "connector_id": "a1eMb4kBJ1eYAeTMAljY", + "guardrails": { + "type": "local_regex", + "input_guardrail": { + "stop_words": [ + { + "index_name": "stop_words_input", + "source_fields": ["title"] + } + ], + "regex": ["regex1", "regex2"] + }, + "output_guardrail": { + "stop_words": [ + { + "index_name": "stop_words_output", + "source_fields": ["title"] + } + ], + "regex": ["regex1", "regex2"] + } + } +} +``` +{% include copy-curl.html %} + +For a complete example, see [Guardrails]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/guardrails/). + +#### Example response + +OpenSearch responds with the `task_id` and task `status`: + +```json +{ + "task_id" : "ew8I44MBhyWuIwnfvDIH", + "status" : "CREATED" +} +``` + ## Check the status of model registration To see the status of your model registration and retrieve the model ID created for the new model version, pass the `task_id` as a path parameter to the Tasks API: diff --git a/_ml-commons-plugin/api/model-apis/update-model.md b/_ml-commons-plugin/api/model-apis/update-model.md index 380f422272..877d0b5c51 100644 --- a/_ml-commons-plugin/api/model-apis/update-model.md +++ b/_ml-commons-plugin/api/model-apis/update-model.md @@ -36,6 +36,7 @@ Field | Data type | Description `rate_limiter` | Object | Limits the number of times any user can call the Predict API on the model. For more information, see [Rate limiting inference calls]({{site.url}}{{site.baseurl}}/ml-commons-plugin/integrating-ml-models/#rate-limiting-inference-calls). `rate_limiter.limit` | Integer | The maximum number of times any user can call the Predict API on the model per `unit` of time. By default, there is no limit on the number of Predict API calls. Once you set a limit, you cannot reset it to no limit. As an alternative, you can specify a high limit value and a small time unit, for example, 1 request per nanosecond. `rate_limiter.unit` | String | The unit of time for the rate limiter. Valid values are `DAYS`, `HOURS`, `MICROSECONDS`, `MILLISECONDS`, `MINUTES`, `NANOSECONDS`, and `SECONDS`. +`guardrails`| Object | The guardrails for the model. #### Example request: Disabling a model @@ -62,6 +63,35 @@ PUT /_plugins/_ml/models/T_S-cY0BKCJ3ot9qr0aP ``` {% include copy-curl.html %} +#### Example request: Updating the guardrails + +```json +PUT /_plugins/_ml/models/MzcIJX8BA7mbufL6DOwl +{ + "guardrails": { + "input_guardrail": { + "stop_words": [ + { + "index_name": "updated_stop_words_input", + "source_fields": ["updated_title"] + } + ], + "regex": ["updated_regex1", "updated_regex2"] + }, + "output_guardrail": { + "stop_words": [ + { + "index_name": "updated_stop_words_output", + "source_fields": ["updated_title"] + } + ], + "regex": ["updated_regex1", "updated_regex2"] + } + } +} +``` +{% include copy-curl.html %} + #### Example response ```json @@ -78,4 +108,5 @@ PUT /_plugins/_ml/models/T_S-cY0BKCJ3ot9qr0aP "_seq_no": 48, "_primary_term": 4 } -``` \ No newline at end of file +``` + diff --git a/_ml-commons-plugin/custom-local-models.md b/_ml-commons-plugin/custom-local-models.md index ee44a0a529..a265d8804a 100644 --- a/_ml-commons-plugin/custom-local-models.md +++ b/_ml-commons-plugin/custom-local-models.md @@ -7,7 +7,7 @@ nav_order: 120 --- # Custom local models -**Generally available 2.9** +**Introduced 2.9** {: .label .label-purple } To use a custom model locally, you can upload it to the OpenSearch cluster. diff --git a/_ml-commons-plugin/ml-dashboard.md b/_ml-commons-plugin/ml-dashboard.md index 3195aff8de..20c4e636bb 100644 --- a/_ml-commons-plugin/ml-dashboard.md +++ b/_ml-commons-plugin/ml-dashboard.md @@ -7,7 +7,7 @@ redirect_from: --- # Managing ML models in OpenSearch Dashboards -**Generally available 2.9** +**Introduced 2.9** {: .label .label-purple } Administrators of machine learning (ML) clusters can use OpenSearch Dashboards to manage and check the status of ML models running inside a cluster. This can help ML developers provision nodes to ensure their models run efficiently. diff --git a/_ml-commons-plugin/opensearch-assistant.md b/_ml-commons-plugin/opensearch-assistant.md index 3a8e0c8703..0a058d73a0 100644 --- a/_ml-commons-plugin/opensearch-assistant.md +++ b/_ml-commons-plugin/opensearch-assistant.md @@ -7,12 +7,9 @@ nav_order: 28 --- # OpenSearch Assistant Toolkit -**Introduced 2.12** +**Introduced 2.13** {: .label .label-purple } -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [OpenSearch forum thread](https://forum.opensearch.org/t/feedback-opensearch-assistant/16741). -{: .warning} - The OpenSearch Assistant Toolkit helps you create AI-powered assistants for OpenSearch Dashboards. The toolkit includes the following elements: - [**Agents and tools**]({{site.url}}{{site.baseurl}}/ml-commons-plugin/agents-tools/index/): _Agents_ interface with a large language model (LLM) and execute high-level tasks, such as summarization or generating Piped Processing Language (PPL) queries from natural language. The agent's high-level tasks consist of low-level tasks called _tools_, which can be reused by multiple agents. @@ -36,8 +33,6 @@ To enable OpenSearch Assistant, perform the following steps: ``` {% include copy.html %} -For more information about ways to enable experimental features, see [Experimental feature flags]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/experimental/). - ## Next steps - For more information about the OpenSearch Assistant UI, see [OpenSearch Assistant for OpenSearch Dashboards]({{site.url}}{{site.baseurl}}/dashboards/dashboards-assistant/index/) \ No newline at end of file diff --git a/_ml-commons-plugin/pretrained-models.md b/_ml-commons-plugin/pretrained-models.md index c68f9c8bab..8847d36291 100644 --- a/_ml-commons-plugin/pretrained-models.md +++ b/_ml-commons-plugin/pretrained-models.md @@ -7,7 +7,7 @@ nav_order: 120 --- # OpenSearch-provided pretrained models -**Generally available 2.9** +**Introduced 2.9** {: .label .label-purple } OpenSearch provides a variety of open-source pretrained models that can assist with a range of machine learning (ML) search and analytics use cases. You can upload any supported model to the OpenSearch cluster and use it locally. diff --git a/_ml-commons-plugin/remote-models/guardrails.md b/_ml-commons-plugin/remote-models/guardrails.md new file mode 100644 index 0000000000..ca34eb335c --- /dev/null +++ b/_ml-commons-plugin/remote-models/guardrails.md @@ -0,0 +1,298 @@ +--- +layout: default +title: Guardrails +has_children: false +has_toc: false +nav_order: 70 +parent: Connecting to externally hosted models +grand_parent: Integrating ML models +--- + +# Configuring model guardrails +**Introduced 2.13** +{: .label .label-purple } + +Guardrails can guide a large language model (LLM) toward desired behavior. They act as a filter, preventing the LLM from generating output that is harmful or violates ethical principles and facilitating safer use of AI. Guardrails also cause the LLM to produce more focused and relevant output. + +To configure guardrails for your LLM, you can provide a list of words to be prohibited in the input or output of the model. Alternatively, you can provide a regular expression against which the model input or output will be matched. + +## Prerequisites + +Before you start, make sure you have fulfilled the [prerequisites]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/index/#prerequisites) for connecting to an externally hosted model. + +## Step 1: Create a guardrail index + +To start, create an index that will store the excluded words (_stopwords_). In the index settings, specify a `title` field, which will contain excluded words, and a `query` field of the [percolator]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/percolator/) type. The percolator query will be used to match the LLM input or output: + +```json +PUT /words0 +{ + "mappings": { + "properties": { + "title": { + "type": "text" + }, + "query": { + "type": "percolator" + } + } + } +} +``` +{% include copy-curl.html %} + +## Step 2: Index excluded words or phrases + +Next, index a query string query that will be used to match excluded words in the model input or output: + +```json +PUT /words0/_doc/1?refresh +{ + "query": { + "query_string": { + "query": "title: blacklist" + } + } +} +``` +{% include copy-curl.html %} + +```json +PUT /words0/_doc/2?refresh +{ + "query": { + "query_string": { + "query": "title: \"Master slave architecture\"" + } + } +} +``` +{% include copy-curl.html %} + +For more query string options, see [Query string query]({{site.url}}{{site.baseurl}}/query-dsl/full-text/query-string/). + +## Step 3: Register a model group + +To register a model group, send the following request: + +```json +POST /_plugins/_ml/model_groups/_register +{ + "name": "bedrock", + "description": "This is a public model group." +} +``` +{% include copy-curl.html %} + +The response contains the model group ID that you'll use to register a model to this model group: + +```json +{ + "model_group_id": "wlcnb4kBJ1eYAeTMHlV6", + "status": "CREATED" +} +``` + +To learn more about model groups, see [Model access control]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control/). + +## Step 4: Create a connector + +Now you can create a connector for the model. In this example, you'll create a connector to the Anthropic Claude model hosted on Amazon Bedrock: + +```json +POST /_plugins/_ml/connectors/_create +{ + "name": "BedRock test claude Connector", + "description": "The connector to BedRock service for claude model", + "version": 1, + "protocol": "aws_sigv4", + "parameters": { + "region": "us-east-1", + "service_name": "bedrock", + "anthropic_version": "bedrock-2023-05-31", + "endpoint": "bedrock.us-east-1.amazonaws.com", + "auth": "Sig_V4", + "content_type": "application/json", + "max_tokens_to_sample": 8000, + "temperature": 0.0001, + "response_filter": "$.completion" + }, + "credential": { + "access_key": "", + "secret_key": "" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-v2/invoke", + "headers": { + "content-type": "application/json", + "x-amz-content-sha256": "required" + }, + "request_body": "{\"prompt\":\"${parameters.prompt}\", \"max_tokens_to_sample\":${parameters.max_tokens_to_sample}, \"temperature\":${parameters.temperature}, \"anthropic_version\":\"${parameters.anthropic_version}\" }" + } + ] +} +``` +{% include copy-curl.html %} + +The response contains the connector ID for the newly created connector: + +```json +{ + "connector_id": "a1eMb4kBJ1eYAeTMAljY" +} +``` + +## Step 5: Register and deploy the model with guardrails + +To register an externally hosted model, provide the model group ID from step 3 and the connector ID from step 4 in the following request. To configure guardrails, include the `guardrails` object: + +```json +POST /_plugins/_ml/models/_register?deploy=true +{ + "name": "Bedrock Claude V2 model", + "function_name": "remote", + "model_group_id": "wlcnb4kBJ1eYAeTMHlV6", + "description": "test model", + "connector_id": "a1eMb4kBJ1eYAeTMAljY", + "guardrails": { + "type": "local_regex", + "input_guardrail": { + "stop_words": [ + { + "index_name": "words0", + "source_fields": [ + "title" + ] + } + ], + "regex": [ + ".*abort.*", + ".*kill.*" + ] + }, + "output_guardrail": { + "stop_words": [ + { + "index_name": "words0", + "source_fields": [ + "title" + ] + } + ], + "regex": [ + ".*abort.*", + ".*kill.*" + ] + } + } +} +``` +{% include copy-curl.html %} + +For more information, see [The `guardrails` parameter]({{site.url}}{{site.baseurl}}/ml-commons-plugin/api/model-apis/register-model/#the-guardrails-parameter). + +OpenSearch returns the task ID of the register operation: + +```json +{ + "task_id": "cVeMb4kBJ1eYAeTMFFgj", + "status": "CREATED" +} +``` + +To check the status of the operation, provide the task ID to the [Tasks API]({{site.url}}{{site.baseurl}}/ml-commons-plugin/api/tasks-apis/get-task/): + +```bash +GET /_plugins/_ml/tasks/cVeMb4kBJ1eYAeTMFFgj +``` +{% include copy-curl.html %} + +When the operation is complete, the state changes to `COMPLETED`: + +```json +{ + "model_id": "cleMb4kBJ1eYAeTMFFg4", + "task_type": "DEPLOY_MODEL", + "function_name": "REMOTE", + "state": "COMPLETED", + "worker_node": [ + "n-72khvBTBi3bnIIR8FTTw" + ], + "create_time": 1689793851077, + "last_update_time": 1689793851101, + "is_async": true +} +``` + +## Step 6 (Optional): Test the model + +To demonstrate how guardrails are applied, first run the predict operation that does not contain any excluded words: + +```json +POST /_plugins/_ml/models/p94dYo4BrXGpZpgPp98E/_predict +{ + "parameters": { + "prompt": "\n\nHuman:this is a test\n\nnAssistant:" + } +} +``` +{% include copy-curl.html %} + +The response contains inference results: + +```json +{ + "inference_results": [ + { + "output": [ + { + "name": "response", + "dataAsMap": { + "response": " Thank you for the test, I appreciate you taking the time to interact with me. I'm an AI assistant created by Anthropic to be helpful, harmless, and honest." + } + } + ], + "status_code": 200 + } + ] +} +``` + +Then run the predict operation that contains excluded words: + +```json +POST /_plugins/_ml/models/p94dYo4BrXGpZpgPp98E/_predict +{ + "parameters": { + "prompt": "\n\nHuman:this is a test of Master slave architecture\n\nnAssistant:" + } +} +``` +{% include copy-curl.html %} + +The response contains an error message because guardrails were triggered: + +```json +{ + "error": { + "root_cause": [ + { + "type": "illegal_argument_exception", + "reason": "guardrails triggered for user input" + } + ], + "type": "illegal_argument_exception", + "reason": "guardrails triggered for user input" + }, + "status": 400 +} +``` + +Guardrails are also triggered when a prompt matches the supplied regular expression. + +## Next steps + +- For more information about configuring guardrails, see [The `guardrails` parameter]({{site.url}}{{site.baseurl}}/ml-commons-plugin/api/model-apis/register-model/#the-guardrails-parameter). \ No newline at end of file diff --git a/_ml-commons-plugin/remote-models/index.md b/_ml-commons-plugin/remote-models/index.md index 657d7254be..0b92adaab6 100644 --- a/_ml-commons-plugin/remote-models/index.md +++ b/_ml-commons-plugin/remote-models/index.md @@ -328,3 +328,4 @@ To learn how to use the model for vector search, see [Using an ML model for neur - For more information about connector parameters, see [Connector blueprints]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/blueprints/). - For more information about managing ML models in OpenSearch, see [Using ML models within OpenSearch]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-serving-framework/). - For more information about interacting with ML models in OpenSearch, see [Managing ML models in OpenSearch Dashboards]({{site.url}}{{site.baseurl}}/ml-commons-plugin/ml-dashboard/) +For instructions on how to configure model guardrails, see [Guardrails]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/guardrails/). diff --git a/_ml-commons-plugin/using-ml-models.md b/_ml-commons-plugin/using-ml-models.md index 5c23e19ab6..db50626721 100644 --- a/_ml-commons-plugin/using-ml-models.md +++ b/_ml-commons-plugin/using-ml-models.md @@ -10,7 +10,7 @@ redirect_from: --- # Using ML models within OpenSearch -**Generally available 2.9** +**Introduced 2.9** {: .label .label-purple } To integrate machine learning (ML) models into your OpenSearch cluster, you can upload and serve them locally. Choose one of the following options: diff --git a/_observing-your-data/event-analytics.md b/_observing-your-data/event-analytics.md index dd936b7d27..b8fe72964c 100644 --- a/_observing-your-data/event-analytics.md +++ b/_observing-your-data/event-analytics.md @@ -30,9 +30,6 @@ For more information about building PPL queries, see [Piped Processing Language] ### OpenSearch Dashboards Query Assistant -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [OpenSearch forum thread](https://forum.opensearch.org/t/feedback-opensearch-assistant/16741). -{: .warning} - Note that machine learning models are probabilistic and that some may perform better than others, so the OpenSearch Assistant may occasionally produce inaccurate information. We recommend evaluating outputs for accuracy as appropriate to your use case, including reviewing the output or combining it with other verification factors. {: .important} @@ -42,28 +39,23 @@ To simplify query building, the **OpenSearch Assistant** toolkit offers an assis #### Enabling Query Assistant -To enable **Query Assistant** in OpenSearch Dashboards, locate your copy of the `opensearch_dashboards.yml` file and set the following option: - -``` -observability.query_assist.enabled: true -observability.query_assist.ppl_agent_name: "PPL agent" -``` +By default, **Query Assistant** is enabled in OpenSearch Dashboards. To enable summarization of responses, locate your copy of the `opensearch_dashboards.yml` file and set the following option: -To enable summarization of responses, locate your copy of the `opensearch_dashboards.yml` file and set the following option: - -``` +```yaml observability.summarize.enabled: true observability.summarize.response_summary_agent_name: "Response summary agent" observability.summarize.error_summary_agent_name: "Error summary agent" ``` +To disable Query Assistant, add `observability.query_assist.enabled: false` to your `opensearch_dashboards.yml`. + #### Setting up Query Assistant To set up **Query Assistant**, follow the steps in the [Getting started guide](https://github.com/opensearch-project/dashboards-assistant/blob/main/GETTING_STARTED_GUIDE.md) on GitHub. This guide provides step-by-step setup instructions for **OpenSearch Assistant** and **Query Assistant**. To set up **Query Assistant** only, use the `query-assist-agent` template included in the guide. ## Saving a visualization -After Dashboards generates a visualization, save it if you want to revisit it or include it in an [operational panel]({{site.url}}{{site.baseurl}}/observing-your-data/operational-panels). To save a visualization, expand the **Save** dropdown menu in the upper-right corner, enter a name for the visualization, and then select the **Save** button. You can reopen saved visualizations on the event analytics page. +After Dashboards generates a visualization, save it if you want to revisit it or include it in an [operational panel]({{site.url}}{{site.baseurl}}/observing-your-data/operational-panels/). To save a visualization, expand the **Save** dropdown menu in the upper-right corner, enter a name for the visualization, and then select the **Save** button. You can reopen saved visualizations on the event analytics page. ## Creating event analytics visualizations and adding them to dashboards diff --git a/_search-plugins/caching/index.md b/_search-plugins/caching/index.md deleted file mode 100644 index 4d0173fdc7..0000000000 --- a/_search-plugins/caching/index.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -layout: default -title: Caching -parent: Improving search performance -has_children: true -nav_order: 100 ---- - -# Caching - -OpenSearch relies heavily on different on-heap cache types to accelerate data retrieval, providing significant improvement in search latencies. However, cache size is limited by the amount of memory available on a node. If you are processing a larger dataset that can potentially be cached, the cache size limit causes a lot of cache evictions and misses. The increasing number of evictions impacts performance because OpenSearch needs to process the query again, causing high resource consumption. - -Prior to version 2.13, OpenSearch supported the following on-heap cache types: - -- **Request cache**: Caches the local results on each shard. This allows frequently used (and potentially resource-heavy) search requests to return results almost instantly. -- **Query cache**: The shard-level query cache caches common data from similar queries. The query cache is more granular than the request cache and can cache data that is reused in different queries. -- **Field data cache**: The field data cache contains field data and global ordinals, which are both used to support aggregations on certain field types. - -## Additional cache stores -**Introduced 2.13** -{: .label .label-purple } - -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/OpenSearch/issues/10024). -{: .warning} - -In addition to existing OpenSearch custom on-heap cache stores, cache plugins provide the following cache stores: - -- **Disk cache**: This cache stores the precomputed result of a query on disk. You can use a disk cache to cache much larger datasets, provided that the disk latencies are acceptable. -- **Tiered cache**: This is a multi-level cache, in which each tier has its own characteristics and performance levels. For example, a tiered cache can contain on-heap and disk tiers. By combining different tiers, you can achieve a balance between cache performance and size. To learn more, see [Tiered cache]({{site.url}}{{site.baseurl}}/search-plugins/caching/tiered-cache/). - -In OpenSearch 2.13, the request cache is integrated with cache plugins. You can use a tiered or disk cache as a request-level cache. -{: .note} \ No newline at end of file diff --git a/_search-plugins/caching/tiered-cache.md b/_search-plugins/caching/tiered-cache.md deleted file mode 100644 index 3842ebe5a9..0000000000 --- a/_search-plugins/caching/tiered-cache.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -layout: default -title: Tiered cache -parent: Caching -grand_parent: Improving search performance -nav_order: 10 ---- - -# Tiered cache - -This is an experimental feature and is not recommended for use in a production environment. For updates on the progress of the feature or if you want to leave feedback, see the associated [GitHub issue](https://github.com/opensearch-project/OpenSearch/issues/10024). -{: .warning} - -A tiered cache is a multi-level cache, in which each tier has its own characteristics and performance levels. By combining different tiers, you can achieve a balance between cache performance and size. - -## Types of tiered caches - -OpenSearch 2.13 provides an implementation of _tiered spillover cache_. This implementation spills the evicted items from upper to lower tiers. The upper tier is smaller in size but offers better latency, like the on-heap tier. The lower tier is larger in size but is slower in terms of latency compared to the upper tier. A disk cache is an example of a lower tier. OpenSearch 2.13 offers on-heap and disk tiers. - -## Enabling a tiered cache - -To enable a tiered cache, configure the following setting: - -```yaml -opensearch.experimental.feature.pluggable.caching.enabled: true -``` -{% include copy.html %} - -For more information about ways to enable experimental features, see [Experimental feature flags]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/experimental/). - -## Installing required plugins - -A tiered cache provides a way to plug in any disk or on-heap tier implementation. You can install the plugins you intend to use in the tiered cache. As of OpenSearch 2.13, the available cache plugin is the `cache-ehcache` plugin. This plugin provides a disk cache implementation to use within a tiered cache as a disk tier. - -A tiered cache will fail to initialize if the `cache-ehcache` plugin is not installed or disk cache properties are not set. -{: .warning} - -## Tiered cache settings - -In OpenSearch 2.13, a request cache can use a tiered cache. To begin, configure the following settings in the `opensearch.yml` file. - -### Cache store name - -Set the cache store name to `tiered_spillover` to use the OpenSearch-provided tiered spillover cache implementation: - -```yaml -indices.request.cache.store.name: tiered_spillover: true -``` -{% include copy.html %} - -### Setting on-heap and disk store tiers - -The `opensearch_onheap` setting is the built-in on-heap cache available in OpenSearch. The `ehcache_disk` setting is the disk cache implementation from [Ehcache](https://www.ehcache.org/). This requires installing the `cache-ehcache` plugin: - -```yaml -indices.request.cache.tiered_spillover.onheap.store.name: opensearch_onheap -indices.request.cache.tiered_spillover.disk.store.name: ehcache_disk -``` -{% include copy.html %} - -For more information about installing non-bundled plugins, see [Additional plugins]({{site.url}}{{site.baseurl}}/install-and-configure/plugins/#additional-plugins). - -### Configuring on-heap and disk stores - -The following table lists the cache store settings for the `opensearch_onheap` store. - -Setting | Default | Description -:--- | :--- | :--- -`indices.request.cache.opensearch_onheap.size` | 1% of the heap | The size of the on-heap cache. Optional. -`indices.request.cache.opensearch_onheap.expire` | `MAX_VALUE` (disabled) | Specify a time-to-live (TTL) for the cached results. Optional. - -The following table lists the disk cache store settings for the `ehcache_disk` store. - -Setting | Default | Description -:--- | :--- | :--- -`indices.request.cache.ehcache_disk.max_size_in_bytes` | `1073741824` (1 GB) | Defines the size of the disk cache. Optional. -`indices.request.cache.ehcache_disk.storage.path` | `""` | Defines the storage path for the disk cache. Required. -`indices.request.cache.ehcache_disk.expire_after_access` | `MAX_VALUE` (disabled) | Specify a time-to-live (TTL) for the cached results. Optional. -`indices.request.cache.ehcache_disk.alias` | `ehcacheDiskCache#INDICES_REQUEST_CACHE` (this is an example of request cache) | Specify an alias for the disk cache. Optional. -`indices.request.cache.ehcache_disk.segments` | `16` | Defines the number of segments the disk cache is separated into. Used for concurrency. Optional. -`indices.request.cache.ehcache_disk.concurrency` | `1` | Defines the number of distinct write queues created for the disk store, where a group of segments share a write queue. Optional. - diff --git a/_search-plugins/knn/knn-index.md b/_search-plugins/knn/knn-index.md index 1e0c2e84f5..01b82b425b 100644 --- a/_search-plugins/knn/knn-index.md +++ b/_search-plugins/knn/knn-index.md @@ -11,10 +11,65 @@ has_children: false The k-NN plugin introduces a custom data type, the `knn_vector`, that allows users to ingest their k-NN vectors into an OpenSearch index and perform different kinds of k-NN search. The `knn_vector` field is highly configurable and can serve many different k-NN workloads. For more information, see [k-NN vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/). +To create a k-NN index, set the `settings.index.knn` parameter to `true`: + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "method": { + "name": "hnsw", + "space_type": "l2", + "engine": "lucene", + "parameters": { + "ef_construction": 128, + "m": 24 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + ## Lucene byte vector Starting with k-NN plugin version 2.9, you can use `byte` vectors with the `lucene` engine in order to reduce the amount of storage space needed. For more information, see [Lucene byte vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#lucene-byte-vector). +## SIMD optimization for the Faiss engine + +Starting with version 2.13, the k-NN plugin supports [Single Instruction Multiple Data (SIMD)](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data) processing if the underlying hardware supports SIMD instructions (AVX2 on x64 architecture and Neon on ARM64 architecture). SIMD is supported by default on Linux machines only for the Faiss engine. SIMD architecture helps boost overall performance by improving indexing throughput and reducing search latency. + +SIMD optimization is applicable only if the vector dimension is a multiple of 8. +{: .note} + + +### x64 architecture + + +For the x64 architecture, two different versions of the Faiss library are built and shipped with the artifact: + +- `libopensearchknn_faiss.so`: The non-optimized Faiss library without SIMD instructions. +- `libopensearchknn_faiss_avx2.so`: The Faiss library that contains AVX2 SIMD instructions. + +If your hardware supports AVX2, the k-NN plugin loads the `libopensearchknn_faiss_avx2.so` library at runtime. + +To disable AVX2 and load the non-optimized Faiss library (`libopensearchknn_faiss.so`), specify the `knn.faiss.avx2.disabled` static setting as `true` in `opensearch.yml` (default is `false`). Note that to update a static setting, you must stop the cluster, change the setting, and restart the cluster. For more information, see [Static settings]({{site.url}}{{site.baseurl}}/install-and-configure/configuring-opensearch/index/#static-settings). + +### ARM64 architecture + +For the ARM64 architecture, only one performance-boosting Faiss library (`libopensearchknn_faiss.so`) is built and shipped. The library contains Neon SIMD instructions and cannot be disabled. + ## Method definitions A method definition refers to the underlying configuration of the approximate k-NN algorithm you want to use. Method definitions are used to either create a `knn_vector` field (when the method does not require training) or [create a model during training]({{site.url}}{{site.baseurl}}/search-plugins/knn/api#train-model) that can then be used to [create a `knn_vector` field]({{site.url}}{{site.baseurl}}/search-plugins/knn/approximate-knn/#building-a-k-nn-index-from-a-model). @@ -48,12 +103,12 @@ For nmslib, *ef_search* is set in the [index settings](#index-settings). An index created in OpenSearch version 2.11 or earlier will still use the old `ef_construction` value (`512`). {: .note} -### Supported faiss methods +### Supported Faiss methods Method name | Requires training | Supported spaces | Description :--- | :--- | :--- | :--- `hnsw` | false | l2, innerproduct | Hierarchical proximity graph approach to approximate k-NN search. -`ivf` | true | l2, innerproduct | Bucketing approach where vectors are assigned different buckets based on clustering and, during search, only a subset of the buckets is searched. +`ivf` | true | l2, innerproduct | Stands for _inverted file index_. Bucketing approach where vectors are assigned different buckets based on clustering and, during search, only a subset of the buckets is searched. For hnsw, "innerproduct" is not available when PQ is used. {: .note} @@ -107,25 +162,21 @@ An index created in OpenSearch version 2.11 or earlier will still use the old `e {: .note} ```json -{ - "type": "knn_vector", - "dimension": 100, - "method": { - "name":"hnsw", - "engine":"lucene", - "space_type": "l2", - "parameters":{ - "m":2048, - "ef_construction": 245 - } +"method": { + "name":"hnsw", + "engine":"lucene", + "space_type": "l2", + "parameters":{ + "m":2048, + "ef_construction": 245 } } ``` -### Supported faiss encoders +### Supported Faiss encoders -You can use encoders to reduce the memory footprint of a k-NN index at the expense of search accuracy. faiss has -several encoder types, but the plugin currently only supports *flat* and *pq* encoding. +You can use encoders to reduce the memory footprint of a k-NN index at the expense of search accuracy. The k-NN plugin currently supports the +`flat`, `pq`, and `sq` encoders in the Faiss library. The following example method definition specifies the `hnsw` method and a `pq` encoder: @@ -151,11 +202,27 @@ The `hnsw` method supports the `pq` encoder for OpenSearch versions 2.10 and lat Encoder name | Requires training | Description :--- | :--- | :--- -`flat` | false | Encode vectors as floating point arrays. This encoding does not reduce memory footprint. +`flat` (Default) | false | Encode vectors as floating-point arrays. This encoding does not reduce memory footprint. `pq` | true | An abbreviation for _product quantization_, it is a lossy compression technique that uses clustering to encode a vector into a fixed size of bytes, with the goal of minimizing the drop in k-NN search accuracy. At a high level, vectors are broken up into `m` subvectors, and then each subvector is represented by a `code_size` code obtained from a code book produced during training. For more information about product quantization, see [this blog post](https://medium.com/dotstar/understanding-faiss-part-2-79d90b1e5388). +`sq` | false | An abbreviation for _scalar quantization_. Starting with k-NN plugin version 2.13, you can use the `sq` encoder to quantize 32-bit floating-point vectors into 16-bit floats. In version 2.13, the built-in `sq` encoder is the SQFP16 Faiss encoder. The encoder reduces memory footprint with a minimal loss of precision and improves performance by using SIMD optimization (using AVX2 on x86 architecture or Neon on ARM64 architecture). For more information, see [Faiss scalar quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization#faiss-scalar-quantization). -#### Examples +#### PQ parameters + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :--- | :--- | :--- +`m` | false | 1 | false | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This vector dimension must be divisible by `m`. Maximum value is 1,024. +`code_size` | false | 8 | false | Determines the number of bits into which to encode a subvector. Maximum value is 8. For IVF, this value must be less than or equal to 8. For HNSW, this value can only be 8. + +#### SQ parameters + +Parameter name | Required | Default | Updatable | Description +:--- | :--- | :-- | :--- | :--- +`type` | false | `fp16` | false | The type of scalar quantization to be used to encode 32-bit float vectors into the corresponding type. As of OpenSearch 2.13, only the `fp16` encoder type is supported. For the `fp16` encoder, vector values must be in the [-65504.0, 65504.0] range. +`clip` | false | `false` | false | If `true`, then any vector values outside of the supported range for the specified vector type are rounded so that they are in the range. If `false`, then the request is rejected if any vector values are outside of the supported range. Setting `clip` to `true` may decrease recall. + +For more information and examples, see [Using Faiss scalar quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#using-faiss-scalar-quantization). +#### Examples The following example uses the `ivf` method without specifying an encoder (by default, OpenSearch uses the `flat` encoder): @@ -204,12 +271,46 @@ The following example uses the `hnsw` method without specifying an encoder (by d } ``` -#### PQ parameters +The following example uses the `hnsw` method with an `sq` encoder of type `fp16` with `clip` enabled: -Paramater Name | Required | Default | Updatable | Description -:--- | :--- | :--- | :--- | :--- -`m` | false | 1 | false | Determines the number of subvectors into which to break the vector. Subvectors are encoded independently of each other. This dimension of the vector must be divisible by `m`. Maximum value is 1,024. -`code_size` | false | 8 | false | Determines the number of bits into which to encode a subvector. Maximum value is 8. For IVF, this value must be less than or equal to 8. For HNSW, this value can only be 8. +```json +"method": { + "name":"hnsw", + "engine":"faiss", + "space_type": "l2", + "parameters":{ + "encoder": { + "name": "sq", + "parameters": { + "type": "fp16", + "clip": true + } + }, + "ef_construction": 256, + "m": 8 + } +} +``` + +The following example uses the `ivf` method with an `sq` encoder of type `fp16`: + +```json +"method": { + "name":"ivf", + "engine":"faiss", + "space_type": "l2", + "parameters":{ + "encoder": { + "name": "sq", + "parameters": { + "type": "fp16", + "clip": false + } + }, + "nprobes": 2 + } +} +``` ### Choosing the right method @@ -221,6 +322,8 @@ If you want to use less memory and index faster than HNSW, while maintaining sim If memory is a concern, consider adding a PQ encoder to your HNSW or IVF index. Because PQ is a lossy encoding, query quality will drop. +You can reduce the memory footprint by a factor of 2, with a minimal loss in search quality, by using the [`fp_16` encoder]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#faiss-scalar-quantization). If your vector dimensions are within the [-128, 127] byte range, we recommend using the [byte quantizer]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector/#lucene-byte-vector) in order to reduce the memory footprint by a factor of 4. To learn more about vector quantization options, see [k-NN vector quantization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/). + ### Memory estimation In a typical OpenSearch cluster, a certain portion of RAM is set aside for the JVM heap. The k-NN plugin allocates @@ -230,6 +333,9 @@ the `circuit_breaker_limit` cluster setting. By default, the limit is set at 50% Having a replica doubles the total number of vectors. {: .note } +For information about using memory estimation with vector quantization, see the [vector quantization documentation]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-vector-quantization/#memory-estimation). +{: .note } + #### HNSW memory estimation The memory required for HNSW is estimated to be `1.1 * (4 * dimension + 8 * M)` bytes/vector. diff --git a/_search-plugins/knn/knn-vector-quantization.md b/_search-plugins/knn/knn-vector-quantization.md new file mode 100644 index 0000000000..3373f104c2 --- /dev/null +++ b/_search-plugins/knn/knn-vector-quantization.md @@ -0,0 +1,159 @@ +--- +layout: default +title: k-NN vector quantization +nav_order: 27 +parent: k-NN search +grand_parent: Search methods +has_children: false +has_math: true +--- + +# k-NN vector quantization + +By default, the k-NN plugin supports the indexing and querying of vectors of type `float`, where each dimension of the vector occupies 4 bytes of memory. For use cases that require ingestion on a large scale, keeping `float` vectors can be expensive because OpenSearch needs to construct, load, save, and search graphs (for native `nmslib` and `faiss` engines). To reduce the memory footprint, you can use vector quantization. + +## Lucene byte vector + +Starting with k-NN plugin version 2.9, you can use `byte` vectors with the `lucene` engine in order to reduce the amount of required memory. This requires quantizing the vectors outside of OpenSearch before ingesting them into an OpenSearch index. For more information, see [Lucene byte vector]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/knn-vector#lucene-byte-vector). + +## Faiss scalar quantization + +Starting with version 2.13, the k-NN plugin supports performing scalar quantization for the Faiss engine within OpenSearch. Within the Faiss engine, a scalar quantizer (SQfp16) performs the conversion between 32-bit and 16-bit vectors. At ingestion time, when you upload 32-bit floating-point vectors to OpenSearch, SQfp16 quantizes them into 16-bit floating-point vectors and stores the quantized vectors in a k-NN index. At search time, SQfp16 decodes the vector values back into 32-bit floating-point values for distance computation. The SQfp16 quantization can decrease the memory footprint by a factor of 2. Additionally, it leads to a minimal loss in recall when differences between vector values are large compared to the error introduced by eliminating their two least significant bits. When used with [SIMD optimization]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index#simd-optimization-for-the-faiss-engine), SQfp16 quantization can also significantly reduce search latencies and improve indexing throughput. + +SIMD optimization is not supported on Windows. Using Faiss scalar quantization on Windows can lead to a significant drop in performance, including decreased indexing throughput and increased search latencies. +{: .warning} + +### Using Faiss scalar quantization + +To use Faiss scalar quantization, set the k-NN vector field's `method.parameters.encoder.name` to `sq` when creating a k-NN index: + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "method": { + "name": "hnsw", + "engine": "faiss", + "space_type": "l2", + "parameters": { + "encoder": { + "name": "sq", + }, + "ef_construction": 256, + "m": 8 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +Optionally, you can specify the parameters in `method.parameters.encoder`. For more information about `encoder` object parameters, see [SQ parameters]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#sq-parameters). + +The `fp16` encoder converts 32-bit vectors into their 16-bit counterparts. For this encoder type, the vector values must be in the [-65504.0, 65504.0] range. To define how to handle out-of-range values, the preceding request specifies the `clip` parameter. By default, this parameter is `false`, and any vectors containing out-of-range values are rejected. When `clip` is set to `true` (as in the preceding request), out-of-range vector values are rounded up or down so that they are in the supported range. For example, if the original 32-bit vector is `[65510.82, -65504.1]`, the vector will be indexed as a 16-bit vector `[65504.0, -65504.0]`. + +We recommend setting `clip` to `true` only if very few elements lie outside of the supported range. Rounding the values may cause a drop in recall. +{: .note} + +The following example method definition specifies the Faiss SQfp16 encoder, which rejects any indexing request that contains out-of-range vector values (because the `clip` parameter is `false` by default): + +```json +PUT /test-index +{ + "settings": { + "index": { + "knn": true, + "knn.algo_param.ef_search": 100 + } + }, + "mappings": { + "properties": { + "my_vector1": { + "type": "knn_vector", + "dimension": 3, + "method": { + "name": "hnsw", + "engine": "faiss", + "space_type": "l2", + "parameters": { + "encoder": { + "name": "sq", + "parameters": { + "type": "fp16" + } + }, + "ef_construction": 256, + "m": 8 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +During ingestion, make sure each dimension of the vector is in the supported range ([-65504.0, 65504.0]): + +```json +PUT test-index/_doc/1 +{ + "my_vector1": [-65504.0, 65503.845, 55.82] +} +``` +{% include copy-curl.html %} + +During querying, there is no range limitation for the query vector: + +```json +GET test-index/_search +{ + "size": 2, + "query": { + "knn": { + "my_vector1": { + "vector": [265436.876, -120906.256, 99.84], + "k": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +## Memory estimation + +In the best-case scenario, 16-bit vectors produced by the Faiss SQfp16 quantizer require 50% of the memory that 32-bit vectors require. + +#### HNSW memory estimation + +The memory required for HNSW is estimated to be `1.1 * (2 * dimension + 8 * M)` bytes/vector. + +As an example, assume that you have 1 million vectors with a dimension of 256 and M of 16. The memory requirement can be estimated as follows: + +```bash +1.1 * (2 * 256 + 8 * 16) * 1,000,000 ~= 0.656 GB +``` + +#### IVF memory estimation + +The memory required for IVF is estimated to be `1.1 * (((2 * dimension) * num_vectors) + (4 * nlist * d))` bytes/vector. + +As an example, assume that you have 1 million vectors with a dimension of 256 and `nlist` of 128. The memory requirement can be estimated as follows: + +```bash +1.1 * (((2 * 256) * 1,000,000) + (4 * 128 * 256)) ~= 0.525 GB +``` + diff --git a/_search-plugins/knn/settings.md b/_search-plugins/knn/settings.md index 1f43654fbe..f4ef057cfb 100644 --- a/_search-plugins/knn/settings.md +++ b/_search-plugins/knn/settings.md @@ -25,3 +25,4 @@ Setting | Default | Description `knn.model.index.number_of_shards`| 1 | The number of shards to use for the model system index, the OpenSearch index that stores the models used for Approximate Nearest Neighbor (ANN) search. `knn.model.index.number_of_replicas`| 1 | The number of replica shards to use for the model system index. Generally, in a multi-node cluster, this should be at least 1 to increase stability. `knn.advanced.filtered_exact_search_threshold`| null | The threshold value for the filtered IDs that is used to switch to exact search during filtered ANN search. If the number of filtered IDs in a segment is less than this setting's value, exact search will be performed on the filtered IDs. +`knn.faiss.avx2.disabled` | False | A static setting that specifies whether to disable the SIMD-based `libopensearchknn_faiss_avx2.so` library and load the non-optimized `libopensearchknn_faiss.so` library for the Faiss engine on machines with x64 architecture. For more information, see [SIMD optimization for the Faiss engine]({{site.url}}{{site.baseurl}}/search-plugins/knn/knn-index/#simd-optimization-for-the-faiss-engine). diff --git a/_search-plugins/neural-sparse-search.md b/_search-plugins/neural-sparse-search.md index 88d30e4391..58918565c4 100644 --- a/_search-plugins/neural-sparse-search.md +++ b/_search-plugins/neural-sparse-search.md @@ -55,7 +55,8 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline-sparse ``` {% include copy-curl.html %} -To split long text into passages, use the `text_chunking` ingest processor before the `sparse_encoding` processor. For more information, see [Chaining text chunking and embedding processors]({{site.url}}{{site.baseurl}}/ingest-pipelines/processors/text-chunking/#chaining-text-chunking-and-embedding-processors). +To split long text into passages, use the `text_chunking` ingest processor before the `sparse_encoding` processor. For more information, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). + ## Step 2: Create an index for ingestion @@ -364,4 +365,8 @@ The response contains both documents: ] } } -``` \ No newline at end of file +``` + +## Next steps + +- To learn more about splitting long text into passages for neural search, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). \ No newline at end of file diff --git a/_search-plugins/semantic-search.md b/_search-plugins/semantic-search.md index 32bd18cd6c..7c3fbb738f 100644 --- a/_search-plugins/semantic-search.md +++ b/_search-plugins/semantic-search.md @@ -48,7 +48,7 @@ PUT /_ingest/pipeline/nlp-ingest-pipeline ``` {% include copy-curl.html %} -To split long text into passages, use the `text_chunking` ingest processor before the `text_embedding` processor. For more information, see [Chaining text chunking and embedding processors]({{site.url}}{{site.baseurl}}/ingest-pipelines/processors/text-chunking/#chaining-text-chunking-and-embedding-processors). +To split long text into passages, use the `text_chunking` ingest processor before the `text_embedding` processor. For more information, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). ## Step 2: Create an index for ingestion diff --git a/_search-plugins/text-chunking.md b/_search-plugins/text-chunking.md new file mode 100644 index 0000000000..b66cfeda61 --- /dev/null +++ b/_search-plugins/text-chunking.md @@ -0,0 +1,116 @@ +--- +layout: default +title: Text chunking +nav_order: 65 +--- + +# Text chunking +Introduced 2.13 +{: .label .label-purple } + +To split long text into passages, you can use a `text_chunking` processor as a preprocessing step for a `text_embedding` or `sparse_encoding` processor in order to obtain embeddings for each chunked passage. For more information about the processor parameters, see [Text chunking processor]({{site.url}}{{site.baseurl}}/ingest-pipelines/processors/text-chunking/). Before you start, follow the steps outlined in the [pretrained model documentation]({{site.url}}{{site.baseurl}}/ml-commons-plugin/pretrained-models/) to register an embedding model. The following example preprocesses text by splitting it into passages and then produces embeddings using the `text_embedding` processor. + +## Step 1: Create a pipeline + +The following example request creates an ingest pipeline that converts the text in the `passage_text` field into chunked passages, which will be stored in the `passage_chunk` field. The text in the `passage_chunk` field is then converted into text embeddings, and the embeddings are stored in the `passage_embedding` field: + +```json +PUT _ingest/pipeline/text-chunking-embedding-ingest-pipeline +{ + "description": "A text chunking and embedding ingest pipeline", + "processors": [ + { + "text_chunking": { + "algorithm": { + "fixed_token_length": { + "token_limit": 10, + "overlap_rate": 0.2, + "tokenizer": "standard" + } + }, + "field_map": { + "passage_text": "passage_chunk" + } + } + }, + { + "text_embedding": { + "model_id": "LMLPWY4BROvhdbtgETaI", + "field_map": { + "passage_chunk": "passage_chunk_embedding" + } + } + } + ] +} +``` +{% include copy-curl.html %} + +## Step 2: Create an index for ingestion + +In order to use the ingest pipeline, you need to create a k-NN index. The `passage_chunk_embedding` field must be of the `nested` type. The `knn.dimension` field must contain the number of dimensions for your model: + +```json +PUT testindex +{ + "settings": { + "index": { + "knn": true + } + }, + "mappings": { + "properties": { + "text": { + "type": "text" + }, + "passage_chunk_embedding": { + "type": "nested", + "properties": { + "knn": { + "type": "knn_vector", + "dimension": 768 + } + } + } + } + } +} +``` +{% include copy-curl.html %} + +## Step 3: Ingest documents into the index + +To ingest a document into the index created in the previous step, send the following request: + +```json +POST testindex/_doc?pipeline=text-chunking-embedding-ingest-pipeline +{ + "passage_text": "This is an example document to be chunked. The document contains a single paragraph, two sentences and 24 tokens by standard tokenizer in OpenSearch." +} +``` +{% include copy-curl.html %} + +## Step 4: Search the index using neural search + +You can use a `nested` query to perform vector search on your index. We recommend setting `score_mode` to `max`, where the document score is set to the highest score out of all passage embeddings: + +```json +GET testindex/_search +{ + "query": { + "nested": { + "score_mode": "max", + "path": "passage_chunk_embedding", + "query": { + "neural": { + "passage_chunk_embedding.knn": { + "query_text": "document", + "model_id": "-tHZeI4BdQKclr136Wl7" + } + } + } + } + } +} +``` +{% include copy-curl.html %}