diff --git a/CHANGELOG.md b/CHANGELOG.md index aa2086646fbac..9b5f2dc5a16f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -170,7 +170,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) - Performance improvement for Datetime field caching ([#4558](https://github.com/opensearch-project/OpenSearch/issues/4558)) - Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) -- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895)) +- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895), [#11607](https://github.com/opensearch-project/OpenSearch/pull/11607)) - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) - Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java index bb3d4bca47859..a48cfd87b78c3 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java @@ -88,16 +88,18 @@ public IngestDocument execute(IngestDocument document) { throw new IllegalArgumentException("cannot remove metadata field [" + path + "]"); } // removing _id is disallowed when there's an external version specified in the request - String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); if (path.equals(IngestDocument.Metadata.ID.getFieldName()) - && !Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { - Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class); - throw new IllegalArgumentException( - "cannot remove metadata field [_id] when specifying external version for the document, version: " - + version - + ", version_type: " - + versionType - ); + && document.hasField(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { + String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); + if (!Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { + Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class, true); + throw new IllegalArgumentException( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + version + + ", version_type: " + + versionType + ); + } } document.removeField(path); }); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java index 1a5630a4730f2..c138ad606d2e5 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java @@ -32,6 +32,7 @@ package org.opensearch.ingest.common; +import org.opensearch.common.lucene.uid.Versions; import org.opensearch.index.VersionType; import org.opensearch.ingest.IngestDocument; import org.opensearch.ingest.Processor; @@ -181,4 +182,100 @@ public void testRemoveMetadataField() throws Exception { } } } + + public void testRemoveDocumentId() throws Exception { + Map config = new HashMap<>(); + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + String processorTag = randomAlphaOfLength(10); + + // test remove _id when _version_type is external + IngestDocument ingestDocumentWithExternalVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + 1L, + VersionType.EXTERNAL, + RandomDocumentPicks.randomSource(random()) + ); + + Processor processorForExternalVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + assertThrows( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + 1 + + ", version_type: " + + VersionType.EXTERNAL, + IllegalArgumentException.class, + () -> processorForExternalVersionType.execute(ingestDocumentWithExternalVersionType) + ); + + // test remove _id when _version_type is external_gte + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + IngestDocument ingestDocumentWithExternalGTEVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + 1L, + VersionType.EXTERNAL_GTE, + RandomDocumentPicks.randomSource(random()) + ); + + Processor processorForExternalGTEVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + assertThrows( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + 1 + + ", version_type: " + + VersionType.EXTERNAL_GTE, + IllegalArgumentException.class, + () -> processorForExternalGTEVersionType.execute(ingestDocumentWithExternalGTEVersionType) + ); + + // test remove _id when _version_type is internal + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + IngestDocument ingestDocumentWithInternalVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + Versions.MATCH_ANY, + VersionType.INTERNAL, + RandomDocumentPicks.randomSource(random()) + ); + + Processor processorForInternalVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + processorForInternalVersionType.execute(ingestDocumentWithInternalVersionType); + assertThat(ingestDocumentWithInternalVersionType.hasField(IngestDocument.Metadata.ID.getFieldName()), equalTo(false)); + + // test remove _id when _version_type is null + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + IngestDocument ingestDocumentWithNoVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + null, + null, + RandomDocumentPicks.randomSource(random()) + ); + Processor processorForNullVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + processorForNullVersionType.execute(ingestDocumentWithNoVersionType); + assertThat(ingestDocumentWithNoVersionType.hasField(IngestDocument.Metadata.ID.getFieldName()), equalTo(false)); + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml index 4811769d04f0e..6668b468f8edc 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml @@ -5,6 +5,69 @@ teardown: id: "my_pipeline" ignore: 404 +--- +"Test simulate API works well with remove processor": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "{{foo}}" + } + } + ] + } + - match: { acknowledged: true } + + # test simulating existing pipeline works well + - do: + ingest.simulate: + id: "my_pipeline" + body: > + { + "docs": [ + { + "_source": { + "foo": "bar", + "bar": "zoo" + } + } + ] + } + - length: { docs: 1 } + - match: { docs.0.doc._source: { "foo": "bar" } } + + # test simulating inflight pipeline works well + - do: + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "{{foo}}" + } + } + ] + }, + "docs": [ + { + "_source": { + "foo": "bar", + "bar": "zoo" + } + } + ] + } + - length: { docs: 1 } + - match: { docs.0.doc._source: { "foo": "bar" } } + --- "Test remove processor with non-existing field and without ignore_missing": - do: @@ -227,3 +290,32 @@ teardown: version: 1 version_type: "external" body: { message: "foo bar baz" } + + # test simulating pipeline with removing _id + - do: + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "_id" + } + } + ] + }, + "docs": [ + { + "_version_type": "external_gte", + "_version": 1, + "_source": { + "foo": "bar", + "bar": "zoo" + } + } + ] + } + - match: { docs.0.error.type: "illegal_argument_exception" } + - match: { docs.0.error.reason: "cannot remove metadata field [_id] when specifying external version for the document, version: 1, version_type: external_gte" }