diff --git a/.buildkite/pipeline.trigger.integration.tests.sh b/.buildkite/pipeline.trigger.integration.tests.sh index 9eddf754f..94c325efe 100755 --- a/.buildkite/pipeline.trigger.integration.tests.sh +++ b/.buildkite/pipeline.trigger.integration.tests.sh @@ -44,14 +44,9 @@ CHECK_PACKAGES_TESTS=( test-check-packages-benchmarks test-check-packages-with-logstash ) -for independent_agent in false true ; do for test in "${CHECK_PACKAGES_TESTS[@]}"; do - label_suffix="" - if [[ "$independent_agent" == "false" ]]; then - label_suffix=" (stack agent)" - fi test_name=${test#"test-check-packages-"} - echo " - label: \":go: Integration test: ${test_name}${label_suffix}\"" + echo " - label: \":go: Integration test: ${test_name}\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t ${test}" echo " agents:" echo " provider: \"gcp\"" @@ -64,15 +59,10 @@ for test in "${CHECK_PACKAGES_TESTS[@]}"; do if [[ $test =~ with-kind$ ]]; then echo " - build/kubectl-dump.txt" fi - if [[ "${independent_agent}" == "false" ]]; then - echo " env:" - echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: ${independent_agent}" - fi -done done pushd test/packages/false_positives > /dev/null -for package in $(find . -maxdepth 1 -mindepth 1 -type d) ; do +while IFS= read -r -d '' package ; do package_name=$(basename "${package}") echo " - label: \":go: Integration test (false positive): ${package_name}\"" echo " key: \"integration-false_positives-${package_name}\"" @@ -86,48 +76,56 @@ for package in $(find . -maxdepth 1 -mindepth 1 -type d) ; do echo " - build/test-results/*.xml" echo " - build/test-results/*.xml.expected-errors.txt" # these files are uploaded in case it is needed to review the xUnit files in case of CI reports success the step echo " - build/test-coverage/coverage-*.xml" # these files should not be used to compute the final coverage of elastic-package -done +done < <(find . -maxdepth 1 -mindepth 1 -type d -print0) popd > /dev/null pushd test/packages/parallel > /dev/null -for independent_agent in false true; do -for package in $(find . -maxdepth 1 -mindepth 1 -type d) ; do - label_suffix="" - if [[ "$independent_agent" == "false" ]]; then - label_suffix=" (stack agent)" - fi +while IFS= read -r -d '' package ; do package_name=$(basename "${package}") - if [[ "$independent_agent" == "false" && "$package_name" == "oracle" ]]; then - echoerr "Package \"${package_name}\" skipped: not supported with Elastic Agent running in the stack (missing required software)." - continue - fi - - if [[ "$independent_agent" == "false" && "$package_name" == "auditd_manager" ]]; then - echoerr "Package \"${package_name}\" skipped: not supported with Elastic Agent running in the stack (missing capabilities)." - continue - fi + echo " - label: \":go: Integration test: ${package_name}\"" + echo " key: \"integration-parallel-${package_name}-agent\"" + echo " command: ./.buildkite/scripts/integration_tests.sh -t test-check-packages-parallel -p ${package_name}" + echo " env:" + echo " UPLOAD_SAFE_LOGS: 1" + echo " agents:" + echo " provider: \"gcp\"" + echo " image: \"${UBUNTU_X86_64_AGENT_IMAGE}\"" + echo " artifact_paths:" + echo " - build/test-results/*.xml" + echo " - build/test-coverage/coverage-*.xml" # these files should not be used to compute the final coverage of elastic-package +done < <(find . -maxdepth 1 -mindepth 1 -type d -print0) - if [[ "$independent_agent" == "false" && "$package_name" == "custom_entrypoint" ]]; then - echoerr "Package \"${package_name}\" skipped: not supported with Elastic Agent running in the stack (missing required files deployed in provisioning)." - continue - fi +# Run system tests with the Elastic Agent from the Elastic stack just for one package +package_name="apache" +echo " - label: \":go: Integration test: ${package_name} (stack agent)\"" +echo " key: \"integration-parallel-${package_name}-stack-agent\"" +echo " command: ./.buildkite/scripts/integration_tests.sh -t test-check-packages-parallel -p ${package_name}" +echo " env:" +echo " UPLOAD_SAFE_LOGS: 1" +echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: false" +echo " agents:" +echo " provider: \"gcp\"" +echo " image: \"${UBUNTU_X86_64_AGENT_IMAGE}\"" +echo " artifact_paths:" +echo " - build/test-results/*.xml" +echo " - build/test-coverage/coverage-*.xml" # these files should not be used to compute the final coverage of elastic-package - echo " - label: \":go: Integration test: ${package_name}${label_suffix}\"" - echo " key: \"integration-parallel-${package_name}-agent-${independent_agent}\"" +# Add steps to test validation method mappings +while IFS= read -r -d '' package ; do + package_name=$(basename "${package}") + echo " - label: \":go: Integration test: ${package_name} (just validate mappings)\"" + echo " key: \"integration-parallel-${package_name}-agent-validate-mappings\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-check-packages-parallel -p ${package_name}" echo " env:" echo " UPLOAD_SAFE_LOGS: 1" - if [[ "${independent_agent}" == "false" ]]; then - echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: ${independent_agent}" - fi + echo " ELASTIC_PACKAGE_FIELD_VALIDATION_TEST_METHOD: mappings" echo " agents:" echo " provider: \"gcp\"" echo " image: \"${UBUNTU_X86_64_AGENT_IMAGE}\"" echo " artifact_paths:" echo " - build/test-results/*.xml" echo " - build/test-coverage/coverage-*.xml" # these files should not be used to compute the final coverage of elastic-package -done -done +done < <(find . -maxdepth 1 -mindepth 1 -type d -print0) popd > /dev/null # TODO: Missing docker & docker-compose in MACOS ARM agent image, skip installation of packages in the meantime. @@ -166,19 +164,11 @@ echo " image: \"${UBUNTU_X86_64_AGENT_IMAGE}\"" echo " artifact_paths:" echo " - build/elastic-stack-dump/install-zip-shellinit/logs/*.log" -for independent_agent in false true; do - label_suffix="" - if [[ "$independent_agent" == "false" ]]; then - label_suffix=" (stack agent)" - fi - echo " - label: \":go: Integration test: system-flags${label_suffix}\"" - echo " command: ./.buildkite/scripts/integration_tests.sh -t test-system-test-flags" - echo " agents:" - echo " provider: \"gcp\"" - echo " image: \"${UBUNTU_X86_64_AGENT_IMAGE}\"" - echo " env:" - echo " ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT: ${independent_agent}" -done +echo " - label: \":go: Integration test: system-flags\"" +echo " command: ./.buildkite/scripts/integration_tests.sh -t test-system-test-flags" +echo " agents:" +echo " provider: \"gcp\"" +echo " image: \"${UBUNTU_X86_64_AGENT_IMAGE}\"" echo " - label: \":go: Integration test: profiles-command\"" echo " command: ./.buildkite/scripts/integration_tests.sh -t test-profiles-command" diff --git a/.buildkite/scripts/integration_tests.sh b/.buildkite/scripts/integration_tests.sh index 465a9d8d2..2959a057f 100755 --- a/.buildkite/scripts/integration_tests.sh +++ b/.buildkite/scripts/integration_tests.sh @@ -122,6 +122,10 @@ if [[ "${TARGET}" == "${PARALLEL_TARGET}" ]] || [[ "${TARGET}" == "${FALSE_POSIT package_folder="${package_folder}-stack_agent" fi + if [[ "${ELASTIC_PACKAGE_FIELD_VALIDATION_TEST_METHOD:-""}" != "" ]]; then + package_folder="${package_folder}-${ELASTIC_PACKAGE_FIELD_VALIDATION_TEST_METHOD}" + fi + if [[ "${retry_count}" -ne 0 ]]; then package_folder="${package_folder}_retry_${retry_count}" fi diff --git a/README.md b/README.md index 1b5fceb7b..1f1db5c3e 100644 --- a/README.md +++ b/README.md @@ -695,9 +695,16 @@ There are available some environment variables that could be used to change some - `ELASTIC_PACKAGE_DISABLE_ELASTIC_AGENT_WOLFI`: If set to `true`, the Elastic Agent image used for running agents will be using the Ubuntu docker images (e.g. `docker.elastic.co/elastic-agent/elastic-agent-complete`). If set to `false`, the Elastic Agent image used for the running agents will be based on the wolfi images (e.g. `docker.elastic.co/elastic-agent/elastic-agent-wolfi`). Default: `false`. - - `ELASTIC_PACKAGE_TEST_DUMP_SCENARIO_DOCS. If the variable is set, elastic-package will dump to a file the documents generated + - `ELASTIC_PACKAGE_TEST_DUMP_SCENARIO_DOCS`. If the variable is set, elastic-package will dump to a file the documents generated by system tests before they are verified. This is useful to know exactly what fields are being verified when investigating issues on this step. Documents are dumped to a file in the system temporary directory. It is disabled by default. + - `ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT`. If the variable is set to false, all system tests defined in the package will use + the Elastic Agent started along with the stack. If set to true, a new Elastic Agent will be started and enrolled for each test defined in the + package (and unenrolled at the end of each test). Default: `true`. + - `ELASTIC_PACKAGE_FIELD_VALIDATION_TEST_METHOD`. This variable can take one of these values: `all`, `mappings` or `fields`. If this + variable is set to `fields`, then validation of fields will be based on the documents ingested into Elasticsearch. If this is set to + `mappings`, then validation of fields will be based on the mappings generated when the documents are ingested into Elasticsearch. If + set to `all`, then validation will be based on both methods mentioned previously. Default option: `fields`. - To configure the Elastic stack to be used by `elastic-package`: - `ELASTIC_PACKAGE_ELASTICSEARCH_HOST`: Host of the elasticsearch (e.g. https://127.0.0.1:9200) diff --git a/internal/elasticsearch/client.go b/internal/elasticsearch/client.go index 20dfff16b..8a95f8cb7 100644 --- a/internal/elasticsearch/client.go +++ b/internal/elasticsearch/client.go @@ -279,3 +279,85 @@ func (client *Client) redHealthCause(ctx context.Context) (string, error) { } return strings.Join(causes, ", "), nil } + +func (c *Client) SimulateIndexTemplate(ctx context.Context, indexTemplateName string) (json.RawMessage, json.RawMessage, error) { + resp, err := c.Indices.SimulateTemplate( + c.Indices.SimulateTemplate.WithContext(ctx), + c.Indices.SimulateTemplate.WithName(indexTemplateName), + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to get field mapping for data stream %q: %w", indexTemplateName, err) + } + defer resp.Body.Close() + if resp.IsError() { + return nil, nil, fmt.Errorf("error getting mapping: %s", resp) + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, nil, fmt.Errorf("error reading mapping body: %w", err) + } + + type mappingsIndexTemplate struct { + DynamicTemplates json.RawMessage `json:"dynamic_templates"` + Properties json.RawMessage `json:"properties"` + } + + type indexTemplateSimulated struct { + // Settings json.RawMessage `json:"settings"` + Mappings mappingsIndexTemplate `json:"mappings"` + } + + type previewTemplate struct { + Template indexTemplateSimulated `json:"template"` + } + + var preview previewTemplate + + if err := json.Unmarshal(body, &preview); err != nil { + return nil, nil, fmt.Errorf("error unmarshaling mappings: %w", err) + } + + return preview.Template.Mappings.DynamicTemplates, preview.Template.Mappings.Properties, nil +} + +func (c *Client) DataStreamMappings(ctx context.Context, dataStreamName string) (json.RawMessage, json.RawMessage, error) { + mappingResp, err := c.Indices.GetMapping( + c.Indices.GetMapping.WithContext(ctx), + c.Indices.GetMapping.WithIndex(dataStreamName), + ) + if err != nil { + return nil, nil, fmt.Errorf("failed to get field mapping for data stream %q: %w", dataStreamName, err) + } + defer mappingResp.Body.Close() + if mappingResp.IsError() { + return nil, nil, fmt.Errorf("error getting mapping: %s", mappingResp) + } + body, err := io.ReadAll(mappingResp.Body) + if err != nil { + return nil, nil, fmt.Errorf("error reading mapping body: %w", err) + } + + type mappings struct { + DynamicTemplates json.RawMessage `json:"dynamic_templates"` + Properties json.RawMessage `json:"properties"` + } + + mappingsRaw := map[string]struct { + Mappings mappings `json:"mappings"` + }{} + + if err := json.Unmarshal(body, &mappingsRaw); err != nil { + return nil, nil, fmt.Errorf("error unmarshaling mappings: %w", err) + } + + if len(mappingsRaw) != 1 { + return nil, nil, fmt.Errorf("exactly 1 mapping was expected, got %d", len(mappingsRaw)) + } + + var mappingsDefinition mappings + for _, v := range mappingsRaw { + mappingsDefinition = v.Mappings + } + + return mappingsDefinition.DynamicTemplates, mappingsDefinition.Properties, nil +} diff --git a/internal/fields/mappings.go b/internal/fields/mappings.go new file mode 100644 index 000000000..32ecf1102 --- /dev/null +++ b/internal/fields/mappings.go @@ -0,0 +1,706 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package fields + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "path/filepath" + "slices" + "strings" + + "github.com/Masterminds/semver/v3" + "github.com/google/go-cmp/cmp" + + "github.com/elastic/elastic-package/internal/elasticsearch" + "github.com/elastic/elastic-package/internal/logger" + "github.com/elastic/elastic-package/internal/multierror" +) + +// MappingValidator is responsible for mappings validation. +type MappingValidator struct { + // Schema contains definition records. + Schema []FieldDefinition + + // SpecVersion contains the version of the spec used by the package. + specVersion semver.Version + + disabledDependencyManagement bool + + enabledImportAllECSSchema bool + + disabledNormalization bool + + injectFieldsOptions InjectFieldsOptions + + esClient *elasticsearch.Client + + indexTemplateName string + + dataStreamName string +} + +// MappingValidatorOption represents an optional flag that can be passed to CreateValidatorForMappings. +type MappingValidatorOption func(*MappingValidator) error + +// WithMappingValidatorSpecVersion enables validation dependant of the spec version used by the package. +func WithMappingValidatorSpecVersion(version string) MappingValidatorOption { + return func(v *MappingValidator) error { + sv, err := semver.NewVersion(version) + if err != nil { + return fmt.Errorf("invalid version %q: %v", version, err) + } + v.specVersion = *sv + return nil + } +} + +// WithMappingValidatorDisabledDependencyManagement configures the validator to ignore external fields and won't follow dependencies. +func WithMappingValidatorDisabledDependencyManagement() MappingValidatorOption { + return func(v *MappingValidator) error { + v.disabledDependencyManagement = true + return nil + } +} + +// WithMappingValidatorEnabledImportAllECSSchema configures the validator to check or not the fields with the complete ECS schema. +func WithMappingValidatorEnabledImportAllECSSChema(importSchema bool) MappingValidatorOption { + return func(v *MappingValidator) error { + v.enabledImportAllECSSchema = importSchema + return nil + } +} + +// WithMappingValidatorDisableNormalization configures the validator to disable normalization. +func WithMappingValidatorDisableNormalization(disabledNormalization bool) MappingValidatorOption { + return func(v *MappingValidator) error { + v.disabledNormalization = disabledNormalization + return nil + } +} + +// WithMappingValidatorInjectFieldsOptions configures fields injection. +func WithMappingValidatorInjectFieldsOptions(options InjectFieldsOptions) MappingValidatorOption { + return func(v *MappingValidator) error { + v.injectFieldsOptions = options + return nil + } +} + +// WithMappingValidatorElasticsearchClient configures the Elasticsearch client. +func WithMappingValidatorElasticsearchClient(esClient *elasticsearch.Client) MappingValidatorOption { + return func(v *MappingValidator) error { + v.esClient = esClient + return nil + } +} + +// WithMappingValidatorIndexTemplate configures the Index Template to query to Elasticsearch. +func WithMappingValidatorIndexTemplate(indexTemplate string) MappingValidatorOption { + return func(v *MappingValidator) error { + v.indexTemplateName = indexTemplate + return nil + } +} + +// WithMappingValidatorDataStream configures the Data Stream to query in Elasticsearch. +func WithMappingValidatorDataStream(dataStream string) MappingValidatorOption { + return func(v *MappingValidator) error { + v.dataStreamName = dataStream + return nil + } +} + +// WithMappingValidatorDataStream configures the Data Stream to query in Elasticsearch. +func WithMappingValidatorFallbackSchema(schema []FieldDefinition) MappingValidatorOption { + return func(v *MappingValidator) error { + v.Schema = schema + return nil + } +} + +// CreateValidatorForMappings function creates a validator for the mappings. +func CreateValidatorForMappings(fieldsParentDir string, esClient *elasticsearch.Client, opts ...MappingValidatorOption) (v *MappingValidator, err error) { + p := packageRoot{} + opts = append(opts, WithMappingValidatorElasticsearchClient(esClient)) + return createValidatorForMappingsAndPackageRoot(fieldsParentDir, p, opts...) +} + +func createValidatorForMappingsAndPackageRoot(fieldsParentDir string, finder packageRootFinder, opts ...MappingValidatorOption) (v *MappingValidator, err error) { + v = new(MappingValidator) + for _, opt := range opts { + if err := opt(v); err != nil { + return nil, err + } + } + + if len(v.Schema) > 0 { + return v, nil + } + + fieldsDir := filepath.Join(fieldsParentDir, "fields") + + var fdm *DependencyManager + if !v.disabledDependencyManagement { + packageRoot, found, err := finder.FindPackageRoot() + if err != nil { + return nil, fmt.Errorf("can't find package root: %w", err) + } + if !found { + return nil, errors.New("package root not found and dependency management is enabled") + } + fdm, v.Schema, err = initDependencyManagement(packageRoot, v.specVersion, v.enabledImportAllECSSchema) + if err != nil { + return nil, fmt.Errorf("failed to initialize dependency management: %w", err) + } + } + fields, err := loadFieldsFromDir(fieldsDir, fdm, v.injectFieldsOptions) + if err != nil { + return nil, fmt.Errorf("can't load fields from directory (path: %s): %w", fieldsDir, err) + } + + v.Schema = append(fields, v.Schema...) + return v, nil +} + +func (v *MappingValidator) ValidateIndexMappings(ctx context.Context) multierror.Error { + var errs multierror.Error + logger.Debugf("Get Mappings from data stream (%s)", v.dataStreamName) + actualDynamicTemplates, actualMappings, err := v.esClient.DataStreamMappings(ctx, v.dataStreamName) + if err != nil { + errs = append(errs, fmt.Errorf("failed to load mappings from ES (data stream %s): %w", v.dataStreamName, err)) + return errs + } + + logger.Debugf("Simulate Index Template (%s)", v.indexTemplateName) + previewDynamicTemplates, previewMappings, err := v.esClient.SimulateIndexTemplate(ctx, v.indexTemplateName) + if err != nil { + errs = append(errs, fmt.Errorf("failed to load mappings from index template preview (%s): %w", v.indexTemplateName, err)) + return errs + } + + // Code from comment posted in https://github.com/google/go-cmp/issues/224 + transformJSON := cmp.FilterValues(func(x, y []byte) bool { + return json.Valid(x) && json.Valid(y) + }, cmp.Transformer("ParseJSON", func(in []byte) string { + var tmp interface{} + if err := json.Unmarshal(in, &tmp); err != nil { + panic(err) // should never occur given previous filter to ensure valid JSON + } + out, err := json.MarshalIndent(tmp, "", " ") + if err != nil { + panic(err) + } + return string(out) + })) + + // Compare dynamic templates, this should always be the same in preview and after ingesting documents + if diff := cmp.Diff(previewDynamicTemplates, actualDynamicTemplates, transformJSON); diff != "" { + errs = append(errs, fmt.Errorf("dynamic templates are different (data stream %s):\n%s", v.dataStreamName, diff)) + } + + // Compare actual mappings: + // - If they are the same exact mapping definitions as in preview, everything should be good + // - If the same mapping exists in both, but they have different "type", there is some issue + // - If there is a new mapping, + // - It could come from a ECS definition, compare that mapping with the ECS field definitions + // - Does this come from some dynamic template? ECS components template or dynamic templates defined in the package? This mapping is valid + // - conditions found in current dynamic templates: match, path_match, path_unmatch, match_mapping_type, unmatch_mapping_type + // - if it does not match, there should be some issue and it should be reported + // - If the mapping is a constant_keyword type (e.g. data_stream.dataset), how to check the value? + // - if the constant_keyword is defined in the preview, it should be the same + if diff := cmp.Diff(actualMappings, previewMappings, transformJSON); diff == "" { + logger.Debug("No changes found in mappings") + return errs.Unique() + } + + var rawPreview map[string]any + err = json.Unmarshal(previewMappings, &rawPreview) + if err != nil { + errs = append(errs, fmt.Errorf("failed to unmarshal preview mappings (index template %s): %w", v.indexTemplateName, err)) + return errs.Unique() + } + var rawActual map[string]any + err = json.Unmarshal(actualMappings, &rawActual) + if err != nil { + errs = append(errs, fmt.Errorf("failed to unmarshal actual mappings (data stream %s): %w", v.dataStreamName, err)) + return errs.Unique() + } + + mappingErrs := v.compareMappings("", rawPreview, rawActual) + errs = append(errs, mappingErrs...) + + if len(errs) > 0 { + return errs.Unique() + } + + return nil +} + +func currentMappingPath(path, key string) string { + if path == "" { + return key + } + return fmt.Sprintf("%s.%s", path, key) +} + +func mappingParameter(field string, definition map[string]any) string { + fieldValue, ok := definition[field] + if !ok { + return "" + } + value, ok := fieldValue.(string) + if !ok { + return "" + } + return value +} + +func isLocalFieldTypeArray(field string, schema []FieldDefinition) bool { + definition := findElementDefinitionForRoot("", field, schema) + if definition == nil { + return false + } + if definition.External != "" { + return false + } + return definition.Type == "array" +} + +func isEmptyObject(definition map[string]any) bool { + // Example: + // "_tmp": { + // "type": "object" + // }, + if len(definition) != 1 { + return false + } + return mappingParameter("type", definition) == "object" +} + +func isObject(definition map[string]any) bool { + // Example: + // "http": { + // "properties": { + // "request": { + // "properties": { + // "method": { + // "type": "keyword", + // "ignore_above": 1024 + // } + // } + // } + // } + // } + field, ok := definition["properties"] + if !ok { + return false + } + if _, ok = field.(map[string]any); !ok { + return false + } + return true +} + +func isObjectFullyDynamic(definition map[string]any) bool { + // Example: + // "labels": { + // "type": "object", + // "dynamic": "true" + // }, + fieldType := mappingParameter("type", definition) + fieldDynamic := mappingParameter("dynamic", definition) + + if fieldType != "object" { + return false + } + if fieldDynamic != "true" { + return false + } + + field, ok := definition["properties"] + if !ok { + return true + } + props, ok := field.(map[string]any) + if !ok { + return false + } + // It should not have properties + // https://www.elastic.co/guide/en/elasticsearch/reference/8.16/dynamic.html + if len(props) != 0 { + return false + } + return true +} + +func isMultiFields(definition map[string]any) bool { + // Example: + // "path": { + // "type": "keyword", + // "fields": { + // "text": { + // "type": "match_only_text" + // } + // } + // }, + fieldType := mappingParameter("type", definition) + if fieldType == "" { + return false + } + field, ok := definition["fields"] + if !ok { + return false + } + if _, ok = field.(map[string]any); !ok { + return false + } + return true +} + +func isNumberTypeField(previewType, actualType string) bool { + if slices.Contains([]string{"float", "long", "double"}, previewType) && slices.Contains([]string{"float", "long", "double"}, string(actualType)) { + return true + } + + return false +} + +func (v *MappingValidator) validateMappingInECSSchema(currentPath string, definition map[string]any) error { + found := FindElementDefinition(currentPath, v.Schema) + if found == nil { + return fmt.Errorf("missing definition for path") + } + + if found.External != "ecs" { + return fmt.Errorf("missing definition for path") + } + + actualType := mappingParameter("type", definition) + if found.Type == actualType { + return nil + } + + // exceptions related to numbers + if isNumberTypeField(found.Type, actualType) { + logger.Debugf("Allowed number fields with different types (ECS %s - actual %s)", string(found.Type), string(actualType)) + return nil + } + // any other field to validate here? + return fmt.Errorf("actual mapping type (%s) does not match with ECS definition type: %s", actualType, found.Type) +} + +// flattenMappings returns all the mapping definitions found at "path" flattened including +// specific entries for multi fields too. +func flattenMappings(path string, definition map[string]any) (map[string]any, error) { + newDefs := map[string]any{} + if isMultiFields(definition) { + multifields, err := getMappingDefinitionsField("fields", definition) + if err != nil { + return nil, multierror.Error{fmt.Errorf("invalid multi_field mapping %q: %w", path, err)} + } + + // Include also the definition itself + newDefs[path] = definition + + for key, object := range multifields { + currentPath := currentMappingPath(path, key) + def, ok := object.(map[string]any) + if !ok { + return nil, multierror.Error{fmt.Errorf("invalid multi_field mapping type: %q", path)} + } + newDefs[currentPath] = def + } + return newDefs, nil + } + + if !isObject(definition) { + newDefs[path] = definition + return newDefs, nil + } + + childMappings, ok := definition["properties"].(map[string]any) + if !ok { + // it should not happen, it is already checked above + return nil, fmt.Errorf("invalid type for properties in path: %s", path) + } + + for key, object := range childMappings { + currentPath := currentMappingPath(path, key) + // multi_fields are already managed above + // there is no need to manage that case here + value, ok := object.(map[string]any) + if ok { + other, err := flattenMappings(currentPath, value) + if err != nil { + return nil, err + } + for i, v := range other { + newDefs[i] = v + } + } + } + + return newDefs, nil +} + +func getMappingDefinitionsField(field string, definition map[string]any) (map[string]any, error) { + anyValue := definition[field] + object, ok := anyValue.(map[string]any) + if !ok { + return nil, fmt.Errorf("unexpected type found for %s: %T ", field, anyValue) + } + return object, nil +} + +func validateConstantKeywordField(path string, preview, actual map[string]any) (bool, error) { + isConstantKeyword := false + if mappingParameter("type", actual) != "constant_keyword" { + return isConstantKeyword, nil + } + isConstantKeyword = true + if mappingParameter("type", preview) != "constant_keyword" { + return isConstantKeyword, fmt.Errorf("invalid type for %q: no constant_keyword type set in preview mapping", path) + } + actualValue := mappingParameter("value", actual) + previewValue := mappingParameter("value", preview) + + if previewValue == "" { + // skip validating value if preview does not have that parameter defined + return isConstantKeyword, nil + } + + if previewValue != actualValue { + // This should also be detected by the failure storage (if available) + // or no documents being ingested + return isConstantKeyword, fmt.Errorf("constant_keyword value in preview %q does not match the actual mapping value %q for path: %q", previewValue, actualValue, path) + } + return isConstantKeyword, nil +} + +func (v *MappingValidator) compareMappings(path string, preview, actual map[string]any) multierror.Error { + var errs multierror.Error + isNestedParent := false + + isConstantKeywordType, err := validateConstantKeywordField(path, preview, actual) + if err != nil { + return multierror.Error{err} + } + if isConstantKeywordType { + return nil + } + + if v.specVersion.LessThan(semver3_0_1) { + if mappingParameter("type", actual) == "nested" { + logger.Warnf("Skip validation of nested object (spec version %s): %s", path, v.specVersion) + isNestedParent = true + } + } + + if isObjectFullyDynamic(actual) { + logger.Debugf("Dynamic object found but no fields ingested under path: %s.*", path) + return nil + } + + if isObject(actual) { + if isObjectFullyDynamic(preview) { + // TODO: Skip for now, it should be required to compare with dynamic templates + logger.Debugf("Pending to validate with the dynamic templates defined the path: %s", path) + return nil + } else if !isObject(preview) { + if isNestedParent { + logger.Warnf("skipped due to field of type \"nested\": not found properties in preview mappings for path %q", path) + return nil + } + errs = append(errs, fmt.Errorf("not found properties in preview mappings for path: %s", path)) + return errs.Unique() + } + previewProperties, err := getMappingDefinitionsField("properties", preview) + if err != nil { + errs = append(errs, fmt.Errorf("found invalid properties type in preview mappings for path %q: %w", path, err)) + } + actualProperties, err := getMappingDefinitionsField("properties", actual) + if err != nil { + errs = append(errs, fmt.Errorf("found invalid properties type in actual mappings for path %q: %w", path, err)) + } + compareErrors := v.compareMappings(path, previewProperties, actualProperties) + errs = append(errs, compareErrors...) + + if isNestedParent { + logger.Warnf("skip validation due to parent type nested:\n%s", errs.Unique().Error()) + return nil + } + + if len(errs) == 0 { + return nil + } + return errs.Unique() + } + + containsMultifield := isMultiFields(actual) + if containsMultifield { + if !isMultiFields(preview) { + errs = append(errs, fmt.Errorf("not found multi_fields in preview mappings for path: %s", path)) + return errs.Unique() + } + previewFields, err := getMappingDefinitionsField("fields", preview) + if err != nil { + errs = append(errs, fmt.Errorf("found invalid multi_fields type in preview mappings for path %q: %w", path, err)) + } + actualFields, err := getMappingDefinitionsField("fields", actual) + if err != nil { + errs = append(errs, fmt.Errorf("found invalid multi_fields type in actual mappings for path %q: %w", path, err)) + } + compareErrors := v.compareMappings(path, previewFields, actualFields) + errs = append(errs, compareErrors...) + // not returning here to keep validating the other fields of this object if any + } + + // Compare and validate the elements under "properties": objects or fields and its parameters + propertiesErrs := v.validateObjectProperties(path, containsMultifield, actual, preview) + errs = append(errs, propertiesErrs...) + if isNestedParent { + logger.Warnf("skip validation due to parent type nested:\n%s", errs.Unique().Error()) + return nil + } + if len(errs) == 0 { + return nil + } + return errs.Unique() +} + +func (v *MappingValidator) validateObjectProperties(path string, containsMultifield bool, actual, preview map[string]any) multierror.Error { + var errs multierror.Error + for key, value := range actual { + if containsMultifield && key == "fields" { + // already checked + continue + } + currentPath := currentMappingPath(path, key) + if skipValidationForField(currentPath) { + logger.Debugf("Skipped mapping due to path being part of the skipped ones: %s", currentPath) + continue + } + + // This key (object) does not exist in the preview mapping + if _, ok := preview[key]; !ok { + if childField, ok := value.(map[string]any); ok { + if isEmptyObject(childField) { + // TODO: Should this be raised as an error instead? + logger.Debugf("field %q is an empty object and it does not exist in the preview", currentPath) + continue + } + ecsErrors := v.validateMappingsNotInPreview(currentPath, childField) + errs = append(errs, ecsErrors...) + } + + continue + } + + fieldErrs := v.validateObjectMappingAndParameters(preview[key], value, currentPath) + errs = append(errs, fieldErrs...) + } + if len(errs) == 0 { + return nil + } + return errs.Unique() +} + +// validateMappingsNotInPreview validates the object and the nested objects in the current path with other resources +// like ECS schema, dynamic templates or local fields defined in the package (type array). +func (v *MappingValidator) validateMappingsNotInPreview(currentPath string, childField map[string]any) multierror.Error { + var errs multierror.Error + logger.Debugf("Calculating flatten fields for %s", currentPath) + flattenFields, err := flattenMappings(currentPath, childField) + if err != nil { + errs = append(errs, err) + return errs + } + + for fieldPath, object := range flattenFields { + logger.Debugf("- %s", fieldPath) + + def, ok := object.(map[string]any) + if !ok { + errs = append(errs, fmt.Errorf("invalid field definition/mapping for path: %q", fieldPath)) + continue + } + + if isEmptyObject(def) { + logger.Debugf("Skip empty object path: %q", fieldPath) + continue + } + + if isLocalFieldTypeArray(fieldPath, v.Schema) && v.specVersion.LessThan(semver2_0_0) { + // Example: https://github.com/elastic/elastic-package/blob/25344b16c6eabe1478067fc55966258a59c769cd/test/packages/parallel/nginx/data_stream/access/fields/fields.yml#L5 + logger.Debugf("Found field definition with type array, skipping path: %q", fieldPath) + continue + } + + // TODO: validate mapping with dynamic templates first than validating with ECS + // just raise an error if both validation processes fail + + // are all fields under this key defined in ECS? + err = v.validateMappingInECSSchema(fieldPath, def) + if err != nil { + logger.Warnf("undefined path %q (pending to check dynamic templates)", fieldPath) + errs = append(errs, fmt.Errorf("field %q is undefined: %w", fieldPath, err)) + } + } + return errs.Unique() +} + +// validateObjectMappingAndParameters validates the current object or field parameter (currentPath) comparing the values +// in the actual mapping with the values in the preview mapping. +func (v *MappingValidator) validateObjectMappingAndParameters(previewValue, actualValue any, currentPath string) multierror.Error { + var errs multierror.Error + switch actualValue.(type) { + case map[string]any: + // there could be other objects nested under this key/path + previewField, ok := previewValue.(map[string]any) + if !ok { + errs = append(errs, fmt.Errorf("unexpected type in preview mappings for path: %q", currentPath)) + } + actualField, ok := actualValue.(map[string]any) + if !ok { + errs = append(errs, fmt.Errorf("unexpected type in actual mappings for path: %q", currentPath)) + } + logger.Debugf(">>>> Comparing Mappings map[string]any: path %s", currentPath) + errs = append(errs, v.compareMappings(currentPath, previewField, actualField)...) + case any: + // Validate each setting/parameter of the mapping + // If a mapping exist in both preview and actual, they should be the same. But forcing to compare each parameter just in case + if previewValue == actualValue { + return nil + } + // Get the string representation of the types via JSON Marshalling + previewData, err := json.Marshal(previewValue) + if err != nil { + errs = append(errs, fmt.Errorf("error marshalling preview value %s (path: %s): %w", previewValue, currentPath, err)) + return errs + } + + actualData, err := json.Marshal(actualValue) + if err != nil { + errs = append(errs, fmt.Errorf("error marshalling actual value %s (path: %s): %w", actualValue, currentPath, err)) + return errs + } + + // Strings from `json.Marshal` include double quotes, so they need to be removed (e.g. "\"float\"") + previewDataString := strings.ReplaceAll(string(previewData), "\"", "") + actualDataString := strings.ReplaceAll(string(actualData), "\"", "") + // exceptions related to numbers + // https://github.com/elastic/elastic-package/blob/8cc126ae5015dd336b22901c365e8c98db4e7c15/internal/fields/validate.go#L1234-L1247 + if isNumberTypeField(previewDataString, actualDataString) { + logger.Debugf("Allowed number fields with different types (preview %s - actual %s)", previewDataString, actualDataString) + return nil + } + + errs = append(errs, fmt.Errorf("unexpected value found in mapping for field %q: preview mappings value (%s) different from the actual mappings value (%s)", currentPath, string(previewData), string(actualData))) + } + return errs +} diff --git a/internal/fields/mappings_test.go b/internal/fields/mappings_test.go new file mode 100644 index 000000000..a0dca1bac --- /dev/null +++ b/internal/fields/mappings_test.go @@ -0,0 +1,657 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package fields + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-package/internal/logger" +) + +func TestComparingMappings(t *testing.T) { + defaultSpecVersion := "3.3.0" + cases := []struct { + title string + preview map[string]any + actual map[string]any + schema []FieldDefinition + spec string + expectedErrors []string + }{ + { + title: "same mappings", + preview: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "host": map[string]any{ + "properties": map[string]any{ + "name": map[string]any{ + "type": "text", + }, + }, + }, + "file": map[string]any{ + "properties": map[string]any{ + "path": map[string]any{ + "type": "text", + }, + }, + }, + "foo": map[string]any{ + "type": "keyword", + "fields": map[string]any{ + "text": map[string]any{ + "type": "match_only_text", + }, + }, + }, + }, + actual: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "host": map[string]any{ + "properties": map[string]any{ + "name": map[string]any{ + "type": "text", + }, + }, + }, + "file": map[string]any{ + "properties": map[string]any{ + "path": map[string]any{ + "type": "text", + }, + }, + }, + "foo": map[string]any{ + "type": "keyword", + "fields": map[string]any{ + "text": map[string]any{ + "type": "match_only_text", + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{}, + }, + { + title: "validate field with ECS", + preview: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + }, + }, + actual: map[string]any{ + "bar": map[string]any{ + "type": "keyword", + }, + "metrics": map[string]any{ + "type": "long", + }, + "foo": map[string]any{ + "type": "keyword", + }, + }, + schema: []FieldDefinition{ + { + Name: "bar", + Type: "keyword", + External: "ecs", + }, + { + Name: "metrics", + Type: "keyword", + External: "ecs", + }, + { + Name: "user", + Type: "keyword", + External: "", + }, + }, + expectedErrors: []string{ + `field "metrics" is undefined: actual mapping type (long) does not match with ECS definition type: keyword`, + }, + }, + { + title: "skip host group mappings", + preview: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "host": map[string]any{ + "properties": map[string]any{ + "name": map[string]any{ + "type": "text", + }, + }, + }, + }, + actual: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "host": map[string]any{ + "properties": map[string]any{ + "name": map[string]any{ + "type": "text", + }, + "os": map[string]any{ + "type": "text", + }, + }, + }, + }, + schema: []FieldDefinition{}, + // If this skip is not present, `host.os` would be undefined + expectedErrors: []string{}, + }, + { + title: "missing mappings", + preview: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + }, + actual: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "foo": map[string]any{ + "type": "keyword", + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `field "foo" is undefined: missing definition for path`, + }, + }, + { + title: "validate constant_keyword value", + preview: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + "value": "example", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + "value": "bar", + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `constant_keyword value in preview "example" does not match the actual mapping value "bar" for path: "foo"`, + }, + }, + { + title: "skip constant_keyword value", + preview: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + "value": "bar", + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{}, + }, + { + title: "unexpected constant_keyword type", + preview: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + "value": "bar", + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `invalid type for "foo": no constant_keyword type set in preview mapping`, + }, + }, + { + title: "validate multifields failure", + preview: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + "fields": map[string]any{ + "other": map[string]any{ + "type": "match_only_text", + }, + }, + }, + "bar": map[string]any{ + "properties": map[string]any{ + "type": map[string]any{ + "type": "constant_keyword", + }, + "fields": map[string]any{ + "type": "text", + "fields": map[string]any{ + "text": map[string]any{ + "type": "match_only_text", + }, + }, + }, + }, + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + "fields": map[string]any{ + "text": map[string]any{ + "type": "match_only_text", + }, + "other": map[string]any{ + "type": "match_only_text", + }, + }, + }, + "bar": map[string]any{ + "properties": map[string]any{ + "type": map[string]any{ + "type": "constant_keyword", + }, + "fields": map[string]any{ + "type": "text", + "fields": map[string]any{ + "text": map[string]any{ + "type": "match_only_text", + }, + }, + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `field "foo.text" is undefined: missing definition for path`, + }, + }, + { + title: "missing multifields", + preview: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + "fields": map[string]any{ + "text": map[string]any{ + "type": "match_only_text", + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `not found multi_fields in preview mappings for path: foo`, + }, + }, + { + title: "validate nested object", + preview: map[string]any{ + "foo": map[string]any{ + "type": "keyword", + }, + "file": map[string]any{ + "properties": map[string]any{ + "size": map[string]any{ + "type": "double", + }, + }, + }, + }, + actual: map[string]any{ + "bar": map[string]any{ + "type": "keyword", + }, + "file": map[string]any{ + "properties": map[string]any{ + "path": map[string]any{ + "type": "text", + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `field "file.path" is undefined: missing definition for path`, + `field "bar" is undefined: missing definition for path`, + }, + }, + { + title: "empty objects", + preview: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + }, + actual: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "_tmp": map[string]any{ + "type": "object", + }, + "nonexisting": map[string]any{ + "properties": map[string]any{ + "field": map[string]any{ + "type": "object", + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + // TODO: there is an exception in the logic to not raise this error + // `field "_tmp" is undefined: missing definition for path`, + }, + }, + { + title: "skip dynamic objects", // TODO: should this be checked using dynamic templates? + preview: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "sql": map[string]any{ + "properties": map[string]any{ + "metrics": map[string]any{ + "properties": map[string]any{ + "dynamic": "true", + "numeric": map[string]any{ + "type": "object", + "dynamic": "true", + }, + }, + }, + }, + }, + }, + actual: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "sql": map[string]any{ + "properties": map[string]any{ + "metrics": map[string]any{ + "properties": map[string]any{ + "dynamic": "true", + "numeric": map[string]any{ + "dynamic": "true", + "properties": map[string]any{ + "innodb_data_fsyncs": map[string]any{ + "type": "long", + }, + }, + }, + }, + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{}, + }, + { + title: "compare all objects even dynamic true", // TODO: should this be checked using dynamic templates? + preview: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "sql": map[string]any{ + "properties": map[string]any{ + "metrics": map[string]any{ + "properties": map[string]any{ + "dynamic": "true", + "numeric": map[string]any{ + "type": "object", + "dynamic": "true", + }, + }, + }, + }, + }, + }, + actual: map[string]any{ + "@timestamp": map[string]any{ + "type": "keyword", + }, + "sql": map[string]any{ + "properties": map[string]any{ + "metrics": map[string]any{ + "properties": map[string]any{ + "dynamic": "true", + "numeric": map[string]any{ + "dynamic": "true", + "properties": map[string]any{ + "innodb_data_fsyncs": map[string]any{ + "type": "long", + }, + }, + }, + "example": map[string]any{ + "type": "keyword", + }, + }, + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `field "sql.metrics.example" is undefined: missing definition for path`, + }, + }, + { + title: "ignore local type array objects", + preview: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + "value": "example", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "constant_keyword", + "value": "example", + }, + "access": map[string]any{ + "properties": map[string]any{ + "field": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + }, + }, + "error": map[string]any{ + "properties": map[string]any{ + "field": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + }, + }, + "status": map[string]any{ + "properties": map[string]any{ + "field": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + }, + }, + }, + schema: []FieldDefinition{ + { + Name: "access.field", + Type: "array", + External: "", + }, + { + Name: "status.field", + Type: "array", + External: "ecs", + }, + }, + spec: "1.0.0", + expectedErrors: []string{ + `field "error.field" is undefined: missing definition for path`, + // should status.field return error ? or should it be ignored? + `field "status.field" is undefined: actual mapping type (keyword) does not match with ECS definition type: array`, + }, + }, + { + title: "properties and type as a fields", + preview: map[string]any{ + "foo": map[string]any{ + "properties": map[string]any{ + "type": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + "properties": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + }, + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "properties": map[string]any{ + "type": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + "properties": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{}, + }, + { + title: "different parameter values within an object", + preview: map[string]any{ + "foo": map[string]any{ + "properties": map[string]any{ + "type": map[string]any{ + "type": "keyword", + "ignore_above": 1024, + }, + }, + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "properties": map[string]any{ + "type": map[string]any{ + "type": "long", + "ignore_above": 2048, + }, + }, + }, + }, + schema: []FieldDefinition{}, + expectedErrors: []string{ + `unexpected value found in mapping for field "foo.type.type": preview mappings value ("keyword") different from the actual mappings value ("long")`, + `unexpected value found in mapping for field "foo.type.ignore_above": preview mappings value (1024) different from the actual mappings value (2048)`, + }, + }, + { + title: "different number types", + preview: map[string]any{ + "foo": map[string]any{ + "type": "float", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "long", + }, + "bar": map[string]any{ + "type": "long", + }, + }, + schema: []FieldDefinition{ + { + Name: "bar", + Type: "float", + External: "ecs", + }, + }, + expectedErrors: []string{}, + }, + { + title: "skip nested types before spec 3.0.1", + preview: map[string]any{ + "foo": map[string]any{ + "type": "nested", + }, + }, + actual: map[string]any{ + "foo": map[string]any{ + "type": "nested", + "properties": map[string]any{ + "bar": map[string]any{ + "type": "long", + }, + }, + }, + }, + spec: "3.0.0", + schema: []FieldDefinition{}, + expectedErrors: []string{}, + }, + } + + for _, c := range cases { + t.Run(c.title, func(t *testing.T) { + logger.EnableDebugMode() + specVersion := defaultSpecVersion + if c.spec != "" { + specVersion = c.spec + } + v, err := CreateValidatorForMappings("", nil, + WithMappingValidatorSpecVersion(specVersion), + WithMappingValidatorFallbackSchema(c.schema), + WithMappingValidatorDisabledDependencyManagement(), + ) + require.NoError(t, err) + + errs := v.compareMappings("", c.preview, c.actual) + if len(c.expectedErrors) > 0 { + assert.Len(t, errs, len(c.expectedErrors)) + for _, err := range errs { + assert.Contains(t, c.expectedErrors, err.Error()) + } + } else { + assert.Len(t, errs, 0) + } + }) + } +} diff --git a/internal/fields/validate.go b/internal/fields/validate.go index 20cde32a5..61cd51b32 100644 --- a/internal/fields/validate.go +++ b/internal/fields/validate.go @@ -342,9 +342,31 @@ func initDependencyManagement(packageRoot string, specVersion semver.Version, im // add the ecs@mappings component template. schema = appendECSMappingMultifields(schema, "") + // Force to set External as "ecs" in all these fields to be able to distinguish + // which fields come from ECS and which ones are loaded from the package directory + schema = setExternalAsECS(schema) + return fdm, schema, nil } +func setExternalAsECS(fields []FieldDefinition) []FieldDefinition { + for i := 0; i < len(fields); i++ { + f := &fields[i] + f.External = "ecs" + if len(f.MultiFields) > 0 { + for j := 0; j < len(f.MultiFields); j++ { + mf := &f.MultiFields[j] + mf.External = "ecs" + } + } + if len(f.Fields) > 0 { + f.Fields = setExternalAsECS(f.Fields) + } + } + + return fields +} + // supportsECSMappings check if all the versions of the stack the package can run on support ECS mappings. func supportsECSMappings(packageRoot string) (bool, error) { packageManifest, err := packages.ReadPackageManifestFromPackageRoot(packageRoot) diff --git a/internal/fields/validate_test.go b/internal/fields/validate_test.go index e52673430..49cbddb96 100644 --- a/internal/fields/validate_test.go +++ b/internal/fields/validate_test.go @@ -117,6 +117,58 @@ func TestValidate_WithStringNumberFields(t *testing.T) { require.Empty(t, errs) } +func TestValidate_SetExternalECS(t *testing.T) { + finder := packageRootTestFinder{"../../test/packages/other/imported_mappings_tests"} + + validator, err := createValidatorForDirectoryAndPackageRoot("../../test/packages/other/imported_mappings_tests/data_stream/first", + finder, + WithSpecVersion("2.3.0"), + WithEnabledImportAllECSSChema(true)) + require.NoError(t, err) + require.NotNil(t, validator) + + require.NotEmpty(t, validator.Schema) + + cases := []struct { + title string + field string + external string + exists bool + }{ + { + title: "field defined just in ECS", + field: "ecs.version", + external: "ecs", + exists: true, + }, + { + title: "field defined fields directory package", + field: "service.status.duration.histogram", + external: "", + exists: true, + }, + { + title: "undefined field", + field: "foo", + external: "", + exists: false, + }, + } + + for _, c := range cases { + t.Run(c.title, func(t *testing.T) { + found := FindElementDefinition(c.field, validator.Schema) + if !c.exists { + assert.Nil(t, found) + return + } + + require.NotNil(t, found) + assert.Equal(t, c.external, found.External) + }) + } +} + func TestValidate_WithEnabledImportAllECSSchema(t *testing.T) { finder := packageRootTestFinder{"../../test/packages/other/imported_mappings_tests"} diff --git a/internal/testrunner/runners/system/tester.go b/internal/testrunner/runners/system/tester.go index 309636007..23ee43423 100644 --- a/internal/testrunner/runners/system/tester.go +++ b/internal/testrunner/runners/system/tester.go @@ -133,10 +133,25 @@ var ( }, }, } - enableIndependentAgentsEnv = environment.WithElasticPackagePrefix("TEST_ENABLE_INDEPENDENT_AGENT") - dumpScenarioDocsEnv = environment.WithElasticPackagePrefix("TEST_DUMP_SCENARIO_DOCS") + enableIndependentAgentsEnv = environment.WithElasticPackagePrefix("TEST_ENABLE_INDEPENDENT_AGENT") + dumpScenarioDocsEnv = environment.WithElasticPackagePrefix("TEST_DUMP_SCENARIO_DOCS") + fieldValidationTestMethodEnv = environment.WithElasticPackagePrefix("FIELD_VALIDATION_TEST_METHOD") ) +type fieldValidationMethod int + +const ( + allMethods fieldValidationMethod = iota + fieldsMethod + mappingsMethod +) + +var validationMethods = map[string]fieldValidationMethod{ + "all": allMethods, + "fields": fieldsMethod, + "mappings": mappingsMethod, +} + type tester struct { profile *profile.Profile testFolder testrunner.TestFolder @@ -148,6 +163,8 @@ type tester struct { runIndependentElasticAgent bool + fieldValidationMethod fieldValidationMethod + deferCleanup time.Duration serviceVariant string configFileName string @@ -273,6 +290,17 @@ func NewSystemTester(options SystemTesterOptions) (*tester, error) { r.runIndependentElasticAgent = strings.ToLower(v) == "true" } + // default method using just fields + r.fieldValidationMethod = fieldsMethod + v, ok = os.LookupEnv(fieldValidationTestMethodEnv) + if ok { + method, ok := validationMethods[v] + if !ok { + return nil, fmt.Errorf("invalid field method option: %s", v) + } + r.fieldValidationMethod = method + } + return &r, nil } @@ -841,6 +869,7 @@ func (r *tester) getFailureStoreDocs(ctx context.Context, dataStream string) ([] type scenarioTest struct { dataStream string + indexTemplateName string policyTemplateName string kibanaDataStream kibana.PackageDataStream syntheticEnabled bool @@ -1070,10 +1099,14 @@ func (r *tester) prepareScenario(ctx context.Context, config *testConfig, svcInf dataStreamDataset = dataset } } - scenario.dataStream = fmt.Sprintf( - "%s-%s-%s", + scenario.indexTemplateName = fmt.Sprintf( + "%s-%s", ds.Inputs[0].Streams[0].DataStream.Type, dataStreamDataset, + ) + scenario.dataStream = fmt.Sprintf( + "%s-%s", + scenario.indexTemplateName, ds.Namespace, ) @@ -1460,11 +1493,14 @@ func (r *tester) validateTestScenario(ctx context.Context, result *testrunner.Re if err != nil { return result.WithErrorf("creating fields validator for data stream failed (path: %s): %w", r.dataStreamPath, err) } - if errs := validateFields(scenario.docs, fieldsValidator); len(errs) > 0 { - return result.WithError(testrunner.ErrTestCaseFailed{ - Reason: fmt.Sprintf("one or more errors found in documents stored in %s data stream", scenario.dataStream), - Details: errs.Error(), - }) + + if r.fieldValidationMethod == allMethods || r.fieldValidationMethod == fieldsMethod { + if errs := validateFields(scenario.docs, fieldsValidator); len(errs) > 0 { + return result.WithError(testrunner.ErrTestCaseFailed{ + Reason: fmt.Sprintf("one or more errors found in documents stored in %s data stream", scenario.dataStream), + Details: errs.Error(), + }) + } } err = validateIgnoredFields(r.stackVersion.Number, scenario, config) @@ -1472,6 +1508,27 @@ func (r *tester) validateTestScenario(ctx context.Context, result *testrunner.Re return result.WithError(err) } + if r.fieldValidationMethod == allMethods || r.fieldValidationMethod == mappingsMethod { + logger.Warn("Validate mappings found (technical preview)") + mappingsValidator, err := fields.CreateValidatorForMappings(r.dataStreamPath, r.esClient, + fields.WithMappingValidatorFallbackSchema(fieldsValidator.Schema), + fields.WithMappingValidatorIndexTemplate(scenario.indexTemplateName), + fields.WithMappingValidatorDataStream(scenario.dataStream), + fields.WithMappingValidatorSpecVersion(r.pkgManifest.SpecVersion), + fields.WithMappingValidatorEnabledImportAllECSSChema(true), + ) + if err != nil { + return result.WithErrorf("creating mappings validator for data stream failed (data stream: %s): %w", scenario.dataStream, err) + } + + if errs := validateMappings(ctx, mappingsValidator); len(errs) > 0 { + return result.WithError(testrunner.ErrTestCaseFailed{ + Reason: fmt.Sprintf("one or more errors found in mappings in %s index template", scenario.indexTemplateName), + Details: errs.Error(), + }) + } + } + docs := scenario.docs if scenario.syntheticEnabled { docs, err = fieldsValidator.SanitizeSyntheticSourceDocs(scenario.docs) @@ -2126,6 +2183,14 @@ func validateIgnoredFields(stackVersionString string, scenario *scenarioTest, co return nil } +func validateMappings(ctx context.Context, mappingsValidator *fields.MappingValidator) multierror.Error { + multiErr := mappingsValidator.ValidateIndexMappings(ctx) + if len(multiErr) > 0 { + return multiErr.Unique() + } + return nil +} + func assertHitCount(expected int, docs []common.MapStr) (pass bool, message string) { if expected != 0 { observed := len(docs) diff --git a/tools/readme/readme.md.tmpl b/tools/readme/readme.md.tmpl index 3c631f8b4..cae41abae 100644 --- a/tools/readme/readme.md.tmpl +++ b/tools/readme/readme.md.tmpl @@ -237,9 +237,16 @@ There are available some environment variables that could be used to change some - `ELASTIC_PACKAGE_DISABLE_ELASTIC_AGENT_WOLFI`: If set to `true`, the Elastic Agent image used for running agents will be using the Ubuntu docker images (e.g. `docker.elastic.co/elastic-agent/elastic-agent-complete`). If set to `false`, the Elastic Agent image used for the running agents will be based on the wolfi images (e.g. `docker.elastic.co/elastic-agent/elastic-agent-wolfi`). Default: `false`. - - `ELASTIC_PACKAGE_TEST_DUMP_SCENARIO_DOCS. If the variable is set, elastic-package will dump to a file the documents generated + - `ELASTIC_PACKAGE_TEST_DUMP_SCENARIO_DOCS`. If the variable is set, elastic-package will dump to a file the documents generated by system tests before they are verified. This is useful to know exactly what fields are being verified when investigating issues on this step. Documents are dumped to a file in the system temporary directory. It is disabled by default. + - `ELASTIC_PACKAGE_TEST_ENABLE_INDEPENDENT_AGENT`. If the variable is set to false, all system tests defined in the package will use + the Elastic Agent started along with the stack. If set to true, a new Elastic Agent will be started and enrolled for each test defined in the + package (and unenrolled at the end of each test). Default: `true`. + - `ELASTIC_PACKAGE_FIELD_VALIDATION_TEST_METHOD`. This variable can take one of these values: `all`, `mappings` or `fields`. If this + variable is set to `fields`, then validation of fields will be based on the documents ingested into Elasticsearch. If this is set to + `mappings`, then validation of fields will be based on the mappings generated when the documents are ingested into Elasticsearch. If + set to `all`, then validation will be based on both methods mentioned previously. Default option: `fields`. - To configure the Elastic stack to be used by `elastic-package`: - `ELASTIC_PACKAGE_ELASTICSEARCH_HOST`: Host of the elasticsearch (e.g. https://127.0.0.1:9200)