Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

All expectations fail if one fails with 'unrecognized condition_parser None for Spark execution engine' #10709

Open
iamamutt opened this issue Nov 26, 2024 · 1 comment
Labels
bug Bugs bugs bugs!

Comments

@iamamutt
Copy link

Describe the bug
Failure to specify condition_parser for a single expectation results in all expectations failing.

To Reproduce

import tempfile

from pprint import pprint

import great_expectations as gx
import pandas as pd

data = {
    'col1': [1, 2, 3, 4, 5],
    'col2': ['A', 'B', 'C', 'D', None],
    'col3': [1.1, None, 3.3, 4.4, 5.5],
}


def validate(dir_path: str, file_name: str):
    context = gx.get_context(mode='ephemeral')
    suite = context.suites.add(
        gx.ExpectationSuite(
            name='test-suite',
            expectations=[
                gx.expectations.ExpectColumnValuesToNotBeNull(
                    column='col1', result_format='COMPLETE'
                ),
                gx.expectations.ExpectColumnValuesToBeInSet(
                    column='col2',
                    value_set=['A', 'B', 'C'],
                    row_condition='col3 IS NOT NULL',
                    mostly=0.665,
                    # condition_parser='spark',
                    result_format='COMPLETE',
                ),
            ],
        )
    )

    return gx.ValidationDefinition(
        name='test-validation',
        data=(
            context.data_sources.add_spark_filesystem(
                name='test-spark-fs',
                base_directory=dir_path,
            )
            .add_csv_asset(
                name='csv-asset',
                sep=',',
                header=True,
                infer_schema=True,
            )
            .add_batch_definition_path(
                name='test-data',
                path=file_name,
            )
        ),
        suite=suite,
    ).run()


with tempfile.TemporaryDirectory() as dir_path:
    file_name = 'data.csv'
    pd.DataFrame(data).to_csv(f'{dir_path}/{file_name}', index=False)
    result = validate(dir_path, file_name)
    pprint(result.to_json_dict(), sort_dicts=False, width=100)

Expected behavior
Only expectations with errors should fail and show exception info

Environment (please complete the following information):

  • Operating System: Linux
  • Great Expectations Version: 1.2.4
  • Data Source: Spark file

Additional context

{
  "success": false,
  "results": [
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_not_be_null",
        "kwargs": {
          "result_format": "COMPLETE",
          "column": "col1",
          "batch_id": "test-spark-fs-csv-asset"
        },
        "meta": {},
        "id": "0e734de0-e872-43e7-adc7-07c868c689d7"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "('table.row_count', '0dfa72ce94f9f181a7dc04305a6c30f7', ())": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n    self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n    df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        }
      }
    },
    {
      "success": false,
      "expectation_config": {
        "type": "expect_column_values_to_be_in_set",
        "kwargs": {
          "result_format": "COMPLETE",
          "column": "col2",
          "mostly": 0.665,
          "row_condition": "col3 IS NOT NULL",
          "value_set": [
            "A",
            "B",
            "C"
          ],
          "batch_id": "test-spark-fs-csv-asset"
        },
        "meta": {},
        "id": "c648e872-154c-4374-9cf1-cb8751e1c6d2"
      },
      "result": {},
      "meta": {},
      "exception_info": {
        "('table.column_types', 'e48bc318d7e9c92e270e3f7ab807c1b8', 'include_nested=True')": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 532, in _process_direct_and_bundled_metric_computation_configurations\n    metric_computation_configuration.metric_fn(  # type: ignore[misc] # F not callable\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/metric_provider.py\", line 60, in inner_func\n    return metric_fn(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/table_metrics/table_column_types.py\", line 81, in _spark\n    df, _, _ = execution_engine.get_compute_domain(\n               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 800, in get_compute_domain\n    data: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 537, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        },
        "('table.row_count', 'e48bc318d7e9c92e270e3f7ab807c1b8', ())": {
          "exception_traceback": "Traceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n    self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n    df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n    raise GreatExpectationsError(  # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n    self._execution_engine.resolve_metrics(\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n    return self._process_direct_and_bundled_metric_computation_configurations(\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n    raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n",
          "exception_message": "unrecognized condition_parser None for Spark execution engine",
          "raised_exception": true
        }
      }
    }
  ],
  "suite_name": "test-suite",
  "suite_parameters": {},
  "statistics": {
    "evaluated_expectations": 2,
    "successful_expectations": 0,
    "unsuccessful_expectations": 2,
    "success_percent": 0.0
  },
  "meta": {
    "great_expectations_version": "1.2.4",
    "batch_spec": {
      "path": "/tmp/tmpgf8032g5/data.csv",
      "reader_method": "csv",
      "reader_options": {
        "sep": ",",
        "header": true,
        "inferSchema": true
      }
    },
    "batch_markers": {
      "ge_load_time": "20241126T222652.157081Z"
    },
    "active_batch_definition": {
      "datasource_name": "test-spark-fs",
      "data_connector_name": "fluent",
      "data_asset_name": "csv-asset",
      "batch_identifiers": {
        "path": "data.csv"
      },
      "batching_regex": "(?P<path>data.csv)"
    },
    "validation_id": "cd188ea7-bedf-4f8a-9898-1cf823b69b5f",
    "checkpoint_id": null,
    "batch_parameters": null
  },
  "id": null
}
@adeola-ak adeola-ak moved this from To Do to In progress in GX Core Issues Board Dec 9, 2024
@adeola-ak adeola-ak added the bug Bugs bugs bugs! label Dec 9, 2024
@adeola-ak
Copy link
Contributor

Hi there, thank you for bringing this to our attention. I've shared it with the team and I will follow up with you once I have any updates

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Bugs bugs bugs!
Projects
Status: In progress
Development

No branches or pull requests

2 participants