We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Describe the bug Failure to specify condition_parser for a single expectation results in all expectations failing.
condition_parser
To Reproduce
import tempfile from pprint import pprint import great_expectations as gx import pandas as pd data = { 'col1': [1, 2, 3, 4, 5], 'col2': ['A', 'B', 'C', 'D', None], 'col3': [1.1, None, 3.3, 4.4, 5.5], } def validate(dir_path: str, file_name: str): context = gx.get_context(mode='ephemeral') suite = context.suites.add( gx.ExpectationSuite( name='test-suite', expectations=[ gx.expectations.ExpectColumnValuesToNotBeNull( column='col1', result_format='COMPLETE' ), gx.expectations.ExpectColumnValuesToBeInSet( column='col2', value_set=['A', 'B', 'C'], row_condition='col3 IS NOT NULL', mostly=0.665, # condition_parser='spark', result_format='COMPLETE', ), ], ) ) return gx.ValidationDefinition( name='test-validation', data=( context.data_sources.add_spark_filesystem( name='test-spark-fs', base_directory=dir_path, ) .add_csv_asset( name='csv-asset', sep=',', header=True, infer_schema=True, ) .add_batch_definition_path( name='test-data', path=file_name, ) ), suite=suite, ).run() with tempfile.TemporaryDirectory() as dir_path: file_name = 'data.csv' pd.DataFrame(data).to_csv(f'{dir_path}/{file_name}', index=False) result = validate(dir_path, file_name) pprint(result.to_json_dict(), sort_dicts=False, width=100)
Expected behavior Only expectations with errors should fail and show exception info
Environment (please complete the following information):
Additional context
{ "success": false, "results": [ { "success": false, "expectation_config": { "type": "expect_column_values_to_not_be_null", "kwargs": { "result_format": "COMPLETE", "column": "col1", "batch_id": "test-spark-fs-csv-asset" }, "meta": {}, "id": "0e734de0-e872-43e7-adc7-07c868c689d7" }, "result": {}, "meta": {}, "exception_info": { "('table.row_count', '0dfa72ce94f9f181a7dc04305a6c30f7', ())": { "exception_traceback": "Traceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n raise GreatExpectationsError( # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n self._execution_engine.resolve_metrics(\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n", "exception_message": "unrecognized condition_parser None for Spark execution engine", "raised_exception": true } } }, { "success": false, "expectation_config": { "type": "expect_column_values_to_be_in_set", "kwargs": { "result_format": "COMPLETE", "column": "col2", "mostly": 0.665, "row_condition": "col3 IS NOT NULL", "value_set": [ "A", "B", "C" ], "batch_id": "test-spark-fs-csv-asset" }, "meta": {}, "id": "c648e872-154c-4374-9cf1-cb8751e1c6d2" }, "result": {}, "meta": {}, "exception_info": { "('table.column_types', 'e48bc318d7e9c92e270e3f7ab807c1b8', 'include_nested=True')": { "exception_traceback": "Traceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 532, in _process_direct_and_bundled_metric_computation_configurations\n metric_computation_configuration.metric_fn( # type: ignore[misc] # F not callable\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/metric_provider.py\", line 60, in inner_func\n return metric_fn(*args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/expectations/metrics/table_metrics/table_column_types.py\", line 81, in _spark\n df, _, _ = execution_engine.get_compute_domain(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 800, in get_compute_domain\n data: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n raise GreatExpectationsError( # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n self._execution_engine.resolve_metrics(\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 537, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n", "exception_message": "unrecognized condition_parser None for Spark execution engine", "raised_exception": true }, "('table.row_count', 'e48bc318d7e9c92e270e3f7ab807c1b8', ())": { "exception_traceback": "Traceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 545, in _process_direct_and_bundled_metric_computation_configurations\n self.resolve_metric_bundle(metric_fn_bundle=metric_fn_bundle_configurations)\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 900, in resolve_metric_bundle\n df: pyspark.DataFrame = self.get_domain_records(domain_kwargs=domain_kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/sparkdf_execution_engine.py\", line 681, in get_domain_records\n raise GreatExpectationsError( # noqa: TRY003\ngreat_expectations.exceptions.exceptions.GreatExpectationsError: unrecognized condition_parser None for Spark execution engine\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/validator/validation_graph.py\", line 276, in _resolve\n self._execution_engine.resolve_metrics(\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 279, in resolve_metrics\n return self._process_direct_and_bundled_metric_computation_configurations(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/home/jburling/.local/conda/envs/stonevalley/lib/python3.12/site-packages/great_expectations/execution_engine/execution_engine.py\", line 549, in _process_direct_and_bundled_metric_computation_configurations\n raise gx_exceptions.MetricResolutionError(\ngreat_expectations.exceptions.exceptions.MetricResolutionError: unrecognized condition_parser None for Spark execution engine\n", "exception_message": "unrecognized condition_parser None for Spark execution engine", "raised_exception": true } } } ], "suite_name": "test-suite", "suite_parameters": {}, "statistics": { "evaluated_expectations": 2, "successful_expectations": 0, "unsuccessful_expectations": 2, "success_percent": 0.0 }, "meta": { "great_expectations_version": "1.2.4", "batch_spec": { "path": "/tmp/tmpgf8032g5/data.csv", "reader_method": "csv", "reader_options": { "sep": ",", "header": true, "inferSchema": true } }, "batch_markers": { "ge_load_time": "20241126T222652.157081Z" }, "active_batch_definition": { "datasource_name": "test-spark-fs", "data_connector_name": "fluent", "data_asset_name": "csv-asset", "batch_identifiers": { "path": "data.csv" }, "batching_regex": "(?P<path>data.csv)" }, "validation_id": "cd188ea7-bedf-4f8a-9898-1cf823b69b5f", "checkpoint_id": null, "batch_parameters": null }, "id": null }
The text was updated successfully, but these errors were encountered:
Hi there, thank you for bringing this to our attention. I've shared it with the team and I will follow up with you once I have any updates
Sorry, something went wrong.
No branches or pull requests
Describe the bug
Failure to specify
condition_parser
for a single expectation results in all expectations failing.To Reproduce
Expected behavior
Only expectations with errors should fail and show exception info
Environment (please complete the following information):
Additional context
The text was updated successfully, but these errors were encountered: