From 4d6488ccc73d683ca1511810191ba64668609e52 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 9 Oct 2025 00:49:15 -0400 Subject: [PATCH 01/40] Initial implementaitons of candidate vs rewrite shuttle --- pydough/configs/session.py | 22 ++ .../mask_server_candidate_shuttle.py | 112 +++++++++ .../conversion/mask_server_rewrite_shuttle.py | 220 ++++++++++++++++++ pydough/conversion/masking_shuttles.py | 4 +- pydough/conversion/relational_converter.py | 15 +- .../masked_expression_function_operator.py | 9 + 6 files changed, 378 insertions(+), 4 deletions(-) create mode 100644 pydough/conversion/mask_server_candidate_shuttle.py create mode 100644 pydough/conversion/mask_server_rewrite_shuttle.py diff --git a/pydough/configs/session.py b/pydough/configs/session.py index 425f1f26d..0476d2fa7 100644 --- a/pydough/configs/session.py +++ b/pydough/configs/session.py @@ -26,6 +26,7 @@ load_database_context, ) from pydough.errors import PyDoughErrorBuilder +from pydough.mask_server import MaskServerInfo from pydough.metadata import GraphMetadata, parse_json_metadata_from_file from .pydough_configs import PyDoughConfigs @@ -50,6 +51,7 @@ def __init__(self) -> None: connection=empty_connection, dialect=DatabaseDialect.ANSI ) self._error_builder: PyDoughErrorBuilder = PyDoughErrorBuilder() + self._mask_server: MaskServerInfo | None = None @property def metadata(self) -> GraphMetadata | None: @@ -131,6 +133,26 @@ def error_builder(self, builder: PyDoughErrorBuilder) -> None: """ self._error_builder = builder + @property + def mask_server(self) -> MaskServerInfo | None: + """ + Get the active mask server information. + + Returns: + The active mask server information. + """ + return self._mask_server + + @mask_server.setter + def mask_server(self, server_info: MaskServerInfo | None) -> None: + """ + Set the active mask server information. + + Args: + The mask server information to set. + """ + self._mask_server = server_info + def connect_database(self, database_name: str, **kwargs) -> DatabaseContext: """ Create a new DatabaseContext and register it in the session. This returns diff --git a/pydough/conversion/mask_server_candidate_shuttle.py b/pydough/conversion/mask_server_candidate_shuttle.py new file mode 100644 index 000000000..94dfefde3 --- /dev/null +++ b/pydough/conversion/mask_server_candidate_shuttle.py @@ -0,0 +1,112 @@ +""" +TODO +""" + +__all__ = ["MaskServerCandidateShuttle"] + +import pydough.pydough_operators as pydop +from pydough.relational import ( + CallExpression, + ColumnReference, + LiteralExpression, + RelationalExpression, + RelationalExpressionShuttle, + WindowCallExpression, +) + + +class MaskServerCandidateShuttle(RelationalExpressionShuttle): + """ + TODO + """ + + ALLOWED_MASK_OPERATORS: set[pydop.PyDoughExpressionOperator] = { + pydop.BAN, + pydop.BOR, + pydop.NOT, + pydop.EQU, + pydop.NEQ, + pydop.GRT, + pydop.GEQ, + pydop.LET, + pydop.LEQ, + pydop.NEQ, + pydop.ISIN, + pydop.STARTSWITH, + pydop.ENDSWITH, + pydop.LOWER, + pydop.UPPER, + pydop.MONOTONIC, + pydop.YEAR, + pydop.MONTH, + pydop.DAY, + pydop.ADD, + pydop.SUB, + pydop.MUL, + pydop.DIV, + } + + def __init__(self) -> None: + # TODO ADD COMMENTS + self.candidate_pool: dict[ + RelationalExpression, + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression], + ] = {} + self.processed_candidates: set[RelationalExpression] = set() + self.stack: list[ + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] | None + ] = [] + + def reset(self): + self.stack.clear() + + def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: + # TODO ADD COMMENTS + for arg in expr.inputs: + arg.accept_shuttle(self) + mask_ops: set[ + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] + ] = set() + for _ in range(len(expr.inputs)): + stack_term: ( + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] + | None + ) = self.stack.pop() + if stack_term is not None: + mask_ops.add(stack_term) + + if ( + isinstance(expr.op, pydop.MaskedExpressionFunctionOperator) + and expr.op.is_unmask + ): + self.stack.append((expr.op, expr.inputs[0])) + elif len(mask_ops) == 1 and expr.op in self.ALLOWED_MASK_OPERATORS: + input_term: tuple[ + pydop.MaskedExpressionFunctionOperator, RelationalExpression + ] = mask_ops.pop() + if expr not in self.processed_candidates: + self.candidate_pool[expr] = input_term + self.processed_candidates.add(expr) + self.stack.append(input_term) + else: + self.stack.append(None) + return expr + + def visit_column_reference( + self, column_reference: ColumnReference + ) -> RelationalExpression: + self.stack.append(None) + return column_reference + + def visit_literal(self, literal: LiteralExpression) -> RelationalExpression: + self.stack.append(None) + return literal + + def visit_window_expression( + self, window_expression: WindowCallExpression + ) -> RelationalExpression: + result: RelationalExpression = super().visit_window_expression( + window_expression + ) + self.stack.append(None) + return result diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py new file mode 100644 index 000000000..e06c429f2 --- /dev/null +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -0,0 +1,220 @@ +""" +TODO +""" + +__all__ = ["MaskServerRewriteShuttle"] + +import pydough.pydough_operators as pydop +from pydough.mask_server import ( + MaskServerInfo, + MaskServerInput, + MaskServerOutput, + MaskServerResponse, +) +from pydough.relational import ( + CallExpression, + LiteralExpression, + RelationalExpression, + RelationalExpressionShuttle, +) +from pydough.types import ArrayType, BooleanType, UnknownType + +from .mask_server_candidate_shuttle import MaskServerCandidateShuttle + + +class MaskServerRewriteShuttle(RelationalExpressionShuttle): + """ + TODO + """ + + OPERATORS_TO_SERVER_NAMES: dict[pydop.PyDoughExpressionOperator, str] = { + pydop.BAN: "AND", + pydop.BOR: "OR", + pydop.NOT: "NOT", + pydop.EQU: "EQUAL", + pydop.NEQ: "NOT_EQUAL", + pydop.GRT: "GT", + pydop.GEQ: "GTE", + pydop.LET: "LT", + pydop.LEQ: "LTE", + pydop.ISIN: "IN", + pydop.STARTSWITH: "STARTSWITH", + pydop.ENDSWITH: "ENDSWITH", + pydop.LOWER: "LOWER", + pydop.UPPER: "UPPER", + pydop.MONOTONIC: "BETWEEN", + pydop.YEAR: "YEAR", + pydop.MONTH: "MONTH", + pydop.DAY: "DAY", + pydop.ADD: "ADD", + pydop.SUB: "SUB", + pydop.MUL: "MUL", + pydop.DIV: "DIV", + } + + def __init__( + self, server_info: MaskServerInfo, candidate_shuttle: MaskServerCandidateShuttle + ) -> None: + self.server_info: MaskServerInfo = server_info + self.candidate_shuttle: MaskServerCandidateShuttle = candidate_shuttle + self.responses: dict[ + RelationalExpression, tuple[RelationalExpression, MaskServerOutput] | None + ] = {} + + def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: + # TODO: ADD COMMENTS + if expr in self.candidate_shuttle.candidate_pool: + self.process_batch() + + response: tuple[RelationalExpression, MaskServerOutput] | None = ( + self.responses.get(expr, None) + ) + if response is not None: + return self.convert_response_to_relational(*response) + return super().visit_call_expression(expr) + + def process_batch(self) -> None: + """ + TODO + """ + batch: list[MaskServerInput] = [] + ancillary_info: list[tuple[RelationalExpression, RelationalExpression]] = [] + for expr, ( + mask_op, + input_expr, + ) in self.candidate_shuttle.candidate_pool.items(): + ancillary_info.append((expr, input_expr)) + batch.append( + MaskServerInput( + table_path=mask_op.table_path, + column_name=mask_op.masking_metadata.column_name, + expression=self.convert_to_server_expression(expr), + ) + ) + responses: list[MaskServerOutput] = ( + self.server_info.simplify_simple_expression_batch(batch) + ) + assert len(responses) == len(ancillary_info) + for (expr, input_expr), response in zip(ancillary_info, responses): + if response.response_case != MaskServerResponse.UNSUPPORTED: + self.responses[expr] = (input_expr, response) + else: + self.responses[expr] = None + self.candidate_shuttle.processed_candidates.add(expr) + self.candidate_shuttle.candidate_pool.clear() + + def convert_literal_to_server_expression( + self, literal: LiteralExpression + ) -> list[str | int | float | None | bool]: + """ + TODO + """ + if literal.value is None: + return ["NULL"] + elif isinstance(literal.value, bool): + return ["TRUE" if literal.value else "FALSE"] + elif isinstance(literal.value, (int, float)): + return [literal.value] + elif isinstance(literal.value, str): + return [literal.value] + else: + raise ValueError( + f"Unsupported literal type for mask server conversion: {type(literal.value)}" + ) + + def convert_to_server_expression( + self, expr: RelationalExpression + ) -> list[str | int | float | None | bool]: + """ + TODO + """ + if isinstance(expr, LiteralExpression): + return self.convert_literal_to_server_expression(expr) + elif isinstance(expr, CallExpression): + if isinstance(expr.op, pydop.MaskedExpressionFunctionOperator): + return ["__col__"] + elif expr.op in self.OPERATORS_TO_SERVER_NAMES: + return self.convert_call_to_server_expression( + self.OPERATORS_TO_SERVER_NAMES[expr.op], expr.inputs + ) + elif expr.op == pydop.ISIN: + return self.convert_isin_call_to_server_expression(expr.inputs) + else: + raise ValueError( + f"Unsupported operator for mask server conversion: {expr.op}" + ) + else: + raise ValueError( + f"Unsupported expression type for mask server conversion: {type(expr)}" + ) + + def convert_call_to_server_expression( + self, operator_name: str, inputs: list[RelationalExpression] + ) -> list[str | int | float | None | bool]: + """ + TODO + """ + result: list[str | int | float | None | bool] = [operator_name] + result.append(len(inputs)) + for inp in inputs: + result.extend(self.convert_to_server_expression(inp)) + return result + + def convert_isin_call_to_server_expression( + self, inputs: list[RelationalExpression] + ) -> list[str | int | float | None | bool]: + """ + TODO + """ + if len(inputs) != 2: + raise ValueError("ISIN operator requires exactly two inputs.") + result: list[str | int | float | None | bool] = ["IN"] + args: list[str | int | float | None | bool] = self.convert_to_server_expression( + inputs[0] + ) + assert isinstance(inputs[1], LiteralExpression) and isinstance( + inputs[1].value, (list, tuple) + ), "ISIN right-hand side must be a list or tuple literal." + for v in inputs[1].value: + args.extend( + self.convert_literal_to_server_expression( + LiteralExpression(v, UnknownType()) + ) + ) + result.append(len(inputs[1].value)) + result.extend(args) + return result + + def convert_response_to_relational( + self, input_expr: RelationalExpression, response: MaskServerOutput + ) -> RelationalExpression: + """ + TODO + """ + result: RelationalExpression + match response.response_case: + case MaskServerResponse.IN_ARRAY | MaskServerResponse.NOT_IN_ARRAY: + result = self.build_in_array_expression(input_expr, response) + if response.response_case == MaskServerResponse.NOT_IN_ARRAY: + result = CallExpression(pydop.NOT, BooleanType(), [result]) + case _: + raise ValueError( + f"Unsupported mask server response case: {response.response_case}" + ) + return result + + def build_in_array_expression( + self, input_expr: RelationalExpression, response: MaskServerOutput + ) -> RelationalExpression: + """ + TODO + """ + assert response.response_case in ( + MaskServerResponse.IN_ARRAY, + MaskServerResponse.NOT_IN_ARRAY, + ) + assert isinstance(response.payload, list) + array_literal: LiteralExpression = LiteralExpression( + response.payload, ArrayType(UnknownType()) + ) + return CallExpression(pydop.ISIN, BooleanType(), [input_expr, array_literal]) diff --git a/pydough/conversion/masking_shuttles.py b/pydough/conversion/masking_shuttles.py index 53ce2097e..9cfcbb559 100644 --- a/pydough/conversion/masking_shuttles.py +++ b/pydough/conversion/masking_shuttles.py @@ -63,7 +63,7 @@ def rewrite_masked_literal_comparison( # literal in a call to MASK by toggling is_unmask to False. masked_literal = CallExpression( pydop.MaskedExpressionFunctionOperator( - call_arg.op.masking_metadata, False + call_arg.op.masking_metadata, call_arg.op.table_path, False ), call_arg.data_type, [literal_arg], @@ -83,7 +83,7 @@ def rewrite_masked_literal_comparison( [ CallExpression( pydop.MaskedExpressionFunctionOperator( - call_arg.op.masking_metadata, False + call_arg.op.masking_metadata, call_arg.op.table_path, False ), call_arg.data_type, [LiteralExpression(v, inner_type)], diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index a10e608ac..a01ec6053 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -85,6 +85,8 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree +from .mask_server_candidate_shuttle import MaskServerCandidateShuttle +from .mask_server_rewrite_shuttle import MaskServerRewriteShuttle from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects from .projection_pullup import pullup_projections @@ -857,7 +859,9 @@ def build_simple_table_scan( ) unmask_columns[name] = CallExpression( pydop.MaskedExpressionFunctionOperator( - hybrid_expr.column.column_property, True + hybrid_expr.column.column_property, + node.collection.collection.table_path, + True, ), hybrid_expr.column.column_property.unprotected_data_type, [ColumnReference(name, hybrid_expr.typ)], @@ -1664,8 +1668,15 @@ def convert_ast_to_relational( # Invoke the optimization procedures on the result to clean up the tree. additional_shuttles: list[RelationalExpressionShuttle] = [] # Add the mask literal comparison shuttle if the environment variable - # PYDOUGH_ENABLE_MASK_REWRITES is set to 1. + # PYDOUGH_ENABLE_MASK_REWRITES is set to 1. If a masking rewrite server has + # been attached to the session, include the shuttles for that as well. if os.getenv("PYDOUGH_ENABLE_MASK_REWRITES") == "1": + if session.mask_server is not None: + candidate_shuttle: MaskServerCandidateShuttle = MaskServerCandidateShuttle() + additional_shuttles.append(candidate_shuttle) + additional_shuttles.append( + MaskServerRewriteShuttle(session.mask_server, candidate_shuttle) + ) additional_shuttles.append(MaskLiteralComparisonShuttle()) optimized_result: RelationalRoot = optimize_relational_tree( raw_result, session, additional_shuttles diff --git a/pydough/pydough_operators/expression_operators/masked_expression_function_operator.py b/pydough/pydough_operators/expression_operators/masked_expression_function_operator.py index 905361348..211a55c90 100644 --- a/pydough/pydough_operators/expression_operators/masked_expression_function_operator.py +++ b/pydough/pydough_operators/expression_operators/masked_expression_function_operator.py @@ -29,6 +29,7 @@ class MaskedExpressionFunctionOperator(ExpressionFunctionOperator): def __init__( self, masking_metadata: MaskedTableColumnMetadata, + table_path: str, is_unmask: bool, ): # Create a dummy verifier that requires exactly one argument, since all @@ -49,6 +50,7 @@ def __init__( "UNMASK" if is_unmask else "MASK", False, verifier, deducer, False ) self._masking_metadata: MaskedTableColumnMetadata = masking_metadata + self._table_path: str = table_path self._is_unmask: bool = is_unmask @property @@ -58,6 +60,13 @@ def masking_metadata(self) -> MaskedTableColumnMetadata: """ return self._masking_metadata + @property + def table_path(self) -> str: + """ + The fully qualified SQL table path for the masked column. + """ + return self._table_path + @property def is_unmask(self) -> bool: """ From 53693791df3c9019ecf72219761eabc17a2ca27f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 9 Oct 2025 12:21:03 -0400 Subject: [PATCH 02/40] Initial implementation of predicate server integration working on cryptbank_filter_count_01 --- .../mask_server_candidate_shuttle.py | 34 +++++++++------ .../conversion/mask_server_rewrite_shuttle.py | 43 ++++++++++++++++--- pydough/evaluation/evaluate_unqualified.py | 11 ++++- pydough/mask_server/mask_server.py | 1 - tests/conftest.py | 9 ++++ tests/mock_server/lookup_table.py | 7 +++ tests/test_masked_sqlite.py | 8 +++- .../cryptbank_filter_count_01_rewrite.txt | 2 +- ...yptbank_filter_count_01_rewrite_sqlite.sql | 2 +- tests/testing_utilities.py | 11 +++++ 10 files changed, 103 insertions(+), 25 deletions(-) diff --git a/pydough/conversion/mask_server_candidate_shuttle.py b/pydough/conversion/mask_server_candidate_shuttle.py index 94dfefde3..7754e8279 100644 --- a/pydough/conversion/mask_server_candidate_shuttle.py +++ b/pydough/conversion/mask_server_candidate_shuttle.py @@ -45,6 +45,9 @@ class MaskServerCandidateShuttle(RelationalExpressionShuttle): pydop.MUL, pydop.DIV, } + """ + TODO: ADD DESCRIPTION + """ def __init__(self) -> None: # TODO ADD COMMENTS @@ -54,7 +57,11 @@ def __init__(self) -> None: ] = {} self.processed_candidates: set[RelationalExpression] = set() self.stack: list[ - tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] | None + tuple[ + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] + | None, + bool, + ] ] = [] def reset(self): @@ -67,19 +74,20 @@ def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: mask_ops: set[ tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] ] = set() + disallowed: bool = False for _ in range(len(expr.inputs)): - stack_term: ( - tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] - | None - ) = self.stack.pop() + stack_term, arg_disallowed = self.stack.pop() if stack_term is not None: mask_ops.add(stack_term) + disallowed |= arg_disallowed if ( isinstance(expr.op, pydop.MaskedExpressionFunctionOperator) and expr.op.is_unmask ): - self.stack.append((expr.op, expr.inputs[0])) + self.stack.append(((expr.op, expr.inputs[0]), False)) + elif disallowed: + self.stack.append((None, True)) elif len(mask_ops) == 1 and expr.op in self.ALLOWED_MASK_OPERATORS: input_term: tuple[ pydop.MaskedExpressionFunctionOperator, RelationalExpression @@ -87,19 +95,21 @@ def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: if expr not in self.processed_candidates: self.candidate_pool[expr] = input_term self.processed_candidates.add(expr) - self.stack.append(input_term) + self.stack.append((input_term, False)) else: - self.stack.append(None) + self.stack.append((None, True)) return expr def visit_column_reference( self, column_reference: ColumnReference ) -> RelationalExpression: - self.stack.append(None) + self.stack.append((None, True)) return column_reference - def visit_literal(self, literal: LiteralExpression) -> RelationalExpression: - self.stack.append(None) + def visit_literal_expression( + self, literal: LiteralExpression + ) -> RelationalExpression: + self.stack.append((None, False)) return literal def visit_window_expression( @@ -108,5 +118,5 @@ def visit_window_expression( result: RelationalExpression = super().visit_window_expression( window_expression ) - self.stack.append(None) + self.stack.append((None, True)) return result diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index e06c429f2..7a39d6166 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -37,7 +37,6 @@ class MaskServerRewriteShuttle(RelationalExpressionShuttle): pydop.GEQ: "GTE", pydop.LET: "LT", pydop.LEQ: "LTE", - pydop.ISIN: "IN", pydop.STARTSWITH: "STARTSWITH", pydop.ENDSWITH: "ENDSWITH", pydop.LOWER: "LOWER", @@ -51,6 +50,11 @@ class MaskServerRewriteShuttle(RelationalExpressionShuttle): pydop.MUL: "MUL", pydop.DIV: "DIV", } + """ + TODO: ADD DESCRIPTION + + NOTE: ISIN is handled separately. + """ def __init__( self, server_info: MaskServerInfo, candidate_shuttle: MaskServerCandidateShuttle @@ -195,8 +199,6 @@ def convert_response_to_relational( match response.response_case: case MaskServerResponse.IN_ARRAY | MaskServerResponse.NOT_IN_ARRAY: result = self.build_in_array_expression(input_expr, response) - if response.response_case == MaskServerResponse.NOT_IN_ARRAY: - result = CallExpression(pydop.NOT, BooleanType(), [result]) case _: raise ValueError( f"Unsupported mask server response case: {response.response_case}" @@ -214,7 +216,34 @@ def build_in_array_expression( MaskServerResponse.NOT_IN_ARRAY, ) assert isinstance(response.payload, list) - array_literal: LiteralExpression = LiteralExpression( - response.payload, ArrayType(UnknownType()) - ) - return CallExpression(pydop.ISIN, BooleanType(), [input_expr, array_literal]) + if len(response.payload) == 0: + # If the payload is empty, we can return a literal true/false + # depending on whether it is IN or NOT IN + return LiteralExpression( + response.response_case == MaskServerResponse.NOT_IN_ARRAY, BooleanType() + ) + elif len(response.payload) == 1: + # If the payload has one element, we can return a simple equality + # or inequality, depending on whether it is IN or NOT IN + return CallExpression( + pydop.EQU + if response.response_case == MaskServerResponse.IN_ARRAY + else pydop.NEQ, + BooleanType(), + [ + input_expr, + LiteralExpression(response.payload[0], UnknownType()), + ], + ) + else: + # Otherwise, we need to return an ISIN expression with an array + # literal, and if doing NOT IN then negate the whole thing. + array_literal: LiteralExpression = LiteralExpression( + response.payload, ArrayType(UnknownType()) + ) + result: RelationalExpression = CallExpression( + pydop.ISIN, BooleanType(), [input_expr, array_literal] + ) + if response.response_case == MaskServerResponse.NOT_IN_ARRAY: + result = CallExpression(pydop.NOT, BooleanType(), [result]) + return result diff --git a/pydough/evaluation/evaluate_unqualified.py b/pydough/evaluation/evaluate_unqualified.py index 2ab99bffb..72a5c9aba 100644 --- a/pydough/evaluation/evaluate_unqualified.py +++ b/pydough/evaluation/evaluate_unqualified.py @@ -15,6 +15,7 @@ from pydough.errors import ( PyDoughSessionException, ) +from pydough.mask_server import MaskServerInfo from pydough.metadata import GraphMetadata from pydough.qdag import PyDoughCollectionQDAG, PyDoughQDAG from pydough.relational import RelationalRoot @@ -32,8 +33,8 @@ def _load_session_info(**kwargs) -> PyDoughSession: Load the session information from the active session unless it is found in the keyword arguments. The following variants are accepted: - If `session` is found, it is used directly. - - If `metadata`, `config` and/or `database` are found, they are used to - construct a new session. + - If `metadata`, `config`, `mask_server`, and/or `database` are found, they + are used to construct a new session. - If none of these are found, the active session is used. Args: @@ -88,6 +89,11 @@ def _load_session_info(**kwargs) -> PyDoughSession: database = kwargs.pop("database") else: database = pydough.active_session.database + mask_server: MaskServerInfo | None + if "mask_server" in kwargs: + mask_server = kwargs.pop("mask_server") + else: + mask_server = pydough.active_session.mask_server assert not kwargs, f"Unexpected keyword arguments: {kwargs}" # Construct the new session @@ -95,6 +101,7 @@ def _load_session_info(**kwargs) -> PyDoughSession: new_session._metadata = metadata new_session._config = config new_session._database = database + new_session._mask_server = mask_server return new_session diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 38f50bd50..2e7c4eea0 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -145,7 +145,6 @@ def simplify_simple_expression_batch( method: RequestMethod = RequestMethod.POST request: ServerRequest = self.generate_request(batch, path, method) - response_json = self.connection.send_server_request(request) result: list[MaskServerOutput] = self.generate_result(response_json) diff --git a/tests/conftest.py b/tests/conftest.py index 518eddb72..693248f11 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,6 +25,7 @@ load_database_context, ) from pydough.errors import PyDoughTestingException +from pydough.mask_server import MaskServerInfo from pydough.metadata.graphs import GraphMetadata from pydough.qdag import AstNodeBuilder from tests.test_pydough_functions.tpch_outputs import ( @@ -1864,3 +1865,11 @@ def mock_server_setup(): # Cleanup after tests proc.terminate() proc.wait() + + +@pytest.fixture(scope="session") +def mock_server_info(mock_server_setup) -> MaskServerInfo: + """ + TODO: add description + """ + return MaskServerInfo(base_url="http://localhost:8000", token=None) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index f132e4c74..582e160a7 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -94,4 +94,11 @@ "values": ['"Hello"', "HelloWorld", "SGVsbG9Xb3JsZA=="], "count": 3, }, + # CRYPTBANK hardcoded responses + ("CRBNK.CUSTOMERS.c_lname", ("EQUAL", 2, "__col__", "lee")): { + "type": "literal", + "operator": "IN", + "values": ["LEE"], + "count": 1, + }, } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index cf5cd5dce..1d9af6976 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -9,6 +9,7 @@ import pytest from pydough.database_connectors import DatabaseContext, DatabaseDialect +from pydough.mask_server import MaskServerInfo from tests.testing_utilities import PyDoughPandasTest, graph_fetcher @@ -625,6 +626,7 @@ def test_pipeline_until_relational_cryptbank( get_plan_test_filename: Callable[[str], str], update_tests: bool, enable_mask_rewrites: str, + mock_server_info: MaskServerInfo, ) -> None: """ Tests the conversion of the PyDough queries on the custom cryptbank dataset @@ -634,7 +636,7 @@ def test_pipeline_until_relational_cryptbank( f"{cryptbank_pipeline_test_data.test_name}_{enable_mask_rewrites}" ) cryptbank_pipeline_test_data.run_relational_test( - masked_graphs, file_path, update_tests + masked_graphs, file_path, update_tests, mask_server=mock_server_info ) @@ -645,6 +647,7 @@ def test_pipeline_until_sql_cryptbank( get_sql_test_filename: Callable[[str, DatabaseDialect], str], update_tests: bool, enable_mask_rewrites: str, + mock_server_info: MaskServerInfo, ): """ Tests the conversion of the PyDough queries on the custom cryptbank dataset @@ -659,6 +662,7 @@ def test_pipeline_until_sql_cryptbank( file_path, update_tests, sqlite_tpch_db_context, + mask_server=mock_server_info, ) @@ -668,6 +672,7 @@ def test_pipeline_e2e_cryptbank( masked_graphs: graph_fetcher, sqlite_cryptbank_connection: DatabaseContext, enable_mask_rewrites: str, + mock_server_info: MaskServerInfo, ): """ Test executing the the custom queries with the custom cryptbank dataset @@ -676,4 +681,5 @@ def test_pipeline_e2e_cryptbank( cryptbank_pipeline_test_data.run_e2e_test( masked_graphs, sqlite_cryptbank_connection, + mask_server=mock_server_info, ) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt index f0cb980e1..bfeb90c10 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_01_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_lname == MASK::(UPPER(['lee':string])), columns={}) + FILTER(condition=c_lname == 'LEE':unknown, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql index bffd9c7c0..298dbab4e 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_01_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - c_lname = UPPER('lee') + c_lname = 'LEE' diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index f3b734241..d3253c63c 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -45,6 +45,7 @@ from pydough.database_connectors import DatabaseContext from pydough.errors import PyDoughTestingException from pydough.evaluation.evaluate_unqualified import _load_column_selection +from pydough.mask_server import MaskServerInfo from pydough.metadata import GraphMetadata from pydough.pydough_operators import get_operator_by_name from pydough.qdag import ( @@ -1148,6 +1149,7 @@ def run_relational_test( file_path: str, update: bool, config: PyDoughConfigs | None = None, + mask_server: MaskServerInfo | None = None, ) -> None: """ Runs a test on the relational plan code generated by the PyDough code, @@ -1162,6 +1164,7 @@ def run_relational_test( plan text, otherwise compares the generated relational plan text against the expected relational plan text in the file. `config`: The PyDough configuration to use for the test, if any. + `mask_server`: The mask server to use for the test, if any. """ # Skip if indicated. if self.skip_relational: @@ -1178,6 +1181,7 @@ def run_relational_test( session: PyDoughSession = PyDoughSession() session.metadata = graph session.config = config if config is not None else pydough.active_session.config + session.mask_server = mask_server qualified: PyDoughQDAG = qualify_node(root, session) assert isinstance(qualified, PyDoughCollectionQDAG), ( "Expected qualified answer to be a collection, not an expression" @@ -1205,6 +1209,7 @@ def run_sql_test( update: bool, database: DatabaseContext, config: PyDoughConfigs | None = None, + mask_server: MaskServerInfo | None = None, ) -> None: """ Runs a test on the SQL code generated by the PyDough code, @@ -1221,6 +1226,7 @@ def run_sql_test( `database`: The database context to determine what dialect of SQL to use when generating the SQL test. `config`: The PyDough configuration to use for the test, if any. + `mask_server`: The mask server to use for the test, if any. """ # Skip if indicated. if self.skip_sql: @@ -1238,6 +1244,8 @@ def run_sql_test( call_kwargs["config"] = config if self.columns is not None: call_kwargs["columns"] = self.columns + if mask_server is not None: + call_kwargs["mask_server"] = mask_server sql_text: str = to_sql(root, **call_kwargs) # Either update the reference solution, or compare the generated sql @@ -1259,6 +1267,7 @@ def run_e2e_test( config: PyDoughConfigs | None = None, display_sql: bool = False, coerce_types: bool = False, + mask_server: MaskServerInfo | None = None, ): """ Runs an end-to-end test using the data in the SQL comparison test, @@ -1289,6 +1298,8 @@ def run_e2e_test( call_kwargs["config"] = config if self.columns is not None: call_kwargs["columns"] = self.columns + if mask_server is not None: + call_kwargs["mask_server"] = mask_server result: pd.DataFrame = to_df(root, **call_kwargs) # Extract the reference solution from the function refsol: pd.DataFrame = self.pd_function() From 36cab6ee7e0d6d5999a0cc297020a48baa3dbbde Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 9 Oct 2025 12:47:11 -0400 Subject: [PATCH 03/40] WIP adding to lookup table --- .../conversion/mask_server_rewrite_shuttle.py | 4 + tests/mock_server/lookup_table.py | 147 ++++++++++++++++++ .../cryptbank_agg_01_rewrite.txt | 2 +- .../cryptbank_analysis_04_rewrite.txt | 4 +- .../cryptbank_filter_count_27_rewrite.txt | 2 +- .../cryptbank_filter_count_28_rewrite.txt | 2 +- .../cryptbank_agg_01_rewrite_sqlite.sql | 3 +- .../cryptbank_analysis_04_rewrite_sqlite.sql | 9 +- ...yptbank_filter_count_27_rewrite_sqlite.sql | 27 ++-- ...yptbank_filter_count_28_rewrite_sqlite.sql | 9 +- 10 files changed, 173 insertions(+), 36 deletions(-) diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 7a39d6166..330ad0dcc 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -95,6 +95,10 @@ def process_batch(self) -> None: expression=self.convert_to_server_expression(expr), ) ) + print() + print( + f"BATCH ITEM: ({mask_op.table_path}.{mask_op.masking_metadata.column_name}): {batch[-1].expression}" + ) responses: list[MaskServerOutput] = ( self.server_info.simplify_simple_expression_batch(batch) ) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 582e160a7..15f159cbc 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -101,4 +101,151 @@ "values": ["LEE"], "count": 1, }, + ("CRBNK.CUSTOMERS.c_birthday", ("BETWEEN", 3, 1980, "YEAR", 1, "__col__", 1985)): { + "type": "literal", + "operator": "IN", + "values": [ + "1980-01-18", + "1981-07-21", + "1981-11-15", + "1982-11-07", + "1983-12-27", + ], + "count": 5, + }, + ("CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 9000.0)): { + "type": "literal", + "operator": "IN", + "values": [ + -8934.44, + -8881.98, + -8736.83, + -8717.7, + -8648.33, + -8639.5, + -8620.48, + -8593.09, + -8553.43, + -8527.34, + -8484.61, + -8480.79, + -8472.7, + -8457.49, + -8366.52, + -8361.27, + -8352.72, + -8308.42, + -8254.69, + -8077.89, + -8067.8, + ], + "count": 21, + }, + ( + "CRBNK.TRANSACTIONS.t_ts", + ( + "AND", + 2, + "EQUAL", + 2, + "MONTH", + 1, + "__col__", + 6, + "EQUAL", + 2, + "YEAR", + 1, + "__col__", + 2022, + ), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2022-06-03 05:08:58", + "2022-06-12 00:24:06", + "2022-06-13 05:50:39", + "2022-06-14 19:08:57", + "2022-06-16 03:15:13", + "2022-06-18 03:37:49", + "2022-06-27 06:08:04", + "2022-06-28 15:35:47", + "2022-06-29 05:40:38", + "2022-06-29 19:53:42", + ], + "count": 10, + }, + ( + "CRBNK.ACCOUNTS.a_type", + ( + "OR", + 2, + "EQUAL", + 2, + "__col__", + "retirement", + "EQUAL", + 2, + "__col__", + "savings", + ), + ): { + "type": "literal", + "operator": "IN", + "values": ["avingss", "etirementr"], + "count": 2, + }, + ("CRBNK.CUSTOMERS.c_phone", ("ENDSWITH", 2, "__col__", "5")): { + "type": "literal", + "operator": "IN", + "values": ["555-091-2345", "555-901-2345"], + "count": 2, + }, + ( + "CRBNK.CUSTOMERS.c_fname", + ("OR", 2, "ENDSWITH", 2, "__col__", "a", "ENDSWITH", 2, "__col__", "e"), + ): { + "type": "literal", + "operator": "IN", + "values": ["ALICE", "GRACE", "LUKE", "MARIA", "OLIVIA", "QUEENIE", "SOPHIA"], + "count": 8, + }, + ("CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "s")): { + "type": "literal", + "operator": "IN", + "values": ["JAMES", "NICHOLAS", "THOMAS"], + "count": 3, + }, + ("CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): { + "type": "literal", + "operator": "NOT_IN", + "values": ["LOPEZ"], + "count": 1, + }, } + +""" +DONE: +- agg_01 +- analysis_04 +- filter_count_27 +- filter_count_28 + +select c_birthday +from customers +where STRFTIME('%Y', DATE(c_birthday, '+472 days')) IN ('1980', '1981', '1982', '1983', '1984', '1985') +ORDER BY 1 +; + + +SELECT c_fname +FROM customers +WHERE c_fname LIKE '%A' OR c_fname LIKE '%E' +ORDER BY 1; + +SELECT c_fname +FROM customers +WHERE c_fname LIKE '%S' +ORDER BY 1; +""" diff --git a/tests/test_plan_refsols/cryptbank_agg_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_01_rewrite.txt index 1dad4af9a..f91b82c0b 100644 --- a/tests/test_plan_refsols/cryptbank_agg_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_agg_01_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', ROUND(avg_unmask_t_amount, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_unmask_t_amount': AVG(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=MONTH(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 6:numeric & YEAR(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 2022:numeric, columns={'t_amount': t_amount}) + FILTER(condition=ISIN(t_ts, ['2022-06-03 05:08:58', '2022-06-12 00:24:06', '2022-06-13 05:50:39', '2022-06-14 19:08:57', '2022-06-16 03:15:13', '2022-06-18 03:37:49', '2022-06-27 06:08:04', '2022-06-28 15:35:47', '2022-06-29 05:40:38', '2022-06-29 19:53:42']:array[unknown]), columns={'t_amount': t_amount}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt index 9f7bfc6df..ddf390cc7 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt @@ -2,8 +2,8 @@ ROOT(columns=[('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_k JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 'n_rows': t1.n_rows}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=MONOTONIC(1980:numeric, YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) + FILTER(condition=ISIN(c_birthday, ['1980-01-18', '1981-07-21', '1981-11-15', '1982-11-07', '1983-12-27']:array[unknown]), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::((1025.67 - ([t_amount]))) > 9000.0:numeric, columns={'t_sourceaccount': t_sourceaccount}) + FILTER(condition=ISIN(t_amount, [-8934.44, -8881.98, -8736.83, -8717.7, -8648.33, -8639.5, -8620.48, -8593.09, -8553.43, -8527.34, -8484.61, -8480.79, -8472.7, -8457.49, -8366.52, -8361.27, -8352.72, -8308.42, -8254.69, -8077.89, -8067.8]:array[unknown]), columns={'t_sourceaccount': t_sourceaccount}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_sourceaccount': t_sourceaccount}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt index 6aac164ff..6de5d9627 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))) & c_lname != MASK::(UPPER(['lopez':string])) & ENDSWITH(UNMASK::(LOWER([c_fname])), 'a':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 'e':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 's':string) | ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ENDSWITH(UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '5':string), columns={}) + FILTER(condition=PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))) & c_lname != 'LOPEZ':unknown & ISIN(c_fname, ['ALICE', 'GRACE', 'LUKE', 'MARIA', 'OLIVIA', 'QUEENIE', 'SOPHIA']:array[unknown]) | ISIN(c_fname, ['JAMES', 'NICHOLAS', 'THOMAS']:array[unknown]) | ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ISIN(c_phone, ['555-091-2345', '555-901-2345']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_birthday': c_birthday, 'c_fname': c_fname, 'c_lname': c_lname, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt index 9526425f5..319cf85ff 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 2020:numeric & UNMASK::(SQRT([a_balance])) >= 5000:numeric & a_type == MASK::(SUBSTRING(['retirement':string], 2) || SUBSTRING(['retirement':string], 1, 1)) | a_type == MASK::(SUBSTRING(['savings':string], 2) || SUBSTRING(['savings':string], 1, 1)), columns={'a_custkey': a_custkey}) + FILTER(condition=YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 2020:numeric & UNMASK::(SQRT([a_balance])) >= 5000:numeric & ISIN(a_type, ['avingss', 'etirementr']:array[unknown]), columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'outlook':string) | CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail':string), columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_sql_refsols/cryptbank_agg_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_01_rewrite_sqlite.sql index ac0ae3093..e8bb433e6 100644 --- a/tests/test_sql_refsols/cryptbank_agg_01_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_agg_01_rewrite_sqlite.sql @@ -4,5 +4,4 @@ SELECT )), 2) AS n FROM crbnk.transactions WHERE - CAST(STRFTIME('%Y', DATETIME(t_ts, '+54321 seconds')) AS INTEGER) = 2022 - AND CAST(STRFTIME('%m', DATETIME(t_ts, '+54321 seconds')) AS INTEGER) = 6 + t_ts IN ('2022-06-03 05:08:58', '2022-06-12 00:24:06', '2022-06-13 05:50:39', '2022-06-14 19:08:57', '2022-06-16 03:15:13', '2022-06-18 03:37:49', '2022-06-27 06:08:04', '2022-06-28 15:35:47', '2022-06-29 05:40:38', '2022-06-29 19:53:42') diff --git a/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql index b21c053ef..babcf7c70 100644 --- a/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_analysis_04_rewrite_sqlite.sql @@ -4,9 +4,7 @@ WITH _s3 AS ( COUNT(*) AS n_rows FROM crbnk.transactions WHERE - ( - 1025.67 - t_amount - ) > 9000.0 + t_amount IN (-8934.44, -8881.98, -8736.83, -8717.7, -8648.33, -8639.5, -8620.48, -8593.09, -8553.43, -8527.34, -8484.61, -8480.79, -8472.7, -8457.49, -8366.52, -8361.27, -8352.72, -8308.42, -8254.69, -8077.89, -8067.8) GROUP BY 1 ) @@ -24,11 +22,10 @@ SELECT _s3.n_rows AS n_trans FROM crbnk.accounts AS accounts JOIN crbnk.customers AS customers - ON CAST(STRFTIME('%Y', DATE(customers.c_birthday, '+472 days')) AS INTEGER) <= 1985 - AND CAST(STRFTIME('%Y', DATE(customers.c_birthday, '+472 days')) AS INTEGER) >= 1980 - AND accounts.a_custkey = ( + ON accounts.a_custkey = ( 42 - customers.c_key ) + AND customers.c_birthday IN ('1980-01-18', '1981-07-21', '1981-11-15', '1982-11-07', '1983-12-27') JOIN _s3 AS _s3 ON _s3.t_sourceaccount = CASE WHEN accounts.a_key = 0 diff --git a/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql index 8e7e601b6..f88bf4b3d 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_27_rewrite_sqlite.sql @@ -3,37 +3,34 @@ SELECT FROM crbnk.customers WHERE ( - DATE(c_birthday, '+472 days') IS NULL - OR LOWER(c_fname) LIKE '%a' - OR LOWER(c_fname) LIKE '%e' - OR LOWER(c_fname) LIKE '%s' - ) - AND ( DATE(c_birthday, '+472 days') IS NULL OR NOT ( SUBSTRING(c_addr, -1) || SUBSTRING(c_addr, 1, LENGTH(c_addr) - 1) ) IS NULL ) AND ( - DATE(c_birthday, '+472 days') IS NULL OR c_lname <> UPPER('lopez') + DATE(c_birthday, '+472 days') IS NULL + OR c_fname IN ('ALICE', 'GRACE', 'LUKE', 'MARIA', 'OLIVIA', 'QUEENIE', 'SOPHIA') + OR c_fname IN ('JAMES', 'NICHOLAS', 'THOMAS') ) AND ( - LOWER(c_fname) LIKE '%a' - OR LOWER(c_fname) LIKE '%e' - OR LOWER(c_fname) LIKE '%s' - OR REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' + DATE(c_birthday, '+472 days') IS NULL OR c_lname <> 'LOPEZ' ) AND ( NOT ( SUBSTRING(c_addr, -1) || SUBSTRING(c_addr, 1, LENGTH(c_addr) - 1) ) IS NULL - OR REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' + OR c_phone IN ('555-091-2345', '555-901-2345') ) AND ( NOT DATE(c_birthday, '+472 days') IS NULL - OR REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' + OR c_phone IN ('555-091-2345', '555-901-2345') + ) + AND ( + c_fname IN ('ALICE', 'GRACE', 'LUKE', 'MARIA', 'OLIVIA', 'QUEENIE', 'SOPHIA') + OR c_fname IN ('JAMES', 'NICHOLAS', 'THOMAS') + OR c_phone IN ('555-091-2345', '555-901-2345') ) AND ( - REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '%5' - OR c_lname <> UPPER('lopez') + c_lname <> 'LOPEZ' OR c_phone IN ('555-091-2345', '555-901-2345') ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql index 0be30d552..188eb8e4c 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_28_rewrite_sqlite.sql @@ -16,11 +16,4 @@ JOIN crbnk.customers AS customers WHERE CAST(STRFTIME('%Y', DATETIME(accounts.a_open_ts, '+123456789 seconds')) AS INTEGER) < 2020 AND SQRT(accounts.a_balance) >= 5000 - AND ( - accounts.a_type = ( - SUBSTRING('retirement', 2) || SUBSTRING('retirement', 1, 1) - ) - OR accounts.a_type = ( - SUBSTRING('savings', 2) || SUBSTRING('savings', 1, 1) - ) - ) + AND accounts.a_type IN ('avingss', 'etirementr') From ed6650c5d9c9c63e59e3ce03cb33133cd732ec13 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 9 Oct 2025 14:14:36 -0400 Subject: [PATCH 04/40] Rewriting the rest of the filter count queries --- tests/mock_server/lookup_table.py | 149 +++++++++++++++--- .../cryptbank_filter_count_02_rewrite.txt | 2 +- .../cryptbank_filter_count_03_rewrite.txt | 2 +- .../cryptbank_filter_count_04_rewrite.txt | 2 +- .../cryptbank_filter_count_05_rewrite.txt | 2 +- .../cryptbank_filter_count_06_rewrite.txt | 2 +- .../cryptbank_filter_count_07_rewrite.txt | 2 +- .../cryptbank_filter_count_08_rewrite.txt | 2 +- .../cryptbank_filter_count_14_rewrite.txt | 2 +- .../cryptbank_filter_count_16_rewrite.txt | 2 +- .../cryptbank_filter_count_20_rewrite.txt | 2 +- .../cryptbank_filter_count_21_rewrite.txt | 2 +- .../cryptbank_filter_count_22_rewrite.txt | 2 +- .../cryptbank_filter_count_23_rewrite.txt | 2 +- .../cryptbank_filter_count_25_rewrite.txt | 2 +- ...yptbank_filter_count_02_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_03_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_04_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_05_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_06_rewrite_sqlite.sql | 4 +- ...yptbank_filter_count_07_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_08_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_14_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_16_rewrite_sqlite.sql | 7 +- ...yptbank_filter_count_20_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_21_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_22_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_23_rewrite_sqlite.sql | 2 +- ...yptbank_filter_count_25_rewrite_sqlite.sql | 3 +- 29 files changed, 152 insertions(+), 61 deletions(-) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 15f159cbc..7e821b33d 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -223,29 +223,128 @@ "values": ["LOPEZ"], "count": 1, }, + ("CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): { + "type": "literal", + "operator": "NOT_IN", + "values": ["LEE"], + "count": 1, + }, + ("CRBNK.CUSTOMERS.c_lname", ("IN", 3, "__col__", "lee", "smith", "rodriguez")): { + "type": "literal", + "operator": "IN", + "values": ["LEE", "SMITH", "RODRIGUEZ"], + "count": 3, + }, + ( + "CRBNK.CUSTOMERS.c_lname", + ("NOT", 1, "IN", 3, "__col__", "lee", "smith", "rodriguez"), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": ["LEE", "SMITH", "RODRIGUEZ"], + "count": 3, + }, + ("CRBNK.CUSTOMERS.c_phone", ("STARTSWITH", 2, "__col__", "555-8")): { + "type": "literal", + "operator": "IN", + "values": ["555-809-1234", "555-870-9123"], + "count": 2, + }, + ("CRBNK.CUSTOMERS.c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): { + "type": "literal", + "operator": "IN", + "values": [ + "livia.a22@gmail.como", + "ob.smith77@gmail.comb", + "ob_moore78@gmail.comr", + "opez.luke99@gmail.coml", + ], + "count": 4, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): { + "type": "literal", + "operator": "IN", + "values": ["1976-10-27", "1976-12-02"], + "count": 2, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): { + "type": "literal", + "operator": "IN", + "values": ["1983-12-27"], + "count": 1, + }, + ("CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "e")): { + "type": "literal", + "operator": "IN", + "values": ["ALICE", "GRACE", "LUKE", "QUEENIE"], + "count": 4, + }, + ("CRBNK.CUSTOMERS.c_lname", ("ENDSWITH", 2, "__col__", "e")): { + "type": "literal", + "operator": "IN", + "values": ["LEE", "MOORE"], + "count": 2, + }, + ( + "CRBNK.ACCOUNTS.a_type", + ( + "AND", + 2, + "NOT_EQUAL", + 2, + "__col__", + "checking", + "NOT_EQUAL", + 2, + "__col__", + "savings", + ), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": ["avingss", "heckingc"], + "count": 2, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): { + "type": "literal", + "operator": "NOT_IN", + "values": ["1990-07-31"], + "count": 1, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1991-11-15")): { + "type": "literal", + "operator": "NOT_IN", + "values": ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], + "count": 4, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("GT", 2, "__col__", "1991-11-15")): { + "type": "literal", + "operator": "IN", + "values": ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], + "count": 4, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("LT", 2, "__col__", "1991-11-15")): { + "type": "literal", + "operator": "NOT_IN", + "values": [ + "1990-07-31", + "1991-03-13", + "1992-05-06", + "1993-01-01", + "1994-06-15", + ], + "count": 4, + }, + ("CRBNK.CUSTOMERS.c_birthday", ("GTE", 2, "__col__", "1991-11-15")): { + "type": "literal", + "operator": "IN", + "values": [ + "1990-07-31", + "1991-03-13", + "1992-05-06", + "1993-01-01", + "1994-06-15", + ], + "count": 4, + }, } - -""" -DONE: -- agg_01 -- analysis_04 -- filter_count_27 -- filter_count_28 - -select c_birthday -from customers -where STRFTIME('%Y', DATE(c_birthday, '+472 days')) IN ('1980', '1981', '1982', '1983', '1984', '1985') -ORDER BY 1 -; - - -SELECT c_fname -FROM customers -WHERE c_fname LIKE '%A' OR c_fname LIKE '%E' -ORDER BY 1; - -SELECT c_fname -FROM customers -WHERE c_fname LIKE '%S' -ORDER BY 1; -""" diff --git a/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt index 7558820f5..e771808ac 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_02_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_lname != MASK::(UPPER(['lee':string])), columns={}) + FILTER(condition=c_lname != 'LEE':unknown, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt index 96ddb590e..81051fdfa 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_03_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(c_lname, [Call(op=MASK, inputs=[Literal(value='lee', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='smith', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='rodriguez', type=UnknownType())], return_type=StringType())]:bool), columns={}) + FILTER(condition=ISIN(c_lname, ['LEE', 'SMITH', 'RODRIGUEZ']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt index a8ebb4a29..733f70691 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_04_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(ISIN(c_lname, [Call(op=MASK, inputs=[Literal(value='lee', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='smith', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='rodriguez', type=UnknownType())], return_type=StringType())]:bool)), columns={}) + FILTER(condition=NOT(ISIN(c_lname, ['LEE', 'SMITH', 'RODRIGUEZ']:array[unknown])), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_05_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_05_rewrite.txt index 80d0ffef1..a8625f8fe 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_05_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_05_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=STARTSWITH(UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '555-8':string), columns={}) + FILTER(condition=ISIN(c_phone, ['555-809-1234', '555-870-9123']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_06_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_06_rewrite.txt index 80671abda..889a1221e 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_06_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_06_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ENDSWITH(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail.com':string), columns={}) + FILTER(condition=ISIN(c_email, ['livia.a22@gmail.como', 'ob.smith77@gmail.comb', 'ob_moore78@gmail.comr', 'opez.luke99@gmail.coml']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_07_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_07_rewrite.txt index 1311df68a..e688c6561 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_07_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_07_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(UNMASK::(DATE([c_birthday], '+472 days'))) == 1978:numeric, columns={}) + FILTER(condition=ISIN(c_birthday, ['1976-10-27', '1976-12-02']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt index 4d56d54dc..d9a743560 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_08_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_birthday == MASK::(DATE(['1985-04-12':string], '-472 days')), columns={}) + FILTER(condition=c_birthday == '1983-12-27':unknown, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_14_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_14_rewrite.txt index b623ae10e..5e7f02291 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_14_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_14_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ENDSWITH(UNMASK::(LOWER([c_fname])), 'e':string) | ENDSWITH(UNMASK::(LOWER([c_lname])), 'e':string), columns={}) + FILTER(condition=ISIN(c_fname, ['ALICE', 'GRACE', 'LUKE', 'QUEENIE']:array[unknown]) | ISIN(c_lname, ['LEE', 'MOORE']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt index 1b2fcbc78..47ef8d92a 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_16_rewrite.txt @@ -2,5 +2,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=SEMI, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=a_type != MASK::(SUBSTRING(['checking':string], 2) || SUBSTRING(['checking':string], 1, 1)) & a_type != MASK::(SUBSTRING(['savings':string], 2) || SUBSTRING(['savings':string], 1, 1)), columns={'a_custkey': a_custkey}) + FILTER(condition=NOT(ISIN(a_type, ['avingss', 'heckingc']:array[unknown])), columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_20_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_20_rewrite.txt index 462814855..34cb8907f 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_20_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_20_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) > '1991-11-15':string, columns={}) + FILTER(condition=ISIN(c_birthday, ['1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_21_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_21_rewrite.txt index 9742c2261..1e32a7730 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_21_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_21_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) >= '1991-11-15':string, columns={}) + FILTER(condition=ISIN(c_birthday, ['1990-07-31', '1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_22_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_22_rewrite.txt index 13dfd466f..df32b5a3a 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_22_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_22_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) < '1991-11-15':string, columns={}) + FILTER(condition=NOT(ISIN(c_birthday, ['1990-07-31', '1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15']:array[unknown])), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_23_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_23_rewrite.txt index 56faca00f..3b1ff1ebe 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_23_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_23_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) <= '1991-11-15':string, columns={}) + FILTER(condition=NOT(ISIN(c_birthday, ['1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15']:array[unknown])), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt index 5face6b66..5890afa41 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_25_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) | c_birthday != MASK::(DATE(['1991-11-15':string], '-472 days')), columns={}) + FILTER(condition=ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) | c_birthday != '1990-07-31':unknown, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql index f1f7b1c78..ec3a44be4 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_02_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - c_lname <> UPPER('lee') + c_lname <> 'LEE' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql index aa7550e49..a590ad01a 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_03_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - c_lname IN (UPPER('lee'), UPPER('smith'), UPPER('rodriguez')) + c_lname IN ('LEE', 'SMITH', 'RODRIGUEZ') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql index 6b329065c..5dc20fbac 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_04_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - NOT c_lname IN (UPPER('lee'), UPPER('smith'), UPPER('rodriguez')) + NOT c_lname IN ('LEE', 'SMITH', 'RODRIGUEZ') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_05_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_05_rewrite_sqlite.sql index 8205aea4b..983d9cffa 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_05_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_05_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0') LIKE '555-8%' + c_phone IN ('555-809-1234', '555-870-9123') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_06_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_06_rewrite_sqlite.sql index 6e69fc127..868bad685 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_06_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_06_rewrite_sqlite.sql @@ -2,6 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - ( - SUBSTRING(c_email, -1) || SUBSTRING(c_email, 1, LENGTH(c_email) - 1) - ) LIKE '%gmail.com' + c_email IN ('livia.a22@gmail.como', 'ob.smith77@gmail.comb', 'ob_moore78@gmail.comr', 'opez.luke99@gmail.coml') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_07_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_07_rewrite_sqlite.sql index 4d7c59588..c6210227a 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_07_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_07_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER) = 1978 + c_birthday IN ('1976-10-27', '1976-12-02') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql index fc4234022..f334ffdeb 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_08_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - c_birthday = DATE('1985-04-12', '-472 days') + c_birthday = '1983-12-27' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_14_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_14_rewrite_sqlite.sql index d70b6decd..850d44d4a 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_14_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_14_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - LOWER(c_fname) LIKE '%e' OR LOWER(c_lname) LIKE '%e' + c_fname IN ('ALICE', 'GRACE', 'LUKE', 'QUEENIE') OR c_lname IN ('LEE', 'MOORE') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql index 41580c0f1..d14b6d3f4 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_16_rewrite_sqlite.sql @@ -3,12 +3,7 @@ WITH _u_0 AS ( a_custkey AS _u_1 FROM crbnk.accounts WHERE - a_type <> ( - SUBSTRING('checking', 2) || SUBSTRING('checking', 1, 1) - ) - AND a_type <> ( - SUBSTRING('savings', 2) || SUBSTRING('savings', 1, 1) - ) + NOT a_type IN ('avingss', 'heckingc') GROUP BY 1 ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_20_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_20_rewrite_sqlite.sql index 5bf9a250d..1a0d73758 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_20_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_20_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') > DATE('1991-11-15') + c_birthday IN ('1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_21_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_21_rewrite_sqlite.sql index d20b706ad..24dcf01a2 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_21_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_21_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') >= DATE('1991-11-15') + c_birthday IN ('1990-07-31', '1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_22_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_22_rewrite_sqlite.sql index b370979ce..7a4ae7954 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_22_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_22_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') < DATE('1991-11-15') + NOT c_birthday IN ('1990-07-31', '1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_23_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_23_rewrite_sqlite.sql index 05f9bc494..156a1de25 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_23_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_23_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') <= DATE('1991-11-15') + NOT c_birthday IN ('1991-03-13', '1992-05-06', '1993-01-01', '1994-06-15') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql index 2ef5a72af..5b28def34 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_25_rewrite_sqlite.sql @@ -2,5 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - DATE(c_birthday, '+472 days') IS NULL - OR c_birthday <> DATE('1991-11-15', '-472 days') + DATE(c_birthday, '+472 days') IS NULL OR c_birthday <> '1990-07-31' From cc2bbed03f7c044e8e7e42ae67247997704bbe39 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 9 Oct 2025 15:07:32 -0400 Subject: [PATCH 05/40] Moving server address into mask server info setup --- .../conversion/mask_server_rewrite_shuttle.py | 4 -- pydough/mask_server/mask_server.py | 9 ++-- tests/conftest.py | 4 +- tests/mock_server/lookup_table.py | 54 ++++++++++--------- tests/test_mock_mask_server.py | 32 +++++------ 5 files changed, 56 insertions(+), 47 deletions(-) diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 330ad0dcc..7a39d6166 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -95,10 +95,6 @@ def process_batch(self) -> None: expression=self.convert_to_server_expression(expr), ) ) - print() - print( - f"BATCH ITEM: ({mask_op.table_path}.{mask_op.masking_metadata.column_name}): {batch[-1].expression}" - ) responses: list[MaskServerOutput] = ( self.server_info.simplify_simple_expression_batch(batch) ) diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 2e7c4eea0..60d9f2a0c 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -88,22 +88,25 @@ class MaskServerOutput: class MaskServerInfo: """ - The MaskServeraInfo class is responsible for evaluating predicates against a + The MaskServerInfo class is responsible for evaluating predicates against a given table and column. It interacts with an external mask server to perform the evaluation. """ - def __init__(self, base_url: str, token: str | None = None): + def __init__(self, base_url: str, server_address: str, token: str | None = None): """ Initialize the MaskServerInfo with the given server URL. Args: `base_url`: The URL of the mask server. + `server_address`: The server address to place at the front of all + qualified table paths. `token`: Optional authentication token for the server. """ self.connection: ServerConnection = ServerConnection( base_url=base_url, token=token ) + self.server_address: str = server_address def get_server_response_case(self, server_case: str) -> MaskServerResponse: """ @@ -185,7 +188,7 @@ def generate_request( for item in batch: evaluate_request: dict = { - "column_reference": f"{item.table_path}.{item.column_name}", + "column_reference": f"{self.server_address}.{item.table_path}.{item.column_name}", "predicate": item.expression, "mode": "dynamic", "dry_run": False, diff --git a/tests/conftest.py b/tests/conftest.py index 693248f11..618e1e483 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1872,4 +1872,6 @@ def mock_server_info(mock_server_setup) -> MaskServerInfo: """ TODO: add description """ - return MaskServerInfo(base_url="http://localhost:8000", token=None) + return MaskServerInfo( + base_url="http://localhost:8000", server_address="srv", token=None + ) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 7e821b33d..d168455ab 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -95,13 +95,16 @@ "count": 3, }, # CRYPTBANK hardcoded responses - ("CRBNK.CUSTOMERS.c_lname", ("EQUAL", 2, "__col__", "lee")): { + ("srv.CRBNK.CUSTOMERS.c_lname", ("EQUAL", 2, "__col__", "lee")): { "type": "literal", "operator": "IN", "values": ["LEE"], "count": 1, }, - ("CRBNK.CUSTOMERS.c_birthday", ("BETWEEN", 3, 1980, "YEAR", 1, "__col__", 1985)): { + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("BETWEEN", 3, 1980, "YEAR", 1, "__col__", 1985), + ): { "type": "literal", "operator": "IN", "values": [ @@ -113,7 +116,7 @@ ], "count": 5, }, - ("CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 9000.0)): { + ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 9000.0)): { "type": "literal", "operator": "IN", "values": [ @@ -142,7 +145,7 @@ "count": 21, }, ( - "CRBNK.TRANSACTIONS.t_ts", + "srv.CRBNK.TRANSACTIONS.t_ts", ( "AND", 2, @@ -177,7 +180,7 @@ "count": 10, }, ( - "CRBNK.ACCOUNTS.a_type", + "srv.CRBNK.ACCOUNTS.a_type", ( "OR", 2, @@ -196,14 +199,14 @@ "values": ["avingss", "etirementr"], "count": 2, }, - ("CRBNK.CUSTOMERS.c_phone", ("ENDSWITH", 2, "__col__", "5")): { + ("srv.CRBNK.CUSTOMERS.c_phone", ("ENDSWITH", 2, "__col__", "5")): { "type": "literal", "operator": "IN", "values": ["555-091-2345", "555-901-2345"], "count": 2, }, ( - "CRBNK.CUSTOMERS.c_fname", + "srv.CRBNK.CUSTOMERS.c_fname", ("OR", 2, "ENDSWITH", 2, "__col__", "a", "ENDSWITH", 2, "__col__", "e"), ): { "type": "literal", @@ -211,32 +214,35 @@ "values": ["ALICE", "GRACE", "LUKE", "MARIA", "OLIVIA", "QUEENIE", "SOPHIA"], "count": 8, }, - ("CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "s")): { + ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "s")): { "type": "literal", "operator": "IN", "values": ["JAMES", "NICHOLAS", "THOMAS"], "count": 3, }, - ("CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): { + ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): { "type": "literal", "operator": "NOT_IN", "values": ["LOPEZ"], "count": 1, }, - ("CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): { + ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): { "type": "literal", "operator": "NOT_IN", "values": ["LEE"], "count": 1, }, - ("CRBNK.CUSTOMERS.c_lname", ("IN", 3, "__col__", "lee", "smith", "rodriguez")): { + ( + "srv.CRBNK.CUSTOMERS.c_lname", + ("IN", 3, "__col__", "lee", "smith", "rodriguez"), + ): { "type": "literal", "operator": "IN", "values": ["LEE", "SMITH", "RODRIGUEZ"], "count": 3, }, ( - "CRBNK.CUSTOMERS.c_lname", + "srv.CRBNK.CUSTOMERS.c_lname", ("NOT", 1, "IN", 3, "__col__", "lee", "smith", "rodriguez"), ): { "type": "literal", @@ -244,13 +250,13 @@ "values": ["LEE", "SMITH", "RODRIGUEZ"], "count": 3, }, - ("CRBNK.CUSTOMERS.c_phone", ("STARTSWITH", 2, "__col__", "555-8")): { + ("srv.CRBNK.CUSTOMERS.c_phone", ("STARTSWITH", 2, "__col__", "555-8")): { "type": "literal", "operator": "IN", "values": ["555-809-1234", "555-870-9123"], "count": 2, }, - ("CRBNK.CUSTOMERS.c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): { + ("srv.CRBNK.CUSTOMERS.c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): { "type": "literal", "operator": "IN", "values": [ @@ -261,32 +267,32 @@ ], "count": 4, }, - ("CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): { "type": "literal", "operator": "IN", "values": ["1976-10-27", "1976-12-02"], "count": 2, }, - ("CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): { "type": "literal", "operator": "IN", "values": ["1983-12-27"], "count": 1, }, - ("CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "e")): { + ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "e")): { "type": "literal", "operator": "IN", "values": ["ALICE", "GRACE", "LUKE", "QUEENIE"], "count": 4, }, - ("CRBNK.CUSTOMERS.c_lname", ("ENDSWITH", 2, "__col__", "e")): { + ("srv.CRBNK.CUSTOMERS.c_lname", ("ENDSWITH", 2, "__col__", "e")): { "type": "literal", "operator": "IN", "values": ["LEE", "MOORE"], "count": 2, }, ( - "CRBNK.ACCOUNTS.a_type", + "srv.CRBNK.ACCOUNTS.a_type", ( "AND", 2, @@ -305,25 +311,25 @@ "values": ["avingss", "heckingc"], "count": 2, }, - ("CRBNK.CUSTOMERS.c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): { "type": "literal", "operator": "NOT_IN", "values": ["1990-07-31"], "count": 1, }, - ("CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1991-11-15")): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1991-11-15")): { "type": "literal", "operator": "NOT_IN", "values": ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], "count": 4, }, - ("CRBNK.CUSTOMERS.c_birthday", ("GT", 2, "__col__", "1991-11-15")): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("GT", 2, "__col__", "1991-11-15")): { "type": "literal", "operator": "IN", "values": ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], "count": 4, }, - ("CRBNK.CUSTOMERS.c_birthday", ("LT", 2, "__col__", "1991-11-15")): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("LT", 2, "__col__", "1991-11-15")): { "type": "literal", "operator": "NOT_IN", "values": [ @@ -335,7 +341,7 @@ ], "count": 4, }, - ("CRBNK.CUSTOMERS.c_birthday", ("GTE", 2, "__col__", "1991-11-15")): { + ("srv.CRBNK.CUSTOMERS.c_birthday", ("GTE", 2, "__col__", "1991-11-15")): { "type": "literal", "operator": "IN", "values": [ diff --git a/tests/test_mock_mask_server.py b/tests/test_mock_mask_server.py index 4d0e44479..19bbbd42e 100644 --- a/tests/test_mock_mask_server.py +++ b/tests/test_mock_mask_server.py @@ -22,27 +22,27 @@ None, [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["EQUAL", 2, "__col__", 0], ), MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["OR", 2, "__col__", 5], ), MaskServerInput( - table_path="srv.db.orders", + table_path="db.orders", column_name="order_date", expression=["BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"], ), MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["GT", 2, "__col__", 45.67], ), MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith"], ), @@ -84,7 +84,7 @@ None, [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["EQUAL", 2, "__col__", 0], ), @@ -105,7 +105,7 @@ None, [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["OR", 2, "__col__", 5], ), @@ -122,7 +122,7 @@ "test-token-123", [ MaskServerInput( - table_path="srv.db.orders", + table_path="db.orders", column_name="order_date", expression=["BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"], ), @@ -145,7 +145,7 @@ "test-token-123", [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["NOT_EQUAL", 2, "__col__", True], ), @@ -162,12 +162,12 @@ None, [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["LT", 2, "__col__", "123.654445"], ), MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=[ "AND", @@ -199,7 +199,7 @@ None, [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=[ "OR", @@ -243,7 +243,7 @@ def test_mock_mask_server( """ mask_server: MaskServerInfo = MaskServerInfo( - base_url="http://localhost:8000", token=token + base_url="http://localhost:8000", server_address="srv", token=token ) # Doing the request @@ -272,7 +272,7 @@ def test_mock_mask_server( "bad_token_123", [ MaskServerInput( - table_path="srv.db.tbl", + table_path="db.tbl", column_name="col", expression=["OR", 2, "__col__", 5], ) @@ -293,7 +293,9 @@ def test_mock_mask_server_errors( Testing that the MaskServer raises an exception with the expected error message """ with pytest.raises(Exception, match=re.escape(error_msg)): - mask_server: MaskServerInfo = MaskServerInfo(base_url=base_url, token=token) + mask_server: MaskServerInfo = MaskServerInfo( + base_url=base_url, server_address="srv", token=token + ) mask_server.connection.set_timeout(0.5) # Doing the request From a6d4b293e834306bbf3b510ce0d152c3740e5b1c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 9 Oct 2025 15:07:45 -0400 Subject: [PATCH 06/40] [RUN ALL] From beadb153e000d851fed858f9157af1b5dd01e3b1 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 10 Oct 2025 13:29:30 -0400 Subject: [PATCH 07/40] Adding more tests --- .../conversion/mask_server_rewrite_shuttle.py | 2 ++ tests/mock_server/lookup_table.py | 24 +++++++++++++++++++ tests/test_masked_sqlite.py | 19 +++++++++++++++ .../cryptbank_agg_06_raw.txt | 3 +++ .../cryptbank_agg_06_rewrite.txt | 3 +++ .../cryptbank_filter_count_26_rewrite.txt | 4 ++-- .../cryptbank_filter_count_29_raw.txt | 4 ++++ .../cryptbank_filter_count_29_rewrite.txt | 4 ++++ .../cryptbank_agg_06_raw_sqlite.sql | 8 +++++++ .../cryptbank_agg_06_rewrite_sqlite.sql | 4 ++++ ...yptbank_filter_count_26_rewrite_sqlite.sql | 2 +- .../cryptbank_filter_count_29_raw_sqlite.sql | 5 ++++ ...yptbank_filter_count_29_rewrite_sqlite.sql | 5 ++++ 13 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 tests/test_plan_refsols/cryptbank_agg_06_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_29_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_29_rewrite.txt create mode 100644 tests/test_sql_refsols/cryptbank_agg_06_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_agg_06_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_29_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_29_rewrite_sqlite.sql diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 7a39d6166..324fa4d5c 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -88,6 +88,8 @@ def process_batch(self) -> None: input_expr, ) in self.candidate_shuttle.candidate_pool.items(): ancillary_info.append((expr, input_expr)) + print() + print(self.convert_to_server_expression(expr)) batch.append( MaskServerInput( table_path=mask_op.table_path, diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index d168455ab..872276215 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -353,4 +353,28 @@ ], "count": 4, }, + ("srv.CRBNK.TRANSACTIONS.t_amount", ("LT", 2, "__col__", 0)): { + "type": "literal", + "operator": "IN", + "values": [], + "count": 0, + }, + ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 0)): { + "type": "literal", + "operator": "NOT_IN", + "values": [], + "count": 0, + }, + ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1925-01-01")): { + "type": "literal", + "operator": "IN", + "values": [], + "count": 0, + }, + ("srv.CRBNK.CUSTOMERS.c_phone", ("EQUAL", 2, "__col__", "555-123-456")): { + "type": "literal", + "operator": "IN", + "values": [], + "count": 0, + }, } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 1d9af6976..6a6b437ba 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -395,6 +395,16 @@ ), id="cryptbank_filter_count_28", ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(birthday <= '1925-01-01')\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [0]}), + "cryptbank_filter_count_29", + ), + id="cryptbank_filter_count_29", + ), pytest.param( PyDoughPandasTest( "selected_transactions = transactions.WHERE((YEAR(time_stamp) == 2022) & (MONTH(time_stamp) == 6))\n" @@ -497,6 +507,15 @@ ), id="cryptbank_agg_05", ), + pytest.param( + PyDoughPandasTest( + "result = CRYPTBANK.CALCULATE(n_neg=SUM(transactions.amount < 0), n_positive=SUM(transactions.amount > 0))", + "CRYPTBANK", + lambda: pd.DataFrame({"n_neg": [0], "n_positive": [300]}), + "cryptbank_agg_06", + ), + id="cryptbank_agg_06", + ), pytest.param( PyDoughPandasTest( "first_sent = accounts_held.transactions_sent.WHERE(receiver_account.branch.address[-5:] == '94105').BEST(per='accounts_held', by=time_stamp.ASC())\n" diff --git a/tests/test_plan_refsols/cryptbank_agg_06_raw.txt b/tests/test_plan_refsols/cryptbank_agg_06_raw.txt new file mode 100644 index 000000000..2815f14ad --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_agg_06_raw.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n_neg', DEFAULT_TO(sum_expr, 0:numeric)), ('n_positive', DEFAULT_TO(sum_expr_3, 0:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(UNMASK::((1025.67 - ([t_amount]))) < 0:numeric), 'sum_expr_3': SUM(UNMASK::((1025.67 - ([t_amount]))) > 0:numeric)}) + SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount}) diff --git a/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt new file mode 100644 index 000000000..c7c7e9e9a --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n_neg', sum_expr), ('n_positive', sum_expr_3)], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(False:bool), 'sum_expr_3': SUM(True:bool)}) + SCAN(table=CRBNK.TRANSACTIONS, columns={}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt index 34e03d1e0..9a06dd87d 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_26_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_phone == MASK::(REPLACE(REPLACE(REPLACE(['555-123-456':string], '0', '*'), '9', '0'), '*', '9')), columns={}) - SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) + FILTER(condition=False:bool, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_29_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_29_raw.txt new file mode 100644 index 000000000..a59cfd88f --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_29_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=UNMASK::(DATE([c_birthday], '+472 days')) <= '1925-01-01':string, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_29_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_29_rewrite.txt new file mode 100644 index 000000000..9a06dd87d --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_29_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=False:bool, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={}) diff --git a/tests/test_sql_refsols/cryptbank_agg_06_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_06_raw_sqlite.sql new file mode 100644 index 000000000..ed6bfdc6c --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_agg_06_raw_sqlite.sql @@ -0,0 +1,8 @@ +SELECT + COALESCE(SUM(( + 1025.67 - t_amount + ) < 0), 0) AS n_neg, + COALESCE(SUM(( + 1025.67 - t_amount + ) > 0), 0) AS n_positive +FROM crbnk.transactions diff --git a/tests/test_sql_refsols/cryptbank_agg_06_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_06_rewrite_sqlite.sql new file mode 100644 index 000000000..104442af6 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_agg_06_rewrite_sqlite.sql @@ -0,0 +1,4 @@ +SELECT + SUM(FALSE) AS n_neg, + SUM(TRUE) AS n_positive +FROM crbnk.transactions diff --git a/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql index 1b6629cc3..853eb7d65 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_26_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - c_phone = REPLACE(REPLACE(REPLACE('555-123-456', '0', '*'), '9', '0'), '*', '9') + FALSE diff --git a/tests/test_sql_refsols/cryptbank_filter_count_29_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_29_raw_sqlite.sql new file mode 100644 index 000000000..705cae931 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_29_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + DATE(c_birthday, '+472 days') <= DATE('1925-01-01') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_29_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_29_rewrite_sqlite.sql new file mode 100644 index 000000000..853eb7d65 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_29_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + FALSE From 5ea82f13d71c3f108ba8207f0885b4321d565a51 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 15 Oct 2025 12:45:36 -0400 Subject: [PATCH 08/40] Switching up relational shuttle handling for simplification --- .../conversion/mask_server_rewrite_shuttle.py | 2 - pydough/conversion/relational_converter.py | 13 ++- .../conversion/relational_simplification.py | 49 +++++------- pydough/relational/__init__.py | 2 + .../relational/relational_nodes/__init__.py | 4 + ...elational_expression_shuttle_dispatcher.py | 80 +++++++++++++++++++ 6 files changed, 116 insertions(+), 34 deletions(-) create mode 100644 pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 324fa4d5c..7a39d6166 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -88,8 +88,6 @@ def process_batch(self) -> None: input_expr, ) in self.candidate_shuttle.candidate_pool.items(): ancillary_info.append((expr, input_expr)) - print() - print(self.convert_to_server_expression(expr)) batch.append( MaskServerInput( table_path=mask_op.table_path, diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index a01ec6053..2a307fa64 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -45,6 +45,7 @@ Project, RelationalExpression, RelationalExpressionShuttle, + RelationalExpressionVisitor, RelationalNode, RelationalRoot, Scan, @@ -1525,7 +1526,9 @@ def confirm_root(node: RelationalNode) -> RelationalRoot: def optimize_relational_tree( root: RelationalRoot, session: PyDoughSession, - additional_shuttles: list[RelationalExpressionShuttle], + additional_shuttles: list[ + RelationalExpressionShuttle | RelationalExpressionVisitor + ], ) -> RelationalRoot: """ Runs optimize on the relational tree, including pushing down filters and @@ -1534,8 +1537,8 @@ def optimize_relational_tree( Args: `root`: the relational root to optimize. `configs`: PyDough session used during optimization. - `additional_shuttles`: additional relational expression shuttles to use - for expression simplification. + `additional_shuttles`: additional relational expression shuttles or + visitors to use for expression simplification. Returns: The optimized relational root. @@ -1666,7 +1669,9 @@ def convert_ast_to_relational( raw_result: RelationalRoot = postprocess_root(node, columns, hybrid, output) # Invoke the optimization procedures on the result to clean up the tree. - additional_shuttles: list[RelationalExpressionShuttle] = [] + additional_shuttles: list[ + RelationalExpressionShuttle | RelationalExpressionVisitor + ] = [] # Add the mask literal comparison shuttle if the environment variable # PYDOUGH_ENABLE_MASK_REWRITES is set to 1. If a masking rewrite server has # been attached to the session, include the shuttles for that as well. diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 4eaf33e7a..2c87b77c2 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -30,7 +30,9 @@ LiteralExpression, Project, RelationalExpression, + RelationalExpressionDispatcher, RelationalExpressionShuttle, + RelationalExpressionVisitor, RelationalNode, RelationalRoot, RelationalVisitor, @@ -40,6 +42,9 @@ from pydough.relational.rel_util import ( add_input_name, ) +from pydough.relational.relational_nodes.relational_expression_shuttle_dispatcher import ( + RelationalExpressionShuttleDispatcher, +) from pydough.sqlglot.transform_bindings.sqlglot_transform_utils import ( DateTimeUnit, offset_pattern, @@ -1455,22 +1460,13 @@ class SimplificationVisitor(RelationalVisitor): the current node are placed on the stack. """ - def __init__( - self, - session: PyDoughSession, - additional_shuttles: list[RelationalExpressionShuttle], - ): + def __init__(self, session: PyDoughSession): self.stack: list[dict[RelationalExpression, PredicateSet]] = [] self.shuttle: SimplificationShuttle = SimplificationShuttle(session) - self.additional_shuttles: list[RelationalExpressionShuttle] = ( - additional_shuttles - ) def reset(self): self.stack.clear() self.shuttle.reset() - for shuttle in self.additional_shuttles: - shuttle.reset() def get_input_predicates( self, node: RelationalNode @@ -1535,8 +1531,6 @@ def generic_visit( ref_expr = ColumnReference(name, expr.data_type) expr = expr.accept_shuttle(self.shuttle) output_predicates[ref_expr] = self.shuttle.stack.pop() - for shuttle in self.additional_shuttles: - expr = expr.accept_shuttle(shuttle) node.columns[name] = expr return output_predicates @@ -1615,8 +1609,6 @@ def visit_filter(self, node: Filter) -> None: # Transform the filter condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) self.shuttle.stack.pop() - for shuttle in self.additional_shuttles: - node._condition = node.condition.accept_shuttle(shuttle) self.infer_null_predicates_from_condition( output_predicates, node.condition, @@ -1631,8 +1623,6 @@ def visit_join(self, node: Join) -> None: # Transform the join condition in-place. node._condition = node.condition.accept_shuttle(self.shuttle) self.shuttle.stack.pop() - for shuttle in self.additional_shuttles: - node._condition = node.condition.accept_shuttle(shuttle) # If the join is not an inner join, remove any not-null predicates # from the RHS of the join. if node.join_type != JoinType.INNER: @@ -1659,8 +1649,6 @@ def visit_limit(self, node: Limit) -> None: for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) self.shuttle.stack.pop() - for shuttle in self.additional_shuttles: - ordering_expr.expr = ordering_expr.expr.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_root(self, node: RelationalRoot) -> None: @@ -1674,8 +1662,6 @@ def visit_root(self, node: RelationalRoot) -> None: for ordering_expr in node.orderings: ordering_expr.expr = ordering_expr.expr.accept_shuttle(self.shuttle) self.shuttle.stack.pop() - for shuttle in self.additional_shuttles: - ordering_expr.expr = ordering_expr.expr.accept_shuttle(shuttle) self.stack.append(output_predicates) def visit_aggregate(self, node: Aggregate) -> None: @@ -1695,7 +1681,9 @@ def visit_aggregate(self, node: Aggregate) -> None: def simplify_expressions( node: RelationalNode, session: PyDoughSession, - additional_shuttles: list[RelationalExpressionShuttle], + additional_shuttles: list[ + RelationalExpressionShuttle | RelationalExpressionVisitor + ], ) -> None: """ Transforms the current node and all of its descendants in-place to simplify @@ -1704,12 +1692,17 @@ def simplify_expressions( Args: `node`: The relational node to perform simplification on. `session`: The PyDough session used during the simplification. - `additional_shuttles`: A list of additional shuttles to apply to the - expressions of the node and its descendants. These shuttles are applied - after the simplification shuttle, and can be used to perform additional - transformations on the expressions. + `additional_shuttles`: A list of additional shuttles or visitors to + apply to the expressions of the node and its descendants. These shuttles + and visitors are applied after the simplification shuttle, and can be + used to perform additional transformations on the expressions. """ - simplifier: SimplificationVisitor = SimplificationVisitor( - session, additional_shuttles - ) + simplifier: SimplificationVisitor = SimplificationVisitor(session) node.accept(simplifier) + + # Run all of the other shuttles/visitors over the entire tree. + for shuttle_or_visitor in additional_shuttles: + if isinstance(shuttle_or_visitor, RelationalExpressionShuttle): + node.accept(RelationalExpressionShuttleDispatcher(shuttle_or_visitor)) + else: + node.accept(RelationalExpressionDispatcher(shuttle_or_visitor, True)) diff --git a/pydough/relational/__init__.py b/pydough/relational/__init__.py index bbe146dbe..faf71a34b 100644 --- a/pydough/relational/__init__.py +++ b/pydough/relational/__init__.py @@ -19,6 +19,7 @@ "RelationalExpression", "RelationalExpressionDispatcher", "RelationalExpressionShuttle", + "RelationalExpressionShuttleDispatcher", "RelationalExpressionVisitor", "RelationalNode", "RelationalRoot", @@ -53,6 +54,7 @@ Limit, Project, RelationalExpressionDispatcher, + RelationalExpressionShuttleDispatcher, RelationalNode, RelationalRoot, RelationalShuttle, diff --git a/pydough/relational/relational_nodes/__init__.py b/pydough/relational/relational_nodes/__init__.py index 736656cf7..e867f847f 100644 --- a/pydough/relational/relational_nodes/__init__.py +++ b/pydough/relational/relational_nodes/__init__.py @@ -15,6 +15,7 @@ "Limit", "Project", "RelationalExpressionDispatcher", + "RelationalExpressionShuttleDispatcher", "RelationalNode", "RelationalRoot", "RelationalShuttle", @@ -31,6 +32,9 @@ from .limit import Limit from .project import Project from .relational_expression_dispatcher import RelationalExpressionDispatcher +from .relational_expression_shuttle_dispatcher import ( + RelationalExpressionShuttleDispatcher, +) from .relational_root import RelationalRoot from .relational_shuttle import RelationalShuttle from .relational_visitor import RelationalVisitor diff --git a/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py b/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py new file mode 100644 index 000000000..ec016d07d --- /dev/null +++ b/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py @@ -0,0 +1,80 @@ +""" +TODO +""" + +from pydough.relational.relational_expressions import ( + CallExpression, + RelationalExpressionShuttle, +) + +from .abstract_node import RelationalNode +from .aggregate import Aggregate +from .empty_singleton import EmptySingleton +from .filter import Filter +from .join import Join +from .limit import Limit +from .project import Project +from .relational_root import RelationalRoot +from .relational_visitor import RelationalVisitor +from .scan import Scan + +__all__ = ["RelationalExpressionShuttleDispatcher"] + + +class RelationalExpressionShuttleDispatcher(RelationalVisitor): + """ + Applies some expression shuttle to each expression in the relational tree. + """ + + def __init__(self, shuttle: RelationalExpressionShuttle) -> None: + self.shuttle: RelationalExpressionShuttle = shuttle + + def reset(self) -> None: + self.shuttle.reset() + + def visit_common(self, node: RelationalNode) -> None: + """ + Applies a visit common to each node. + """ + self.visit_inputs(node) + for name, expr in node.columns.items(): + node.columns[name] = expr.accept_shuttle(self.shuttle) + + def visit_scan(self, scan: Scan) -> None: + self.visit_common(scan) + + def visit_join(self, join: Join) -> None: + self.visit_common(join) + join._condition = join.condition.accept_shuttle(self.shuttle) + + def visit_project(self, project: Project) -> None: + self.visit_common(project) + + def visit_filter(self, filter: Filter) -> None: + self.visit_common(filter) + filter._condition = filter.condition.accept_shuttle(self.shuttle) + + def visit_aggregate(self, aggregate: Aggregate) -> None: + self.visit_common(aggregate) + for key in aggregate.keys: + aggregate.keys[key] = aggregate.columns[key] + for agg in aggregate.aggregations: + aggregation = aggregate.aggregations[agg] + assert isinstance(aggregation, CallExpression) + aggregate.aggregations[agg] = aggregation + + def visit_limit(self, limit: Limit) -> None: + self.visit_common(limit) + limit._limit = limit.limit.accept_shuttle(self.shuttle) + for order in limit.orderings: + order.expr = order.expr.accept_shuttle(self.shuttle) + + def visit_empty_singleton(self, singleton: EmptySingleton) -> None: + pass + + def visit_root(self, root: RelationalRoot) -> None: + self.visit_common(root) + if root.limit is not None: + root._limit = root.limit.accept_shuttle(self.shuttle) + for order in root.orderings: + order.expr = order.expr.accept_shuttle(self.shuttle) From f0f512cb4c7f20e3a539daf92ff273357627724a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 15 Oct 2025 12:48:53 -0400 Subject: [PATCH 09/40] Minor adjustments to file placement --- pydough/conversion/relational_converter.py | 12 ++++++++++-- .../conversion/relational_simplification.py | 19 ------------------- ...elational_expression_shuttle_dispatcher.py | 6 ++++-- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 2a307fa64..fada8ee12 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -44,7 +44,9 @@ LiteralExpression, Project, RelationalExpression, + RelationalExpressionDispatcher, RelationalExpressionShuttle, + RelationalExpressionShuttleDispatcher, RelationalExpressionVisitor, RelationalNode, RelationalRoot, @@ -1594,7 +1596,7 @@ def optimize_relational_tree( # Run the following pipeline twice: # A: projection pullup - # B: expression simplification + # B: expression simplification (followed by additional shuttles) # C: filter pushdown # D: column pruning # This is done because pullup will create more opportunities for expression @@ -1604,7 +1606,13 @@ def optimize_relational_tree( # pullup and pushdown and so on. for _ in range(2): root = confirm_root(pullup_projections(root)) - simplify_expressions(root, session, additional_shuttles) + simplify_expressions(root, session) + # Run all of the other shuttles/visitors over the entire tree. + for shuttle_or_visitor in additional_shuttles: + if isinstance(shuttle_or_visitor, RelationalExpressionShuttle): + root.accept(RelationalExpressionShuttleDispatcher(shuttle_or_visitor)) + else: + root.accept(RelationalExpressionDispatcher(shuttle_or_visitor, True)) root = confirm_root(push_filters(root, session)) root = pruner.prune_unused_columns(root) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 2c87b77c2..305e4948a 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -30,9 +30,7 @@ LiteralExpression, Project, RelationalExpression, - RelationalExpressionDispatcher, RelationalExpressionShuttle, - RelationalExpressionVisitor, RelationalNode, RelationalRoot, RelationalVisitor, @@ -42,9 +40,6 @@ from pydough.relational.rel_util import ( add_input_name, ) -from pydough.relational.relational_nodes.relational_expression_shuttle_dispatcher import ( - RelationalExpressionShuttleDispatcher, -) from pydough.sqlglot.transform_bindings.sqlglot_transform_utils import ( DateTimeUnit, offset_pattern, @@ -1681,9 +1676,6 @@ def visit_aggregate(self, node: Aggregate) -> None: def simplify_expressions( node: RelationalNode, session: PyDoughSession, - additional_shuttles: list[ - RelationalExpressionShuttle | RelationalExpressionVisitor - ], ) -> None: """ Transforms the current node and all of its descendants in-place to simplify @@ -1692,17 +1684,6 @@ def simplify_expressions( Args: `node`: The relational node to perform simplification on. `session`: The PyDough session used during the simplification. - `additional_shuttles`: A list of additional shuttles or visitors to - apply to the expressions of the node and its descendants. These shuttles - and visitors are applied after the simplification shuttle, and can be - used to perform additional transformations on the expressions. """ simplifier: SimplificationVisitor = SimplificationVisitor(session) node.accept(simplifier) - - # Run all of the other shuttles/visitors over the entire tree. - for shuttle_or_visitor in additional_shuttles: - if isinstance(shuttle_or_visitor, RelationalExpressionShuttle): - node.accept(RelationalExpressionShuttleDispatcher(shuttle_or_visitor)) - else: - node.accept(RelationalExpressionDispatcher(shuttle_or_visitor, True)) diff --git a/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py b/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py index ec016d07d..5bcd4bf9a 100644 --- a/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py +++ b/pydough/relational/relational_nodes/relational_expression_shuttle_dispatcher.py @@ -1,5 +1,6 @@ """ -TODO +Implementation of a visitor that works by applying a shuttle to every expression +for each node. """ from pydough.relational.relational_expressions import ( @@ -34,7 +35,8 @@ def reset(self) -> None: def visit_common(self, node: RelationalNode) -> None: """ - Applies a visit common to each node. + Applies the basic logic to transform all the expressions in a node's + column list, as well as transforming the inputs to the node. """ self.visit_inputs(node) for name, expr in node.columns.items(): From 54ecef14d45e256956c9c38ede15c8ec5a51b97f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 15 Oct 2025 13:32:06 -0400 Subject: [PATCH 10/40] Moved some logic from rewrite shuttle to candidate visitor --- .../mask_server_candidate_shuttle.py | 122 ----------- .../mask_server_candidate_visitor.py | 205 ++++++++++++++++++ .../conversion/mask_server_rewrite_shuttle.py | 113 ++-------- pydough/conversion/relational_converter.py | 4 +- tests/conftest.py | 8 +- tests/mock_server/lookup_table.py | 4 +- 6 files changed, 230 insertions(+), 226 deletions(-) delete mode 100644 pydough/conversion/mask_server_candidate_shuttle.py create mode 100644 pydough/conversion/mask_server_candidate_visitor.py diff --git a/pydough/conversion/mask_server_candidate_shuttle.py b/pydough/conversion/mask_server_candidate_shuttle.py deleted file mode 100644 index 7754e8279..000000000 --- a/pydough/conversion/mask_server_candidate_shuttle.py +++ /dev/null @@ -1,122 +0,0 @@ -""" -TODO -""" - -__all__ = ["MaskServerCandidateShuttle"] - -import pydough.pydough_operators as pydop -from pydough.relational import ( - CallExpression, - ColumnReference, - LiteralExpression, - RelationalExpression, - RelationalExpressionShuttle, - WindowCallExpression, -) - - -class MaskServerCandidateShuttle(RelationalExpressionShuttle): - """ - TODO - """ - - ALLOWED_MASK_OPERATORS: set[pydop.PyDoughExpressionOperator] = { - pydop.BAN, - pydop.BOR, - pydop.NOT, - pydop.EQU, - pydop.NEQ, - pydop.GRT, - pydop.GEQ, - pydop.LET, - pydop.LEQ, - pydop.NEQ, - pydop.ISIN, - pydop.STARTSWITH, - pydop.ENDSWITH, - pydop.LOWER, - pydop.UPPER, - pydop.MONOTONIC, - pydop.YEAR, - pydop.MONTH, - pydop.DAY, - pydop.ADD, - pydop.SUB, - pydop.MUL, - pydop.DIV, - } - """ - TODO: ADD DESCRIPTION - """ - - def __init__(self) -> None: - # TODO ADD COMMENTS - self.candidate_pool: dict[ - RelationalExpression, - tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression], - ] = {} - self.processed_candidates: set[RelationalExpression] = set() - self.stack: list[ - tuple[ - tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] - | None, - bool, - ] - ] = [] - - def reset(self): - self.stack.clear() - - def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: - # TODO ADD COMMENTS - for arg in expr.inputs: - arg.accept_shuttle(self) - mask_ops: set[ - tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] - ] = set() - disallowed: bool = False - for _ in range(len(expr.inputs)): - stack_term, arg_disallowed = self.stack.pop() - if stack_term is not None: - mask_ops.add(stack_term) - disallowed |= arg_disallowed - - if ( - isinstance(expr.op, pydop.MaskedExpressionFunctionOperator) - and expr.op.is_unmask - ): - self.stack.append(((expr.op, expr.inputs[0]), False)) - elif disallowed: - self.stack.append((None, True)) - elif len(mask_ops) == 1 and expr.op in self.ALLOWED_MASK_OPERATORS: - input_term: tuple[ - pydop.MaskedExpressionFunctionOperator, RelationalExpression - ] = mask_ops.pop() - if expr not in self.processed_candidates: - self.candidate_pool[expr] = input_term - self.processed_candidates.add(expr) - self.stack.append((input_term, False)) - else: - self.stack.append((None, True)) - return expr - - def visit_column_reference( - self, column_reference: ColumnReference - ) -> RelationalExpression: - self.stack.append((None, True)) - return column_reference - - def visit_literal_expression( - self, literal: LiteralExpression - ) -> RelationalExpression: - self.stack.append((None, False)) - return literal - - def visit_window_expression( - self, window_expression: WindowCallExpression - ) -> RelationalExpression: - result: RelationalExpression = super().visit_window_expression( - window_expression - ) - self.stack.append((None, True)) - return result diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py new file mode 100644 index 000000000..925804ed7 --- /dev/null +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -0,0 +1,205 @@ +""" +Logic for the visitor that is run across all expressions to identify candidates +for Mask Server rewrite conversion. +""" + +__all__ = ["MaskServerCandidateVisitor"] + +import pydough.pydough_operators as pydop +from pydough.relational import ( + CallExpression, + ColumnReference, + CorrelatedReference, + LiteralExpression, + RelationalExpression, + RelationalExpressionVisitor, + WindowCallExpression, +) +from pydough.types import UnknownType + + +class MaskServerCandidateVisitor(RelationalExpressionVisitor): + """ + TODO + """ + + OPERATORS_TO_SERVER_NAMES: dict[pydop.PyDoughExpressionOperator, str] = { + pydop.BAN: "AND", + pydop.BOR: "OR", + pydop.NOT: "NOT", + pydop.EQU: "EQUAL", + pydop.NEQ: "NOT_EQUAL", + pydop.GRT: "GT", + pydop.GEQ: "GTE", + pydop.LET: "LT", + pydop.LEQ: "LTE", + pydop.STARTSWITH: "STARTSWITH", + pydop.ENDSWITH: "ENDSWITH", + pydop.LOWER: "LOWER", + pydop.UPPER: "UPPER", + pydop.MONOTONIC: "BETWEEN", + pydop.YEAR: "YEAR", + pydop.MONTH: "MONTH", + pydop.DAY: "DAY", + pydop.ADD: "ADD", + pydop.SUB: "SUB", + pydop.MUL: "MUL", + pydop.DIV: "DIV", + } + """ + TODO: ADD DESCRIPTION + """ + + def __init__(self) -> None: + self.candidate_pool: dict[ + RelationalExpression, + tuple[ + pydop.MaskedExpressionFunctionOperator, + RelationalExpression, + list[str | int | float | None | bool], + ], + ] = {} + """ + TODO: ADD COMMENTS + """ + + self.processed_candidates: set[RelationalExpression] = set() + """ + TODO: ADD COMMENTS + """ + + self.stack: list[ + tuple[ + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] + | None, + list[str | int | float | None | bool] | None, + ] + ] = [] + """ + TODO: ADD COMMENTS + """ + + def reset(self): + self.stack.clear() + + def visit_call_expression(self, expr: CallExpression) -> None: + # TODO: ADD COMMENTS + for arg in expr.inputs: + arg.accept_shuttle(self) + mask_ops: set[ + tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] + ] = set() + arg_exprs: list[list[str | int | float | None | bool] | None] = [] + for _ in range(len(expr.inputs)): + stack_term, expression_list = self.stack.pop() + if stack_term is not None: + mask_ops.add(stack_term) + arg_exprs.append(expression_list) + arg_exprs.reverse() + + input_op: pydop.MaskedExpressionFunctionOperator + input_expr: RelationalExpression + combined_exprs: list[str | int | float | None | bool] | None + if ( + isinstance(expr.op, pydop.MaskedExpressionFunctionOperator) + and expr.op.is_unmask + ): + self.stack.append(((expr.op, expr.inputs[0]), ["__col__"])) + elif len(mask_ops) != 1: + self.stack.append((None, None)) + else: + input_op, input_expr = mask_ops.pop() + combined_exprs = self.convert_call_to_server_expression(expr, arg_exprs) + if combined_exprs is not None and expr not in self.processed_candidates: + self.candidate_pool[expr] = (input_op, input_expr, combined_exprs) + self.processed_candidates.add(expr) + self.stack.append(((input_op, input_expr), combined_exprs)) + + def visit_column_reference(self, column_reference: ColumnReference) -> None: + self.stack.append((None, None)) + + def visit_literal_expression(self, literal: LiteralExpression) -> None: + self.stack.append((None, self.convert_literal_to_server_expression(literal))) + + def visit_window_expression(self, window_expression: WindowCallExpression) -> None: + for arg in window_expression.inputs: + arg.accept_shuttle(self) + self.stack.pop() + for arg in window_expression.partition_inputs: + arg.accept_shuttle(self) + self.stack.pop() + for order in window_expression.order_inputs: + order.expr.accept_shuttle(self) + self.stack.pop() + self.stack.append((None, None)) + + def visit_correlated_reference(self, correlated_reference: CorrelatedReference): + pass + + def convert_call_to_server_expression( + self, + call: CallExpression, + input_exprs: list[list[str | int | float | None | bool] | None], + ) -> list[str | int | float | None | bool] | None: + """ + TODO: ADD COMMENTS + """ + result: list[str | int | float | None | bool] = [] + if call.op == pydop.ISIN and len(call.inputs) == 2: + return self.convert_isin_call_to_server_expression(call.inputs, input_exprs) + if call.op not in self.OPERATORS_TO_SERVER_NAMES: + return None + operator_name = self.OPERATORS_TO_SERVER_NAMES[call.op] + result.append(operator_name) + result.append(len(call.inputs)) + for inp in input_exprs: + if inp is None: + return None + result.extend(inp) + return result + + def convert_isin_call_to_server_expression( + self, + inputs: list[RelationalExpression], + input_exprs: list[list[str | int | float | None | bool] | None], + ) -> list[str | int | float | None | bool] | None: + """ + TODO: ADD COMMENTS + """ + if len(inputs) != 2: + raise ValueError("ISIN operator requires exactly two inputs.") + result: list[str | int | float | None | bool] = ["IN"] + if input_exprs[0] is None: + return None + assert isinstance(inputs[1], LiteralExpression) and isinstance( + inputs[1].value, (list, tuple) + ), "ISIN right-hand side must be a list or tuple literal." + in_list: list[str | int | float | None | bool] = [] + for v in inputs[1].value: + literal_list: list[str | int | float | None | bool] | None = ( + self.convert_literal_to_server_expression( + LiteralExpression(v, UnknownType()) + ) + ) + if literal_list is None: + return None + in_list.extend(literal_list) + result.append(len(inputs[1].value) + 1) + result.extend(input_exprs[0]) + result.extend(in_list) + return result + + def convert_literal_to_server_expression( + self, literal: LiteralExpression + ) -> list[str | int | float | None | bool] | None: + """ + TODO: ADD COMMENTS + """ + if literal.value is None: + return ["NULL"] + elif isinstance(literal.value, bool): + return ["TRUE" if literal.value else "FALSE"] + elif isinstance(literal.value, (int, float, str)): + return [literal.value] + else: + return None diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 7a39d6166..0977e4938 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -1,5 +1,6 @@ """ -TODO +Logic for the shuttle that performs Mask Server rewrite conversion on candidates +identified by the candidate visitor. """ __all__ = ["MaskServerRewriteShuttle"] @@ -19,12 +20,15 @@ ) from pydough.types import ArrayType, BooleanType, UnknownType -from .mask_server_candidate_shuttle import MaskServerCandidateShuttle +from .mask_server_candidate_visitor import MaskServerCandidateVisitor class MaskServerRewriteShuttle(RelationalExpressionShuttle): """ - TODO + A shuttle that rewrites candidate expressions for Mask Server conversion + identified by a `MaskServerCandidateVisitor`, by batching requests to the + Mask Server and replacing the candidate expressions with the appropriate + responses from the server. """ OPERATORS_TO_SERVER_NAMES: dict[pydop.PyDoughExpressionOperator, str] = { @@ -57,17 +61,17 @@ class MaskServerRewriteShuttle(RelationalExpressionShuttle): """ def __init__( - self, server_info: MaskServerInfo, candidate_shuttle: MaskServerCandidateShuttle + self, server_info: MaskServerInfo, candidate_visitor: MaskServerCandidateVisitor ) -> None: self.server_info: MaskServerInfo = server_info - self.candidate_shuttle: MaskServerCandidateShuttle = candidate_shuttle + self.candidate_visitor: MaskServerCandidateVisitor = candidate_visitor self.responses: dict[ RelationalExpression, tuple[RelationalExpression, MaskServerOutput] | None ] = {} def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: # TODO: ADD COMMENTS - if expr in self.candidate_shuttle.candidate_pool: + if expr in self.candidate_visitor.candidate_pool: self.process_batch() response: tuple[RelationalExpression, MaskServerOutput] | None = ( @@ -79,20 +83,21 @@ def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: def process_batch(self) -> None: """ - TODO + TODO: ADD COMMENTS """ batch: list[MaskServerInput] = [] ancillary_info: list[tuple[RelationalExpression, RelationalExpression]] = [] for expr, ( mask_op, input_expr, - ) in self.candidate_shuttle.candidate_pool.items(): + expression_list, + ) in self.candidate_visitor.candidate_pool.items(): ancillary_info.append((expr, input_expr)) batch.append( MaskServerInput( table_path=mask_op.table_path, column_name=mask_op.masking_metadata.column_name, - expression=self.convert_to_server_expression(expr), + expression=expression_list, ) ) responses: list[MaskServerOutput] = ( @@ -104,96 +109,14 @@ def process_batch(self) -> None: self.responses[expr] = (input_expr, response) else: self.responses[expr] = None - self.candidate_shuttle.processed_candidates.add(expr) - self.candidate_shuttle.candidate_pool.clear() - - def convert_literal_to_server_expression( - self, literal: LiteralExpression - ) -> list[str | int | float | None | bool]: - """ - TODO - """ - if literal.value is None: - return ["NULL"] - elif isinstance(literal.value, bool): - return ["TRUE" if literal.value else "FALSE"] - elif isinstance(literal.value, (int, float)): - return [literal.value] - elif isinstance(literal.value, str): - return [literal.value] - else: - raise ValueError( - f"Unsupported literal type for mask server conversion: {type(literal.value)}" - ) - - def convert_to_server_expression( - self, expr: RelationalExpression - ) -> list[str | int | float | None | bool]: - """ - TODO - """ - if isinstance(expr, LiteralExpression): - return self.convert_literal_to_server_expression(expr) - elif isinstance(expr, CallExpression): - if isinstance(expr.op, pydop.MaskedExpressionFunctionOperator): - return ["__col__"] - elif expr.op in self.OPERATORS_TO_SERVER_NAMES: - return self.convert_call_to_server_expression( - self.OPERATORS_TO_SERVER_NAMES[expr.op], expr.inputs - ) - elif expr.op == pydop.ISIN: - return self.convert_isin_call_to_server_expression(expr.inputs) - else: - raise ValueError( - f"Unsupported operator for mask server conversion: {expr.op}" - ) - else: - raise ValueError( - f"Unsupported expression type for mask server conversion: {type(expr)}" - ) - - def convert_call_to_server_expression( - self, operator_name: str, inputs: list[RelationalExpression] - ) -> list[str | int | float | None | bool]: - """ - TODO - """ - result: list[str | int | float | None | bool] = [operator_name] - result.append(len(inputs)) - for inp in inputs: - result.extend(self.convert_to_server_expression(inp)) - return result - - def convert_isin_call_to_server_expression( - self, inputs: list[RelationalExpression] - ) -> list[str | int | float | None | bool]: - """ - TODO - """ - if len(inputs) != 2: - raise ValueError("ISIN operator requires exactly two inputs.") - result: list[str | int | float | None | bool] = ["IN"] - args: list[str | int | float | None | bool] = self.convert_to_server_expression( - inputs[0] - ) - assert isinstance(inputs[1], LiteralExpression) and isinstance( - inputs[1].value, (list, tuple) - ), "ISIN right-hand side must be a list or tuple literal." - for v in inputs[1].value: - args.extend( - self.convert_literal_to_server_expression( - LiteralExpression(v, UnknownType()) - ) - ) - result.append(len(inputs[1].value)) - result.extend(args) - return result + self.candidate_visitor.processed_candidates.add(expr) + self.candidate_visitor.candidate_pool.clear() def convert_response_to_relational( self, input_expr: RelationalExpression, response: MaskServerOutput ) -> RelationalExpression: """ - TODO + TODO: ADD COMMENTS """ result: RelationalExpression match response.response_case: @@ -209,7 +132,7 @@ def build_in_array_expression( self, input_expr: RelationalExpression, response: MaskServerOutput ) -> RelationalExpression: """ - TODO + TODO: ADD COMMENTS """ assert response.response_case in ( MaskServerResponse.IN_ARRAY, diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index fada8ee12..3835a151c 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -88,7 +88,7 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree -from .mask_server_candidate_shuttle import MaskServerCandidateShuttle +from .mask_server_candidate_visitor import MaskServerCandidateVisitor from .mask_server_rewrite_shuttle import MaskServerRewriteShuttle from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects @@ -1685,7 +1685,7 @@ def convert_ast_to_relational( # been attached to the session, include the shuttles for that as well. if os.getenv("PYDOUGH_ENABLE_MASK_REWRITES") == "1": if session.mask_server is not None: - candidate_shuttle: MaskServerCandidateShuttle = MaskServerCandidateShuttle() + candidate_shuttle: MaskServerCandidateVisitor = MaskServerCandidateVisitor() additional_shuttles.append(candidate_shuttle) additional_shuttles.append( MaskServerRewriteShuttle(session.mask_server, candidate_shuttle) diff --git a/tests/conftest.py b/tests/conftest.py index 618e1e483..8614f194a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1868,10 +1868,8 @@ def mock_server_setup(): @pytest.fixture(scope="session") -def mock_server_info(mock_server_setup) -> MaskServerInfo: +def mock_server_info(mock_server_setup: str) -> MaskServerInfo: """ - TODO: add description + Returns the MaskServerInfo for the mock server. """ - return MaskServerInfo( - base_url="http://localhost:8000", server_address="srv", token=None - ) + return MaskServerInfo(base_url=mock_server_setup, server_address="srv", token=None) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 872276215..67cfc5f04 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -234,7 +234,7 @@ }, ( "srv.CRBNK.CUSTOMERS.c_lname", - ("IN", 3, "__col__", "lee", "smith", "rodriguez"), + ("IN", 4, "__col__", "lee", "smith", "rodriguez"), ): { "type": "literal", "operator": "IN", @@ -243,7 +243,7 @@ }, ( "srv.CRBNK.CUSTOMERS.c_lname", - ("NOT", 1, "IN", 3, "__col__", "lee", "smith", "rodriguez"), + ("NOT", 1, "IN", 4, "__col__", "lee", "smith", "rodriguez"), ): { "type": "literal", "operator": "NOT_IN", From 557aaeb27367da69b798dfb5affa727af487c73f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 15 Oct 2025 14:04:22 -0400 Subject: [PATCH 11/40] Added more tests --- tests/mock_server/lookup_table.py | 57 ++++++++++++++++ tests/test_masked_sqlite.py | 65 +++++++++++++++++++ .../cryptbank_filter_count_30_raw.txt | 4 ++ .../cryptbank_filter_count_30_rewrite.txt | 4 ++ .../cryptbank_window_01_raw.txt | 3 + .../cryptbank_window_01_rewrite.txt | 3 + .../cryptbank_window_02_raw.txt | 6 ++ .../cryptbank_window_02_rewrite.txt | 6 ++ .../cryptbank_filter_count_30_raw_sqlite.sql | 10 +++ ...yptbank_filter_count_30_rewrite_sqlite.sql | 5 ++ .../cryptbank_window_01_raw_sqlite.sql | 26 ++++++++ .../cryptbank_window_01_rewrite_sqlite.sql | 21 ++++++ .../cryptbank_window_02_raw_sqlite.sql | 33 ++++++++++ .../cryptbank_window_02_rewrite_sqlite.sql | 33 ++++++++++ 14 files changed, 276 insertions(+) create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_30_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_30_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_window_01_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_window_01_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_window_02_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_window_02_rewrite.txt create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_30_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_30_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_window_01_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_window_01_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_window_02_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_window_02_rewrite_sqlite.sql diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 67cfc5f04..6f11ca254 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -377,4 +377,61 @@ "values": [], "count": 0, }, + ("srv.CRBNK.ACCOUNTS.a_open_ts", ("EQUAL", 2, "YEAR", 1, "__col__", 2021)): { + "type": "literal", + "operator": "IN", + "values": [ + "2017-02-11 10:59:51", + "2017-06-15 12:41:51", + "2017-07-07 14:26:51", + "2017-07-09 12:21:51", + "2017-09-15 11:26:51", + "2018-01-02 12:26:51", + ], + "count": 6, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ( + "AND", + 2, + "IN", + 7, + "ADD", + 2, + "MONTH", + 1, + "__col__", + 1, + 2, + 4, + 6, + 8, + 10, + 12, + "IN", + 11, + "SUB", + 2, + "YEAR", + 1, + "__col__", + 2, + 1975, + 1977, + 1979, + 1981, + 1983, + 1985, + 1987, + 1989, + 1991, + 1993, + ), + ): { + "type": "literal", + "operator": "IN", + "values": ["1980-01-18", "1981-11-15", "1990-07-31", "1994-06-15"], + "count": 4, + }, } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 6a6b437ba..268fce4d5 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -405,6 +405,19 @@ ), id="cryptbank_filter_count_29", ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(" + " ISIN(YEAR(birthday) - 2, (1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993))" + " & ISIN(MONTH(birthday) + 1, (2, 4, 6, 8, 10, 12))" + ")\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [4]}), + "cryptbank_filter_count_30", + ), + id="cryptbank_filter_count_30", + ), pytest.param( PyDoughPandasTest( "selected_transactions = transactions.WHERE((YEAR(time_stamp) == 2022) & (MONTH(time_stamp) == 6))\n" @@ -516,6 +529,58 @@ ), id="cryptbank_agg_06", ), + pytest.param( + PyDoughPandasTest( + "result = (" + " accounts" + " .CALCULATE(partkey=(account_type == 'retirement') | (account_type == 'savings'))" + " .PARTITION(name='actyp', by=partkey)" + " .accounts" + " .BEST(per='actyp', by=balance.DESC())" + " .CALCULATE(account_type, key, balance)" + " .ORDER_BY(account_type.ASC())" + ")", + "CRYPTBANK", + lambda: pd.DataFrame( + { + "account_type": ["mma", "retirement"], + "key": [8, 28], + "balance": [5500.0, 25000.0], + } + ), + "cryptbank_window_01", + ), + id="cryptbank_window_01", + ), + pytest.param( + PyDoughPandasTest( + "result = (" + " branches" + " .WHERE(CONTAINS(address, ';CA;'))" + " .CALCULATE(branch_name=name)" + " .accounts_managed" + " .BEST(per='branches', by=((YEAR(creation_timestamp) == 2021).ASC(), key.ASC()))" + " .CALCULATE(branch_name, key, creation_timestamp)" + " .ORDER_BY(branch_name.ASC())" + ")", + "CRYPTBANK", + lambda: pd.DataFrame( + { + "branch_name": [ + "Downtown Los Angeles Branch", + "San Francisco Financial Branch", + ], + "key": [14, 8], + "creation_timestamp": [ + "2016-05-12 14:00:00", + "2018-07-19 14:10:00", + ], + } + ), + "cryptbank_window_02", + ), + id="cryptbank_window_02", + ), pytest.param( PyDoughPandasTest( "first_sent = accounts_held.transactions_sent.WHERE(receiver_account.branch.address[-5:] == '94105').BEST(per='accounts_held', by=time_stamp.ASC())\n" diff --git a/tests/test_plan_refsols/cryptbank_filter_count_30_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_30_raw.txt new file mode 100644 index 000000000..68486227f --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_30_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(MONTH(UNMASK::(DATE([c_birthday], '+472 days'))) + 1:numeric, [2, 4, 6, 8, 10, 12]:array[unknown]) & ISIN(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))) - 2:numeric, [1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_30_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_30_rewrite.txt new file mode 100644 index 000000000..cea80fa2c --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_30_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(c_birthday, ['1980-01-18', '1981-11-15', '1990-07-31', '1994-06-15']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_window_01_raw.txt b/tests/test_plan_refsols/cryptbank_window_01_raw.txt new file mode 100644 index 000000000..32267be3d --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_window_01_raw.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('balance', UNMASK::(SQRT([a_balance])))], orderings=[(UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))):asc_first]) + FILTER(condition=RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'retirement':string | UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'savings':string], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt new file mode 100644 index 000000000..48b7f6c42 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('balance', UNMASK::(SQRT([a_balance])))], orderings=[(UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))):asc_first]) + FILTER(condition=RANKING(args=[], partition=[ISIN(a_type, ['avingss', 'etirementr']:array[unknown])], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_window_02_raw.txt b/tests/test_plan_refsols/cryptbank_window_02_raw.txt new file mode 100644 index 000000000..dfeb1cc2a --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_window_02_raw.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('branch_name', b_name), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('creation_timestamp', UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')))], orderings=[(b_name):asc_first]) + FILTER(condition=RANKING(args=[], partition=[a_branchkey], order=[(YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == 2021:numeric):asc_last, (UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_last], allow_ties=False) == 1:numeric, columns={'a_key': a_key, 'a_open_ts': a_open_ts, 'b_name': b_name}) + JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'a_branchkey': t1.a_branchkey, 'a_key': t1.a_key, 'a_open_ts': t1.a_open_ts, 'b_name': t0.b_name}) + FILTER(condition=CONTAINS(b_addr, ';CA;':string), columns={'b_key': b_key, 'b_name': b_name}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key, 'b_name': b_name}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt new file mode 100644 index 000000000..eeb528693 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('branch_name', b_name), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('creation_timestamp', UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')))], orderings=[(b_name):asc_first]) + FILTER(condition=RANKING(args=[], partition=[a_branchkey], order=[(ISIN(a_open_ts, ['2017-02-11 10:59:51', '2017-06-15 12:41:51', '2017-07-07 14:26:51', '2017-07-09 12:21:51', '2017-09-15 11:26:51', '2018-01-02 12:26:51']:array[unknown])):asc_last, (UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_last], allow_ties=False) == 1:numeric, columns={'a_key': a_key, 'a_open_ts': a_open_ts, 'b_name': b_name}) + JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'a_branchkey': t1.a_branchkey, 'a_key': t1.a_key, 'a_open_ts': t1.a_open_ts, 'b_name': t0.b_name}) + FILTER(condition=CONTAINS(b_addr, ';CA;':string), columns={'b_key': b_key, 'b_name': b_name}) + SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key, 'b_name': b_name}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_30_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_30_raw_sqlite.sql new file mode 100644 index 000000000..8567c4aab --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_30_raw_sqlite.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + ( + CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER) - 2 + ) IN (1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993) + AND ( + CAST(STRFTIME('%m', DATE(c_birthday, '+472 days')) AS INTEGER) + 1 + ) IN (2, 4, 6, 8, 10, 12) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_30_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_30_rewrite_sqlite.sql new file mode 100644 index 000000000..e10e5d7dd --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_30_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_birthday IN ('1980-01-18', '1981-11-15', '1990-07-31', '1994-06-15') diff --git a/tests/test_sql_refsols/cryptbank_window_01_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_window_01_raw_sqlite.sql new file mode 100644 index 000000000..d394bcb5a --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_window_01_raw_sqlite.sql @@ -0,0 +1,26 @@ +WITH _t AS ( + SELECT + a_balance, + a_key, + a_type, + ROW_NUMBER() OVER (PARTITION BY ( + SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) + ) = 'retirement' + OR ( + SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) + ) = 'savings' ORDER BY SQRT(a_balance) DESC) AS _w + FROM crbnk.accounts +) +SELECT + SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) AS account_type, + CASE + WHEN a_key = 0 + THEN 0 + ELSE CASE WHEN a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING(a_key, 1 + INSTR(a_key, '-'), CAST(LENGTH(a_key) AS REAL) / 2) AS INTEGER) + END AS key, + SQRT(a_balance) AS balance +FROM _t +WHERE + _w = 1 +ORDER BY + 1 diff --git a/tests/test_sql_refsols/cryptbank_window_01_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_window_01_rewrite_sqlite.sql new file mode 100644 index 000000000..8fa8b7309 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_window_01_rewrite_sqlite.sql @@ -0,0 +1,21 @@ +WITH _t AS ( + SELECT + a_balance, + a_key, + a_type, + ROW_NUMBER() OVER (PARTITION BY a_type IN ('avingss', 'etirementr') ORDER BY SQRT(a_balance) DESC) AS _w + FROM crbnk.accounts +) +SELECT + SUBSTRING(a_type, -1) || SUBSTRING(a_type, 1, LENGTH(a_type) - 1) AS account_type, + CASE + WHEN a_key = 0 + THEN 0 + ELSE CASE WHEN a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING(a_key, 1 + INSTR(a_key, '-'), CAST(LENGTH(a_key) AS REAL) / 2) AS INTEGER) + END AS key, + SQRT(a_balance) AS balance +FROM _t +WHERE + _w = 1 +ORDER BY + 1 diff --git a/tests/test_sql_refsols/cryptbank_window_02_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_window_02_raw_sqlite.sql new file mode 100644 index 000000000..f06faa358 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_window_02_raw_sqlite.sql @@ -0,0 +1,33 @@ +WITH _t AS ( + SELECT + accounts.a_key, + accounts.a_open_ts, + branches.b_name, + ROW_NUMBER() OVER (PARTITION BY accounts.a_branchkey ORDER BY CAST(STRFTIME('%Y', DATETIME(accounts.a_open_ts, '+123456789 seconds')) AS INTEGER) = 2021, CASE + WHEN accounts.a_key = 0 + THEN 0 + ELSE CASE WHEN accounts.a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( + accounts.a_key, + 1 + INSTR(accounts.a_key, '-'), + CAST(LENGTH(accounts.a_key) AS REAL) / 2 + ) AS INTEGER) + END) AS _w + FROM crbnk.branches AS branches + JOIN crbnk.accounts AS accounts + ON accounts.a_branchkey = branches.b_key + WHERE + branches.b_addr LIKE '%;CA;%' +) +SELECT + b_name AS branch_name, + CASE + WHEN a_key = 0 + THEN 0 + ELSE CASE WHEN a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING(a_key, 1 + INSTR(a_key, '-'), CAST(LENGTH(a_key) AS REAL) / 2) AS INTEGER) + END AS key, + DATETIME(a_open_ts, '+123456789 seconds') AS creation_timestamp +FROM _t +WHERE + _w = 1 +ORDER BY + 1 diff --git a/tests/test_sql_refsols/cryptbank_window_02_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_window_02_rewrite_sqlite.sql new file mode 100644 index 000000000..718ec27bc --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_window_02_rewrite_sqlite.sql @@ -0,0 +1,33 @@ +WITH _t AS ( + SELECT + accounts.a_key, + accounts.a_open_ts, + branches.b_name, + ROW_NUMBER() OVER (PARTITION BY accounts.a_branchkey ORDER BY accounts.a_open_ts IN ('2017-02-11 10:59:51', '2017-06-15 12:41:51', '2017-07-07 14:26:51', '2017-07-09 12:21:51', '2017-09-15 11:26:51', '2018-01-02 12:26:51'), CASE + WHEN accounts.a_key = 0 + THEN 0 + ELSE CASE WHEN accounts.a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING( + accounts.a_key, + 1 + INSTR(accounts.a_key, '-'), + CAST(LENGTH(accounts.a_key) AS REAL) / 2 + ) AS INTEGER) + END) AS _w + FROM crbnk.branches AS branches + JOIN crbnk.accounts AS accounts + ON accounts.a_branchkey = branches.b_key + WHERE + branches.b_addr LIKE '%;CA;%' +) +SELECT + b_name AS branch_name, + CASE + WHEN a_key = 0 + THEN 0 + ELSE CASE WHEN a_key > 0 THEN 1 ELSE -1 END * CAST(SUBSTRING(a_key, 1 + INSTR(a_key, '-'), CAST(LENGTH(a_key) AS REAL) / 2) AS INTEGER) + END AS key, + DATETIME(a_open_ts, '+123456789 seconds') AS creation_timestamp +FROM _t +WHERE + _w = 1 +ORDER BY + 1 From 6b109d9a2fba2f32fe349b702bce2fb8b767eeff Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 16 Oct 2025 13:29:22 -0400 Subject: [PATCH 12/40] Added rewrite shuttle docstrings/comments --- .../conversion/mask_server_rewrite_shuttle.py | 93 +++++++++++++++---- 1 file changed, 75 insertions(+), 18 deletions(-) diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 0977e4938..a51883647 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -55,9 +55,11 @@ class MaskServerRewriteShuttle(RelationalExpressionShuttle): pydop.DIV: "DIV", } """ - TODO: ADD DESCRIPTION + A mapping of all PyDough operators that can be handled by the Mask Server, + mapping each such operator to the string name used in the linear string + serialization format recognized by the Mask Server. - NOTE: ISIN is handled separately. + Note: ISIN is handled separately. """ def __init__( @@ -65,28 +67,48 @@ def __init__( ) -> None: self.server_info: MaskServerInfo = server_info self.candidate_visitor: MaskServerCandidateVisitor = candidate_visitor - self.responses: dict[ - RelationalExpression, tuple[RelationalExpression, MaskServerOutput] | None - ] = {} + self.responses: dict[RelationalExpression, RelationalExpression | None] = {} + """ + A mapping of relational expressions from the candidate visitor that have + been processed by the Mask Server. Each expression maps to either None + (if the server could not handle it) or the rewritten expression based on + the outcome of the server request. + """ def visit_call_expression(self, expr: CallExpression) -> RelationalExpression: - # TODO: ADD COMMENTS + # If this expression is in the candidate pool, process all of the + # candidates in the pool in a batch sent to the Mask Server. The + # candidate pool will then be cleared, preventing duplicate processing + # of the same expression. The responses will be stored in self.responses + # for later lookup. if expr in self.candidate_visitor.candidate_pool: self.process_batch() - response: tuple[RelationalExpression, MaskServerOutput] | None = ( - self.responses.get(expr, None) - ) + # If a Mask Server response has been stored for this expression, + # utilize it to convert the expression to its simplified form. + response: RelationalExpression | None = self.responses.get(expr, None) if response is not None: - return self.convert_response_to_relational(*response) + return response + + # Otherwise, use the regular process to recursively transform the inputs + # to the function call. return super().visit_call_expression(expr) def process_batch(self) -> None: """ - TODO: ADD COMMENTS + Invokes the logic to dump the contents of the candidate pool to the + Mask Server in a single batch, and process the responses to store them + in self.responses for later lookup. """ batch: list[MaskServerInput] = [] ancillary_info: list[tuple[RelationalExpression, RelationalExpression]] = [] + + # Loop over every candidate in the pool, building up the batch request + # by adding the MaskServerInput for each candidate, and storing the + # tuple of the original expression and the underlying input that is + # being unmasked for later use when processing the response. The two + # lists, the batch and ancillary info, remain in sync by index so they + # can be zipped together later. for expr, ( mask_op, input_expr, @@ -100,39 +122,74 @@ def process_batch(self) -> None: expression=expression_list, ) ) + + # Send the batch to the Mask Server, and process each response + # alongside the ancillary info. Afterwards, self.responses should + # contain an entry for every candidate that was in the pool, mapping it + # to None in the case of failure, or the rewritten expression in the + # case of success. responses: list[MaskServerOutput] = ( self.server_info.simplify_simple_expression_batch(batch) ) assert len(responses) == len(ancillary_info) for (expr, input_expr), response in zip(ancillary_info, responses): if response.response_case != MaskServerResponse.UNSUPPORTED: - self.responses[expr] = (input_expr, response) + self.responses[expr] = self.convert_response_to_relational( + input_expr, response + ) else: self.responses[expr] = None self.candidate_visitor.processed_candidates.add(expr) + + # Wipe the candidate pool to prevent duplicate processing, since every + # candidate already in the pool has now been added to self.responses. self.candidate_visitor.candidate_pool.clear() def convert_response_to_relational( self, input_expr: RelationalExpression, response: MaskServerOutput - ) -> RelationalExpression: + ) -> RelationalExpression | None: """ - TODO: ADD COMMENTS + Takes in the original input expression that is being unmasked within + a larger candidate expression for Mask Server rewrite, as well as the + response from the Mask Server, and converts it to a relational + expression that can be used to replace the original candidate + expression. + + Args: + `input_expr`: The original input expression that is being unmasked. + `response`: The response from the Mask Server for the candidate. + + Returns: + A relational expression that can be used to replace the original + candidate expression. Alternatively, returns None if the response + could not be converted (e.g. it is a pattern PyDough does not yet + support). """ result: RelationalExpression match response.response_case: case MaskServerResponse.IN_ARRAY | MaskServerResponse.NOT_IN_ARRAY: result = self.build_in_array_expression(input_expr, response) case _: - raise ValueError( - f"Unsupported mask server response case: {response.response_case}" - ) + return None return result def build_in_array_expression( self, input_expr: RelationalExpression, response: MaskServerOutput ) -> RelationalExpression: """ - TODO: ADD COMMENTS + Implements the logic of `convert_response_to_relational` specifically + for the case where the Mask Server response indicates that the original + expression, containing the input expression, can be replaced with an + IN or NOT IN expression with a list of literals. + + Args: + `input_expr`: The original input expression that is being unmasked. + `response`: The response from the Mask Server for the candidate. + This response is assumed to be of type IN_ARRAY or NOT_IN_ARRAY. + + Returns: + A relational expression that can be used to replace the original + candidate expression. """ assert response.response_case in ( MaskServerResponse.IN_ARRAY, From 13779163721bbec72d2319145f3359fb46e70994 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 16 Oct 2025 15:05:10 -0400 Subject: [PATCH 13/40] Adding remaining documentation --- .../mask_server_candidate_visitor.py | 115 ++++++++++++++++-- .../conversion/mask_server_rewrite_shuttle.py | 31 ----- 2 files changed, 104 insertions(+), 42 deletions(-) diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 925804ed7..dd98d8c00 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -47,7 +47,11 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): pydop.DIV: "DIV", } """ - TODO: ADD DESCRIPTION + A mapping of all PyDough operators that can be handled by the Mask Server, + mapping each such operator to the string name used in the linear string + serialization format recognized by the Mask Server. + + Note: ISIN is handled separately. """ def __init__(self) -> None: @@ -60,12 +64,24 @@ def __init__(self) -> None: ], ] = {} """ - TODO: ADD COMMENTS + The internal datastructure used to keep track of all candidate + expressions identified during a traversal of a relational tree. Each + candidate expression maps to a tuple of: + 1. The single unmasking operator contained within the expression. + 2. The input expression that is being unmasked. + 3. The linear serialization of the entire expression as a list, where + invocations of UNMASK(input_expr) are replaced with the token + "__col__". """ self.processed_candidates: set[RelationalExpression] = set() """ - TODO: ADD COMMENTS + The set of all relational expressions that have already been added to + the candidate pool at lest once. This is used to avoid adding the same + candidate multiple times if it is encountered multiple times during a + traversal of the relational tree, since the candidate pool will be + cleared once all of the candidates in the pool are processed in a batch + request to the mask server. """ self.stack: list[ @@ -76,7 +92,15 @@ def __init__(self) -> None: ] ] = [] """ - TODO: ADD COMMENTS + The stack is used to keep track of information relating to + sub-expressions of the current expression. When visiting an expression, + the stack will contain one entry for each input to the expression, + where each entry is a tuple of: + 1. Either None, or the single unmasking operator and input expression + contained within the input expression, if any. + 2. Either None, or the linear serialization of the input expression as + a list, where invocations of UNMASK(input_expr) are replaced with + the token "__col__". """ def reset(self): @@ -119,9 +143,14 @@ def visit_column_reference(self, column_reference: ColumnReference) -> None: self.stack.append((None, None)) def visit_literal_expression(self, literal: LiteralExpression) -> None: + # Literals do not contain the UNMASK operator, but can have a linear + # serialization that can be sent to the Mask Server, so we convert the + # literal to the appropriate list format and push that onto the stack. self.stack.append((None, self.convert_literal_to_server_expression(literal))) def visit_window_expression(self, window_expression: WindowCallExpression) -> None: + # Window functions cannot be sent to the mask server, but their inputs + # potentially can be. for arg in window_expression.inputs: arg.accept_shuttle(self) self.stack.pop() @@ -134,7 +163,8 @@ def visit_window_expression(self, window_expression: WindowCallExpression) -> No self.stack.append((None, None)) def visit_correlated_reference(self, correlated_reference: CorrelatedReference): - pass + # Correlated references cannot be sent to the mask server. + self.stack.append((None, None)) def convert_call_to_server_expression( self, @@ -142,20 +172,49 @@ def convert_call_to_server_expression( input_exprs: list[list[str | int | float | None | bool] | None], ) -> list[str | int | float | None | bool] | None: """ - TODO: ADD COMMENTS + Converts a function call to the linear serialization format recognized + by the Mask Server, using the provided list of linear serializations for + each input to the function call. If the function call cannot be + converted, returns None. + + Args: + `call`: The function call to convert. + `input_exprs`: A list of linear serializations for each input to + the function call, where each input serialization is either a + list of strings/ints/floats/bools/None, or None if the input + could not be converted. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the function call, or None if the function call + could not be converted. """ - result: list[str | int | float | None | bool] = [] + + # If the function call is an ISIN, handle it separately since it has a + # different format than the other operators. if call.op == pydop.ISIN and len(call.inputs) == 2: return self.convert_isin_call_to_server_expression(call.inputs, input_exprs) - if call.op not in self.OPERATORS_TO_SERVER_NAMES: + + # Besides ISIN, if the function call is not one of the operators that + # can be handled by the Mask Server, return None since it cannot be + # converted. + elif call.op not in self.OPERATORS_TO_SERVER_NAMES: return None + + # Build up the list with the first two entries: the name of the function + # call operator, and the number of inputs to the function call. + result: list[str | int | float | None | bool] = [] operator_name = self.OPERATORS_TO_SERVER_NAMES[call.op] result.append(operator_name) result.append(len(call.inputs)) + + # For each input to the function call, append its linear serialization + # to the result list. If any input could not be converted, return None. for inp in input_exprs: if inp is None: return None result.extend(inp) + return result def convert_isin_call_to_server_expression( @@ -164,16 +223,32 @@ def convert_isin_call_to_server_expression( input_exprs: list[list[str | int | float | None | bool] | None], ) -> list[str | int | float | None | bool] | None: """ - TODO: ADD COMMENTS + Converts a relational expression for an ISIN call into the linear + serialization list format recognized by the Mask Server, using the + provided list of linear serializations for the first input, versus a + manual unfolding of the second input which must be a literal list. + + Args: + `inputs`: The two inputs to the ISIN call. + `input_exprs`: A list of linear serializations for each input to + the ISIN call, where each input serialization is either a + list of strings/ints/floats/bools/None, or None if the input + could not be converted. """ if len(inputs) != 2: raise ValueError("ISIN operator requires exactly two inputs.") - result: list[str | int | float | None | bool] = ["IN"] + + # Start the output list with the operator name. If the first input + # could not be converted, return None. if input_exprs[0] is None: return None assert isinstance(inputs[1], LiteralExpression) and isinstance( inputs[1].value, (list, tuple) ), "ISIN right-hand side must be a list or tuple literal." + + # Unfold the second input, which must be a literal list, into the + # output list. If any element of the list cannot be converted, return + # None. in_list: list[str | int | float | None | bool] = [] for v in inputs[1].value: literal_list: list[str | int | float | None | bool] | None = ( @@ -184,6 +259,14 @@ def convert_isin_call_to_server_expression( if literal_list is None: return None in_list.extend(literal_list) + + # The result list is: + # 1. The operator name "IN" + # 2. The total number of arguments, including the element to check + # versus the number of elements in the list. + # 3. The linear serialization of the first input expression. + # 4. The unfolded elements of the literal list from the second input. + result: list[str | int | float | None | bool] = ["IN"] result.append(len(inputs[1].value) + 1) result.extend(input_exprs[0]) result.extend(in_list) @@ -193,7 +276,17 @@ def convert_literal_to_server_expression( self, literal: LiteralExpression ) -> list[str | int | float | None | bool] | None: """ - TODO: ADD COMMENTS + Converts a literal expression to the linear serialization format + recognized by the Mask Server. If the literal cannot be converted, + returns None. + + Args: + `literal`: The literal expression to convert. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the literal, or None if the literal could not be + converted. """ if literal.value is None: return ["NULL"] diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index a51883647..0792db8c7 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -31,37 +31,6 @@ class MaskServerRewriteShuttle(RelationalExpressionShuttle): responses from the server. """ - OPERATORS_TO_SERVER_NAMES: dict[pydop.PyDoughExpressionOperator, str] = { - pydop.BAN: "AND", - pydop.BOR: "OR", - pydop.NOT: "NOT", - pydop.EQU: "EQUAL", - pydop.NEQ: "NOT_EQUAL", - pydop.GRT: "GT", - pydop.GEQ: "GTE", - pydop.LET: "LT", - pydop.LEQ: "LTE", - pydop.STARTSWITH: "STARTSWITH", - pydop.ENDSWITH: "ENDSWITH", - pydop.LOWER: "LOWER", - pydop.UPPER: "UPPER", - pydop.MONOTONIC: "BETWEEN", - pydop.YEAR: "YEAR", - pydop.MONTH: "MONTH", - pydop.DAY: "DAY", - pydop.ADD: "ADD", - pydop.SUB: "SUB", - pydop.MUL: "MUL", - pydop.DIV: "DIV", - } - """ - A mapping of all PyDough operators that can be handled by the Mask Server, - mapping each such operator to the string name used in the linear string - serialization format recognized by the Mask Server. - - Note: ISIN is handled separately. - """ - def __init__( self, server_info: MaskServerInfo, candidate_visitor: MaskServerCandidateVisitor ) -> None: From 891c472720e871708556803183391a183621f842 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 16 Oct 2025 15:05:32 -0400 Subject: [PATCH 14/40] Removing dead rule --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 74502bbe0..102f59181 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,9 +60,7 @@ lint.extend-select = [ "TID", # flake8-tidy-imports ] -lint.ignore = [ - "UP038", -] +lint.ignore = [] # Don't run ruff on any ipython notebooks as they may have # names that don't statically resolve. exclude = ["**/*.ipynb"] From 62db4bfa2d6084358a6185e0fe695bcf9fde5a96 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 16 Oct 2025 15:09:02 -0400 Subject: [PATCH 15/40] [RUN ALL] From c9f6a5942b3016f7af5143dc60bc715ff409b4b7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 16 Oct 2025 15:19:22 -0400 Subject: [PATCH 16/40] [RUN ALL] --- .../conversion/mask_server_candidate_visitor.py | 16 +++++++++++++++- pyproject.toml | 4 +++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index dd98d8c00..872b6ce94 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -20,7 +20,21 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): """ - TODO + A relational expression visitor that identifies candidate expressions for + Mask Server rewrite conversion, and stores them in a candidate pool for + later processing by a `MaskServerRewriteShuttle`. The candidate pool + contains expressions with the following criteria, including both + atomic instances of the patterns, and larger expressions that contain + these patterns as sub-expressions: + 1. An expression that contains exactly one unique unmasking operator (i.e. a + `MaskedExpressionFunctionOperator` with `is_unmask=True`). The contents + of the unmasking operator can be any valid expression. + 2. Literals are allowed anywhere in the expression. + 3. No other expressions are allowed (outside the contents of the unmasking + operator) except for function calls used to combine other valid + expressions, where the function calls must be one of the operators + supported by the Mask Server (see `OPERATORS_TO_SERVER_NAMES`, as well as + the `ISIN` operator). """ OPERATORS_TO_SERVER_NAMES: dict[pydop.PyDoughExpressionOperator, str] = { diff --git a/pyproject.toml b/pyproject.toml index 102f59181..74502bbe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,9 @@ lint.extend-select = [ "TID", # flake8-tidy-imports ] -lint.ignore = [] +lint.ignore = [ + "UP038", +] # Don't run ruff on any ipython notebooks as they may have # names that don't statically resolve. exclude = ["**/*.ipynb"] From 7c371102e41f229af4d525f76a9f3752a1d80ff2 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 26 Oct 2025 09:25:02 -0400 Subject: [PATCH 17/40] Adding logging to keep track of the batch requests sent --- pydough/mask_server/mask_server.py | 11 +- tests/test_masked_sqlite.py | 207 ++++++++++++++++++++++++++--- tests/testing_utilities.py | 62 +++++++++ 3 files changed, 264 insertions(+), 16 deletions(-) diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 60d9f2a0c..308c71aef 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -14,6 +14,7 @@ from enum import Enum from typing import Any +from pydough.logger import get_logger from pydough.mask_server.server_connection import ( RequestMethod, ServerConnection, @@ -142,11 +143,19 @@ def simplify_simple_expression_batch( Returns: An output list containing the response case and payload. """ + + # Log the batch request + pyd_logger = get_logger(__name__) + pyd_logger.info(f"Batch request to Mask Server ({len(batch)} items):") + for idx, item in enumerate(batch): + pyd_logger.info( + f"({idx + 1}) {item.table_path}.{item.column_name}: {item.expression}" + ) + assert batch != [], "Batch cannot be empty." path: str = "v1/predicates/batch-evaluate" method: RequestMethod = RequestMethod.POST - request: ServerRequest = self.generate_request(batch, path, method) response_json = self.connection.send_server_request(request) result: list[MaskServerOutput] = self.generate_result(response_json) diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 268fce4d5..58a05ac93 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -3,14 +3,24 @@ CRYPTBANK sqlite database. """ +import io from collections.abc import Callable +from contextlib import redirect_stdout import pandas as pd import pytest +from pydough import to_sql from pydough.database_connectors import DatabaseContext, DatabaseDialect from pydough.mask_server import MaskServerInfo -from tests.testing_utilities import PyDoughPandasTest, graph_fetcher +from pydough.metadata import GraphMetadata +from pydough.unqualified import UnqualifiedNode +from tests.testing_utilities import ( + PyDoughPandasTest, + extract_batch_requests_from_logs, + graph_fetcher, + transform_and_exec_pydough, +) @pytest.fixture( @@ -719,9 +729,11 @@ def test_pipeline_until_relational_cryptbank( file_path: str = get_plan_test_filename( f"{cryptbank_pipeline_test_data.test_name}_{enable_mask_rewrites}" ) - cryptbank_pipeline_test_data.run_relational_test( - masked_graphs, file_path, update_tests, mask_server=mock_server_info - ) + # Capture stdout to avoid polluting the console with logging calls + with redirect_stdout(io.StringIO()): + cryptbank_pipeline_test_data.run_relational_test( + masked_graphs, file_path, update_tests, mask_server=mock_server_info + ) def test_pipeline_until_sql_cryptbank( @@ -741,13 +753,15 @@ def test_pipeline_until_sql_cryptbank( f"{cryptbank_pipeline_test_data.test_name}_{enable_mask_rewrites}", sqlite_tpch_db_context.dialect, ) - cryptbank_pipeline_test_data.run_sql_test( - masked_graphs, - file_path, - update_tests, - sqlite_tpch_db_context, - mask_server=mock_server_info, - ) + # Capture stdout to avoid polluting the console with logging calls + with redirect_stdout(io.StringIO()): + cryptbank_pipeline_test_data.run_sql_test( + masked_graphs, + file_path, + update_tests, + sqlite_tpch_db_context, + mask_server=mock_server_info, + ) @pytest.mark.execute @@ -762,8 +776,171 @@ def test_pipeline_e2e_cryptbank( Test executing the the custom queries with the custom cryptbank dataset against the refsol DataFrame. """ - cryptbank_pipeline_test_data.run_e2e_test( - masked_graphs, - sqlite_cryptbank_connection, - mask_server=mock_server_info, + # Capture stdout to avoid polluting the console with logging calls + with redirect_stdout(io.StringIO()): + cryptbank_pipeline_test_data.run_e2e_test( + masked_graphs, + sqlite_cryptbank_connection, + mask_server=mock_server_info, + ) + + +@pytest.mark.parametrize( + ["pydough_code", "batch_requests"], + [ + pytest.param( + "selected_customers = customers.WHERE(last_name == 'lee')\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + {"CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']"}, + ], + id="cryptbank_filter_count_01", + ), + pytest.param( + "selected_customers = customers.WHERE(ISIN(last_name, ('lee', 'smith', 'rodriguez')))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']" + } + ], + id="cryptbank_filter_count_03", + ), + pytest.param( + "selected_customers = customers.WHERE(~ISIN(last_name, ('lee', 'smith', 'rodriguez')))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "CRBNK.CUSTOMERS.c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + } + ], + id="cryptbank_filter_count_04", + ), + pytest.param( + "selected_customers = customers.WHERE(" + " (" + " PRESENT(address) &" + " PRESENT(birthday) &" + " (last_name != 'lopez') &" + " (ENDSWITH(first_name, 'a') | ENDSWITH(first_name, 'e') | ENDSWITH(first_name, 's'))" + ") | (ABSENT(birthday) & ENDSWITH(phone_number, '5'))" + ")\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 's']", + "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e', 'ENDSWITH', 2, '__col__', 's']", + "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", + "CRBNK.CUSTOMERS.c_phone: ['ENDSWITH', 2, '__col__', '5']", + } + ], + id="cryptbank_filter_count_27", + ), + pytest.param( + "selected_accounts = accounts.WHERE(" + + " & ".join( + [ + "((account_type == 'retirement') | (account_type == 'savings'))", + "(balance >= 5000)", + "(CONTAINS(account_holder.email, 'outlook') | CONTAINS(account_holder.email, 'gmail'))", + "(YEAR(creation_timestamp) < 2020)", + ] + ) + + ")\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + [ + { + "CRBNK.ACCOUNTS.a_balance: ['GTE', 2, '__col__', 5000]", + "CRBNK.ACCOUNTS.a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", + "CRBNK.ACCOUNTS.a_open_ts: ['YEAR', 1, '__col__']", + "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'retirement']", + "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'savings']", + "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", + } + ], + id="cryptbank_filter_count_28", + ), + pytest.param( + "selected_customers = customers.WHERE(birthday <= '1925-01-01')\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_birthday: ['LTE', 2, '__col__', '1925-01-01']", + } + ], + id="cryptbank_filter_count_29", + ), + pytest.param( + "selected_customers = customers.WHERE(" + " ISIN(YEAR(birthday) - 2, (1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993))" + " & ISIN(MONTH(birthday) + 1, (2, 4, 6, 8, 10, 12))" + ")\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_birthday: ['ADD', 2, 'MONTH', 1, '__col__', 1]", + "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK.CUSTOMERS.c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK.CUSTOMERS.c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", + "CRBNK.CUSTOMERS.c_birthday: ['MONTH', 1, '__col__']", + "CRBNK.CUSTOMERS.c_birthday: ['SUB', 2, 'YEAR', 1, '__col__', 2]", + "CRBNK.CUSTOMERS.c_birthday: ['YEAR', 1, '__col__']", + } + ], + id="cryptbank_filter_count_30", + ), + pytest.param( + "result = CRYPTBANK.CALCULATE(n_neg=SUM(transactions.amount < 0), n_positive=SUM(transactions.amount > 0))", + [ + { + "CRBNK.TRANSACTIONS.t_amount: ['LT', 2, '__col__', 0]", + "CRBNK.TRANSACTIONS.t_amount: ['GT', 2, '__col__', 0]", + } + ], + id="cryptbank_agg_06", + ), + ], +) +def test_cryptbank_mask_server_logging( + pydough_code: str, + batch_requests: list[set[str]], + masked_graphs: graph_fetcher, + enable_mask_rewrites: str, + mock_server_info: MaskServerInfo, + caplog, +): + """ + Tests whether, during the conversion of the PyDough queries on the custom + cryptbank dataset into SQL text, the correct logging calls are made + regarding batches sent to the mask server. + """ + # Obtain the graph and the unqualified node + graph: GraphMetadata = masked_graphs("CRYPTBANK") + root: UnqualifiedNode = transform_and_exec_pydough( + pydough_code, masked_graphs("CRYPTBANK"), {} ) + + # Convert the PyDough code to SQL text. + to_sql(root, metadata=graph, mask_server=mock_server_info) + + # Retrieve the output from the captured logger output, while capturing + # stdout to avoid polluting the console with logging calls + with redirect_stdout(io.StringIO()): + batch_requests_made: list[set[str]] = extract_batch_requests_from_logs( + caplog.text + ) + + # If in raw mode, make sure no requests were made. Otherwise, compare the + # expected batch requests to those made. + if enable_mask_rewrites == "raw": + assert batch_requests_made == [], ( + "Expected no batch requests to be made in 'raw' mode." + ) + else: + assert batch_requests_made == batch_requests, ( + "The batch requests made do not match the expected batch requests." + ) diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index d3253c63c..a4556a48f 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -23,11 +23,13 @@ "TableCollectionInfo", "TopKInfo", "WhereInfo", + "extract_batch_requests_from_logs", "graph_fetcher", "map_over_dict_values", ] import datetime +import re from abc import ABC, abstractmethod from collections.abc import Callable from dataclasses import dataclass @@ -1496,3 +1498,63 @@ def run_e2e_error_test( if columns is not None: call_kwargs["columns"] = columns to_df(root, **call_kwargs) + + +def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: + """ + Extracts the batch requests made to a mask server from the provided log + string. Each batch request will have a corresponding sequence of log lines + in the following format: + + ``` + INFO pydough.mask_server.mask_server:mask_server.py:149 Batch request to Mask Server (2 items): + INFO pydough.mask_server.mask_server:mask_server.py:151 (1) CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee'] + INFO pydough.mask_server.mask_server:mask_server.py:151 (2) CRBNK.CUSTOMERS.c_birthday: ['EQUAL', 2, 'YEAR', 1, '__col__', 1980] + ``` + + A log message string with those lines would return the following list of + sets: + + ``` + [ + { + "CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']", + "CRBNK.CUSTOMERS.c_birthday: ['EQUAL', 2, 'YEAR', 1, '__col__', 1980]", + } + ] + ``` + + Args: + `log_str`: The log string to extract batch requests from. + + Returns: + A list of sets, each set indicating one of the batch requests made to + the mask server during the conversion process and logged in the logger + that was dumped into the log string. The format for each set entry is + `db_name.table_name.column_name: [expression_list]`. + """ + header_pattern: re.Pattern = re.compile( + r"Batch request to Mask Server \((\d+) items?\):" + ) + entry_pattern: re.Pattern = re.compile(r"\(\d+\) (.+)") + result: list[set[str]] = [] + current_set: set[str] = set() + lines_remaining: int = 0 + for line in log_str.splitlines(): + header_match = re.findall(header_pattern, line) + if header_match: + assert lines_remaining == 0, ( + "Malformed log: new batch request started before previous one ended." + ) + current_set = set() + lines_remaining = int(header_match[0]) + result.append(current_set) + elif lines_remaining > 0: + entry_match = re.findall(entry_pattern, line) + assert entry_match, "Malformed log: expected batch request entry line." + current_set.add(entry_match[0]) + lines_remaining -= 1 + assert lines_remaining == 0, ( + "Malformed log: batch request did not have expected number of entries." + ) + return result From 127244ff9a0d9ee037454a91719a16400326a910 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 26 Oct 2025 09:33:30 -0400 Subject: [PATCH 18/40] Ensuring non-predicate sub-expressions are not sent to the server [RUN CI] --- .../mask_server_candidate_visitor.py | 47 ++++++++++++++++++- tests/test_masked_sqlite.py | 10 ++-- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 872b6ce94..3cc5479be 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -68,6 +68,27 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): Note: ISIN is handled separately. """ + PREDICATE_OPERATORS: set[str] = { + "EQUAL", + "NOT_EQUAL", + "GT", + "GTE", + "LT", + "LTE", + "STARTSWITH", + "ENDSWITH", + "IN", + "BETWEEN", + "AND", + "OR", + "NOT", + } + """ + The set of strings from `OPERATORS_TO_SERVER_NAMES` that correspond to + predicate operators. Only expressions whose outermost layer is a predicate + operator will be added to the candidate pool. + """ + def __init__(self) -> None: self.candidate_pool: dict[ RelationalExpression, @@ -121,7 +142,11 @@ def reset(self): self.stack.clear() def visit_call_expression(self, expr: CallExpression) -> None: - # TODO: ADD COMMENTS + # First, recursively visit all of the inputs to the function call, then + # extract the data from the stack to determine whether this expression + # is a candidate for Mask Server rewrite conversion. Reverse the order + # of the stack entries since they were pushed in order of visitation, + # but need to be processed in the original input order. for arg in expr.inputs: arg.accept_shuttle(self) mask_ops: set[ @@ -138,18 +163,36 @@ def visit_call_expression(self, expr: CallExpression) -> None: input_op: pydop.MaskedExpressionFunctionOperator input_expr: RelationalExpression combined_exprs: list[str | int | float | None | bool] | None + + # A call in the form `UNMASK(input_expr)` is the atomic `__col__` + # expression. if ( isinstance(expr.op, pydop.MaskedExpressionFunctionOperator) and expr.op.is_unmask ): self.stack.append(((expr.op, expr.inputs[0]), ["__col__"])) + + # If there are zero unmasking operators in the inputs, or more than + # one, this expression is not a candidate. elif len(mask_ops) != 1: self.stack.append((None, None)) + + # Otherwise, verify that the function call operator is one that can be + # handled by the Mask Server, and if so, build the linear serialization + # for the entire expression. If it cannot be handled, return None. else: input_op, input_expr = mask_ops.pop() combined_exprs = self.convert_call_to_server_expression(expr, arg_exprs) if combined_exprs is not None and expr not in self.processed_candidates: - self.candidate_pool[expr] = (input_op, input_expr, combined_exprs) + # Insert the expression and its corresponding data (the unmask + # operator, the input expression, and the linear serialization) + # into the candidate pool, but only if the expression's + # outermost layer is a predicate call. + if ( + len(combined_exprs) > 0 + and combined_exprs[0] in self.PREDICATE_OPERATORS + ): + self.candidate_pool[expr] = (input_op, input_expr, combined_exprs) self.processed_candidates.add(expr) self.stack.append(((input_op, input_expr), combined_exprs)) diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 58a05ac93..dae9f533b 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -856,7 +856,6 @@ def test_pipeline_e2e_cryptbank( { "CRBNK.ACCOUNTS.a_balance: ['GTE', 2, '__col__', 5000]", "CRBNK.ACCOUNTS.a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", - "CRBNK.ACCOUNTS.a_open_ts: ['YEAR', 1, '__col__']", "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'retirement']", "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'savings']", "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", @@ -882,13 +881,9 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_birthday: ['ADD', 2, 'MONTH', 1, '__col__', 1]", "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", "CRBNK.CUSTOMERS.c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", "CRBNK.CUSTOMERS.c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", - "CRBNK.CUSTOMERS.c_birthday: ['MONTH', 1, '__col__']", - "CRBNK.CUSTOMERS.c_birthday: ['SUB', 2, 'YEAR', 1, '__col__', 2]", - "CRBNK.CUSTOMERS.c_birthday: ['YEAR', 1, '__col__']", } ], id="cryptbank_filter_count_30", @@ -916,7 +911,10 @@ def test_cryptbank_mask_server_logging( """ Tests whether, during the conversion of the PyDough queries on the custom cryptbank dataset into SQL text, the correct logging calls are made - regarding batches sent to the mask server. + regarding batches sent to the mask server. This is to ensure that the calls + are being batched as expected, the right calls are being sent to the server, + and expressions that are non-predicates are not being sent, even if they are + a valid sub-expression of a predicate that can be sent. """ # Obtain the graph and the unqualified node graph: GraphMetadata = masked_graphs("CRYPTBANK") From 1f2dc6dbbbb20eadce11ae04b4eb3b0b70a9cf33 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 26 Oct 2025 09:33:59 -0400 Subject: [PATCH 19/40] Ensuring non-predicate sub-expressions are not sent to the server [RUN CI] --- pydough/conversion/mask_server_candidate_visitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 3cc5479be..3f5c58552 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -261,7 +261,7 @@ def convert_call_to_server_expression( # Build up the list with the first two entries: the name of the function # call operator, and the number of inputs to the function call. result: list[str | int | float | None | bool] = [] - operator_name = self.OPERATORS_TO_SERVER_NAMES[call.op] + operator_name: str = self.OPERATORS_TO_SERVER_NAMES[call.op] result.append(operator_name) result.append(len(call.inputs)) From b278f9bb0a59ce5f70b2da041fb8b3bc29441aa1 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sun, 26 Oct 2025 09:57:31 -0400 Subject: [PATCH 20/40] Adding date/datetime/timestamp literal handling tests [RUN CI] --- .../mask_server_candidate_visitor.py | 6 +++ tests/mock_server/lookup_table.py | 24 ++++++++++ tests/test_masked_sqlite.py | 47 ++++++++++++++++++- .../cryptbank_filter_count_31_raw.txt | 4 ++ .../cryptbank_filter_count_31_rewrite.txt | 4 ++ .../cryptbank_filter_count_32_raw.txt | 4 ++ .../cryptbank_filter_count_32_rewrite.txt | 4 ++ .../cryptbank_filter_count_31_raw_sqlite.sql | 5 ++ ...yptbank_filter_count_31_rewrite_sqlite.sql | 5 ++ .../cryptbank_filter_count_32_raw_sqlite.sql | 6 +++ ...yptbank_filter_count_32_rewrite_sqlite.sql | 5 ++ 11 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_31_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_31_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_32_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_32_rewrite.txt create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_31_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_31_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_32_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_32_rewrite_sqlite.sql diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 3f5c58552..04cac9358 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -5,6 +5,8 @@ __all__ = ["MaskServerCandidateVisitor"] +import datetime + import pydough.pydough_operators as pydop from pydough.relational import ( CallExpression, @@ -351,5 +353,9 @@ def convert_literal_to_server_expression( return ["TRUE" if literal.value else "FALSE"] elif isinstance(literal.value, (int, float, str)): return [literal.value] + elif isinstance(literal.value, datetime.datetime): + return [literal.value.strftime("%Y-%m-%d %H:%M:%S")] + elif isinstance(literal.value, datetime.date): + return [literal.value.isoformat()] else: return None diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 6f11ca254..e98b49b0f 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -434,4 +434,28 @@ "values": ["1980-01-18", "1981-11-15", "1990-07-31", "1994-06-15"], "count": 4, }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("IN", 5, "__col__", "1991-11-15", "1978-02-11", "2005-03-14", "1985-04-12"), + ): { + "type": "literal", + "operator": "IN", + "values": ["1990-07-31", "1976-10-27", "1983-12-27"], + "count": 3, + }, + ( + "srv.CRBNK.ACCOUNTS.a_open_ts", + ("BETWEEN", 3, "2020-03-28 09:20:00", "__col__", "2020-09-20 08:30:00"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2016-04-29 11:46:51", + "2016-06-10 12:56:51", + "2016-07-20 15:46:51", + "2016-08-22 10:41:51", + "2016-09-03 12:01:51", + ], + "count": 5, + }, } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index dae9f533b..207cf8016 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -3,6 +3,7 @@ CRYPTBANK sqlite database. """ +import datetime import io from collections.abc import Callable from contextlib import redirect_stdout @@ -428,6 +429,28 @@ ), id="cryptbank_filter_count_30", ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(birthday, [datetime.date(1991, 11, 15), datetime.date(1978, 2, 11), datetime.date(2005, 3, 14), datetime.date(1985, 4, 12)]))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [3]}), + "cryptbank_filter_count_31", + kwargs={"datetime": datetime, "pd": pd}, + ), + id="cryptbank_filter_count_31", + ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE(MONOTONIC(pd.Timestamp('2020-03-28 09:20:00'), creation_timestamp, datetime.datetime(2020, 9, 20, 8, 30, 0)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [5]}), + "cryptbank_filter_count_32", + kwargs={"datetime": datetime, "pd": pd}, + ), + id="cryptbank_filter_count_32", + ), pytest.param( PyDoughPandasTest( "selected_transactions = transactions.WHERE((YEAR(time_stamp) == 2022) & (MONTH(time_stamp) == 6))\n" @@ -888,6 +911,26 @@ def test_pipeline_e2e_cryptbank( ], id="cryptbank_filter_count_30", ), + pytest.param( + "selected_customers = customers.WHERE(ISIN(birthday, [datetime.date(1991, 11, 15), datetime.date(1978, 2, 11), datetime.date(2005, 3, 14), datetime.date(1985, 4, 12)]))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", + } + ], + id="cryptbank_filter_count_31", + ), + pytest.param( + "selected_accounts = accounts.WHERE(MONOTONIC(pd.Timestamp('2020-03-28 09:20:00'), creation_timestamp, datetime.datetime(2020, 9, 20, 8, 30, 0)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + [ + { + "CRBNK.ACCOUNTS.a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", + } + ], + id="cryptbank_filter_count_32", + ), pytest.param( "result = CRYPTBANK.CALCULATE(n_neg=SUM(transactions.amount < 0), n_positive=SUM(transactions.amount > 0))", [ @@ -919,7 +962,9 @@ def test_cryptbank_mask_server_logging( # Obtain the graph and the unqualified node graph: GraphMetadata = masked_graphs("CRYPTBANK") root: UnqualifiedNode = transform_and_exec_pydough( - pydough_code, masked_graphs("CRYPTBANK"), {} + pydough_code, + masked_graphs("CRYPTBANK"), + {"datetime": datetime, "pd": pd}, ) # Convert the PyDough code to SQL text. diff --git a/tests/test_plan_refsols/cryptbank_filter_count_31_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_31_raw.txt new file mode 100644 index 000000000..bbfe21d7c --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_31_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(UNMASK::(DATE([c_birthday], '+472 days')), [datetime.date(1991, 11, 15), datetime.date(1978, 2, 11), datetime.date(2005, 3, 14), datetime.date(1985, 4, 12)]:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_31_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_31_rewrite.txt new file mode 100644 index 000000000..924660224 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_31_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(c_birthday, ['1990-07-31', '1976-10-27', '1983-12-27']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_32_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_32_raw.txt new file mode 100644 index 000000000..3d0ca83f6 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_32_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(Timestamp('2020-03-28 09:20:00'):datetime, UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')), datetime.datetime(2020, 9, 20, 8, 30):datetime), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_32_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_32_rewrite.txt new file mode 100644 index 000000000..506f379b7 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_32_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(a_open_ts, ['2016-04-29 11:46:51', '2016-06-10 12:56:51', '2016-07-20 15:46:51', '2016-08-22 10:41:51', '2016-09-03 12:01:51']:array[unknown]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_31_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_31_raw_sqlite.sql new file mode 100644 index 000000000..8c81871f2 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_31_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + DATE(c_birthday, '+472 days') IN ('1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_31_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_31_rewrite_sqlite.sql new file mode 100644 index 000000000..b82d51921 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_31_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_birthday IN ('1990-07-31', '1976-10-27', '1983-12-27') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_32_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_32_raw_sqlite.sql new file mode 100644 index 000000000..24ffe436b --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_32_raw_sqlite.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + DATETIME(a_open_ts, '+123456789 seconds') <= '2020-09-20 08:30:00' + AND DATETIME(a_open_ts, '+123456789 seconds') >= '2020-03-28 09:20:00' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_32_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_32_rewrite_sqlite.sql new file mode 100644 index 000000000..27e9db986 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_32_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_open_ts IN ('2016-04-29 11:46:51', '2016-06-10 12:56:51', '2016-07-20 15:46:51', '2016-08-22 10:41:51', '2016-09-03 12:01:51') From dcbb69c36792b4a70c2aefa3e6175bf82d9f4099 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 30 Oct 2025 17:04:20 -0400 Subject: [PATCH 21/40] Added new operators support, need to add new tests for datetime, quarter/hour/minute/second, coalesce,iff, join_strings, smallest/largest, and abs --- .../mask_server_candidate_visitor.py | 373 +++++++++++++++++- .../conversion/mask_server_rewrite_shuttle.py | 4 + tests/mock_server/lookup_table.py | 30 ++ tests/test_masked_sqlite.py | 38 ++ .../cryptbank_filter_count_18_rewrite.txt | 2 +- .../cryptbank_filter_count_19_rewrite.txt | 2 +- ...yptbank_filter_count_18_rewrite_sqlite.sql | 4 +- ...yptbank_filter_count_19_rewrite_sqlite.sql | 4 +- 8 files changed, 428 insertions(+), 29 deletions(-) diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 04cac9358..99035059e 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -6,6 +6,7 @@ __all__ = ["MaskServerCandidateVisitor"] import datetime +import re import pydough.pydough_operators as pydop from pydough.relational import ( @@ -17,6 +18,12 @@ RelationalExpressionVisitor, WindowCallExpression, ) +from pydough.sqlglot.transform_bindings.sqlglot_transform_utils import ( + DateTimeUnit, + current_ts_pattern, + offset_pattern, + trunc_pattern, +) from pydough.types import UnknownType @@ -51,23 +58,38 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): pydop.LEQ: "LTE", pydop.STARTSWITH: "STARTSWITH", pydop.ENDSWITH: "ENDSWITH", + pydop.CONTAINS: "CONTAINS", + pydop.LIKE: "LIKE", pydop.LOWER: "LOWER", pydop.UPPER: "UPPER", pydop.MONOTONIC: "BETWEEN", pydop.YEAR: "YEAR", pydop.MONTH: "MONTH", pydop.DAY: "DAY", + pydop.HOUR: "HOUR", + pydop.MINUTE: "MINUTE", + pydop.SECOND: "SECOND", pydop.ADD: "ADD", pydop.SUB: "SUB", pydop.MUL: "MUL", pydop.DIV: "DIV", + pydop.ABS: "ABS", + pydop.SMALLEST: "LEAST", + pydop.LARGEST: "GREATEST", + pydop.DEFAULT_TO: "COALESCE", + pydop.IFF: "IFF", } """ A mapping of all PyDough operators that can be handled by the Mask Server, mapping each such operator to the string name used in the linear string serialization format recognized by the Mask Server. - Note: ISIN is handled separately. + Note: the following operators are handled separately: + - `ISIN` + - `SLICE` + - `JOIN_STRINGS` + - `DATETIME` + - `DATEDIFF` """ PREDICATE_OPERATORS: set[str] = { @@ -79,6 +101,8 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): "LTE", "STARTSWITH", "ENDSWITH", + "CONTAINS", + "LIKE", "IN", "BETWEEN", "AND", @@ -88,7 +112,10 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): """ The set of strings from `OPERATORS_TO_SERVER_NAMES` that correspond to predicate operators. Only expressions whose outermost layer is a predicate - operator will be added to the candidate pool. + operator will be added to the candidate pool. This also includes other + operators from the mask server not used by `OPERATORS_TO_SERVER_NAMES` but + that are used by special handling cases, like how the `ISIN` operator + in PyDough becomes the `IN` operator in the mask server. """ def __init__(self) -> None: @@ -250,31 +277,47 @@ def convert_call_to_server_expression( """ # If the function call is an ISIN, handle it separately since it has a - # different format than the other operators. + # different format than the other operators, and we don't need the + # second input to be converted since it must be a literal list. if call.op == pydop.ISIN and len(call.inputs) == 2: return self.convert_isin_call_to_server_expression(call.inputs, input_exprs) - # Besides ISIN, if the function call is not one of the operators that - # can be handled by the Mask Server, return None since it cannot be - # converted. - elif call.op not in self.OPERATORS_TO_SERVER_NAMES: + # If any of the inputs were not able to be converted, return None since + # then the call cannot be converted. + if None in input_exprs: return None - # Build up the list with the first two entries: the name of the function - # call operator, and the number of inputs to the function call. - result: list[str | int | float | None | bool] = [] - operator_name: str = self.OPERATORS_TO_SERVER_NAMES[call.op] - result.append(operator_name) - result.append(len(call.inputs)) - - # For each input to the function call, append its linear serialization - # to the result list. If any input could not be converted, return None. - for inp in input_exprs: - if inp is None: + # Dispatch to the specified conversion method for each operator that + # has dedicated logic, besides ISIN which was already handled. + match call.op: + case pydop.SLICE: + return self.convert_slice_call_to_server_expression(input_exprs) + case pydop.JOIN_STRINGS: + return self.convert_join_strings_call_to_server_expression(input_exprs) + case pydop.DATETIME: + return self.convert_datetime_call_to_server_expression(input_exprs) + case pydop.DATEDIFF: + return self.convert_datediff_call_to_server_expression(input_exprs) + case op if op in self.OPERATORS_TO_SERVER_NAMES: + # Default handling for all the remaining operators that are + # just translated 1:1 with from `OPERATORS_TO_SERVER_NAMES`. + # First, build up the list with the first two entries: the name + # of the function call operator, and the number of inputs to the + # function call. + result: list[str | int | float | None | bool] = [] + operator_name: str = self.OPERATORS_TO_SERVER_NAMES[call.op] + result.append(operator_name) + result.append(len(call.inputs)) + # For each input to the function call, append its linear + # serialization to the result list. We know they are not None + # from the earlier check. + for inp in input_exprs: + assert inp is not None + result.extend(inp) + return result + case _: + # Any other operator is unsupported. return None - result.extend(inp) - - return result def convert_isin_call_to_server_expression( self, @@ -331,6 +374,294 @@ def convert_isin_call_to_server_expression( result.extend(in_list) return result + def convert_slice_call_to_server_expression( + self, input_exprs: list[list[str | int | float | None | bool] | None] + ) -> list[str | int | float | None | bool] | None: + """ + Attempts to convert a PyDough SLICE operation to the linear + serialization format recognized by the Mask Server. This requires + converting the slice from Python form `input_expr[start:stop:step]` to + the more SQL-like form `SUBSTRING(input_expr, start, length)`, but + still using 0-based indexing for start (just like Python). + + Args: + `input_exprs`: A list of linear serializations for each input to + the SLICE call, where each input serialization is either a + list of strings/ints/floats/bools/None, or None if the input + could not be converted. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the SLICE operation, or None if the SLICE + operation could not be converted. + """ + assert len(input_exprs) == 4, "SLICE operator requires exactly four inputs." + # Start by building the output list with the operator name, the number + # of arguments (3), and the linear serialization of the input + # expression. If the input expression could not be converted, return + # None. + result: list[str | int | float | None | bool] = ["SLICE", 3] + if input_exprs[0] is None: + return None + result.extend(input_exprs[0]) + + # Attempt to extract the start, stop, and step values from the remaining + # arguments to the slice operation and convert them to start vs length. + # For now, only supports the form where step is 1, and start/stop are + # both positive integer literals, with stop > start. Alternatively, + # allows taking a prefix since that case is similarly well defined. + start_int: int + length_int: int + start_literal = input_exprs[1] + stop_literal = input_exprs[2] + step_literal = input_exprs[3] + if ( + start_literal is None + or stop_literal is None + or step_literal is None + or len(start_literal) != 1 + or len(stop_literal) != 1 + or len(step_literal) != 1 + ): + return None + match (start_literal[0], stop_literal[0], step_literal[0]): + case (int(start), int(stop), int(step)) if ( + start >= 0 and stop > start and step == 1 + ): + start_int = start + length_int = stop - start + case (int(start), int(stop), None) if start >= 0 and stop > start: + start_int = start + length_int = stop - start + case (None, int(stop), None) if stop > 0: + start_int = 0 + length_int = stop + case _: + return None + + result.append(start_int) + result.append(length_int) + return result + + def convert_join_strings_call_to_server_expression( + self, input_exprs: list[list[str | int | float | None | bool] | None] + ) -> list[str | int | float | None | bool] | None: + """ + Converts the JOIN_STRINGS PyDough operator to an equivalent variadic + CONCAT operation in the linear serialization format recognized by + the Mask Server: + + `JOIN_STRINGS('', a, b, c)` becomes `CONCAT(3, a, b, c)` + `JOIN_STRINGS(s, a, b, c)` becomes `CONCAT(5, a, s, b, s, c)` + """ + assert len(input_exprs) >= 3, ( + "JOIN_STRINGS operator requires at least three inputs." + ) + # If the delimiter expression could not be converted, return None. + delimiter_expr = input_exprs[0] + if delimiter_expr is None: + return None + + # Start building the result list with the operator name. + result: list[str | int | float | None | bool] = ["CONCAT"] + + # If the delimiter is the empty string, then the number of arguments + # is simply the number of input expressions minus one (the delimiter), + # and all of the remaining arguments should just be appended directly. + remaining_args = input_exprs[1:] + if delimiter_expr == [""]: + result.append(len(remaining_args)) + for expr in remaining_args: + if expr is None: + return None + result.extend(expr) + return result + + # Otherwise, the remaining arguments are interleaved with the delimiter. + result.append(2 * len(remaining_args) - 1) + for i, expr in enumerate(remaining_args): + if expr is None: + return None + result.extend(expr) + if i < len(remaining_args) - 1: + result.extend(delimiter_expr) + return result + + def convert_datetime_call_to_server_expression( + self, input_exprs: list[list[str | int | float | None | bool] | None] + ) -> list[str | int | float | None | bool] | None: + """ + Attempts to convert a PyDough DATETIME operation to the linear + serialization format recognized by the Mask Server. The DATETIME + operation is treated as a series of transformations on an initial + input expression, where each transformation is either a truncation + (DATETRUNC) or an addition (DATEADD). + + Args: + `input_exprs`: A list of linear serializations for each input to + the DATETIME call, where each input serialization is either a + list of strings/ints/floats/bools/None, or None if the input + could not be converted. The first input is the seed expression, + and each subsequent input is a string representing either a + truncation or addition operation. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the DATETIME operation, or None if the DATETIME + operation could not be converted. + """ + # Skip cases where DATETIME is called on an argument just to cast it. + if len(input_exprs) < 2: + return None + + # Start with the input argument, then iteratively apply each phase of + # the transformation with DATETIME as either a truncation or addition. + # Reject if the seed is a literal indicating the current timestamp. + result: list[str | int | float | None | bool] + if input_exprs[0] is None or ( + len(input_exprs[0]) == 1 + and isinstance(input_exprs[0][0], str) + and current_ts_pattern.fullmatch(input_exprs[0][0]) + ): + return None + else: + result = input_exprs[0] + for arg in input_exprs[1:]: + if arg is None or len(arg) != 1 or not isinstance(arg[0], str): + return None + # Use regex to determine if this is a truncation or addition, + # and dispatch to the appropriate conversion method. If it is + # neither, or the conversion method failed, return None. + # Otherwise, the result becomes the new input to the next phase. + trunc_match: re.Match | None = trunc_pattern.fullmatch(arg[0]) + offset_match: re.Match | None = offset_pattern.fullmatch(arg[0]) + new_result: list[str | int | float | None | bool] | None = None + if trunc_match is not None: + new_result = self.convert_datetrunc_call_to_server_expression( + result, str(trunc_match.group(1)) + ) + elif offset_match is not None: + new_result = self.convert_dateadd_call_to_server_expression( + result, + str(offset_match.group(1)), + int(offset_match.group(2)), + str(offset_match.group(3)), + ) + if new_result is None: + return None + result = new_result + + return result + + def convert_datetrunc_call_to_server_expression( + self, input_expr: list[str | int | float | None | bool], unit_str: str + ) -> list[str | int | float | None | bool] | None: + """ + Attempt to convert a DATETRUNC call to the linear serialization format + recognized by the Mask Server. + + Args: + `input_expr`: A linear serialization for the input to the + DATETRUNC call, as a list of strings/ints/floats/bools/None. + `unit_str`: The string representing the unit to truncate to. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the DATETRUNC operation, or None if the DATETRUNC + operation could not be converted. + """ + unit = DateTimeUnit.from_string(unit_str) + # Reject if the unit is not recognized, or is a WEEK (for now). + if unit is None or unit == DateTimeUnit.WEEK: + return None + result: list[str | int | float | None | bool] = ["DATETRUNC", 2] + result.append(unit.value) + result.extend(input_expr) + return result + + def convert_dateadd_call_to_server_expression( + self, + input_expr: list[str | int | float | None | bool], + sign_str: str, + amount: int, + unit_str: str, + ) -> list[str | int | float | None | bool] | None: + """ + Attempt to convert a DATEADD call to the linear serialization format + recognized by the Mask Server. + + Args: + `input_expr`: A linear serialization for the input to the + DATEADD call, as a list of strings/ints/floats/bools/None. + `sign_str`: The string representing the sign of the amount to add ( + either "+", "-", or "", with empty being the same as "+"). + `amount`: The integer amount to add (can be negative). + `unit_str`: The string representing the unit to add. + """ + unit = DateTimeUnit.from_string(unit_str) + if unit is None: + return None + result: list[str | int | float | None | bool] = ["DATEADD", 3] + if sign_str == "-": + amount = -amount + result.append(amount) + result.append(unit.value + "s") + result.extend(input_expr) + return result + + def convert_datediff_call_to_server_expression( + self, input_exprs: list[list[str | int | float | None | bool] | None] + ) -> list[str | int | float | None | bool] | None: + """ + Attempt to convert a DATEDIFF call to the linear serialization format + recognized by the Mask Server. The datediff is transformed by having + its first argument, the units, normalized into one of the following: + - "years" + - "quarters" + - "months" + - "days" + - "hours" + - "minutes" + - "seconds" + + Weeks are ignored for now. + + Args: + `input_exprs`: A list of linear serializations for each input to + the DATEDIFF call, where each input serialization is either a + list of strings/ints/floats/bools/None, or None if the input + could not be converted. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the DATEDIFF operation, or None if the DATEDIFF + operation could not be converted. + """ + result: list[str | int | float | None | bool] = ["DATEDIFF", 3] + assert len(input_exprs) == 3, "DATEDIFF operator requires exactly three inputs." + + # Extract and normalize the unit argument, rejecting weeks for now. + unit_expr = input_exprs[0] + if ( + unit_expr is None + or len(unit_expr) != 1 + or not isinstance(unit_expr[0], str) + ): + return None + unit = DateTimeUnit.from_string(unit_expr[0]) + if unit is None or unit == DateTimeUnit.WEEK: + return None + result.append(unit.value + "s") + + # Append the linear serializations for the start and end expressions. + start_expr = input_exprs[1] + end_expr = input_exprs[2] + if start_expr is None or end_expr is None: + return None + result.extend(start_expr) + result.extend(end_expr) + return result + def convert_literal_to_server_expression( self, literal: LiteralExpression ) -> list[str | int | float | None | bool] | None: diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 0792db8c7..99734845c 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -84,6 +84,10 @@ def process_batch(self) -> None: expression_list, ) in self.candidate_visitor.candidate_pool.items(): ancillary_info.append((expr, input_expr)) + print( + f"srv.{mask_op.table_path}.{mask_op.masking_metadata.column_name}", + tuple(expression_list), + ) batch.append( MaskServerInput( table_path=mask_op.table_path, diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index e98b49b0f..44f1ded46 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -458,4 +458,34 @@ ], "count": 5, }, + ("srv.CRBNK.CUSTOMERS.c_email", ("CONTAINS", 2, "__col__", "mail")): { + "type": "literal", + "operator": "NOT_IN", + "values": [ + "homasl@outlook.comt", + "ueenie.t@outlook.netq", + ".hernandez@icloud.comk", + "martinez94@outlook.orgj", + "sa.rodriguez@zoho.comi", + ".brown88@yahoo.comd", + ".lee@outlook.comc", + "lice_j@example.orga", + ], + "count": 8, + }, + ("srv.CRBNK.CUSTOMERS.c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): { + "type": "literal", + "operator": "IN", + "values": [ + "ophia.jackson@mail.org", + "livia.a22@gmail.como", + ".gonzalez@ymail.comm", + "opez.luke99@gmail.coml", + "enry.g@fastmail.comh", + "rank.k@protonmail.comf", + "mily.jones@mail.come", + "ob.smith77@gmail.comb", + ], + "count": 8, + }, } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 207cf8016..bfb4ffa95 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -23,6 +23,41 @@ transform_and_exec_pydough, ) +""" +ADD TESTS FOR: +- DATETIME - BAD (singleton) +- DATETIME - BAD (using current ts) +- DATETIME - BAD (nested ) +- DATETIME - BAD (add week) +- DATETIME - BAD (trunc week) +- DATETIME - GOOD (nested) +- DATETIME - GOOD (add year) +- DATETIME - GOOD (trunc year) +- DATETIME - GOOD (add quarter) +- DATETIME - GOOD (trunc quarter) +- DATETIME - GOOD (add month) +- DATETIME - GOOD (trunc month) +- DATETIME - GOOD (add day) +- DATETIME - GOOD (trunc day) +- DATETIME - GOOD (add hour) +- DATETIME - GOOD (trunc hour) +- DATETIME - GOOD (add minute) +- DATETIME - GOOD (trunc minute) +- DATETIME - GOOD (add second) +- DATETIME - GOOD (trunc second) +- QUARTER +- HOUR +- MINUTE +- SECOND +- COALESCE +- IFF +- JOIN_STRINGS (empty) +- JOIN_STRINGS (nonempty) +- SMALLEST +- LARGEST +- ABS +""" + @pytest.fixture( params=[ @@ -881,7 +916,10 @@ def test_pipeline_e2e_cryptbank( "CRBNK.ACCOUNTS.a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'retirement']", "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'savings']", + "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'gmail']", + "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'outlook']", "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", + "CRBNK.CUSTOMERS.c_email: ['OR', 2, 'CONTAINS', 2, '__col__', 'outlook', 'CONTAINS', 2, '__col__', 'gmail']", } ], id="cryptbank_filter_count_28", diff --git a/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt index 935e582ed..98bf079c7 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=LIKE(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), '%.%@%mail%':string), columns={}) + FILTER(condition=ISIN(c_email, ['ophia.jackson@mail.org', 'livia.a22@gmail.como', '.gonzalez@ymail.comm', 'opez.luke99@gmail.coml', 'enry.g@fastmail.comh', 'rank.k@protonmail.comf', 'mily.jones@mail.come', 'ob.smith77@gmail.comb']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_19_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_19_rewrite.txt index 3da1c4ddd..f32b625c3 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_19_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_19_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'mail':string), columns={}) + FILTER(condition=NOT(ISIN(c_email, ['homasl@outlook.comt', 'ueenie.t@outlook.netq', '.hernandez@icloud.comk', 'martinez94@outlook.orgj', 'sa.rodriguez@zoho.comi', '.brown88@yahoo.comd', '.lee@outlook.comc', 'lice_j@example.orga']:array[unknown])), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql index 598be0fcf..5396be61f 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql @@ -2,6 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - ( - SUBSTRING(c_email, -1) || SUBSTRING(c_email, 1, LENGTH(c_email) - 1) - ) LIKE '%.%@%mail%' + c_email IN ('ophia.jackson@mail.org', 'livia.a22@gmail.como', '.gonzalez@ymail.comm', 'opez.luke99@gmail.coml', 'enry.g@fastmail.comh', 'rank.k@protonmail.comf', 'mily.jones@mail.come', 'ob.smith77@gmail.comb') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_19_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_19_rewrite_sqlite.sql index 565b89e92..25e1ef8f2 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_19_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_19_rewrite_sqlite.sql @@ -2,6 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - ( - SUBSTRING(c_email, -1) || SUBSTRING(c_email, 1, LENGTH(c_email) - 1) - ) LIKE '%mail%' + NOT c_email IN ('homasl@outlook.comt', 'ueenie.t@outlook.netq', '.hernandez@icloud.comk', 'martinez94@outlook.orgj', 'sa.rodriguez@zoho.comi', '.brown88@yahoo.comd', '.lee@outlook.comc', 'lice_j@example.orga') From feabd8af05c6f02a7a4b3eb63c7bd1a6d07d45da Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 30 Oct 2025 18:49:35 -0400 Subject: [PATCH 22/40] Added more tests, handled predicate pushdown bug with least/greatest, handled cases where the in/not in list contains a NULL --- .../mask_server_candidate_visitor.py | 1 + .../conversion/mask_server_rewrite_shuttle.py | 73 +++++- .../sqlglot/override_pushdown_predicates.py | 128 ++++++++- tests/mock_server/lookup_table.py | 243 +++++++++++++++++ tests/test_masked_sqlite.py | 248 ++++++++++++++++-- .../cryptbank_filter_count_33_raw.txt | 4 + .../cryptbank_filter_count_33_rewrite.txt | 4 + .../cryptbank_filter_count_34_raw.txt | 4 + .../cryptbank_filter_count_34_rewrite.txt | 4 + .../cryptbank_filter_count_35_raw.txt | 4 + .../cryptbank_filter_count_35_rewrite.txt | 4 + .../cryptbank_filter_count_36_raw.txt | 4 + .../cryptbank_filter_count_36_rewrite.txt | 4 + .../cryptbank_filter_count_37_raw.txt | 4 + .../cryptbank_filter_count_37_rewrite.txt | 4 + .../cryptbank_filter_count_38_raw.txt | 4 + .../cryptbank_filter_count_38_rewrite.txt | 4 + .../cryptbank_filter_count_39_raw.txt | 4 + .../cryptbank_filter_count_39_rewrite.txt | 4 + .../cryptbank_filter_count_40_raw.txt | 4 + .../cryptbank_filter_count_40_rewrite.txt | 3 + .../cryptbank_filter_count_41_raw.txt | 4 + .../cryptbank_filter_count_41_rewrite.txt | 3 + .../cryptbank_filter_count_42_raw.txt | 4 + .../cryptbank_filter_count_42_rewrite.txt | 4 + .../cryptbank_filter_count_43_raw.txt | 4 + .../cryptbank_filter_count_43_rewrite.txt | 4 + .../cryptbank_filter_count_44_raw.txt | 4 + .../cryptbank_filter_count_44_rewrite.txt | 4 + .../cryptbank_filter_count_45_raw.txt | 4 + .../cryptbank_filter_count_45_rewrite.txt | 4 + .../cryptbank_filter_count_46_raw.txt | 4 + .../cryptbank_filter_count_46_rewrite.txt | 4 + .../cryptbank_filter_count_47_raw.txt | 4 + .../cryptbank_filter_count_47_rewrite.txt | 4 + .../cryptbank_filter_count_48_raw.txt | 4 + .../cryptbank_filter_count_48_rewrite.txt | 4 + .../cryptbank_filter_count_49_raw.txt | 4 + .../cryptbank_filter_count_49_rewrite.txt | 4 + .../cryptbank_filter_count_33_raw_sqlite.sql | 5 + ...yptbank_filter_count_33_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_34_raw_sqlite.sql | 18 ++ ...yptbank_filter_count_34_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_35_raw_sqlite.sql | 6 + ...yptbank_filter_count_35_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_36_raw_sqlite.sql | 5 + ...yptbank_filter_count_36_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_37_raw_sqlite.sql | 5 + ...yptbank_filter_count_37_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_38_raw_sqlite.sql | 14 + ...yptbank_filter_count_38_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_39_raw_sqlite.sql | 13 + ...yptbank_filter_count_39_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_40_raw_sqlite.sql | 5 + ...yptbank_filter_count_40_rewrite_sqlite.sql | 3 + .../cryptbank_filter_count_41_raw_sqlite.sql | 5 + ...yptbank_filter_count_41_rewrite_sqlite.sql | 3 + .../cryptbank_filter_count_42_raw_sqlite.sql | 5 + ...yptbank_filter_count_42_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_43_raw_sqlite.sql | 5 + ...yptbank_filter_count_43_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_44_raw_sqlite.sql | 5 + ...yptbank_filter_count_44_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_45_raw_sqlite.sql | 5 + ...yptbank_filter_count_45_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_46_raw_sqlite.sql | 5 + ...yptbank_filter_count_46_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_47_raw_sqlite.sql | 5 + ...yptbank_filter_count_47_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_48_raw_sqlite.sql | 5 + ...yptbank_filter_count_48_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_49_raw_sqlite.sql | 5 + ...yptbank_filter_count_49_rewrite_sqlite.sql | 5 + 73 files changed, 995 insertions(+), 29 deletions(-) create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_33_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_33_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_35_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_35_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_36_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_36_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_37_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_37_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_38_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_38_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_39_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_39_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_40_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_40_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_41_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_41_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_42_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_42_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_43_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_43_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_44_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_44_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_45_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_45_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_46_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_46_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_47_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_47_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_48_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_48_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_49_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_49_rewrite.txt create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_33_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_33_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_34_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_35_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_35_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_36_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_36_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_37_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_37_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_38_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_38_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_39_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_39_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_40_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_40_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_41_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_41_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_42_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_42_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_43_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_43_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_44_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_44_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_45_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_45_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_46_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_46_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_47_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_47_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_48_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_48_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_49_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_49_rewrite_sqlite.sql diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 99035059e..840ea0366 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -64,6 +64,7 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): pydop.UPPER: "UPPER", pydop.MONOTONIC: "BETWEEN", pydop.YEAR: "YEAR", + pydop.QUARTER: "QUARTER", pydop.MONTH: "MONTH", pydop.DAY: "DAY", pydop.HOUR: "HOUR", diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 99734845c..574079c77 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -169,34 +169,83 @@ def build_in_array_expression( MaskServerResponse.NOT_IN_ARRAY, ) assert isinstance(response.payload, list) - if len(response.payload) == 0: + # Extract the list of literals from the response payload. If the list + # contains a NULL, remove it since SQL IN lists cannot contain NULLs, + # then mark it as such so we can add the null check later. + in_list: list = response.payload + contains_null: bool = None in in_list + while None in in_list: + in_list.remove(None) + result: RelationalExpression + if len(in_list) == 0: # If the payload is empty, we can return a literal true/false - # depending on whether it is IN or NOT IN - return LiteralExpression( - response.response_case == MaskServerResponse.NOT_IN_ARRAY, BooleanType() - ) - elif len(response.payload) == 1: + # depending on whether it is IN or NOT IN. If there was a null, then + # instead we just check if the expression is/isn't null. + if contains_null: + result = CallExpression( + pydop.ABSENT + if response.response_case == MaskServerResponse.IN_ARRAY + else pydop.PRESENT, + BooleanType(), + [input_expr], + ) + else: + result = LiteralExpression( + response.response_case == MaskServerResponse.NOT_IN_ARRAY, + BooleanType(), + ) + elif len(in_list) == 1: # If the payload has one element, we can return a simple equality - # or inequality, depending on whether it is IN or NOT IN - return CallExpression( + # or inequality, depending on whether it is IN or NOT IN. + result = CallExpression( pydop.EQU if response.response_case == MaskServerResponse.IN_ARRAY else pydop.NEQ, BooleanType(), [ input_expr, - LiteralExpression(response.payload[0], UnknownType()), + LiteralExpression(in_list[0], UnknownType()), ], ) else: # Otherwise, we need to return an ISIN expression with an array # literal, and if doing NOT IN then negate the whole thing. array_literal: LiteralExpression = LiteralExpression( - response.payload, ArrayType(UnknownType()) + in_list, ArrayType(UnknownType()) ) - result: RelationalExpression = CallExpression( + result = CallExpression( pydop.ISIN, BooleanType(), [input_expr, array_literal] ) if response.response_case == MaskServerResponse.NOT_IN_ARRAY: result = CallExpression(pydop.NOT, BooleanType(), [result]) - return result + + # If the original payload contained a NULL, we need to add an extra + # check to the result to account for that, since SQL IN lists cannot + # contain NULLs. + # - If the list is empty after removing nulls, then the present/absent + # check has already been added. + # - Otherwise, if doing IN -> `ABSENT(x) OR ISIN(x, ...)`. + # - Otherwise, if doing NOT_IN -> `PRESENT(x) AND NOT(ISIN(x, ...))`. + if contains_null and len(in_list) > 0: + null_op = ( + pydop.ABSENT + if response.response_case == MaskServerResponse.IN_ARRAY + else pydop.PRESENT + ) + bool_op = ( + pydop.BOR + if response.response_case == MaskServerResponse.IN_ARRAY + else pydop.BAN + ) + is_null_check: CallExpression = CallExpression( + null_op, + BooleanType(), + [input_expr], + ) + result = CallExpression( + bool_op, + BooleanType(), + [is_null_check, result], + ) + + return result diff --git a/pydough/sqlglot/override_pushdown_predicates.py b/pydough/sqlglot/override_pushdown_predicates.py index d4fdbc810..b15f67bfe 100644 --- a/pydough/sqlglot/override_pushdown_predicates.py +++ b/pydough/sqlglot/override_pushdown_predicates.py @@ -3,9 +3,33 @@ """ from sqlglot import exp -from sqlglot.optimizer.pushdown_predicates import pushdown +from sqlglot.optimizer.normalize import normalized +from sqlglot.optimizer.pushdown_predicates import nodes_for_predicate, replace_aliases +from sqlglot.optimizer.simplify import simplify +from sqlglot.optimizer.scope import find_all_in_scope from sqlglot.optimizer.scope import build_scope +# ruff: noqa +# mypy: ignore-errors +# ruff & mypy should not try to typecheck or verify any of this + + +def contains_real_aggregate(expression) -> bool: + """ + Check if the expression contains a real aggregate function (e.g. SUM, AVG), + as opposed to MAX(a, b) which is a form of the LEAST/GREATEST function. This + is created by PyDough to account for such an edge case when pushing down + predicates. + """ + for agg_expr in find_all_in_scope(expression, exp.AggFunc, bfs=True): + if ( + isinstance(agg_expr, (exp.Max, exp.Min)) + and len(agg_expr.args["expressions"]) > 0 + ): + continue + return True + return False + def pushdown_predicates(expression, dialect=None): """ @@ -71,3 +95,105 @@ def pushdown_predicates(expression, dialect=None): ) return expression + + +def pushdown(condition, sources, scope_ref_count, dialect, join_index=None): + if not condition: + return + + condition = condition.replace(simplify(condition, dialect=dialect)) + cnf_like = normalized(condition) or not normalized(condition, dnf=True) + + predicates = list( + condition.flatten() + if isinstance(condition, exp.And if cnf_like else exp.Or) + else [condition] + ) + + if cnf_like: + pushdown_cnf(predicates, sources, scope_ref_count, join_index=join_index) + else: + pushdown_dnf(predicates, sources, scope_ref_count) + + +def pushdown_cnf(predicates, sources, scope_ref_count, join_index=None): + """ + If the predicates are in CNF like form, we can simply replace each block in the parent. + """ + join_index = join_index or {} + for predicate in predicates: + for node in nodes_for_predicate(predicate, sources, scope_ref_count).values(): + if isinstance(node, exp.Join): + name = node.alias_or_name + predicate_tables = exp.column_table_names(predicate, name) + + # Don't push the predicate if it references tables that appear in later joins + this_index = join_index[name] + if all( + join_index.get(table, -1) < this_index for table in predicate_tables + ): + predicate.replace(exp.true()) + node.on(predicate, copy=False) + break + if isinstance(node, exp.Select): + predicate.replace(exp.true()) + inner_predicate = replace_aliases(node, predicate) + # PyDough Change: stop using `find_in_scope(inner_predicate, exp.AggFunc)` + # since this will fail if the predicate is MIN/MAX with 2+ args. + if contains_real_aggregate(inner_predicate): + node.having(inner_predicate, copy=False) + else: + node.where(inner_predicate, copy=False) + + +def pushdown_dnf(predicates, sources, scope_ref_count): + """ + If the predicates are in DNF form, we can only push down conditions that are in all blocks. + Additionally, we can't remove predicates from their original form. + """ + # find all the tables that can be pushdown too + # these are tables that are referenced in all blocks of a DNF + # (a.x AND b.x) OR (a.y AND c.y) + # only table a can be push down + pushdown_tables = set() + + for a in predicates: + a_tables = exp.column_table_names(a) + + for b in predicates: + a_tables &= exp.column_table_names(b) + + pushdown_tables.update(a_tables) + + conditions = {} + + # pushdown all predicates to their respective nodes + for table in sorted(pushdown_tables): + for predicate in predicates: + nodes = nodes_for_predicate(predicate, sources, scope_ref_count) + + if table not in nodes: + continue + + conditions[table] = ( + exp.or_(conditions[table], predicate) + if table in conditions + else predicate + ) + + for name, node in nodes.items(): + if name not in conditions: + continue + + predicate = conditions[name] + + if isinstance(node, exp.Join): + node.on(predicate, copy=False) + elif isinstance(node, exp.Select): + inner_predicate = replace_aliases(node, predicate) + # PyDough Change: stop using `find_in_scope(inner_predicate, exp.AggFunc)` + # since this will fail if the predicate is MIN/MAX with 2+ args. + if contains_real_aggregate(inner_predicate): + node.having(inner_predicate, copy=False) + else: + node.where(inner_predicate, copy=False) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 44f1ded46..3bcf36e6d 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -488,4 +488,247 @@ ], "count": 8, }, + ("srv.CRBNK.ACCOUNTS.a_open_ts", ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3)): { + "type": "literal", + "operator": "IN", + "values": [ + "2013-04-22 11:37:51", + "2017-02-11 10:59:51", + "2011-04-30 15:16:51", + "2016-03-23 12:41:51", + "2013-02-15 12:46:51", + "2018-03-15 10:36:51", + "2014-04-07 14:21:51", + "2015-02-08 17:26:51", + "2016-04-29 11:46:51", + "2012-03-22 12:16:51", + "2015-04-06 13:46:51", + ], + "count": 8, + }, + ( + "srv.CRBNK.ACCOUNTS.a_open_ts", + ("EQUAL", 2, "QUARTER", 1, "__col__", "DAY", 1, "__col__"), + ): { + "type": "literal", + "operator": "IN", + "values": ["2015-05-04 18:01:51"], + "count": 1, + }, + ( + "srv.CRBNK.ACCOUNTS.a_open_ts", + ( + "AND", + 2, + "LT", + 2, + "HOUR", + 1, + "__col__", + 10, + "LT", + 2, + "MINUTE", + 1, + "__col__", + 20, + ), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2013-04-22 11:37:51", + "2017-09-15 11:26:51", + "2018-03-15 10:36:51", + "2014-05-23 11:31:51", + "2016-08-22 10:41:51", + "2014-08-15 11:31:51", + ], + "count": 6, + }, + ("srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "SECOND", 1, "__col__", 23)): { + "type": "literal", + "operator": "IN", + "values": [ + "2020-11-11 09:03:02", + "2023-09-15 09:00:02", + "2024-07-21 23:24:02", + ], + "count": 3, + }, + ( + "srv.CRBNK.ACCOUNTS.a_balance", + ("BETWEEN", 3, 200, "ABS", 1, "SUB", 2, "__col__", 7250, 600), + ): { + "type": "literal", + "operator": "IN", + "values": [ + 46240000.0, + 57760000.0, + ], + "count": 2, + }, + ( + "srv.CRBNK.ACCOUNTS.a_open_ts", + ( + "EQUAL", + 2, + "GREATEST", + 3, + "HOUR", + 1, + "__col__", + "MINUTE", + 1, + "__col__", + "SECOND", + 1, + "__col__", + 10, + ), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2018-03-15 10:36:51", + "2018-01-02 12:26:51", + ], + "count": 2, + }, + ( + "srv.CRBNK.ACCOUNTS.a_open_ts", + ("EQUAL", 2, "LEAST", 2, "HOUR", 1, "__col__", "MINUTE", 1, "__col__", 15), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2015-08-10 18:11:51", + "2015-05-04 18:01:51", + "2015-10-19 18:11:51", + "2014-10-03 17:41:51", + ], + "count": 4, + }, + ( + "srv.CRBNK.CUSTOMERS.c_phone", + ("CONTAINS", 2, "CONCAT", 2, "1-", "__col__", "1-5"), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": [], + "count": 0, + }, + ( + "srv.CRBNK.CUSTOMERS.c_phone", + ("CONTAINS", 2, "CONCAT", 3, "1", "-", "__col__", "1-5"), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": [], + "count": 0, + }, + ( + "srv.CRBNK.CUSTOMERS.c_phone", + ("CONTAINS", 2, "CONCAT", 5, "1", "-", "__col__", "-", "1", "5-1"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "555-112-3456", + "555-901-2345", + "555-091-2345", + "555-123-4567", + ], + "count": 4, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "1990-07-31", + "1989-04-07", + None, + ], + "count": 3, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "1989-04-07", + None, + ], + "count": 2, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), + ): { + "type": "literal", + "operator": "IN", + "values": [ + None, + ], + "count": 1, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": [ + "1990-07-31", + "1989-04-07", + None, + ], + "count": 3, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": [ + "1989-04-07", + None, + ], + "count": 2, + }, + ( + "srv.CRBNK.CUSTOMERS.c_birthday", + ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), + ): { + "type": "literal", + "operator": "NOT_IN", + "values": [ + None, + ], + "count": 1, + }, } + + +""" +SELECT c_birthday, DATE(c_birthday, '+472 days') +FROM customers +WHERE STRFTIME('%Y', COALESCE(DATE(c_birthday, '+472 days'), '1990-01-01')) IN ('1990', '1991') +; + +SELECT c_birthday, DATE(c_birthday, '+472 days') +FROM customers +WHERE STRFTIME('%Y', COALESCE(DATE(c_birthday, '+472 days'), '1990-01-01')) IN ('1990') +; + +SELECT c_birthday, DATE(c_birthday, '+472 days') +FROM customers +WHERE STRFTIME('%Y', COALESCE(DATE(c_birthday, '+472 days'), '1990-01-01')) IN ('2005') +; +""" diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index bfb4ffa95..360ce1cfa 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -45,17 +45,7 @@ - DATETIME - GOOD (trunc minute) - DATETIME - GOOD (add second) - DATETIME - GOOD (trunc second) -- QUARTER -- HOUR -- MINUTE -- SECOND -- COALESCE - IFF -- JOIN_STRINGS (empty) -- JOIN_STRINGS (nonempty) -- SMALLEST -- LARGEST -- ABS """ @@ -486,6 +476,186 @@ ), id="cryptbank_filter_count_32", ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE(QUARTER(creation_timestamp) == 1)\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [11]}), + "cryptbank_filter_count_33", + ), + id="cryptbank_filter_count_33", + ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE(QUARTER(creation_timestamp) == DAY(creation_timestamp))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [1]}), + "cryptbank_filter_count_34", + ), + id="cryptbank_filter_count_34", + ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE((HOUR(creation_timestamp) < 10) & (MINUTE(creation_timestamp) < 20))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [6]}), + "cryptbank_filter_count_35", + ), + id="cryptbank_filter_count_35", + ), + pytest.param( + PyDoughPandasTest( + "selected_transactions = transactions.WHERE(SECOND(time_stamp) == 23)\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_transactions))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [3]}), + "cryptbank_filter_count_36", + ), + id="cryptbank_filter_count_36", + ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE(MONOTONIC(200, ABS(balance - 7250), 600))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [2]}), + "cryptbank_filter_count_37", + ), + id="cryptbank_filter_count_37", + ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE(LARGEST(HOUR(creation_timestamp), MINUTE(creation_timestamp), SECOND(creation_timestamp)) == 10)\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [2]}), + "cryptbank_filter_count_38", + ), + id="cryptbank_filter_count_38", + ), + pytest.param( + PyDoughPandasTest( + "selected_accounts = accounts.WHERE(SMALLEST(HOUR(creation_timestamp), MINUTE(creation_timestamp)) == 15)\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [4]}), + "cryptbank_filter_count_39", + ), + id="cryptbank_filter_count_39", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('', '1-', phone_number), '1-5'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [20]}), + "cryptbank_filter_count_40", + ), + id="cryptbank_filter_count_40", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('-', '1', phone_number), '1-5'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [20]}), + "cryptbank_filter_count_41", + ), + id="cryptbank_filter_count_41", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('-', '1', phone_number, '1'), '5-1'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [4]}), + "cryptbank_filter_count_42", + ), + id="cryptbank_filter_count_42", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(JOIN_STRINGS(' ', first_name, last_name) == 'olivia anderson')\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [1]}), + "cryptbank_filter_count_43", + ), + id="cryptbank_filter_count_43", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(YEAR(COALESCE(birthday)) == )\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [1]}), + "cryptbank_filter_count_43", + ), + id="cryptbank_filter_count_43", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(DEFAULT_TO(YEAR(birthday), 1990), (1990, 1991)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [4]}), + "cryptbank_filter_count_44", + ), + id="cryptbank_filter_count_44", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(DEFAULT_TO(YEAR(birthday), 1990), (1990, 2005)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [3]}), + "cryptbank_filter_count_45", + ), + id="cryptbank_filter_count_45", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(DEFAULT_TO(YEAR(birthday), 2005), (2005, 2006)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [2]}), + "cryptbank_filter_count_46", + ), + id="cryptbank_filter_count_46", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(~ISIN(DEFAULT_TO(YEAR(birthday), 1990), (1990, 1991)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [16]}), + "cryptbank_filter_count_47", + ), + id="cryptbank_filter_count_47", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(~ISIN(DEFAULT_TO(YEAR(birthday), 1990), (1990, 2005)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [17]}), + "cryptbank_filter_count_48", + ), + id="cryptbank_filter_count_48", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(~ISIN(DEFAULT_TO(YEAR(birthday), 2005), (2005, 2006)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [18]}), + "cryptbank_filter_count_49", + ), + id="cryptbank_filter_count_49", + ), pytest.param( PyDoughPandasTest( "selected_transactions = transactions.WHERE((YEAR(time_stamp) == 2022) & (MONTH(time_stamp) == 6))\n" @@ -835,12 +1005,12 @@ def test_pipeline_e2e_cryptbank( against the refsol DataFrame. """ # Capture stdout to avoid polluting the console with logging calls - with redirect_stdout(io.StringIO()): - cryptbank_pipeline_test_data.run_e2e_test( - masked_graphs, - sqlite_cryptbank_connection, - mask_server=mock_server_info, - ) + # with redirect_stdout(io.StringIO()): + cryptbank_pipeline_test_data.run_e2e_test( + masked_graphs, + sqlite_cryptbank_connection, + mask_server=mock_server_info, + ) @pytest.mark.parametrize( @@ -979,6 +1149,52 @@ def test_pipeline_e2e_cryptbank( ], id="cryptbank_agg_06", ), + pytest.param( + "selected_accounts = accounts.WHERE(QUARTER(creation_timestamp) == DAY(creation_timestamp))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", + [ + { + "CRBNK.ACCOUNTS.a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']" + } + ], + id="cryptbank_filter_count_34", + ), + pytest.param( + "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('', '1-', phone_number), '1-5'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']" + } + ], + id="cryptbank_filter_count_40", + ), + pytest.param( + "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('-', '1', phone_number), '1-5'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']" + } + ], + id="cryptbank_filter_count_41", + ), + pytest.param( + "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('-', '1', phone_number, '1'), '5-1'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']" + } + ], + id="cryptbank_filter_count_42", + ), + pytest.param( + "selected_customers = customers.WHERE(JOIN_STRINGS(' ', first_name, last_name) == 'olivia anderson')\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [], + id="cryptbank_filter_count_43", + ), ], ) def test_cryptbank_mask_server_logging( diff --git a/tests/test_plan_refsols/cryptbank_filter_count_33_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_33_raw.txt new file mode 100644 index 000000000..35df42fd5 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_33_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(MONTH(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), [1, 2, 3]:array[numeric]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_33_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_33_rewrite.txt new file mode 100644 index 000000000..1e78cde11 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_33_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(a_open_ts, ['2013-04-22 11:37:51', '2017-02-11 10:59:51', '2011-04-30 15:16:51', '2016-03-23 12:41:51', '2013-02-15 12:46:51', '2018-03-15 10:36:51', '2014-04-07 14:21:51', '2015-02-08 17:26:51', '2016-04-29 11:46:51', '2012-03-22 12:16:51', '2015-04-06 13:46:51']:array[unknown]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt new file mode 100644 index 000000000..3ee213c72 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=QUARTER(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == DAY(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt new file mode 100644 index 000000000..7aeed4c0f --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=a_open_ts == '2015-05-04 18:01:51':unknown, columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_35_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_35_raw.txt new file mode 100644 index 000000000..a07aea681 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_35_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=HOUR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 10:numeric & MINUTE(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 20:numeric, columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_35_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_35_rewrite.txt new file mode 100644 index 000000000..4dd3363ae --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_35_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(a_open_ts, ['2013-04-22 11:37:51', '2017-09-15 11:26:51', '2018-03-15 10:36:51', '2014-05-23 11:31:51', '2016-08-22 10:41:51', '2014-08-15 11:31:51']:array[unknown]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_36_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_36_raw.txt new file mode 100644 index 000000000..59a065082 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_36_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=SECOND(UNMASK::(DATETIME([t_ts], '+54321 seconds'))) == 23:numeric, columns={}) + SCAN(table=CRBNK.TRANSACTIONS, columns={'t_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_36_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_36_rewrite.txt new file mode 100644 index 000000000..60678bf05 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_36_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(t_ts, ['2020-11-11 09:03:02', '2023-09-15 09:00:02', '2024-07-21 23:24:02']:array[unknown]), columns={}) + SCAN(table=CRBNK.TRANSACTIONS, columns={'t_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_37_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_37_raw.txt new file mode 100644 index 000000000..3b9aa3400 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_37_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(200:numeric, ABS(UNMASK::(SQRT([a_balance])) - 7250:numeric), 600:numeric), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_37_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_37_rewrite.txt new file mode 100644 index 000000000..38eceea46 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_37_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(a_balance, [46240000.0, 57760000.0]:array[unknown]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_38_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_38_raw.txt new file mode 100644 index 000000000..7b0df0c11 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_38_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=LARGEST(HOUR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), MINUTE(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), SECOND(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')))) == 10:numeric, columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_38_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_38_rewrite.txt new file mode 100644 index 000000000..10b036d4c --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_38_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(a_open_ts, ['2018-03-15 10:36:51', '2018-01-02 12:26:51']:array[unknown]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_39_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_39_raw.txt new file mode 100644 index 000000000..4fdbe5082 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_39_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=SMALLEST(HOUR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), MINUTE(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')))) == 15:numeric, columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_39_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_39_rewrite.txt new file mode 100644 index 000000000..223ee5fac --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_39_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(a_open_ts, ['2015-08-10 18:11:51', '2015-05-04 18:01:51', '2015-10-19 18:11:51', '2014-10-03 17:41:51']:array[unknown]), columns={}) + SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_40_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_40_raw.txt new file mode 100644 index 000000000..9bf88a215 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_40_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(JOIN_STRINGS('':string, '1-':string, UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0'))), '1-5':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_40_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_40_rewrite.txt new file mode 100644 index 000000000..174a826e7 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_40_rewrite.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=CRBNK.CUSTOMERS, columns={}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_41_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_41_raw.txt new file mode 100644 index 000000000..e12d9e85b --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_41_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(JOIN_STRINGS('-':string, '1':string, UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0'))), '1-5':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_41_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_41_rewrite.txt new file mode 100644 index 000000000..174a826e7 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_41_rewrite.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=CRBNK.CUSTOMERS, columns={}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_42_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_42_raw.txt new file mode 100644 index 000000000..44f03c593 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_42_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(JOIN_STRINGS('-':string, '1':string, UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '1':string), '5-1':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_42_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_42_rewrite.txt new file mode 100644 index 000000000..d88bab6e1 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_42_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(c_phone, ['555-112-3456', '555-901-2345', '555-091-2345', '555-123-4567']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_43_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_43_raw.txt new file mode 100644 index 000000000..5ee34a718 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_43_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))) == 'olivia anderson':string, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_43_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_43_rewrite.txt new file mode 100644 index 000000000..5ee34a718 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_43_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))) == 'olivia anderson':string, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_44_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_44_raw.txt new file mode 100644 index 000000000..e2012c8b7 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_44_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1990:numeric), [1990, 1991]:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_44_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_44_rewrite.txt new file mode 100644 index 000000000..e09882c8f --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_44_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ABSENT(c_birthday) | ISIN(c_birthday, ['1990-07-31', '1989-04-07']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_45_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_45_raw.txt new file mode 100644 index 000000000..84908b7f1 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_45_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1990:numeric), [1990, 2005]:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_45_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_45_rewrite.txt new file mode 100644 index 000000000..0112adac3 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_45_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ABSENT(c_birthday) | c_birthday == '1989-04-07':unknown, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_46_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_46_raw.txt new file mode 100644 index 000000000..3f3b65f41 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_46_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 2005:numeric), [2005, 2006]:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_46_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_46_rewrite.txt new file mode 100644 index 000000000..b9d6157ba --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_46_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ABSENT(c_birthday), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_47_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_47_raw.txt new file mode 100644 index 000000000..ccfac3d44 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_47_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1990:numeric), [1990, 1991]:array[unknown])), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_47_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_47_rewrite.txt new file mode 100644 index 000000000..b1c4218ca --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_47_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(ISIN(c_birthday, ['1990-07-31', '1989-04-07']:array[unknown])) & PRESENT(c_birthday), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_48_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_48_raw.txt new file mode 100644 index 000000000..8937c18e3 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_48_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1990:numeric), [1990, 2005]:array[unknown])), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_48_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_48_rewrite.txt new file mode 100644 index 000000000..8fdc5ff76 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_48_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_birthday != '1989-04-07':unknown & PRESENT(c_birthday), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_49_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_49_raw.txt new file mode 100644 index 000000000..c7d2ebb89 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_49_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 2005:numeric), [2005, 2006]:array[unknown])), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_49_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_49_rewrite.txt new file mode 100644 index 000000000..429f74d7b --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_49_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PRESENT(c_birthday), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_33_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_33_raw_sqlite.sql new file mode 100644 index 000000000..bd44fe280 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_33_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) IN (1, 2, 3) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_33_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_33_rewrite_sqlite.sql new file mode 100644 index 000000000..2e606aacb --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_33_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_open_ts IN ('2013-04-22 11:37:51', '2017-02-11 10:59:51', '2011-04-30 15:16:51', '2016-03-23 12:41:51', '2013-02-15 12:46:51', '2018-03-15 10:36:51', '2014-04-07 14:21:51', '2015-02-08 17:26:51', '2016-04-29 11:46:51', '2012-03-22 12:16:51', '2015-04-06 13:46:51') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_34_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_34_raw_sqlite.sql new file mode 100644 index 000000000..d750b8bd4 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_34_raw_sqlite.sql @@ -0,0 +1,18 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + CASE + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 3 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 1 + THEN 1 + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 6 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 4 + THEN 2 + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 9 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 7 + THEN 3 + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 12 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 10 + THEN 4 + END = CAST(STRFTIME('%d', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql new file mode 100644 index 000000000..30fcdcb3b --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_open_ts = '2015-05-04 18:01:51' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_35_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_35_raw_sqlite.sql new file mode 100644 index 000000000..00039a869 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_35_raw_sqlite.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + CAST(STRFTIME('%H', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) < 10 + AND CAST(STRFTIME('%M', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) < 20 diff --git a/tests/test_sql_refsols/cryptbank_filter_count_35_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_35_rewrite_sqlite.sql new file mode 100644 index 000000000..600292e56 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_35_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_open_ts IN ('2013-04-22 11:37:51', '2017-09-15 11:26:51', '2018-03-15 10:36:51', '2014-05-23 11:31:51', '2016-08-22 10:41:51', '2014-08-15 11:31:51') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_36_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_36_raw_sqlite.sql new file mode 100644 index 000000000..b88b20918 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_36_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.transactions +WHERE + CAST(STRFTIME('%S', DATETIME(t_ts, '+54321 seconds')) AS INTEGER) = 23 diff --git a/tests/test_sql_refsols/cryptbank_filter_count_36_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_36_rewrite_sqlite.sql new file mode 100644 index 000000000..66a0fd2a5 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_36_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.transactions +WHERE + t_ts IN ('2020-11-11 09:03:02', '2023-09-15 09:00:02', '2024-07-21 23:24:02') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_37_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_37_raw_sqlite.sql new file mode 100644 index 000000000..063179e95 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_37_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + ABS(SQRT(a_balance) - 7250) <= 600 AND ABS(SQRT(a_balance) - 7250) >= 200 diff --git a/tests/test_sql_refsols/cryptbank_filter_count_37_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_37_rewrite_sqlite.sql new file mode 100644 index 000000000..c80f475e1 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_37_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_balance IN (46240000.0, 57760000.0) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_38_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_38_raw_sqlite.sql new file mode 100644 index 000000000..d5c4aed7f --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_38_raw_sqlite.sql @@ -0,0 +1,14 @@ +WITH _t1 AS ( + SELECT + a_open_ts + FROM crbnk.accounts + WHERE + MAX( + CAST(STRFTIME('%H', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER), + CAST(STRFTIME('%M', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER), + CAST(STRFTIME('%S', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) + ) = 10 +) +SELECT + COUNT(*) AS n +FROM _t1 diff --git a/tests/test_sql_refsols/cryptbank_filter_count_38_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_38_rewrite_sqlite.sql new file mode 100644 index 000000000..d56c032b4 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_38_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_open_ts IN ('2018-03-15 10:36:51', '2018-01-02 12:26:51') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_39_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_39_raw_sqlite.sql new file mode 100644 index 000000000..5aab54336 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_39_raw_sqlite.sql @@ -0,0 +1,13 @@ +WITH _t1 AS ( + SELECT + a_open_ts + FROM crbnk.accounts + WHERE + MIN( + CAST(STRFTIME('%H', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER), + CAST(STRFTIME('%M', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) + ) = 15 +) +SELECT + COUNT(*) AS n +FROM _t1 diff --git a/tests/test_sql_refsols/cryptbank_filter_count_39_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_39_rewrite_sqlite.sql new file mode 100644 index 000000000..dad8534d8 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_39_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.accounts +WHERE + a_open_ts IN ('2015-08-10 18:11:51', '2015-05-04 18:01:51', '2015-10-19 18:11:51', '2014-10-03 17:41:51') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_40_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_40_raw_sqlite.sql new file mode 100644 index 000000000..f1c7b44c0 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_40_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + CONCAT_WS('', '1-', REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0')) LIKE '%1-5%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_40_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_40_rewrite_sqlite.sql new file mode 100644 index 000000000..c9724c3af --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_40_rewrite_sqlite.sql @@ -0,0 +1,3 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers diff --git a/tests/test_sql_refsols/cryptbank_filter_count_41_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_41_raw_sqlite.sql new file mode 100644 index 000000000..3d4f0db7b --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_41_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + CONCAT_WS('-', '1', REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0')) LIKE '%1-5%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_41_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_41_rewrite_sqlite.sql new file mode 100644 index 000000000..c9724c3af --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_41_rewrite_sqlite.sql @@ -0,0 +1,3 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers diff --git a/tests/test_sql_refsols/cryptbank_filter_count_42_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_42_raw_sqlite.sql new file mode 100644 index 000000000..f5b1f84a4 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_42_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + CONCAT_WS('-', '1', REPLACE(REPLACE(REPLACE(c_phone, '9', '*'), '0', '9'), '*', '0'), '1') LIKE '%5-1%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_42_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_42_rewrite_sqlite.sql new file mode 100644 index 000000000..2013f54c7 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_42_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_phone IN ('555-112-3456', '555-901-2345', '555-091-2345', '555-123-4567') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_43_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_43_raw_sqlite.sql new file mode 100644 index 000000000..54de7672d --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_43_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + CONCAT_WS(' ', LOWER(c_fname), LOWER(c_lname)) = 'olivia anderson' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_43_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_43_rewrite_sqlite.sql new file mode 100644 index 000000000..54de7672d --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_43_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + CONCAT_WS(' ', LOWER(c_fname), LOWER(c_lname)) = 'olivia anderson' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_44_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_44_raw_sqlite.sql new file mode 100644 index 000000000..8fd719c0c --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_44_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 1990) IN (1990, 1991) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_44_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_44_rewrite_sqlite.sql new file mode 100644 index 000000000..c51703ad0 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_44_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_birthday IN ('1990-07-31', '1989-04-07') OR c_birthday IS NULL diff --git a/tests/test_sql_refsols/cryptbank_filter_count_45_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_45_raw_sqlite.sql new file mode 100644 index 000000000..919cc9063 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_45_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 1990) IN (1990, 2005) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_45_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_45_rewrite_sqlite.sql new file mode 100644 index 000000000..14d064359 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_45_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_birthday = '1989-04-07' OR c_birthday IS NULL diff --git a/tests/test_sql_refsols/cryptbank_filter_count_46_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_46_raw_sqlite.sql new file mode 100644 index 000000000..0142b992f --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_46_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 2005) IN (2005, 2006) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_46_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_46_rewrite_sqlite.sql new file mode 100644 index 000000000..344e0ac38 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_46_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_birthday IS NULL diff --git a/tests/test_sql_refsols/cryptbank_filter_count_47_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_47_raw_sqlite.sql new file mode 100644 index 000000000..1be1a96ad --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_47_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 1990) IN (1990, 1991) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_47_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_47_rewrite_sqlite.sql new file mode 100644 index 000000000..de41664e9 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_47_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT c_birthday IN ('1990-07-31', '1989-04-07') AND NOT c_birthday IS NULL diff --git a/tests/test_sql_refsols/cryptbank_filter_count_48_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_48_raw_sqlite.sql new file mode 100644 index 000000000..fe7d8295f --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_48_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 1990) IN (1990, 2005) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_48_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_48_rewrite_sqlite.sql new file mode 100644 index 000000000..95c463ed7 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_48_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT c_birthday IS NULL AND c_birthday <> '1989-04-07' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_49_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_49_raw_sqlite.sql new file mode 100644 index 000000000..57cc1e62a --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_49_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 2005) IN (2005, 2006) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_49_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_49_rewrite_sqlite.sql new file mode 100644 index 000000000..2dfd1a393 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_49_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT c_birthday IS NULL From 940dd16e40821d92a85fb47073f22ae4f4068954 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 31 Oct 2025 05:29:52 -0400 Subject: [PATCH 23/40] Added remaining tests [RUN CI] --- .../mask_server_candidate_visitor.py | 17 +- .../conversion/mask_server_rewrite_shuttle.py | 4 - tests/mock_server/lookup_table.py | 353 +++++++++++++++++- tests/test_masked_sqlite.py | 234 ++++++++++-- .../cryptbank_agg_07_raw.txt | 3 + .../cryptbank_agg_07_rewrite.txt | 3 + .../cryptbank_filter_count_18_rewrite.txt | 2 +- .../cryptbank_filter_count_50_raw.txt | 4 + .../cryptbank_filter_count_50_rewrite.txt | 4 + .../cryptbank_filter_count_51_raw.txt | 4 + .../cryptbank_filter_count_51_rewrite.txt | 4 + .../cryptbank_filter_count_52_raw.txt | 4 + .../cryptbank_filter_count_52_rewrite.txt | 4 + .../cryptbank_filter_count_53_raw.txt | 4 + .../cryptbank_filter_count_53_rewrite.txt | 4 + .../cryptbank_filter_count_54_raw.txt | 4 + .../cryptbank_filter_count_54_rewrite.txt | 4 + .../cryptbank_filter_count_55_raw.txt | 4 + .../cryptbank_filter_count_55_rewrite.txt | 4 + .../cryptbank_filter_count_56_raw.txt | 4 + .../cryptbank_filter_count_56_rewrite.txt | 4 + .../cryptbank_filter_count_57_raw.txt | 4 + .../cryptbank_filter_count_57_rewrite.txt | 4 + .../cryptbank_filter_count_58_raw.txt | 4 + .../cryptbank_filter_count_58_rewrite.txt | 4 + .../cryptbank_agg_07_raw_sqlite.sql | 113 ++++++ .../cryptbank_agg_07_rewrite_sqlite.sql | 66 ++++ ...yptbank_filter_count_18_rewrite_sqlite.sql | 2 +- .../cryptbank_filter_count_50_raw_sqlite.sql | 5 + ...yptbank_filter_count_50_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_51_raw_sqlite.sql | 5 + ...yptbank_filter_count_51_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_52_raw_sqlite.sql | 7 + ...yptbank_filter_count_52_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_53_raw_sqlite.sql | 5 + ...yptbank_filter_count_53_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_54_raw_sqlite.sql | 16 + ...yptbank_filter_count_54_rewrite_sqlite.sql | 16 + .../cryptbank_filter_count_55_raw_sqlite.sql | 5 + ...yptbank_filter_count_55_rewrite_sqlite.sql | 5 + .../cryptbank_filter_count_56_raw_sqlite.sql | 33 ++ ...yptbank_filter_count_56_rewrite_sqlite.sql | 33 ++ .../cryptbank_filter_count_57_raw_sqlite.sql | 17 + ...yptbank_filter_count_57_rewrite_sqlite.sql | 17 + .../cryptbank_filter_count_58_raw_sqlite.sql | 25 ++ ...yptbank_filter_count_58_rewrite_sqlite.sql | 25 ++ 46 files changed, 1026 insertions(+), 77 deletions(-) create mode 100644 tests/test_plan_refsols/cryptbank_agg_07_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_50_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_50_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_51_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_51_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_52_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_52_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_53_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_53_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_54_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_54_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_55_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_55_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_56_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_56_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_57_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_57_rewrite.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_58_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_58_rewrite.txt create mode 100644 tests/test_sql_refsols/cryptbank_agg_07_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_agg_07_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_50_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_50_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_51_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_51_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_52_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_52_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_53_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_53_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_54_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_54_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_55_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_55_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_56_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_56_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_57_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_57_rewrite_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_58_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_58_rewrite_sqlite.sql diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/conversion/mask_server_candidate_visitor.py index 840ea0366..225017cef 100644 --- a/pydough/conversion/mask_server_candidate_visitor.py +++ b/pydough/conversion/mask_server_candidate_visitor.py @@ -419,22 +419,17 @@ def convert_slice_call_to_server_expression( if ( start_literal is None or stop_literal is None - or step_literal is None or len(start_literal) != 1 or len(stop_literal) != 1 - or len(step_literal) != 1 + or step_literal not in ([1], ["NULL"]) ): return None - match (start_literal[0], stop_literal[0], step_literal[0]): - case (int(start), int(stop), int(step)) if ( - start >= 0 and stop > start and step == 1 - ): + print(start_literal, stop_literal, step_literal) + match (start_literal[0], stop_literal[0]): + case (int(start), int(stop)) if start >= 0 and stop > start: start_int = start length_int = stop - start - case (int(start), int(stop), None) if start >= 0 and stop > start: - start_int = start - length_int = stop - start - case (None, int(stop), None) if stop > 0: + case ("NULL", int(stop)) if stop > 0: start_int = 0 length_int = stop case _: @@ -600,7 +595,7 @@ def convert_dateadd_call_to_server_expression( `unit_str`: The string representing the unit to add. """ unit = DateTimeUnit.from_string(unit_str) - if unit is None: + if unit is None or unit == DateTimeUnit.WEEK: return None result: list[str | int | float | None | bool] = ["DATEADD", 3] if sign_str == "-": diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 574079c77..19721b902 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -84,10 +84,6 @@ def process_batch(self) -> None: expression_list, ) in self.candidate_visitor.candidate_pool.items(): ancillary_info.append((expr, input_expr)) - print( - f"srv.{mask_op.table_path}.{mask_op.masking_metadata.column_name}", - tuple(expression_list), - ) batch.append( MaskServerInput( table_path=mask_op.table_path, diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 3bcf36e6d..2032fa06a 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -477,7 +477,7 @@ "type": "literal", "operator": "IN", "values": [ - "ophia.jackson@mail.org", + "ophia.jackson@mail.orgs", "livia.a22@gmail.como", ".gonzalez@ymail.comm", "opez.luke99@gmail.coml", @@ -713,22 +713,337 @@ ], "count": 1, }, + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("IN", 4, "SLICE", 3, "__col__", 0, 1, "q", "r", "s"), + ): {"type": "literal", "operator": "IN", "values": ["QUEENIE", "ROBERT", "SOPHIA"]}, + ( + "srv.CRBNK.CUSTOMERS.c_lname", + ( + "CONTAINS", + 2, + "__col__", + "CONCAT", + 2, + "e", + "IFF", + 3, + "IN", + 4, + "SLICE", + 3, + "__col__", + 0, + 1, + "q", + "r", + "s", + "z", + "e", + ), + ): { + "type": "literal", + "operator": "IN", + "values": ["LEE", "RODRIGUEZ"], + "count": 2, + }, + ("srv.CRBNK.CUSTOMERS.c_fname", ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i")): { + "type": "literal", + "operator": "IN", + "values": ["ISABEL"], + "count": 1, + }, + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("IN", 6, "SLICE", 3, "__col__", 1, 2, "ar", "li", "ra", "to", "am"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "ALICE", + "CAROL", + "FRANK", + "GRACE", + "JAMES", + "KAREN", + "MARIA", + "OLIVIA", + ], + "count": 5, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "year", "__col__", "2023-01-01"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2022-12-31 17:42:54", + "2023-01-04 12:05:15", + "2023-01-07 22:11:27", + "2023-01-20 04:38:03", + "2023-01-20 16:40:54", + "2023-01-27 15:13:18", + "2023-01-30 19:58:26", + "2023-02-02 19:12:58", + "2023-02-11 11:13:53", + "2023-02-11 12:32:55", + "2023-02-15 21:54:29", + "2023-02-16 14:18:36", + "2023-02-28 07:11:29", + "2023-03-07 01:26:10", + "2023-03-08 18:58:18", + "2023-03-14 14:23:33", + "2023-03-16 06:17:44", + "2023-03-17 08:48:16", + "2023-03-24 03:33:40", + "2023-03-26 06:52:52", + "2023-04-18 00:35:40", + "2023-04-25 18:54:26", + "2023-04-29 04:58:30", + "2023-05-04 23:30:10", + "2023-05-12 04:42:28", + "2023-05-17 18:54:12", + "2023-05-19 10:10:44", + "2023-05-21 13:52:14", + "2023-05-24 03:51:10", + "2023-06-01 13:50:10", + "2023-06-01 13:50:14", + "2023-06-04 10:35:26", + "2023-06-11 21:53:04", + "2023-06-25 15:06:06", + "2023-06-25 21:58:37", + "2023-06-27 03:21:19", + "2023-06-27 10:34:20", + "2023-06-30 15:27:03", + "2023-07-07 15:17:47", + "2023-07-17 03:23:15", + "2023-07-18 14:41:26", + "2023-08-03 20:24:35", + "2023-08-11 20:25:39", + "2023-08-29 03:07:18", + "2023-09-01 16:50:48", + "2023-09-08 09:30:23", + "2023-09-13 06:42:39", + "2023-09-15 09:00:02", + "2023-09-30 08:57:30", + "2023-10-15 02:47:04", + "2023-10-19 09:40:06", + "2023-10-30 00:20:45", + "2023-11-08 12:52:24", + "2023-11-10 17:20:29", + "2023-11-16 11:30:24", + "2023-11-21 15:17:10", + "2023-11-28 06:34:03", + "2023-12-07 14:11:33", + "2023-12-15 05:57:23", + "2023-12-16 00:51:23", + "2023-12-23 07:54:22", + ], + "count": 61, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "quarter", "__col__", "2023-04-01"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2023-04-18 00:35:40", + "2023-04-25 18:54:26", + "2023-04-29 04:58:30", + "2023-05-04 23:30:10", + "2023-05-12 04:42:28", + "2023-05-17 18:54:12", + "2023-05-19 10:10:44", + "2023-05-21 13:52:14", + "2023-05-24 03:51:10", + "2023-06-01 13:50:10", + "2023-06-01 13:50:14", + "2023-06-04 10:35:26", + "2023-06-11 21:53:04", + "2023-06-25 15:06:06", + "2023-06-25 21:58:37", + "2023-06-27 03:21:19", + "2023-06-27 10:34:20", + ], + "count": 17, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "month", "__col__", "2023-06-01"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2023-06-01 13:50:10", + "2023-06-01 13:50:14", + "2023-06-04 10:35:26", + "2023-06-11 21:53:04", + "2023-06-25 15:06:06", + "2023-06-25 21:58:37", + "2023-06-27 03:21:19", + "2023-06-27 10:34:20", + ], + "count": 8, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "day", "__col__", "2023-06-02"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2023-06-01 13:50:10", + "2023-06-01 13:50:14", + ], + "count": 2, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "hour", "__col__", "2023-06-02 04:00:00"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2023-06-01 13:50:10", + "2023-06-01 13:50:14", + ], + "count": 2, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "minute", "__col__", "2023-06-02 04:55:00"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2023-06-01 13:50:10", + "2023-06-01 13:50:14", + ], + "count": 2, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATETRUNC", 2, "second", "__col__", "2023-06-02 04:55:31"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2023-06-01 13:50:10", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATEADD", 3, 1, "years", "__col__", "2020-11-11 18:00:52"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATEADD", 3, 2, "quarters", "__col__", "2020-05-11 18:00:52"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATEADD", 3, -5, "months", "__col__", "2019-06-11 18:00:52"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATEADD", 3, 10, "days", "__col__", "2019-11-21 18:00:52"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATEADD", 3, 1000, "hours", "__col__", "2019-12-23 10:00:52"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ("EQUAL", 2, "DATEADD", 3, 10000, "minutes", "__col__", "2019-11-18 16:40:52"), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ( + "EQUAL", + 2, + "DATEADD", + 3, + -1000000, + "seconds", + "__col__", + "2019-10-31 04:14:12", + ), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-11 02:55:31", + ], + "count": 1, + }, + ( + "srv.CRBNK.TRANSACTIONS.t_ts", + ( + "EQUAL", + 2, + "DATEADD", + 3, + -1, + "days", + "DATETRUNC", + 2, + "month", + "__col__", + "2019-10-31", + ), + ): { + "type": "literal", + "operator": "IN", + "values": [ + "2019-11-02 11:58:37", + "2019-11-02 12:54:09", + "2019-11-11 02:55:31", + "2019-11-11 15:44:22", + ], + "count": 4, + }, } - - -""" -SELECT c_birthday, DATE(c_birthday, '+472 days') -FROM customers -WHERE STRFTIME('%Y', COALESCE(DATE(c_birthday, '+472 days'), '1990-01-01')) IN ('1990', '1991') -; - -SELECT c_birthday, DATE(c_birthday, '+472 days') -FROM customers -WHERE STRFTIME('%Y', COALESCE(DATE(c_birthday, '+472 days'), '1990-01-01')) IN ('1990') -; - -SELECT c_birthday, DATE(c_birthday, '+472 days') -FROM customers -WHERE STRFTIME('%Y', COALESCE(DATE(c_birthday, '+472 days'), '1990-01-01')) IN ('2005') -; -""" diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 360ce1cfa..04b7e2189 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -23,31 +23,6 @@ transform_and_exec_pydough, ) -""" -ADD TESTS FOR: -- DATETIME - BAD (singleton) -- DATETIME - BAD (using current ts) -- DATETIME - BAD (nested ) -- DATETIME - BAD (add week) -- DATETIME - BAD (trunc week) -- DATETIME - GOOD (nested) -- DATETIME - GOOD (add year) -- DATETIME - GOOD (trunc year) -- DATETIME - GOOD (add quarter) -- DATETIME - GOOD (trunc quarter) -- DATETIME - GOOD (add month) -- DATETIME - GOOD (trunc month) -- DATETIME - GOOD (add day) -- DATETIME - GOOD (trunc day) -- DATETIME - GOOD (add hour) -- DATETIME - GOOD (trunc hour) -- DATETIME - GOOD (add minute) -- DATETIME - GOOD (trunc minute) -- DATETIME - GOOD (add second) -- DATETIME - GOOD (trunc second) -- IFF -""" - @pytest.fixture( params=[ @@ -586,16 +561,6 @@ ), id="cryptbank_filter_count_43", ), - pytest.param( - PyDoughPandasTest( - "selected_customers = customers.WHERE(YEAR(COALESCE(birthday)) == )\n" - "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", - "CRYPTBANK", - lambda: pd.DataFrame({"n": [1]}), - "cryptbank_filter_count_43", - ), - id="cryptbank_filter_count_43", - ), pytest.param( PyDoughPandasTest( "selected_customers = customers.WHERE(ISIN(DEFAULT_TO(YEAR(birthday), 1990), (1990, 1991)))\n" @@ -656,6 +621,96 @@ ), id="cryptbank_filter_count_49", ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(~ISIN(DEFAULT_TO(YEAR(birthday), 2005), (2005, 2006)))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [18]}), + "cryptbank_filter_count_50", + ), + id="cryptbank_filter_count_50", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(IFF(ISIN(first_name[:1], ('q', 'r', 's')), first_name, last_name), 'ee'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [4]}), + "cryptbank_filter_count_51", + ), + id="cryptbank_filter_count_51", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(last_name, JOIN_STRINGS('', 'e', IFF(ISIN(last_name[:1], ('q', 'r', 's')), 'z', 'e'))))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [5]}), + "cryptbank_filter_count_52", + ), + id="cryptbank_filter_count_52", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(first_name[0:1] == 'i')\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [1]}), + "cryptbank_filter_count_53", + ), + id="cryptbank_filter_count_53", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(first_name[-1:], list('aeiou')))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [7]}), + "cryptbank_filter_count_54", + ), + id="cryptbank_filter_count_54", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(first_name[1:3], ['ar', 'li', 'ra', 'to', 'am']))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [8]}), + "cryptbank_filter_count_55", + ), + id="cryptbank_filter_count_55", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(ISIN(first_name[-2:-1], ['a', 'c', 'l']))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [5]}), + "cryptbank_filter_count_56", + ), + id="cryptbank_filter_count_56", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(first_name[:-1], 'e'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [8]}), + "cryptbank_filter_count_57", + ), + id="cryptbank_filter_count_57", + ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS(first_name[1:-1], 'e'))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [7]}), + "cryptbank_filter_count_58", + ), + id="cryptbank_filter_count_58", + ), pytest.param( PyDoughPandasTest( "selected_transactions = transactions.WHERE((YEAR(time_stamp) == 2022) & (MONTH(time_stamp) == 6))\n" @@ -767,6 +822,59 @@ ), id="cryptbank_agg_06", ), + pytest.param( + PyDoughPandasTest( + "result = CRYPTBANK.CALCULATE(" + " n_yr=SUM(DATETIME(transactions.time_stamp, 'start of year') == '2023-01-01')," + " n_qu=SUM(DATETIME(transactions.time_stamp, 'start of quarter') == '2023-04-01')," + " n_mo=SUM(DATETIME(transactions.time_stamp, 'start of month') == '2023-06-01')," + " n_we=SUM(DATETIME(transactions.time_stamp, 'start of week') == '2023-05-28')," + " n_da=SUM(DATETIME(transactions.time_stamp, 'start of day') == '2023-06-02')," + " n_ho=SUM(DATETIME(transactions.time_stamp, 'start of hour') == '2023-06-02 04:00:00')," + " n_mi=SUM(DATETIME(transactions.time_stamp, 'start of minute') == '2023-06-02 04:55:00')," + " n_se=SUM(DATETIME(transactions.time_stamp, 'start of second') == '2023-06-02 04:55:31')," + " n_cts=SUM(transactions.time_stamp == DATETIME('now', 'start of day'))," + " n_dts=SUM(transactions.time_stamp == DATETIME(JOIN_STRINGS('-', '2025', '12', '31')))," + " n_nst=SUM(DATETIME(transactions.time_stamp, 'start of week', '+3 days') == '2023-05-31')," + " n_ayr=SUM(DATETIME(transactions.time_stamp, '+1 Y') == '2020-11-11 18:00:52')," + " n_aqu=SUM(DATETIME(transactions.time_stamp, '+2 q') == '2020-05-11 18:00:52')," + " n_amo=SUM(DATETIME(transactions.time_stamp, '-5 Mm') == '2019-06-11 18:00:52')," + " n_awe=SUM(DATETIME(transactions.time_stamp, 'start of day', '+1 week') == '2023-06-09')," + " n_ada=SUM(DATETIME(transactions.time_stamp, '+10 DAYS') == '2019-11-21 18:00:52')," + " n_aho=SUM(DATETIME(transactions.time_stamp, '+1000 hour') == '2019-12-23 10:00:52')," + " n_ami=SUM(DATETIME(transactions.time_stamp, '+10000 minute') == '2019-11-18 16:40:52')," + " n_ase=SUM(DATETIME(transactions.time_stamp, '-1000000 s') == '2019-10-31 04:14:12')," + " n_ldm=SUM(DATETIME(transactions.time_stamp, 'start of month', '-1 day') == '2019-10-31')," + ")", + "CRYPTBANK", + lambda: pd.DataFrame( + { + "n_yr": [61], + "n_qu": [17], + "n_mo": [8], + "n_we": [2], + "n_da": [2], + "n_ho": [2], + "n_mi": [2], + "n_se": [1], + "n_cts": [0], + "n_dts": [0], + "n_nst": [2], + "n_ayr": [1], + "n_aqu": [1], + "n_amo": [1], + "n_awe": [2], + "n_ada": [1], + "n_aho": [1], + "n_ami": [1], + "n_ase": [1], + "n_ldm": [4], + } + ), + "cryptbank_agg_07", + ), + id="cryptbank_agg_07", + ), pytest.param( PyDoughPandasTest( "result = (" @@ -1005,12 +1113,12 @@ def test_pipeline_e2e_cryptbank( against the refsol DataFrame. """ # Capture stdout to avoid polluting the console with logging calls - # with redirect_stdout(io.StringIO()): - cryptbank_pipeline_test_data.run_e2e_test( - masked_graphs, - sqlite_cryptbank_connection, - mask_server=mock_server_info, - ) + with redirect_stdout(io.StringIO()): + cryptbank_pipeline_test_data.run_e2e_test( + masked_graphs, + sqlite_cryptbank_connection, + mask_server=mock_server_info, + ) @pytest.mark.parametrize( @@ -1149,6 +1257,50 @@ def test_pipeline_e2e_cryptbank( ], id="cryptbank_agg_06", ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + " n_yr=SUM(DATETIME(transactions.time_stamp, 'start of year') == '2023-01-01')," + " n_qu=SUM(DATETIME(transactions.time_stamp, 'start of quarter') == '2023-04-01')," + " n_mo=SUM(DATETIME(transactions.time_stamp, 'start of month') == '2023-06-01')," + " n_we=SUM(DATETIME(transactions.time_stamp, 'start of week') == '2023-05-28')," + " n_da=SUM(DATETIME(transactions.time_stamp, 'start of day') == '2023-06-02')," + " n_ho=SUM(DATETIME(transactions.time_stamp, 'start of hour') == '2023-06-02 04:00:00')," + " n_mi=SUM(DATETIME(transactions.time_stamp, 'start of minute') == '2023-06-02 04:55:00')," + " n_se=SUM(DATETIME(transactions.time_stamp, 'start of second') == '2023-06-02 04:55:31')," + " n_cts=SUM(transactions.time_stamp == DATETIME('now', 'start of day'))," + " n_dts=SUM(transactions.time_stamp == DATETIME(JOIN_STRINGS('-', '2025', '12', '31')))," + " n_nst=SUM(DATETIME(transactions.time_stamp, 'start of week', '+3 days') == '2023-05-31')," + " n_ayr=SUM(DATETIME(transactions.time_stamp, '+1 Y') == '2020-11-11 18:00:52')," + " n_aqu=SUM(DATETIME(transactions.time_stamp, '+2 q') == '2020-05-11 18:00:52')," + " n_amo=SUM(DATETIME(transactions.time_stamp, '-5 Mm') == '2019-06-11 18:00:52')," + " n_awe=SUM(DATETIME(transactions.time_stamp, 'start of day', '+1 week') == '2023-06-09')," + " n_ada=SUM(DATETIME(transactions.time_stamp, '+10 DAYS') == '2019-11-21 18:00:52')," + " n_aho=SUM(DATETIME(transactions.time_stamp, '+1000 hour') == '2019-12-23 10:00:52')," + " n_ami=SUM(DATETIME(transactions.time_stamp, '+10000 minute') == '2019-11-18 16:40:52')," + " n_ase=SUM(DATETIME(transactions.time_stamp, '-1000000 s') == '2019-10-31 04:14:12')," + " n_ldm=SUM(DATETIME(transactions.time_stamp, 'start of month', '-1 day') == '2019-10-31')," + ")", + [ + { + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'day', '__col__', '2023-06-02']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'hour', '__col__', '2023-06-02 04:00:00']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'minute', '__col__', '2023-06-02 04:55:00']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'month', '__col__', '2023-06-01']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'quarter', '__col__', '2023-04-01']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'second', '__col__', '2023-06-02 04:55:31']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'year', '__col__', '2023-01-01']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1, 'years', '__col__', '2020-11-11 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 2, 'quarters', '__col__', '2020-05-11 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -5, 'months', '__col__', '2019-06-11 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10, 'days', '__col__', '2019-11-21 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1000, 'hours', '__col__', '2019-12-23 10:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", + } + ], + id="cryptbank_agg_07", + ), pytest.param( "selected_accounts = accounts.WHERE(QUARTER(creation_timestamp) == DAY(creation_timestamp))\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", diff --git a/tests/test_plan_refsols/cryptbank_agg_07_raw.txt b/tests/test_plan_refsols/cryptbank_agg_07_raw.txt new file mode 100644 index 000000000..e5ad88ef3 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_agg_07_raw.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n_yr', DEFAULT_TO(sum_expr_31, 0:numeric)), ('n_qu', DEFAULT_TO(sum_expr_28, 0:numeric)), ('n_mo', DEFAULT_TO(sum_expr_26, 0:numeric)), ('n_we', DEFAULT_TO(sum_expr_30, 0:numeric)), ('n_da', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_ho', DEFAULT_TO(sum_expr_23, 0:numeric)), ('n_mi', DEFAULT_TO(sum_expr_25, 0:numeric)), ('n_se', DEFAULT_TO(sum_expr_29, 0:numeric)), ('n_cts', DEFAULT_TO(sum_expr_38, 0:numeric)), ('n_dts', DEFAULT_TO(sum_expr_22, 0:numeric)), ('n_nst', DEFAULT_TO(sum_expr_27, 0:numeric)), ('n_ayr', DEFAULT_TO(sum_expr_37, 0:numeric)), ('n_aqu', DEFAULT_TO(sum_expr_34, 0:numeric)), ('n_amo', DEFAULT_TO(sum_expr_33, 0:numeric)), ('n_awe', DEFAULT_TO(sum_expr_36, 0:numeric)), ('n_ada', DEFAULT_TO(sum_expr, 0:numeric)), ('n_aho', DEFAULT_TO(sum_expr_21, 0:numeric)), ('n_ami', DEFAULT_TO(sum_expr_32, 0:numeric)), ('n_ase', DEFAULT_TO(sum_expr_35, 0:numeric)), ('n_ldm', DEFAULT_TO(sum_expr_24, 0:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+10 DAYS':string) == '2019-11-21 18:00:52':string), 'sum_expr_21': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+1000 hour':string) == '2019-12-23 10:00:52':string), 'sum_expr_22': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME(JOIN_STRINGS('-':string, '2025':string, '12':string, '31':string))), 'sum_expr_23': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of hour':string) == '2023-06-02 04:00:00':string), 'sum_expr_24': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of month':string, '-1 day':string) == '2019-10-31':string), 'sum_expr_25': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of minute':string) == '2023-06-02 04:55:00':string), 'sum_expr_26': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of month':string) == '2023-06-01':string), 'sum_expr_27': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string, '+3 days':string) == '2023-05-31':string), 'sum_expr_28': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of quarter':string) == '2023-04-01':string), 'sum_expr_29': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of second':string) == '2023-06-02 04:55:31':string), 'sum_expr_30': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string) == '2023-05-28':string), 'sum_expr_31': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of year':string) == '2023-01-01':string), 'sum_expr_32': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+10000 minute':string) == '2019-11-18 16:40:52':string), 'sum_expr_33': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '-5 Mm':string) == '2019-06-11 18:00:52':string), 'sum_expr_34': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+2 q':string) == '2020-05-11 18:00:52':string), 'sum_expr_35': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '-1000000 s':string) == '2019-10-31 04:14:12':string), 'sum_expr_36': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string, '+1 week':string) == '2023-06-09':string), 'sum_expr_37': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+1 Y':string) == '2020-11-11 18:00:52':string), 'sum_expr_38': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME('now':string, 'start of day':string)), 'sum_expr_39': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string) == '2023-06-02':string)}) + SCAN(table=CRBNK.TRANSACTIONS, columns={'t_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt new file mode 100644 index 000000000..ed2497185 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n_yr', DEFAULT_TO(sum_expr_31, 0:numeric)), ('n_qu', DEFAULT_TO(sum_expr_28, 0:numeric)), ('n_mo', DEFAULT_TO(sum_expr_26, 0:numeric)), ('n_we', DEFAULT_TO(sum_expr_30, 0:numeric)), ('n_da', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_ho', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_mi', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_se', DEFAULT_TO(sum_expr_29, 0:numeric)), ('n_cts', DEFAULT_TO(sum_expr_38, 0:numeric)), ('n_dts', DEFAULT_TO(sum_expr_22, 0:numeric)), ('n_nst', DEFAULT_TO(sum_expr_27, 0:numeric)), ('n_ayr', DEFAULT_TO(sum_expr, 0:numeric)), ('n_aqu', DEFAULT_TO(sum_expr, 0:numeric)), ('n_amo', DEFAULT_TO(sum_expr, 0:numeric)), ('n_awe', DEFAULT_TO(sum_expr_36, 0:numeric)), ('n_ada', DEFAULT_TO(sum_expr, 0:numeric)), ('n_aho', DEFAULT_TO(sum_expr, 0:numeric)), ('n_ami', DEFAULT_TO(sum_expr, 0:numeric)), ('n_ase', DEFAULT_TO(sum_expr, 0:numeric)), ('n_ldm', DEFAULT_TO(sum_expr_24, 0:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(t_ts == '2019-11-11 02:55:31':unknown), 'sum_expr_22': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME(JOIN_STRINGS('-':string, '2025':string, '12':string, '31':string))), 'sum_expr_24': SUM(ISIN(t_ts, ['2019-11-02 11:58:37', '2019-11-02 12:54:09', '2019-11-11 02:55:31', '2019-11-11 15:44:22']:array[unknown])), 'sum_expr_26': SUM(ISIN(t_ts, ['2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20']:array[unknown])), 'sum_expr_27': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string, '+3 days':string) == '2023-05-31':string), 'sum_expr_28': SUM(ISIN(t_ts, ['2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20']:array[unknown])), 'sum_expr_29': SUM(t_ts == '2023-06-01 13:50:10':unknown), 'sum_expr_30': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string) == '2023-05-28':string), 'sum_expr_31': SUM(ISIN(t_ts, ['2022-12-31 17:42:54', '2023-01-04 12:05:15', '2023-01-07 22:11:27', '2023-01-20 04:38:03', '2023-01-20 16:40:54', '2023-01-27 15:13:18', '2023-01-30 19:58:26', '2023-02-02 19:12:58', '2023-02-11 11:13:53', '2023-02-11 12:32:55', '2023-02-15 21:54:29', '2023-02-16 14:18:36', '2023-02-28 07:11:29', '2023-03-07 01:26:10', '2023-03-08 18:58:18', '2023-03-14 14:23:33', '2023-03-16 06:17:44', '2023-03-17 08:48:16', '2023-03-24 03:33:40', '2023-03-26 06:52:52', '2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20', '2023-06-30 15:27:03', '2023-07-07 15:17:47', '2023-07-17 03:23:15', '2023-07-18 14:41:26', '2023-08-03 20:24:35', '2023-08-11 20:25:39', '2023-08-29 03:07:18', '2023-09-01 16:50:48', '2023-09-08 09:30:23', '2023-09-13 06:42:39', '2023-09-15 09:00:02', '2023-09-30 08:57:30', '2023-10-15 02:47:04', '2023-10-19 09:40:06', '2023-10-30 00:20:45', '2023-11-08 12:52:24', '2023-11-10 17:20:29', '2023-11-16 11:30:24', '2023-11-21 15:17:10', '2023-11-28 06:34:03', '2023-12-07 14:11:33', '2023-12-15 05:57:23', '2023-12-16 00:51:23', '2023-12-23 07:54:22']:array[unknown])), 'sum_expr_36': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string, '+1 week':string) == '2023-06-09':string), 'sum_expr_38': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME('now':string, 'start of day':string)), 'sum_expr_39': SUM(ISIN(t_ts, ['2023-06-01 13:50:10', '2023-06-01 13:50:14']:array[unknown]))}) + SCAN(table=CRBNK.TRANSACTIONS, columns={'t_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt index 98bf079c7..0ff9c1100 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_18_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(c_email, ['ophia.jackson@mail.org', 'livia.a22@gmail.como', '.gonzalez@ymail.comm', 'opez.luke99@gmail.coml', 'enry.g@fastmail.comh', 'rank.k@protonmail.comf', 'mily.jones@mail.come', 'ob.smith77@gmail.comb']:array[unknown]), columns={}) + FILTER(condition=ISIN(c_email, ['ophia.jackson@mail.orgs', 'livia.a22@gmail.como', '.gonzalez@ymail.comm', 'opez.luke99@gmail.coml', 'enry.g@fastmail.comh', 'rank.k@protonmail.comf', 'mily.jones@mail.come', 'ob.smith77@gmail.comb']:array[unknown]), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_50_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_50_raw.txt new file mode 100644 index 000000000..c7d2ebb89 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_50_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=NOT(ISIN(DEFAULT_TO(YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 2005:numeric), [2005, 2006]:array[unknown])), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_50_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_50_rewrite.txt new file mode 100644 index 000000000..429f74d7b --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_50_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PRESENT(c_birthday), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_51_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_51_raw.txt new file mode 100644 index 000000000..3aecc6a52 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_51_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(IFF(ISIN(SLICE(UNMASK::(LOWER([c_fname])), None:unknown, 1:numeric, None:unknown), ['q', 'r', 's']:array[unknown]), UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))), 'ee':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_51_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_51_rewrite.txt new file mode 100644 index 000000000..9abbfccd9 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_51_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(IFF(ISIN(c_fname, ['QUEENIE', 'ROBERT', 'SOPHIA']:array[unknown]), UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))), 'ee':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_52_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_52_raw.txt new file mode 100644 index 000000000..be00df52e --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_52_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(UNMASK::(LOWER([c_lname])), JOIN_STRINGS('':string, 'e':string, IFF(ISIN(SLICE(UNMASK::(LOWER([c_lname])), None:unknown, 1:numeric, None:unknown), ['q', 'r', 's']:array[unknown]), 'z':string, 'e':string))), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_52_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_52_rewrite.txt new file mode 100644 index 000000000..2945a7768 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_52_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(c_lname, ['LEE', 'RODRIGUEZ']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_53_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_53_raw.txt new file mode 100644 index 000000000..425d5b7d0 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_53_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=SLICE(UNMASK::(LOWER([c_fname])), 0:numeric, 1:numeric, None:unknown) == 'i':string, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_53_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_53_rewrite.txt new file mode 100644 index 000000000..6110686f1 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_53_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_fname == 'ISABEL':unknown, columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_54_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_54_raw.txt new file mode 100644 index 000000000..ac00003be --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_54_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(SLICE(UNMASK::(LOWER([c_fname])), -1:numeric, None:unknown, None:unknown), ['a', 'e', 'i', 'o', 'u']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_54_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_54_rewrite.txt new file mode 100644 index 000000000..ac00003be --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_54_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(SLICE(UNMASK::(LOWER([c_fname])), -1:numeric, None:unknown, None:unknown), ['a', 'e', 'i', 'o', 'u']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_55_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_55_raw.txt new file mode 100644 index 000000000..2565c41de --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_55_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(SLICE(UNMASK::(LOWER([c_fname])), 1:numeric, 3:numeric, None:unknown), ['ar', 'li', 'ra', 'to', 'am']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_55_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_55_rewrite.txt new file mode 100644 index 000000000..66493452f --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_55_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(c_fname, ['ALICE', 'CAROL', 'FRANK', 'GRACE', 'JAMES', 'KAREN', 'MARIA', 'OLIVIA']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_56_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_56_raw.txt new file mode 100644 index 000000000..93d44675d --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_56_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(SLICE(UNMASK::(LOWER([c_fname])), -2:numeric, -1:numeric, None:unknown), ['a', 'c', 'l']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_56_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_56_rewrite.txt new file mode 100644 index 000000000..93d44675d --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_56_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(SLICE(UNMASK::(LOWER([c_fname])), -2:numeric, -1:numeric, None:unknown), ['a', 'c', 'l']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_57_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_57_raw.txt new file mode 100644 index 000000000..7a469fc93 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_57_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(SLICE(UNMASK::(LOWER([c_fname])), None:unknown, -1:numeric, None:unknown), 'e':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_57_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_57_rewrite.txt new file mode 100644 index 000000000..7a469fc93 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_57_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(SLICE(UNMASK::(LOWER([c_fname])), None:unknown, -1:numeric, None:unknown), 'e':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_58_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_58_raw.txt new file mode 100644 index 000000000..497690ba2 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_58_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(SLICE(UNMASK::(LOWER([c_fname])), 1:numeric, -1:numeric, None:unknown), 'e':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_58_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_58_rewrite.txt new file mode 100644 index 000000000..497690ba2 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_58_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS(SLICE(UNMASK::(LOWER([c_fname])), 1:numeric, -1:numeric, None:unknown), 'e':string), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_sql_refsols/cryptbank_agg_07_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_07_raw_sqlite.sql new file mode 100644 index 000000000..d389c0bff --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_agg_07_raw_sqlite.sql @@ -0,0 +1,113 @@ +SELECT + COALESCE( + SUM(DATE(DATETIME(t_ts, '+54321 seconds'), 'start of year') = DATE('2023-01-01')), + 0 + ) AS n_yr, + COALESCE( + SUM( + DATE( + DATETIME(t_ts, '+54321 seconds'), + 'start of month', + '-' || CAST(( + ( + CAST(STRFTIME('%m', DATETIME(DATETIME(t_ts, '+54321 seconds'))) AS INTEGER) - 1 + ) % 3 + ) AS TEXT) || ' months' + ) = DATE('2023-04-01') + ), + 0 + ) AS n_qu, + COALESCE( + SUM(DATE(DATETIME(t_ts, '+54321 seconds'), 'start of month') = DATE('2023-06-01')), + 0 + ) AS n_mo, + COALESCE( + SUM( + DATE( + DATETIME(t_ts, '+54321 seconds'), + '-' || CAST(CAST(STRFTIME('%w', DATETIME(DATETIME(t_ts, '+54321 seconds'))) AS INTEGER) AS TEXT) || ' days', + 'start of day' + ) = DATE('2023-05-28') + ), + 0 + ) AS n_we, + COALESCE( + SUM(DATE(DATETIME(t_ts, '+54321 seconds'), 'start of day') = DATE('2023-06-02')), + 0 + ) AS n_da, + COALESCE( + SUM( + STRFTIME('%Y-%m-%d %H:00:00', DATETIME(DATETIME(t_ts, '+54321 seconds'))) = '2023-06-02 04:00:00' + ), + 0 + ) AS n_ho, + COALESCE( + SUM( + STRFTIME('%Y-%m-%d %H:%M:00', DATETIME(DATETIME(t_ts, '+54321 seconds'))) = '2023-06-02 04:55:00' + ), + 0 + ) AS n_mi, + COALESCE( + SUM( + STRFTIME('%Y-%m-%d %H:%M:%S', DATETIME(DATETIME(t_ts, '+54321 seconds'))) = '2023-06-02 04:55:31' + ), + 0 + ) AS n_se, + COALESCE(SUM(DATE('now', 'start of day') = DATETIME(t_ts, '+54321 seconds')), 0) AS n_cts, + COALESCE(SUM(DATETIME('2025-12-31') = DATETIME(t_ts, '+54321 seconds')), 0) AS n_dts, + COALESCE( + SUM( + DATE( + DATETIME(t_ts, '+54321 seconds'), + '-' || CAST(CAST(STRFTIME('%w', DATETIME(DATETIME(t_ts, '+54321 seconds'))) AS INTEGER) AS TEXT) || ' days', + 'start of day', + '3 day' + ) = DATE('2023-05-31') + ), + 0 + ) AS n_nst, + COALESCE( + SUM(DATETIME(DATETIME(t_ts, '+54321 seconds'), '1 year') = '2020-11-11 18:00:52'), + 0 + ) AS n_ayr, + COALESCE( + SUM(DATETIME(DATETIME(t_ts, '+54321 seconds'), '6 month') = '2020-05-11 18:00:52'), + 0 + ) AS n_aqu, + COALESCE( + SUM(DATETIME(DATETIME(t_ts, '+54321 seconds'), '-5 month') = '2019-06-11 18:00:52'), + 0 + ) AS n_amo, + COALESCE( + SUM( + DATE(DATETIME(t_ts, '+54321 seconds'), 'start of day', '7 day') = DATE('2023-06-09') + ), + 0 + ) AS n_awe, + COALESCE( + SUM(DATETIME(DATETIME(t_ts, '+54321 seconds'), '10 day') = '2019-11-21 18:00:52'), + 0 + ) AS n_ada, + COALESCE( + SUM(DATETIME(DATETIME(t_ts, '+54321 seconds'), '1000 hour') = '2019-12-23 10:00:52'), + 0 + ) AS n_aho, + COALESCE( + SUM( + DATETIME(DATETIME(t_ts, '+54321 seconds'), '10000 minute') = '2019-11-18 16:40:52' + ), + 0 + ) AS n_ami, + COALESCE( + SUM( + DATETIME(DATETIME(t_ts, '+54321 seconds'), '-1000000 second') = '2019-10-31 04:14:12' + ), + 0 + ) AS n_ase, + COALESCE( + SUM( + DATE(DATETIME(t_ts, '+54321 seconds'), 'start of month', '-1 day') = DATE('2019-10-31') + ), + 0 + ) AS n_ldm +FROM crbnk.transactions diff --git a/tests/test_sql_refsols/cryptbank_agg_07_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_agg_07_rewrite_sqlite.sql new file mode 100644 index 000000000..85a40572e --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_agg_07_rewrite_sqlite.sql @@ -0,0 +1,66 @@ +SELECT + COALESCE( + SUM( + t_ts IN ('2022-12-31 17:42:54', '2023-01-04 12:05:15', '2023-01-07 22:11:27', '2023-01-20 04:38:03', '2023-01-20 16:40:54', '2023-01-27 15:13:18', '2023-01-30 19:58:26', '2023-02-02 19:12:58', '2023-02-11 11:13:53', '2023-02-11 12:32:55', '2023-02-15 21:54:29', '2023-02-16 14:18:36', '2023-02-28 07:11:29', '2023-03-07 01:26:10', '2023-03-08 18:58:18', '2023-03-14 14:23:33', '2023-03-16 06:17:44', '2023-03-17 08:48:16', '2023-03-24 03:33:40', '2023-03-26 06:52:52', '2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20', '2023-06-30 15:27:03', '2023-07-07 15:17:47', '2023-07-17 03:23:15', '2023-07-18 14:41:26', '2023-08-03 20:24:35', '2023-08-11 20:25:39', '2023-08-29 03:07:18', '2023-09-01 16:50:48', '2023-09-08 09:30:23', '2023-09-13 06:42:39', '2023-09-15 09:00:02', '2023-09-30 08:57:30', '2023-10-15 02:47:04', '2023-10-19 09:40:06', '2023-10-30 00:20:45', '2023-11-08 12:52:24', '2023-11-10 17:20:29', '2023-11-16 11:30:24', '2023-11-21 15:17:10', '2023-11-28 06:34:03', '2023-12-07 14:11:33', '2023-12-15 05:57:23', '2023-12-16 00:51:23', '2023-12-23 07:54:22') + ), + 0 + ) AS n_yr, + COALESCE( + SUM( + t_ts IN ('2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20') + ), + 0 + ) AS n_qu, + COALESCE( + SUM( + t_ts IN ('2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20') + ), + 0 + ) AS n_mo, + COALESCE( + SUM( + DATE( + DATETIME(t_ts, '+54321 seconds'), + '-' || CAST(CAST(STRFTIME('%w', DATETIME(DATETIME(t_ts, '+54321 seconds'))) AS INTEGER) AS TEXT) || ' days', + 'start of day' + ) = DATE('2023-05-28') + ), + 0 + ) AS n_we, + COALESCE(SUM(t_ts IN ('2023-06-01 13:50:10', '2023-06-01 13:50:14')), 0) AS n_da, + COALESCE(SUM(t_ts IN ('2023-06-01 13:50:10', '2023-06-01 13:50:14')), 0) AS n_ho, + COALESCE(SUM(t_ts IN ('2023-06-01 13:50:10', '2023-06-01 13:50:14')), 0) AS n_mi, + COALESCE(SUM(t_ts = '2023-06-01 13:50:10'), 0) AS n_se, + COALESCE(SUM(DATE('now', 'start of day') = DATETIME(t_ts, '+54321 seconds')), 0) AS n_cts, + COALESCE(SUM(DATETIME('2025-12-31') = DATETIME(t_ts, '+54321 seconds')), 0) AS n_dts, + COALESCE( + SUM( + DATE( + DATETIME(t_ts, '+54321 seconds'), + '-' || CAST(CAST(STRFTIME('%w', DATETIME(DATETIME(t_ts, '+54321 seconds'))) AS INTEGER) AS TEXT) || ' days', + 'start of day', + '3 day' + ) = DATE('2023-05-31') + ), + 0 + ) AS n_nst, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_ayr, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_aqu, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_amo, + COALESCE( + SUM( + DATE(DATETIME(t_ts, '+54321 seconds'), 'start of day', '7 day') = DATE('2023-06-09') + ), + 0 + ) AS n_awe, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_ada, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_aho, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_ami, + COALESCE(SUM(t_ts = '2019-11-11 02:55:31'), 0) AS n_ase, + COALESCE( + SUM( + t_ts IN ('2019-11-02 11:58:37', '2019-11-02 12:54:09', '2019-11-11 02:55:31', '2019-11-11 15:44:22') + ), + 0 + ) AS n_ldm +FROM crbnk.transactions diff --git a/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql index 5396be61f..e27ab0f98 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_18_rewrite_sqlite.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM crbnk.customers WHERE - c_email IN ('ophia.jackson@mail.org', 'livia.a22@gmail.como', '.gonzalez@ymail.comm', 'opez.luke99@gmail.coml', 'enry.g@fastmail.comh', 'rank.k@protonmail.comf', 'mily.jones@mail.come', 'ob.smith77@gmail.comb') + c_email IN ('ophia.jackson@mail.orgs', 'livia.a22@gmail.como', '.gonzalez@ymail.comm', 'opez.luke99@gmail.coml', 'enry.g@fastmail.comh', 'rank.k@protonmail.comf', 'mily.jones@mail.come', 'ob.smith77@gmail.comb') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_50_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_50_raw_sqlite.sql new file mode 100644 index 000000000..57cc1e62a --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_50_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT COALESCE(CAST(STRFTIME('%Y', DATE(c_birthday, '+472 days')) AS INTEGER), 2005) IN (2005, 2006) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_50_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_50_rewrite_sqlite.sql new file mode 100644 index 000000000..2dfd1a393 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_50_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + NOT c_birthday IS NULL diff --git a/tests/test_sql_refsols/cryptbank_filter_count_51_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_51_raw_sqlite.sql new file mode 100644 index 000000000..9a09159a5 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_51_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + IIF(SUBSTRING(LOWER(c_fname), 1, 1) IN ('q', 'r', 's'), LOWER(c_fname), LOWER(c_lname)) LIKE '%ee%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_51_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_51_rewrite_sqlite.sql new file mode 100644 index 000000000..f11735f74 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_51_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + IIF(c_fname IN ('QUEENIE', 'ROBERT', 'SOPHIA'), LOWER(c_fname), LOWER(c_lname)) LIKE '%ee%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_52_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_52_raw_sqlite.sql new file mode 100644 index 000000000..43699c0d4 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_52_raw_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + LOWER(c_lname) LIKE ( + '%' || CONCAT_WS('', 'e', IIF(SUBSTRING(LOWER(c_lname), 1, 1) IN ('q', 'r', 's'), 'z', 'e')) || '%' + ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_52_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_52_rewrite_sqlite.sql new file mode 100644 index 000000000..c896fffa0 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_52_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_lname IN ('LEE', 'RODRIGUEZ') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_53_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_53_raw_sqlite.sql new file mode 100644 index 000000000..d91509f71 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_53_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING(LOWER(c_fname), 1, 1) = 'i' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_53_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_53_rewrite_sqlite.sql new file mode 100644 index 000000000..3db7a0281 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_53_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_fname = 'ISABEL' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_54_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_54_raw_sqlite.sql new file mode 100644 index 000000000..7c261ae4b --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_54_raw_sqlite.sql @@ -0,0 +1,16 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + 0 + ) < 1 + THEN 1 + ELSE ( + LENGTH(LOWER(c_fname)) + 0 + ) + END + ) IN ('a', 'e', 'i', 'o', 'u') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_54_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_54_rewrite_sqlite.sql new file mode 100644 index 000000000..7c261ae4b --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_54_rewrite_sqlite.sql @@ -0,0 +1,16 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + 0 + ) < 1 + THEN 1 + ELSE ( + LENGTH(LOWER(c_fname)) + 0 + ) + END + ) IN ('a', 'e', 'i', 'o', 'u') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_55_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_55_raw_sqlite.sql new file mode 100644 index 000000000..e7a88d0e3 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_55_raw_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING(LOWER(c_fname), 2, 2) IN ('ar', 'li', 'ra', 'to', 'am') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_55_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_55_rewrite_sqlite.sql new file mode 100644 index 000000000..81431699b --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_55_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_fname IN ('ALICE', 'CAROL', 'FRANK', 'GRACE', 'JAMES', 'KAREN', 'MARIA', 'OLIVIA') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_56_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_56_raw_sqlite.sql new file mode 100644 index 000000000..61af191d7 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_56_raw_sqlite.sql @@ -0,0 +1,33 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + -1 + ) < 1 + THEN 1 + ELSE ( + LENGTH(LOWER(c_fname)) + -1 + ) + END, + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + 0 + ) < 1 + THEN 0 + ELSE ( + LENGTH(LOWER(c_fname)) + 0 + ) - CASE + WHEN ( + LENGTH(LOWER(c_fname)) + -1 + ) < 1 + THEN 1 + ELSE ( + LENGTH(LOWER(c_fname)) + -1 + ) + END + END + ) IN ('a', 'c', 'l') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_56_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_56_rewrite_sqlite.sql new file mode 100644 index 000000000..61af191d7 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_56_rewrite_sqlite.sql @@ -0,0 +1,33 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + -1 + ) < 1 + THEN 1 + ELSE ( + LENGTH(LOWER(c_fname)) + -1 + ) + END, + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + 0 + ) < 1 + THEN 0 + ELSE ( + LENGTH(LOWER(c_fname)) + 0 + ) - CASE + WHEN ( + LENGTH(LOWER(c_fname)) + -1 + ) < 1 + THEN 1 + ELSE ( + LENGTH(LOWER(c_fname)) + -1 + ) + END + END + ) IN ('a', 'c', 'l') diff --git a/tests/test_sql_refsols/cryptbank_filter_count_57_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_57_raw_sqlite.sql new file mode 100644 index 000000000..47c6438a4 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_57_raw_sqlite.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + 1, + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + -1 + ) < 0 + THEN 0 + ELSE ( + LENGTH(LOWER(c_fname)) + -1 + ) + END + ) LIKE '%e%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_57_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_57_rewrite_sqlite.sql new file mode 100644 index 000000000..47c6438a4 --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_57_rewrite_sqlite.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + 1, + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + -1 + ) < 0 + THEN 0 + ELSE ( + LENGTH(LOWER(c_fname)) + -1 + ) + END + ) LIKE '%e%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_58_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_58_raw_sqlite.sql new file mode 100644 index 000000000..f2068d2ab --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_58_raw_sqlite.sql @@ -0,0 +1,25 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + 2, + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + 0 + ) < 1 + THEN 0 + ELSE CASE + WHEN ( + ( + LENGTH(LOWER(c_fname)) + 0 + ) - 2 + ) <= 0 + THEN 0 + ELSE ( + LENGTH(LOWER(c_fname)) + 0 + ) - 2 + END + END + ) LIKE '%e%' diff --git a/tests/test_sql_refsols/cryptbank_filter_count_58_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_58_rewrite_sqlite.sql new file mode 100644 index 000000000..f2068d2ab --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_58_rewrite_sqlite.sql @@ -0,0 +1,25 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + SUBSTRING( + LOWER(c_fname), + 2, + CASE + WHEN ( + LENGTH(LOWER(c_fname)) + 0 + ) < 1 + THEN 0 + ELSE CASE + WHEN ( + ( + LENGTH(LOWER(c_fname)) + 0 + ) - 2 + ) <= 0 + THEN 0 + ELSE ( + LENGTH(LOWER(c_fname)) + 0 + ) - 2 + END + END + ) LIKE '%e%' From a6f6a37bd88996a562d0d60dd829bf8700b0d6c2 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 5 Nov 2025 13:46:46 -0500 Subject: [PATCH 24/40] Predicate server revisions with new API --- .../conversion/mask_server_rewrite_shuttle.py | 2 +- pydough/mask_server/mask_server.py | 44 ++++++++++++++----- tests/mock_server/api_mock_server.py | 14 ++++-- 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/conversion/mask_server_rewrite_shuttle.py index 19721b902..8ff32c3ec 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/conversion/mask_server_rewrite_shuttle.py @@ -98,7 +98,7 @@ def process_batch(self) -> None: # to None in the case of failure, or the rewritten expression in the # case of success. responses: list[MaskServerOutput] = ( - self.server_info.simplify_simple_expression_batch(batch) + self.server_info.simplify_simple_expression_batch(batch, False) ) assert len(responses) == len(ancillary_info) for (expr, input_expr), response in zip(ancillary_info, responses): diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 308c71aef..1b68734f4 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -127,7 +127,9 @@ def get_server_response_case(self, server_case: str) -> MaskServerResponse: return MaskServerResponse.UNSUPPORTED def simplify_simple_expression_batch( - self, batch: list[MaskServerInput] + self, + batch: list[MaskServerInput], + dry_run: bool, ) -> list[MaskServerOutput]: """ Sends a batch of predicate expressions to the mask server for evaluation. @@ -139,6 +141,7 @@ def simplify_simple_expression_batch( Args: `batch`: The list of inputs to be sent to the server. + `dry_run`: Whether to perform a dry run or not. Returns: An output list containing the response case and payload. @@ -156,38 +159,48 @@ def simplify_simple_expression_batch( path: str = "v1/predicates/batch-evaluate" method: RequestMethod = RequestMethod.POST - request: ServerRequest = self.generate_request(batch, path, method) + request: ServerRequest = self.generate_request(batch, path, method, dry_run) response_json = self.connection.send_server_request(request) result: list[MaskServerOutput] = self.generate_result(response_json) return result def generate_request( - self, batch: list[MaskServerInput], path: str, method: RequestMethod + self, + batch: list[MaskServerInput], + path: str, + method: RequestMethod, + dry_run: bool, ) -> ServerRequest: """ Generate a server request from the given batch of server inputs and path. Args: `batch`: A list of MaskServerInput objects. - `path`: The API endpoint path. + `path`: The server path for the request. + `method`: The HTTP method for the request. + `dry_run`: Whether the request is a dry run or not. Returns: A server request including payload to be sent. Example payload: + ``` { "items": [ { - "column_reference": "srv.db.tbl.col", + "column_ref": {"kind": "fqn", "value": "srv.db.schema.table.name"}, "predicate": ["EQUAL", 2, "__col__", 1], "mode": "dynamic", - "dry_run": false + "predicate_format": "linear_with_arity", + "output_mode": "cell_encrypted", + "dry_run": true/false, }, ... ], "expression_format": {"name": "linear", "version": "0.2.0"} } + ``` """ payload: dict = { @@ -197,10 +210,14 @@ def generate_request( for item in batch: evaluate_request: dict = { - "column_reference": f"{self.server_address}.{item.table_path}.{item.column_name}", + "column_ref": { + "kind": "fqn", + "value": f"{self.server_address}.{item.table_path}.{item.column_name}", + }, "predicate": item.expression, + "output_mode": "cell_encrypted", "mode": "dynamic", - "dry_run": False, + "dry_run": dry_run, } payload["items"].append(evaluate_request) @@ -208,15 +225,14 @@ def generate_request( def generate_result(self, response: dict) -> list[MaskServerOutput]: """ - Generate a list of server outputs from the server response. + Generate a list of server outputs from the server response of a + non-dry-run request. Args: `response`: The response from the mask server. - Returns: - A list of server outputs objects. - Example response: + ``` { "result": "SUCCESS", "items": [ @@ -236,6 +252,10 @@ def generate_result(self, response: dict) -> list[MaskServerOutput]: ... ] } + ``` + + Returns: + A list of server outputs objects. """ result: list[MaskServerOutput] = [] diff --git a/tests/mock_server/api_mock_server.py b/tests/mock_server/api_mock_server.py index 2a1b125d9..5e08ca4a4 100644 --- a/tests/mock_server/api_mock_server.py +++ b/tests/mock_server/api_mock_server.py @@ -17,10 +17,11 @@ class EvaluateRequest(BaseModel): - column_reference: str + column_ref: dict[str, str] predicate: list[str | int | float | None | bool] - mode: str = "dynamic" - dry_run: bool = False + output_mode: str + mode: str + dry_run: bool class RequestPayload(BaseModel): @@ -48,7 +49,12 @@ def batch_evaluate( ): responses: list[dict] = [] for item in payload.items: - key = (item.column_reference, tuple(item.predicate)) + assert set(item.column_ref.keys()) == { + "kind", + "value", + }, f"Invalid column_reference format in mock: {item.column_ref!r}." + assert item.column_ref["kind"] == "fqn", "Only FQN kind is supported in mock." + key = (item.column_ref["value"], tuple(item.predicate)) materialization: dict = LOOKUP_TABLE.get(key, {}) response: dict = { From af10c5bd852ec61a07d2eb069ebfc0ade0d10fe4 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Sat, 15 Nov 2025 21:19:21 -0500 Subject: [PATCH 25/40] JSON request/response reformatting WIP --- pydough/configs/session.py | 10 +- pydough/conversion/relational_converter.py | 4 +- pydough/mask_server/__init__.py | 4 + pydough/mask_server/mask_server.py | 73 +- .../mask_server_candidate_visitor.py | 0 .../mask_server_rewrite_shuttle.py | 14 +- tests/mock_server/api_mock_server.py | 58 +- tests/mock_server/lookup_table.py | 765 +++++++----------- tests/test_mock_mask_server.py | 4 + 9 files changed, 432 insertions(+), 500 deletions(-) rename pydough/{conversion => mask_server}/mask_server_candidate_visitor.py (100%) rename pydough/{conversion => mask_server}/mask_server_rewrite_shuttle.py (99%) diff --git a/pydough/configs/session.py b/pydough/configs/session.py index 0476d2fa7..7485752ef 100644 --- a/pydough/configs/session.py +++ b/pydough/configs/session.py @@ -19,6 +19,8 @@ existing state. """ +from typing import TYPE_CHECKING, Union + from pydough.database_connectors import ( DatabaseContext, DatabaseDialect, @@ -26,11 +28,13 @@ load_database_context, ) from pydough.errors import PyDoughErrorBuilder -from pydough.mask_server import MaskServerInfo from pydough.metadata import GraphMetadata, parse_json_metadata_from_file from .pydough_configs import PyDoughConfigs +if TYPE_CHECKING: + from pydough.mask_server import MaskServerInfo + class PyDoughSession: """ @@ -134,7 +138,7 @@ def error_builder(self, builder: PyDoughErrorBuilder) -> None: self._error_builder = builder @property - def mask_server(self) -> MaskServerInfo | None: + def mask_server(self) -> Union["MaskServerInfo", None]: """ Get the active mask server information. @@ -144,7 +148,7 @@ def mask_server(self) -> MaskServerInfo | None: return self._mask_server @mask_server.setter - def mask_server(self, server_info: MaskServerInfo | None) -> None: + def mask_server(self, server_info: Union["MaskServerInfo", None]) -> None: """ Set the active mask server information. diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index 3835a151c..d284602d0 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -12,6 +12,8 @@ import pydough.pydough_operators as pydop from pydough.configs import PyDoughSession +from pydough.mask_server.mask_server_candidate_visitor import MaskServerCandidateVisitor +from pydough.mask_server.mask_server_rewrite_shuttle import MaskServerRewriteShuttle from pydough.metadata import ( CartesianProductMetadata, GeneralJoinMetadata, @@ -88,8 +90,6 @@ ) from .hybrid_translator import HybridTranslator from .hybrid_tree import HybridTree -from .mask_server_candidate_visitor import MaskServerCandidateVisitor -from .mask_server_rewrite_shuttle import MaskServerRewriteShuttle from .masking_shuttles import MaskLiteralComparisonShuttle from .merge_projects import merge_projects from .projection_pullup import pullup_projections diff --git a/pydough/mask_server/__init__.py b/pydough/mask_server/__init__.py index 4ec78b406..5cbdccda7 100644 --- a/pydough/mask_server/__init__.py +++ b/pydough/mask_server/__init__.py @@ -3,10 +3,12 @@ """ __all__ = [ + "MaskServerCandidateVisitor", "MaskServerInfo", "MaskServerInput", "MaskServerOutput", "MaskServerResponse", + "MaskServerRewriteShuttle", "RequestMethod", "ServerConnection", "ServerRequest", @@ -18,6 +20,8 @@ MaskServerOutput, MaskServerResponse, ) +from .mask_server_candidate_visitor import MaskServerCandidateVisitor +from .mask_server_rewrite_shuttle import MaskServerRewriteShuttle from .server_connection import ( RequestMethod, ServerConnection, diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 1b68734f4..e48cbf3e8 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -130,6 +130,7 @@ def simplify_simple_expression_batch( self, batch: list[MaskServerInput], dry_run: bool, + hard_limit: int, ) -> list[MaskServerOutput]: """ Sends a batch of predicate expressions to the mask server for evaluation. @@ -142,6 +143,8 @@ def simplify_simple_expression_batch( Args: `batch`: The list of inputs to be sent to the server. `dry_run`: Whether to perform a dry run or not. + `hard_limit`: The maximum number of items that can be returned for + each predicate. Returns: An output list containing the response case and payload. @@ -159,7 +162,9 @@ def simplify_simple_expression_batch( path: str = "v1/predicates/batch-evaluate" method: RequestMethod = RequestMethod.POST - request: ServerRequest = self.generate_request(batch, path, method, dry_run) + request: ServerRequest = self.generate_request( + batch, path, method, dry_run, hard_limit + ) response_json = self.connection.send_server_request(request) result: list[MaskServerOutput] = self.generate_result(response_json) @@ -171,6 +176,7 @@ def generate_request( path: str, method: RequestMethod, dry_run: bool, + hard_limit: int, ) -> ServerRequest: """ Generate a server request from the given batch of server inputs and path. @@ -180,6 +186,8 @@ def generate_request( `path`: The server path for the request. `method`: The HTTP method for the request. `dry_run`: Whether the request is a dry run or not. + `hard_limit`: The maximum number of items that can be returned for + each predicate. Returns: A server request including payload to be sent. @@ -194,11 +202,12 @@ def generate_request( "mode": "dynamic", "predicate_format": "linear_with_arity", "output_mode": "cell_encrypted", - "dry_run": true/false, + "dry_run": true, }, ... ], "expression_format": {"name": "linear", "version": "0.2.0"} + "hard_limit": 1000, } ``` """ @@ -206,6 +215,7 @@ def generate_request( payload: dict = { "items": [], "expression_format": {"name": "linear", "version": "0.2.0"}, + "hard_limit": hard_limit, } for item in batch: @@ -223,13 +233,13 @@ def generate_request( return ServerRequest(path=path, payload=payload, method=method) - def generate_result(self, response: dict) -> list[MaskServerOutput]: + def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: """ Generate a list of server outputs from the server response of a non-dry-run request. Args: - `response`: The response from the mask server. + `response_dict`: The response from the mask server. Example response: ``` @@ -239,14 +249,28 @@ def generate_result(self, response: dict) -> list[MaskServerOutput]: { "index": 0, "result": "SUCCESS", - "decision": {"strategy": "values", "reason": "mock"}, - "predicate_hash": "hash0", - "encryption_mode": "clear", - "materialization": { - "type": "literal", - "operator": "IN", - "values": [0], - "count": 1 + "response": { + "strategy": ..., + + "records": [ + { + "mode": "cell_encrypted", + "cell_encrypted": "abcE1dsa", + } + ], + + "count": ..., + + "stats": ..., + + "column_stats": ..., + + "next_cursor": ..., + + "metadata": { + "dynamic_operator": "IN", + ... + } } }, ... @@ -259,7 +283,7 @@ def generate_result(self, response: dict) -> list[MaskServerOutput]: """ result: list[MaskServerOutput] = [] - for item in response.get("items", []): + for item in response_dict.get("items", []): """ Case on whether operator is ERROR or not If ERROR, then response_case is unsupported and payload is None @@ -273,10 +297,20 @@ def generate_result(self, response: dict) -> list[MaskServerOutput]: ) ) else: - materialization: dict = item.get("materialization", {}) - response_case: MaskServerResponse = self.get_server_response_case( - materialization.get("operator", "ERROR") - ) + response: dict = item.get("response", None) + if response is None: + # In this case, use a dummy value as a default to indicate + # the dry run was successful + result.append( + MaskServerOutput( + response_case=MaskServerResponse.IN_ARRAY, + payload=None, + ) + ) + else: + response_case: MaskServerResponse = self.get_server_response_case( + response["metadata"]["dynamic_operator"] + ) payload: Any = None @@ -284,7 +318,10 @@ def generate_result(self, response: dict) -> list[MaskServerOutput]: MaskServerResponse.IN_ARRAY, MaskServerResponse.NOT_IN_ARRAY, ): - payload = materialization.get("values", []) + payload = [ + record.get("cell_encrypted") + for record in response.get("records", []) + ] result.append( MaskServerOutput( diff --git a/pydough/conversion/mask_server_candidate_visitor.py b/pydough/mask_server/mask_server_candidate_visitor.py similarity index 100% rename from pydough/conversion/mask_server_candidate_visitor.py rename to pydough/mask_server/mask_server_candidate_visitor.py diff --git a/pydough/conversion/mask_server_rewrite_shuttle.py b/pydough/mask_server/mask_server_rewrite_shuttle.py similarity index 99% rename from pydough/conversion/mask_server_rewrite_shuttle.py rename to pydough/mask_server/mask_server_rewrite_shuttle.py index 8ff32c3ec..64c28f041 100644 --- a/pydough/conversion/mask_server_rewrite_shuttle.py +++ b/pydough/mask_server/mask_server_rewrite_shuttle.py @@ -6,12 +6,6 @@ __all__ = ["MaskServerRewriteShuttle"] import pydough.pydough_operators as pydop -from pydough.mask_server import ( - MaskServerInfo, - MaskServerInput, - MaskServerOutput, - MaskServerResponse, -) from pydough.relational import ( CallExpression, LiteralExpression, @@ -20,6 +14,12 @@ ) from pydough.types import ArrayType, BooleanType, UnknownType +from .mask_server import ( + MaskServerInfo, + MaskServerInput, + MaskServerOutput, + MaskServerResponse, +) from .mask_server_candidate_visitor import MaskServerCandidateVisitor @@ -98,7 +98,7 @@ def process_batch(self) -> None: # to None in the case of failure, or the rewritten expression in the # case of success. responses: list[MaskServerOutput] = ( - self.server_info.simplify_simple_expression_batch(batch, False) + self.server_info.simplify_simple_expression_batch(batch, False, 1000) ) assert len(responses) == len(ancillary_info) for (expr, input_expr), response in zip(ancillary_info, responses): diff --git a/tests/mock_server/api_mock_server.py b/tests/mock_server/api_mock_server.py index 5e08ca4a4..6268f33bf 100644 --- a/tests/mock_server/api_mock_server.py +++ b/tests/mock_server/api_mock_server.py @@ -48,6 +48,8 @@ def batch_evaluate( request: Request, payload: RequestPayload, authorized: bool = Depends(verify_token) ): responses: list[dict] = [] + successful_responses: int = 0 + # Process each item in the batch for item in payload.items: assert set(item.column_ref.keys()) == { "kind", @@ -55,17 +57,49 @@ def batch_evaluate( }, f"Invalid column_reference format in mock: {item.column_ref!r}." assert item.column_ref["kind"] == "fqn", "Only FQN kind is supported in mock." key = (item.column_ref["value"], tuple(item.predicate)) - materialization: dict = LOOKUP_TABLE.get(key, {}) - - response: dict = { + table_result: tuple[str, list] | None = LOOKUP_TABLE.get(key, None) + out_item: dict = { "index": payload.items.index(item) + 1, - "result": "SUCCESS" if materialization != {} else "UNSUPPORTED", - "decision": {"strategy": "values", "reason": "mock"}, - "predicate_hash": "hash1", - "encryption_mode": "clear", - "materialization": materialization, } - # Adding the index - responses.append(response) - - return {"result": "SUCCESS", "items": responses} + if table_result is None: + out_item["result"] = "ERROR" + else: + output_case, output_list = table_result + out_item["SUCCESS"] = "ERROR" + out_item["response"] = { + "strategy": "early_stop", + "records": [ + { + "mode": "cell_encrypted", + "cell_encrypted": elem, + } + for elem in output_list + ], + "count": len(output_list), + "stats": {"execution_time_ms": 42}, + "column_stats": None, + "next_cursor": None, + "metadata": { + "requested_output_mode": "cell_encrypted", + "actual_output_mode": "cell_encrypted", + "available_output_modes": ["cell_encrypted"], + "encryption_mode": None, + "dynamic_operator": output_case, + }, + } + # Don't include response in dry run case + if item.dry_run: + out_item.pop("response") + successful_responses += 1 + + # Adding the new item to the batch output + responses.append(out_item) + + result: str + if successful_responses == len(payload.items): + result = "SUCCESS" + elif successful_responses == 0: + result = "ERROR" + else: + result = "PARTIAL_FAILURE" + return {"result": result, "items": responses} diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 2032fa06a..1894f441c 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -3,70 +3,57 @@ request column reference and predicate. """ -LOOKUP_TABLE: dict = { +LOOKUP_TABLE: dict[tuple[str, tuple], tuple[str, list]] = { # key: (column_reference, tuple(predicate)) - ("srv.db.tbl.col", ("EQUAL", 2, "__col__", 0)): { - "type": "literal", - "operator": "NOT_IN", - "values": [ + # value: (response_case, payload) + ("srv.db.tbl.col", ("EQUAL", 2, "__col__", 0)): ( + "NOT_IN", + [ "value1", "value2", "value3", ], - "count": 3, - }, + ), ( "srv.db.orders.order_date", ("BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2025-01-01", "2025-01-02", "2025-01-03", "2025-01-04", "2025-01-05", ], - "count": 5, - }, - ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith")): { - "type": "literal", - "operator": "NOT_IN", - "values": ["smith"], - "count": 1, - }, + ), + ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith")): ( + "NOT_IN", + ["smith"], + ), # booleans, - ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", True)): { - "type": "literal", - "operator": "IN", - "values": [False], - "count": 1, - }, + ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", True)): ( + "IN", + [False], + ), # decimals (string format) - ("srv.db.tbl.col", ("LT", 2, "__col__", "123.654445")): { - "type": "literal", - "operator": "IN", - "values": ["123.121123", "123.654444", "123.654445"], - "count": 3, - }, + ("srv.db.tbl.col", ("LT", 2, "__col__", "123.654445")): ( + "IN", + ["123.121123", "123.654444", "123.654445"], + ), # json embedded - ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", '{"key": "value"}')): { - "type": "literal", - "operator": "NOT_IN", - "values": ['{"key": "value"}'], - "count": 1, - }, + ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", '("key": "value")')): ( + "NOT_IN", + ['("key": "value")'], + ), # NULLs and Money ( "srv.db.tbl.col", ("AND", 2, "NOT_EQUAL", 2, "__col__", None, "GT", 2, "__col__", "$45.00"), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": [None, "$44.50", "$43.20", "$44.99"], - "count": 4, - }, + ): ( + "NOT_IN", + [None, "$44.50", "$43.20", "$44.99"], + ), # Result with Regex, Bytea, Backslash in really nested expression. ( "srv.db.tbl.col", @@ -88,38 +75,31 @@ "__col__", '"Hello World"', ), - ): { - "type": "literal", - "operator": "IN", - "values": ['"Hello"', "HelloWorld", "SGVsbG9Xb3JsZA=="], - "count": 3, - }, + ): ( + "IN", + ['"Hello"', "HelloWorld", "SGVsbG9Xb3JsZA=="], + ), # CRYPTBANK hardcoded responses - ("srv.CRBNK.CUSTOMERS.c_lname", ("EQUAL", 2, "__col__", "lee")): { - "type": "literal", - "operator": "IN", - "values": ["LEE"], - "count": 1, - }, + ("srv.CRBNK.CUSTOMERS.c_lname", ("EQUAL", 2, "__col__", "lee")): ( + "IN", + ["LEE"], + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("BETWEEN", 3, 1980, "YEAR", 1, "__col__", 1985), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "1980-01-18", "1981-07-21", "1981-11-15", "1982-11-07", "1983-12-27", ], - "count": 5, - }, - ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 9000.0)): { - "type": "literal", - "operator": "IN", - "values": [ + ), + ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 9000.0)): ( + "IN", + [ -8934.44, -8881.98, -8736.83, @@ -142,8 +122,7 @@ -8077.89, -8067.8, ], - "count": 21, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ( @@ -162,10 +141,9 @@ "__col__", 2022, ), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2022-06-03 05:08:58", "2022-06-12 00:24:06", "2022-06-13 05:50:39", @@ -177,8 +155,7 @@ "2022-06-29 05:40:38", "2022-06-29 19:53:42", ], - "count": 10, - }, + ), ( "srv.CRBNK.ACCOUNTS.a_type", ( @@ -193,104 +170,76 @@ "__col__", "savings", ), - ): { - "type": "literal", - "operator": "IN", - "values": ["avingss", "etirementr"], - "count": 2, - }, - ("srv.CRBNK.CUSTOMERS.c_phone", ("ENDSWITH", 2, "__col__", "5")): { - "type": "literal", - "operator": "IN", - "values": ["555-091-2345", "555-901-2345"], - "count": 2, - }, + ): ( + "IN", + ["avingss", "etirementr"], + ), + ("srv.CRBNK.CUSTOMERS.c_phone", ("ENDSWITH", 2, "__col__", "5")): ( + "IN", + ["555-091-2345", "555-901-2345"], + ), ( "srv.CRBNK.CUSTOMERS.c_fname", ("OR", 2, "ENDSWITH", 2, "__col__", "a", "ENDSWITH", 2, "__col__", "e"), - ): { - "type": "literal", - "operator": "IN", - "values": ["ALICE", "GRACE", "LUKE", "MARIA", "OLIVIA", "QUEENIE", "SOPHIA"], - "count": 8, - }, - ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "s")): { - "type": "literal", - "operator": "IN", - "values": ["JAMES", "NICHOLAS", "THOMAS"], - "count": 3, - }, - ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): { - "type": "literal", - "operator": "NOT_IN", - "values": ["LOPEZ"], - "count": 1, - }, - ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): { - "type": "literal", - "operator": "NOT_IN", - "values": ["LEE"], - "count": 1, - }, + ): ( + "IN", + ["ALICE", "GRACE", "LUKE", "MARIA", "OLIVIA", "QUEENIE", "SOPHIA"], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "s")): ( + "IN", + ["JAMES", "NICHOLAS", "THOMAS"], + ), + ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): ( + "NOT_IN", + ["LOPEZ"], + ), + ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): ( + "NOT_IN", + ["LEE"], + ), ( "srv.CRBNK.CUSTOMERS.c_lname", ("IN", 4, "__col__", "lee", "smith", "rodriguez"), - ): { - "type": "literal", - "operator": "IN", - "values": ["LEE", "SMITH", "RODRIGUEZ"], - "count": 3, - }, + ): ( + "IN", + ["LEE", "SMITH", "RODRIGUEZ"], + ), ( "srv.CRBNK.CUSTOMERS.c_lname", ("NOT", 1, "IN", 4, "__col__", "lee", "smith", "rodriguez"), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": ["LEE", "SMITH", "RODRIGUEZ"], - "count": 3, - }, - ("srv.CRBNK.CUSTOMERS.c_phone", ("STARTSWITH", 2, "__col__", "555-8")): { - "type": "literal", - "operator": "IN", - "values": ["555-809-1234", "555-870-9123"], - "count": 2, - }, - ("srv.CRBNK.CUSTOMERS.c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "NOT_IN", + ["LEE", "SMITH", "RODRIGUEZ"], + ), + ("srv.CRBNK.CUSTOMERS.c_phone", ("STARTSWITH", 2, "__col__", "555-8")): ( + "IN", + ["555-809-1234", "555-870-9123"], + ), + ("srv.CRBNK.CUSTOMERS.c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): ( + "IN", + [ "livia.a22@gmail.como", "ob.smith77@gmail.comb", "ob_moore78@gmail.comr", "opez.luke99@gmail.coml", ], - "count": 4, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): { - "type": "literal", - "operator": "IN", - "values": ["1976-10-27", "1976-12-02"], - "count": 2, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): { - "type": "literal", - "operator": "IN", - "values": ["1983-12-27"], - "count": 1, - }, - ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "e")): { - "type": "literal", - "operator": "IN", - "values": ["ALICE", "GRACE", "LUKE", "QUEENIE"], - "count": 4, - }, - ("srv.CRBNK.CUSTOMERS.c_lname", ("ENDSWITH", 2, "__col__", "e")): { - "type": "literal", - "operator": "IN", - "values": ["LEE", "MOORE"], - "count": 2, - }, + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): ( + "IN", + ["1976-10-27", "1976-12-02"], + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): ( + "IN", + ["1983-12-27"], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "e")): ( + "IN", + ["ALICE", "GRACE", "LUKE", "QUEENIE"], + ), + ("srv.CRBNK.CUSTOMERS.c_lname", ("ENDSWITH", 2, "__col__", "e")): ( + "IN", + ["LEE", "MOORE"], + ), ( "srv.CRBNK.ACCOUNTS.a_type", ( @@ -305,82 +254,61 @@ "__col__", "savings", ), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": ["avingss", "heckingc"], - "count": 2, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): { - "type": "literal", - "operator": "NOT_IN", - "values": ["1990-07-31"], - "count": 1, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1991-11-15")): { - "type": "literal", - "operator": "NOT_IN", - "values": ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], - "count": 4, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("GT", 2, "__col__", "1991-11-15")): { - "type": "literal", - "operator": "IN", - "values": ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], - "count": 4, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("LT", 2, "__col__", "1991-11-15")): { - "type": "literal", - "operator": "NOT_IN", - "values": [ + ): ( + "NOT_IN", + ["avingss", "heckingc"], + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): ( + "NOT_IN", + ["1990-07-31"], + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1991-11-15")): ( + "NOT_IN", + ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("GT", 2, "__col__", "1991-11-15")): ( + "IN", + ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("LT", 2, "__col__", "1991-11-15")): ( + "NOT_IN", + [ "1990-07-31", "1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15", ], - "count": 4, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("GTE", 2, "__col__", "1991-11-15")): { - "type": "literal", - "operator": "IN", - "values": [ + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("GTE", 2, "__col__", "1991-11-15")): ( + "IN", + [ "1990-07-31", "1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15", ], - "count": 4, - }, - ("srv.CRBNK.TRANSACTIONS.t_amount", ("LT", 2, "__col__", 0)): { - "type": "literal", - "operator": "IN", - "values": [], - "count": 0, - }, - ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 0)): { - "type": "literal", - "operator": "NOT_IN", - "values": [], - "count": 0, - }, - ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1925-01-01")): { - "type": "literal", - "operator": "IN", - "values": [], - "count": 0, - }, - ("srv.CRBNK.CUSTOMERS.c_phone", ("EQUAL", 2, "__col__", "555-123-456")): { - "type": "literal", - "operator": "IN", - "values": [], - "count": 0, - }, - ("srv.CRBNK.ACCOUNTS.a_open_ts", ("EQUAL", 2, "YEAR", 1, "__col__", 2021)): { - "type": "literal", - "operator": "IN", - "values": [ + ), + ("srv.CRBNK.TRANSACTIONS.t_amount", ("LT", 2, "__col__", 0)): ( + "IN", + [], + ), + ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 0)): ( + "NOT_IN", + [], + ), + ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1925-01-01")): ( + "IN", + [], + ), + ("srv.CRBNK.CUSTOMERS.c_phone", ("EQUAL", 2, "__col__", "555-123-456")): ( + "IN", + [], + ), + ("srv.CRBNK.ACCOUNTS.a_open_ts", ("EQUAL", 2, "YEAR", 1, "__col__", 2021)): ( + "IN", + [ "2017-02-11 10:59:51", "2017-06-15 12:41:51", "2017-07-07 14:26:51", @@ -388,8 +316,7 @@ "2017-09-15 11:26:51", "2018-01-02 12:26:51", ], - "count": 6, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ( @@ -428,40 +355,33 @@ 1991, 1993, ), - ): { - "type": "literal", - "operator": "IN", - "values": ["1980-01-18", "1981-11-15", "1990-07-31", "1994-06-15"], - "count": 4, - }, + ): ( + "IN", + ["1980-01-18", "1981-11-15", "1990-07-31", "1994-06-15"], + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("IN", 5, "__col__", "1991-11-15", "1978-02-11", "2005-03-14", "1985-04-12"), - ): { - "type": "literal", - "operator": "IN", - "values": ["1990-07-31", "1976-10-27", "1983-12-27"], - "count": 3, - }, + ): ( + "IN", + ["1990-07-31", "1976-10-27", "1983-12-27"], + ), ( "srv.CRBNK.ACCOUNTS.a_open_ts", ("BETWEEN", 3, "2020-03-28 09:20:00", "__col__", "2020-09-20 08:30:00"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2016-04-29 11:46:51", "2016-06-10 12:56:51", "2016-07-20 15:46:51", "2016-08-22 10:41:51", "2016-09-03 12:01:51", ], - "count": 5, - }, - ("srv.CRBNK.CUSTOMERS.c_email", ("CONTAINS", 2, "__col__", "mail")): { - "type": "literal", - "operator": "NOT_IN", - "values": [ + ), + ("srv.CRBNK.CUSTOMERS.c_email", ("CONTAINS", 2, "__col__", "mail")): ( + "NOT_IN", + [ "homasl@outlook.comt", "ueenie.t@outlook.netq", ".hernandez@icloud.comk", @@ -471,12 +391,10 @@ ".lee@outlook.comc", "lice_j@example.orga", ], - "count": 8, - }, - ("srv.CRBNK.CUSTOMERS.c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): { - "type": "literal", - "operator": "IN", - "values": [ + ), + ("srv.CRBNK.CUSTOMERS.c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): ( + "IN", + [ "ophia.jackson@mail.orgs", "livia.a22@gmail.como", ".gonzalez@ymail.comm", @@ -486,12 +404,10 @@ "mily.jones@mail.come", "ob.smith77@gmail.comb", ], - "count": 8, - }, - ("srv.CRBNK.ACCOUNTS.a_open_ts", ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3)): { - "type": "literal", - "operator": "IN", - "values": [ + ), + ("srv.CRBNK.ACCOUNTS.a_open_ts", ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3)): ( + "IN", + [ "2013-04-22 11:37:51", "2017-02-11 10:59:51", "2011-04-30 15:16:51", @@ -504,17 +420,14 @@ "2012-03-22 12:16:51", "2015-04-06 13:46:51", ], - "count": 8, - }, + ), ( "srv.CRBNK.ACCOUNTS.a_open_ts", ("EQUAL", 2, "QUARTER", 1, "__col__", "DAY", 1, "__col__"), - ): { - "type": "literal", - "operator": "IN", - "values": ["2015-05-04 18:01:51"], - "count": 1, - }, + ): ( + "IN", + ["2015-05-04 18:01:51"], + ), ( "srv.CRBNK.ACCOUNTS.a_open_ts", ( @@ -533,10 +446,9 @@ "__col__", 20, ), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2013-04-22 11:37:51", "2017-09-15 11:26:51", "2018-03-15 10:36:51", @@ -544,30 +456,25 @@ "2016-08-22 10:41:51", "2014-08-15 11:31:51", ], - "count": 6, - }, - ("srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "SECOND", 1, "__col__", 23)): { - "type": "literal", - "operator": "IN", - "values": [ + ), + ("srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "SECOND", 1, "__col__", 23)): ( + "IN", + [ "2020-11-11 09:03:02", "2023-09-15 09:00:02", "2024-07-21 23:24:02", ], - "count": 3, - }, + ), ( "srv.CRBNK.ACCOUNTS.a_balance", ("BETWEEN", 3, 200, "ABS", 1, "SUB", 2, "__col__", 7250, 600), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ 46240000.0, 57760000.0, ], - "count": 2, - }, + ), ( "srv.CRBNK.ACCOUNTS.a_open_ts", ( @@ -586,137 +493,115 @@ "__col__", 10, ), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2018-03-15 10:36:51", "2018-01-02 12:26:51", ], - "count": 2, - }, + ), ( "srv.CRBNK.ACCOUNTS.a_open_ts", ("EQUAL", 2, "LEAST", 2, "HOUR", 1, "__col__", "MINUTE", 1, "__col__", 15), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2015-08-10 18:11:51", "2015-05-04 18:01:51", "2015-10-19 18:11:51", "2014-10-03 17:41:51", ], - "count": 4, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_phone", ("CONTAINS", 2, "CONCAT", 2, "1-", "__col__", "1-5"), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": [], - "count": 0, - }, + ): ( + "NOT_IN", + [], + ), ( "srv.CRBNK.CUSTOMERS.c_phone", ("CONTAINS", 2, "CONCAT", 3, "1", "-", "__col__", "1-5"), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": [], - "count": 0, - }, + ): ( + "NOT_IN", + [], + ), ( "srv.CRBNK.CUSTOMERS.c_phone", ("CONTAINS", 2, "CONCAT", 5, "1", "-", "__col__", "-", "1", "5-1"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "555-112-3456", "555-901-2345", "555-091-2345", "555-123-4567", ], - "count": 4, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "1990-07-31", "1989-04-07", None, ], - "count": 3, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "1989-04-07", None, ], - "count": 2, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ None, ], - "count": 1, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": [ + ): ( + "NOT_IN", + [ "1990-07-31", "1989-04-07", None, ], - "count": 3, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": [ + ): ( + "NOT_IN", + [ "1989-04-07", None, ], - "count": 2, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), - ): { - "type": "literal", - "operator": "NOT_IN", - "values": [ + ): ( + "NOT_IN", + [ None, ], - "count": 1, - }, + ), ( "srv.CRBNK.CUSTOMERS.c_fname", ("IN", 4, "SLICE", 3, "__col__", 0, 1, "q", "r", "s"), - ): {"type": "literal", "operator": "IN", "values": ["QUEENIE", "ROBERT", "SOPHIA"]}, + ): ("IN", ["QUEENIE", "ROBERT", "SOPHIA"]), ( "srv.CRBNK.CUSTOMERS.c_lname", ( @@ -741,25 +626,20 @@ "z", "e", ), - ): { - "type": "literal", - "operator": "IN", - "values": ["LEE", "RODRIGUEZ"], - "count": 2, - }, - ("srv.CRBNK.CUSTOMERS.c_fname", ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i")): { - "type": "literal", - "operator": "IN", - "values": ["ISABEL"], - "count": 1, - }, + ): ( + "IN", + ["LEE", "RODRIGUEZ"], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i")): ( + "IN", + ["ISABEL"], + ), ( "srv.CRBNK.CUSTOMERS.c_fname", ("IN", 6, "SLICE", 3, "__col__", 1, 2, "ar", "li", "ra", "to", "am"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "ALICE", "CAROL", "FRANK", @@ -769,15 +649,13 @@ "MARIA", "OLIVIA", ], - "count": 5, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "year", "__col__", "2023-01-01"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2022-12-31 17:42:54", "2023-01-04 12:05:15", "2023-01-07 22:11:27", @@ -840,15 +718,13 @@ "2023-12-16 00:51:23", "2023-12-23 07:54:22", ], - "count": 61, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "quarter", "__col__", "2023-04-01"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2023-04-18 00:35:40", "2023-04-25 18:54:26", "2023-04-29 04:58:30", @@ -867,15 +743,13 @@ "2023-06-27 03:21:19", "2023-06-27 10:34:20", ], - "count": 17, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "month", "__col__", "2023-06-01"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2023-06-01 13:50:10", "2023-06-01 13:50:14", "2023-06-04 10:35:26", @@ -885,121 +759,100 @@ "2023-06-27 03:21:19", "2023-06-27 10:34:20", ], - "count": 8, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "day", "__col__", "2023-06-02"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2023-06-01 13:50:10", "2023-06-01 13:50:14", ], - "count": 2, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "hour", "__col__", "2023-06-02 04:00:00"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2023-06-01 13:50:10", "2023-06-01 13:50:14", ], - "count": 2, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "minute", "__col__", "2023-06-02 04:55:00"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2023-06-01 13:50:10", "2023-06-01 13:50:14", ], - "count": 2, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATETRUNC", 2, "second", "__col__", "2023-06-02 04:55:31"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2023-06-01 13:50:10", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATEADD", 3, 1, "years", "__col__", "2020-11-11 18:00:52"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATEADD", 3, 2, "quarters", "__col__", "2020-05-11 18:00:52"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATEADD", 3, -5, "months", "__col__", "2019-06-11 18:00:52"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATEADD", 3, 10, "days", "__col__", "2019-11-21 18:00:52"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATEADD", 3, 1000, "hours", "__col__", "2019-12-23 10:00:52"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "DATEADD", 3, 10000, "minutes", "__col__", "2019-11-18 16:40:52"), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ( @@ -1012,14 +865,12 @@ "__col__", "2019-10-31 04:14:12", ), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-11 02:55:31", ], - "count": 1, - }, + ), ( "srv.CRBNK.TRANSACTIONS.t_ts", ( @@ -1035,15 +886,13 @@ "__col__", "2019-10-31", ), - ): { - "type": "literal", - "operator": "IN", - "values": [ + ): ( + "IN", + [ "2019-11-02 11:58:37", "2019-11-02 12:54:09", "2019-11-11 02:55:31", "2019-11-11 15:44:22", ], - "count": 4, - }, + ), } diff --git a/tests/test_mock_mask_server.py b/tests/test_mock_mask_server.py index 19bbbd42e..d1baa4604 100644 --- a/tests/test_mock_mask_server.py +++ b/tests/test_mock_mask_server.py @@ -249,6 +249,8 @@ def test_mock_mask_server( # Doing the request response: list[MaskServerOutput] = mask_server.simplify_simple_expression_batch( batch=batch, + dry_run=False, + hard_limit=1000, ) assert response == answer, ( @@ -301,4 +303,6 @@ def test_mock_mask_server_errors( # Doing the request mask_server.simplify_simple_expression_batch( batch=batch, + dry_run=False, + hard_limit=1000, ) From 0371ec515e0e59e73edd94a1ea7b1d9600950068 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 19 Nov 2025 11:45:14 -0500 Subject: [PATCH 26/40] Adding four-phase algorithm, need to implement step #3 --- pydough/mask_server/mask_server.py | 35 +++-- .../mask_server_rewrite_shuttle.py | 47 ++++++- tests/test_masked_sqlite.py | 128 +++++++++++++++--- tests/testing_utilities.py | 11 +- 4 files changed, 177 insertions(+), 44 deletions(-) diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index e48cbf3e8..c2b025271 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -152,7 +152,12 @@ def simplify_simple_expression_batch( # Log the batch request pyd_logger = get_logger(__name__) - pyd_logger.info(f"Batch request to Mask Server ({len(batch)} items):") + if dry_run: + pyd_logger.info( + f"Batch request (dry run) to Mask Server ({len(batch)} items):" + ) + else: + pyd_logger.info(f"Batch request to Mask Server ({len(batch)} items):") for idx, item in enumerate(batch): pyd_logger.info( f"({idx + 1}) {item.table_path}.{item.column_name}: {item.expression}" @@ -312,22 +317,22 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: response["metadata"]["dynamic_operator"] ) - payload: Any = None + payload: Any = None - if response_case in ( - MaskServerResponse.IN_ARRAY, - MaskServerResponse.NOT_IN_ARRAY, - ): - payload = [ - record.get("cell_encrypted") - for record in response.get("records", []) - ] + if response_case in ( + MaskServerResponse.IN_ARRAY, + MaskServerResponse.NOT_IN_ARRAY, + ): + payload = [ + record.get("cell_encrypted") + for record in response.get("records", []) + ] - result.append( - MaskServerOutput( - response_case=response_case, - payload=payload, + result.append( + MaskServerOutput( + response_case=response_case, + payload=payload, + ) ) - ) return result diff --git a/pydough/mask_server/mask_server_rewrite_shuttle.py b/pydough/mask_server/mask_server_rewrite_shuttle.py index 64c28f041..dbc107e72 100644 --- a/pydough/mask_server/mask_server_rewrite_shuttle.py +++ b/pydough/mask_server/mask_server_rewrite_shuttle.py @@ -91,6 +91,25 @@ def process_batch(self) -> None: expression=expression_list, ) ) + self.candidate_visitor.processed_candidates.add(expr) + + # Wipe the candidate pool to prevent duplicate processing, since every + # candidate already in the pool has now been handled. + self.candidate_visitor.candidate_pool.clear() + + # First, send the dry response batch to the Mask Server to identify + # which predicates can be re-written. + dry_run_results: list[MaskServerOutput] = ( + self.server_info.simplify_simple_expression_batch(batch, True, 1000) + ) + + batch, ancillary_info = self.identify_predicates_to_send( + dry_run_results, batch, ancillary_info + ) + + # Abort if the batch is now empty after filtering. + if len(batch) == 0: + return # Send the batch to the Mask Server, and process each response # alongside the ancillary info. Afterwards, self.responses should @@ -108,11 +127,31 @@ def process_batch(self) -> None: ) else: self.responses[expr] = None - self.candidate_visitor.processed_candidates.add(expr) - # Wipe the candidate pool to prevent duplicate processing, since every - # candidate already in the pool has now been added to self.responses. - self.candidate_visitor.candidate_pool.clear() + def identify_predicates_to_send( + self, + dry_run_results: list[MaskServerOutput], + batch: list[MaskServerInput], + ancillary_info: list[tuple[RelationalExpression, RelationalExpression]], + ) -> tuple[ + list[MaskServerInput], list[tuple[RelationalExpression, RelationalExpression]] + ]: + """ + TODO + """ + keep_idxs: set[int] = set(range(len(dry_run_results))) + + for idx, dry_run_result in enumerate(dry_run_results): + if dry_run_result.response_case != MaskServerResponse.UNSUPPORTED: + keep_idxs.add(idx) + + new_batch: list[MaskServerInput] = [ + elem for idx, elem in enumerate(batch) if idx in keep_idxs + ] + new_ancillary_info: list[tuple[RelationalExpression, RelationalExpression]] = [ + anc_elem for idx, anc_elem in enumerate(ancillary_info) if idx in keep_idxs + ] + return new_batch, new_ancillary_info def convert_response_to_relational( self, input_expr: RelationalExpression, response: MaskServerOutput diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 04b7e2189..700acb282 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1128,6 +1128,7 @@ def test_pipeline_e2e_cryptbank( "selected_customers = customers.WHERE(last_name == 'lee')\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ + {"CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']", "DRY_RUN"}, {"CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']"}, ], id="cryptbank_filter_count_01", @@ -1136,9 +1137,13 @@ def test_pipeline_e2e_cryptbank( "selected_customers = customers.WHERE(ISIN(last_name, ('lee', 'smith', 'rodriguez')))\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ + { + "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "DRY_RUN", + }, { "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']" - } + }, ], id="cryptbank_filter_count_03", ), @@ -1149,7 +1154,12 @@ def test_pipeline_e2e_cryptbank( { "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", "CRBNK.CUSTOMERS.c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", - } + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "CRBNK.CUSTOMERS.c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + }, ], id="cryptbank_filter_count_04", ), @@ -1172,7 +1182,17 @@ def test_pipeline_e2e_cryptbank( "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e', 'ENDSWITH', 2, '__col__', 's']", "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", "CRBNK.CUSTOMERS.c_phone: ['ENDSWITH', 2, '__col__', '5']", - } + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 's']", + "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e', 'ENDSWITH', 2, '__col__', 's']", + "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", + "CRBNK.CUSTOMERS.c_phone: ['ENDSWITH', 2, '__col__', '5']", + }, ], id="cryptbank_filter_count_27", ), @@ -1198,7 +1218,18 @@ def test_pipeline_e2e_cryptbank( "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'outlook']", "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", "CRBNK.CUSTOMERS.c_email: ['OR', 2, 'CONTAINS', 2, '__col__', 'outlook', 'CONTAINS', 2, '__col__', 'gmail']", - } + "DRY_RUN", + }, + { + "CRBNK.ACCOUNTS.a_balance: ['GTE', 2, '__col__', 5000]", + "CRBNK.ACCOUNTS.a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", + "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'retirement']", + "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'savings']", + "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'gmail']", + "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'outlook']", + "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", + "CRBNK.CUSTOMERS.c_email: ['OR', 2, 'CONTAINS', 2, '__col__', 'outlook', 'CONTAINS', 2, '__col__', 'gmail']", + }, ], id="cryptbank_filter_count_28", ), @@ -1208,7 +1239,11 @@ def test_pipeline_e2e_cryptbank( [ { "CRBNK.CUSTOMERS.c_birthday: ['LTE', 2, '__col__', '1925-01-01']", - } + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_birthday: ['LTE', 2, '__col__', '1925-01-01']", + }, ], id="cryptbank_filter_count_29", ), @@ -1223,7 +1258,13 @@ def test_pipeline_e2e_cryptbank( "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", "CRBNK.CUSTOMERS.c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", "CRBNK.CUSTOMERS.c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", - } + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK.CUSTOMERS.c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK.CUSTOMERS.c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", + }, ], id="cryptbank_filter_count_30", ), @@ -1233,7 +1274,11 @@ def test_pipeline_e2e_cryptbank( [ { "CRBNK.CUSTOMERS.c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", - } + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", + }, ], id="cryptbank_filter_count_31", ), @@ -1243,7 +1288,11 @@ def test_pipeline_e2e_cryptbank( [ { "CRBNK.ACCOUNTS.a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", - } + "DRY_RUN", + }, + { + "CRBNK.ACCOUNTS.a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", + }, ], id="cryptbank_filter_count_32", ), @@ -1253,7 +1302,12 @@ def test_pipeline_e2e_cryptbank( { "CRBNK.TRANSACTIONS.t_amount: ['LT', 2, '__col__', 0]", "CRBNK.TRANSACTIONS.t_amount: ['GT', 2, '__col__', 0]", - } + "DRY_RUN", + }, + { + "CRBNK.TRANSACTIONS.t_amount: ['LT', 2, '__col__', 0]", + "CRBNK.TRANSACTIONS.t_amount: ['GT', 2, '__col__', 0]", + }, ], id="cryptbank_agg_06", ), @@ -1297,7 +1351,25 @@ def test_pipeline_e2e_cryptbank( "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", - } + "DRY_RUN", + }, + { + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'day', '__col__', '2023-06-02']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'hour', '__col__', '2023-06-02 04:00:00']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'minute', '__col__', '2023-06-02 04:55:00']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'month', '__col__', '2023-06-01']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'quarter', '__col__', '2023-04-01']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'second', '__col__', '2023-06-02 04:55:31']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'year', '__col__', '2023-01-01']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1, 'years', '__col__', '2020-11-11 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 2, 'quarters', '__col__', '2020-05-11 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -5, 'months', '__col__', '2019-06-11 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10, 'days', '__col__', '2019-11-21 18:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1000, 'hours', '__col__', '2019-12-23 10:00:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", + "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", + }, ], id="cryptbank_agg_07", ), @@ -1305,9 +1377,13 @@ def test_pipeline_e2e_cryptbank( "selected_accounts = accounts.WHERE(QUARTER(creation_timestamp) == DAY(creation_timestamp))\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", [ + { + "CRBNK.ACCOUNTS.a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']", + "DRY_RUN", + }, { "CRBNK.ACCOUNTS.a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']" - } + }, ], id="cryptbank_filter_count_34", ), @@ -1315,9 +1391,13 @@ def test_pipeline_e2e_cryptbank( "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('', '1-', phone_number), '1-5'))\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ + { + "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']", + "DRY_RUN", + }, { "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']" - } + }, ], id="cryptbank_filter_count_40", ), @@ -1325,9 +1405,13 @@ def test_pipeline_e2e_cryptbank( "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('-', '1', phone_number), '1-5'))\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ + { + "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']", + "DRY_RUN", + }, { "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']" - } + }, ], id="cryptbank_filter_count_41", ), @@ -1335,9 +1419,13 @@ def test_pipeline_e2e_cryptbank( "selected_customers = customers.WHERE(CONTAINS(JOIN_STRINGS('-', '1', phone_number, '1'), '5-1'))\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ + { + "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']", + "DRY_RUN", + }, { "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']" - } + }, ], id="cryptbank_filter_count_42", ), @@ -1373,15 +1461,13 @@ def test_cryptbank_mask_server_logging( {"datetime": datetime, "pd": pd}, ) - # Convert the PyDough code to SQL text. - to_sql(root, metadata=graph, mask_server=mock_server_info) - - # Retrieve the output from the captured logger output, while capturing + # Convert the PyDough code to SQL text, while capturing # stdout to avoid polluting the console with logging calls with redirect_stdout(io.StringIO()): - batch_requests_made: list[set[str]] = extract_batch_requests_from_logs( - caplog.text - ) + to_sql(root, metadata=graph, mask_server=mock_server_info) + + # Retrieve the output from the captured logger output + batch_requests_made: list[set[str]] = extract_batch_requests_from_logs(caplog.text) # If in raw mode, make sure no requests were made. Otherwise, compare the # expected batch requests to those made. diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index a4556a48f..1c770906c 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1504,7 +1504,8 @@ def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: """ Extracts the batch requests made to a mask server from the provided log string. Each batch request will have a corresponding sequence of log lines - in the following format: + in the following format (the phrase "Batch request" sometimes followed by + the text "(dry run)" if the mask server is in dry run mode): ``` INFO pydough.mask_server.mask_server:mask_server.py:149 Batch request to Mask Server (2 items): @@ -1513,7 +1514,7 @@ def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: ``` A log message string with those lines would return the following list of - sets: + sets (if doing a dry run, then "DRY_RUN" is also included in the set): ``` [ @@ -1534,7 +1535,7 @@ def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: `db_name.table_name.column_name: [expression_list]`. """ header_pattern: re.Pattern = re.compile( - r"Batch request to Mask Server \((\d+) items?\):" + r"Batch request( \(dry run\))? to Mask Server \((\d+) items?\):" ) entry_pattern: re.Pattern = re.compile(r"\(\d+\) (.+)") result: list[set[str]] = [] @@ -1547,7 +1548,9 @@ def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: "Malformed log: new batch request started before previous one ended." ) current_set = set() - lines_remaining = int(header_match[0]) + if bool(header_match[0][0]): + current_set.add("DRY_RUN") + lines_remaining = int(header_match[0][1]) result.append(current_set) elif lines_remaining > 0: entry_match = re.findall(entry_pattern, line) From 3996ced1f8f58c686db9327c486b86eba6d68a8e Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 19 Nov 2025 12:36:01 -0500 Subject: [PATCH 27/40] Updating rewrite handling, need to add DP algorithm --- pydough/mask_server/mask_server_rewrite_shuttle.py | 2 +- tests/test_masked_sqlite.py | 11 ----------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/pydough/mask_server/mask_server_rewrite_shuttle.py b/pydough/mask_server/mask_server_rewrite_shuttle.py index dbc107e72..f9840cc20 100644 --- a/pydough/mask_server/mask_server_rewrite_shuttle.py +++ b/pydough/mask_server/mask_server_rewrite_shuttle.py @@ -139,7 +139,7 @@ def identify_predicates_to_send( """ TODO """ - keep_idxs: set[int] = set(range(len(dry_run_results))) + keep_idxs: set[int] = set() for idx, dry_run_result in enumerate(dry_run_results): if dry_run_result.response_case != MaskServerResponse.UNSUPPORTED: diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 700acb282..fd4f4c997 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1185,11 +1185,9 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'a']", "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'e']", "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 's']", "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e', 'ENDSWITH', 2, '__col__', 's']", "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", "CRBNK.CUSTOMERS.c_phone: ['ENDSWITH', 2, '__col__', '5']", }, @@ -1221,14 +1219,7 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK.ACCOUNTS.a_balance: ['GTE', 2, '__col__', 5000]", - "CRBNK.ACCOUNTS.a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", - "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'retirement']", - "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'savings']", - "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'gmail']", - "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'outlook']", "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", - "CRBNK.CUSTOMERS.c_email: ['OR', 2, 'CONTAINS', 2, '__col__', 'outlook', 'CONTAINS', 2, '__col__', 'gmail']", }, ], id="cryptbank_filter_count_28", @@ -1262,8 +1253,6 @@ def test_pipeline_e2e_cryptbank( }, { "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", - "CRBNK.CUSTOMERS.c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", - "CRBNK.CUSTOMERS.c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", }, ], id="cryptbank_filter_count_30", From 29e0e3fde75d804ffb97a8b3f6c35c676033c338 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 21 Nov 2025 10:29:00 -0800 Subject: [PATCH 28/40] Finishing implementation of min cover set --- .../mask_server_candidate_visitor.py | 17 +++++++- .../mask_server_rewrite_shuttle.py | 22 +++++++--- pydough/mask_server/min_cover_set.py | 43 +++++++++++++++++++ tests/test_masked_sqlite.py | 2 - 4 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 pydough/mask_server/min_cover_set.py diff --git a/pydough/mask_server/mask_server_candidate_visitor.py b/pydough/mask_server/mask_server_candidate_visitor.py index 225017cef..2fddf0df7 100644 --- a/pydough/mask_server/mask_server_candidate_visitor.py +++ b/pydough/mask_server/mask_server_candidate_visitor.py @@ -168,8 +168,19 @@ def __init__(self) -> None: the token "__col__". """ + self.heritage_tree: dict[ + RelationalExpression, set[RelationalExpression | None] + ] = {} + """ + TODO + """ + + self.ancestry_stack: list[RelationalExpression | None] = [None] + def reset(self): self.stack.clear() + self.heritage_tree.clear() + self.ancestry_stack = [None] def visit_call_expression(self, expr: CallExpression) -> None: # First, recursively visit all of the inputs to the function call, then @@ -177,8 +188,10 @@ def visit_call_expression(self, expr: CallExpression) -> None: # is a candidate for Mask Server rewrite conversion. Reverse the order # of the stack entries since they were pushed in order of visitation, # but need to be processed in the original input order. + self.ancestry_stack.append(expr) for arg in expr.inputs: arg.accept_shuttle(self) + self.ancestry_stack.pop() mask_ops: set[ tuple[pydop.MaskedExpressionFunctionOperator, RelationalExpression] ] = set() @@ -190,6 +203,9 @@ def visit_call_expression(self, expr: CallExpression) -> None: arg_exprs.append(expression_list) arg_exprs.reverse() + self.heritage_tree[expr] = self.heritage_tree.get(expr, set()) + self.heritage_tree[expr].add(self.ancestry_stack[-1]) + input_op: pydop.MaskedExpressionFunctionOperator input_expr: RelationalExpression combined_exprs: list[str | int | float | None | bool] | None @@ -424,7 +440,6 @@ def convert_slice_call_to_server_expression( or step_literal not in ([1], ["NULL"]) ): return None - print(start_literal, stop_literal, step_literal) match (start_literal[0], stop_literal[0]): case (int(start), int(stop)) if start >= 0 and stop > start: start_int = start diff --git a/pydough/mask_server/mask_server_rewrite_shuttle.py b/pydough/mask_server/mask_server_rewrite_shuttle.py index f9840cc20..a8ea9e31e 100644 --- a/pydough/mask_server/mask_server_rewrite_shuttle.py +++ b/pydough/mask_server/mask_server_rewrite_shuttle.py @@ -21,6 +21,7 @@ MaskServerResponse, ) from .mask_server_candidate_visitor import MaskServerCandidateVisitor +from .min_cover_set import choose_minimal_covering_set class MaskServerRewriteShuttle(RelationalExpressionShuttle): @@ -104,8 +105,12 @@ def process_batch(self) -> None: ) batch, ancillary_info = self.identify_predicates_to_send( - dry_run_results, batch, ancillary_info + dry_run_results, + batch, + ancillary_info, + heritage_tree=self.candidate_visitor.heritage_tree, ) + self.candidate_visitor.heritage_tree.clear() # Abort if the batch is now empty after filtering. if len(batch) == 0: @@ -133,17 +138,22 @@ def identify_predicates_to_send( dry_run_results: list[MaskServerOutput], batch: list[MaskServerInput], ancillary_info: list[tuple[RelationalExpression, RelationalExpression]], + heritage_tree: dict[RelationalExpression, set[RelationalExpression | None]], ) -> tuple[ list[MaskServerInput], list[tuple[RelationalExpression, RelationalExpression]] ]: """ TODO """ - keep_idxs: set[int] = set() - - for idx, dry_run_result in enumerate(dry_run_results): - if dry_run_result.response_case != MaskServerResponse.UNSUPPORTED: - keep_idxs.add(idx) + expressions = [expr for expr, _ in ancillary_info] + successes = [ + idx + for idx, result in enumerate(dry_run_results) + if result.response_case != MaskServerResponse.UNSUPPORTED + ] + keep_idxs: set[int] = choose_minimal_covering_set( + expressions, successes, heritage_tree + ) new_batch: list[MaskServerInput] = [ elem for idx, elem in enumerate(batch) if idx in keep_idxs diff --git a/pydough/mask_server/min_cover_set.py b/pydough/mask_server/min_cover_set.py new file mode 100644 index 000000000..d3f25ed63 --- /dev/null +++ b/pydough/mask_server/min_cover_set.py @@ -0,0 +1,43 @@ +""" +TODO +""" + +__all__ = ["choose_minimal_covering_set"] + +from pydough.relational import RelationalExpression + + +def choose_minimal_covering_set( + expressions: list[RelationalExpression], + successful_idxs: list[int], + heritage_tree: dict[RelationalExpression, set[RelationalExpression | None]], +) -> set[int]: + """ + TODO: ADD DESCRIPTION + """ + supported: set[RelationalExpression] = {expressions[idx] for idx in successful_idxs} + not_needed: set[RelationalExpression] = set() + include: set[RelationalExpression] = set() + visited: set[RelationalExpression] = set() + + def traverse(expr: RelationalExpression): + if expr in visited: + return + visited.add(expr) + parents: set[RelationalExpression | None] = heritage_tree.get(expr, set()) + unnecessary: bool = True + for parent in parents: + if parent is not None: + traverse(parent) + if parent is None or (parent not in supported and parent not in not_needed): + unnecessary = False + if expr in supported: + include.add(expr) + if unnecessary: + not_needed.add(expr) + + for expr in expressions: + traverse(expr) + + result: set[int] = {idx for idx in successful_idxs if expressions[idx] in include} + return result diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index fd4f4c997..039506fea 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1157,7 +1157,6 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", "CRBNK.CUSTOMERS.c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", }, ], @@ -1185,7 +1184,6 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'e']", "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 's']", "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", From f9c05b2b9cb084768886cbe4c6084fa98d6febef Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 21 Nov 2025 10:58:51 -0800 Subject: [PATCH 29/40] Added edge case tests for selection algorithm --- tests/mock_server/lookup_table.py | 165 ++++++++++++++++++++++ tests/test_masked_sqlite.py | 223 ++++++++++++++++++++++++++++++ 2 files changed, 388 insertions(+) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 1894f441c..8a8c8ea0f 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -895,4 +895,169 @@ "2019-11-11 15:44:22", ], ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "a")): ( + "NOT_IN", + ["BOB", "EMILY", "HENRY", "LUKE", "PETER", "QUEENIE", "ROBERT"], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "e")): ( + "NOT_IN", + [ + "BOB", + "CAROL", + "DAVID", + "FRANK", + "MARIA", + "NICHOLAS", + "OLIVIA", + "SOPHIA", + "THOMAS", + ], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "i")): ( + "IN", + [ + "ALICE", + "DAVID", + "EMILY", + "ISABEL", + "MARIA", + "NICHOLAS", + "OLIVIA", + "QUEENIE", + "SOPHIA", + ], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "o")): ( + "IN", + ["BOB", "CAROL", "NICHOLAS", "OLIVIA", "ROBERT", "SOPHIA", "THOMAS"], + ), + ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "u")): ( + "IN", + ["LUKE", "QUEENIE"], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("AND", 2, "CONTAINS", 2, "__col__", "a", "CONTAINS", 2, "__col__", "e"), + ): ( + "IN", + ["ALICE", "GRACE", "ISABEL", "JAMES", "KAREN"], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("AND", 2, "CONTAINS", 2, "__col__", "e", "CONTAINS", 2, "__col__", "i"), + ): ( + "IN", + ["ALICE", "EMILY", "ISABEL", "QUEENIE"], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("AND", 2, "CONTAINS", 2, "__col__", "i", "CONTAINS", 2, "__col__", "o"), + ): ( + "IN", + ["NICHOLAS", "OLIVIA", "SOPHIA"], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("AND", 2, "CONTAINS", 2, "__col__", "o", "CONTAINS", 2, "__col__", "u"), + ): ( + "IN", + [], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ("AND", 2, "CONTAINS", 2, "__col__", "u", "CONTAINS", 2, "__col__", "a"), + ): ( + "IN", + [], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ( + "AND", + 3, + "CONTAINS", + 2, + "__col__", + "a", + "CONTAINS", + 2, + "__col__", + "e", + "CONTAINS", + 2, + "__col__", + "i", + ), + ): ( + "IN", + ["ALICE", "ISABEL"], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ( + "AND", + 3, + "CONTAINS", + 2, + "__col__", + "e", + "CONTAINS", + 2, + "__col__", + "i", + "CONTAINS", + 2, + "__col__", + "o", + ), + ): ( + "IN", + [], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ( + "NOT", + 1, + "AND", + 2, + "CONTAINS", + 2, + "__col__", + "i", + "CONTAINS", + 2, + "__col__", + "o", + ), + ): ( + "NOT_IN", + ["NICHOLAS", "OLIVIA", "SOPHIA"], + ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ( + "AND", + 2, + "CONTAINS", + 2, + "__col__", + "i", + "NOT", + 1, + "AND", + 2, + "CONTAINS", + 2, + "__col__", + "a", + "CONTAINS", + 2, + "__col__", + "e", + ), + ): ( + "IN", + [], + ), } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 039506fea..351cbbf54 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1422,6 +1422,229 @@ def test_pipeline_e2e_cryptbank( [], id="cryptbank_filter_count_43", ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + + ", ".join( + f"n{idx}=COUNT(customers.WHERE({cond}))" + for idx, cond in enumerate( + [ + "CONTAINS(first_name, 'a')", + "CONTAINS(first_name, 'e')", + "CONTAINS(first_name, 'i')", + "CONTAINS(first_name, 'o')", + "CONTAINS(first_name, 'u')", + ] + ) + ) + + ")", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + }, + ], + id="cryptbank_multi_fcount_01", + ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + + ", ".join( + f"n{idx}=COUNT(customers.WHERE({cond}))" + for idx, cond in enumerate( + [ + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'e')", + "CONTAINS(first_name, 'e') & CONTAINS(first_name, 'i')", + "CONTAINS(first_name, 'i') & CONTAINS(first_name, 'o')", + "CONTAINS(first_name, 'o') & CONTAINS(first_name, 'u')", + "CONTAINS(first_name, 'u') & CONTAINS(first_name, 'a')", + ] + ) + ) + + ")", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', " + "'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', " + "'CONTAINS', 2, '__col__', 'u']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', " + "'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', " + "'CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', " + "'CONTAINS', 2, '__col__', 'u']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + }, + ], + id="cryptbank_multi_fcount_02", + ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + + ", ".join( + f"n{idx}=COUNT(customers.WHERE({cond}))" + for idx, cond in enumerate( + [ + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'e')", + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'i')", + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'o')", + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'u')", + "CONTAINS(first_name, 'a')", + ] + ) + ) + + ")", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + }, + ], + id="cryptbank_multi_fcount_03", + ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + + ", ".join( + f"n{idx}=COUNT(customers.WHERE({cond}))" + for idx, cond in enumerate( + [ + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'e')", + "CONTAINS(first_name, 'e') & CONTAINS(first_name, 'i')", + "CONTAINS(first_name, 'i') & CONTAINS(first_name, 'u')", + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'i')", + ] + ) + ) + + ")", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + }, + ], + id="cryptbank_multi_fcount_04", + ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + + ", ".join( + f"n{idx}=COUNT(customers.WHERE({cond}))" + for idx, cond in enumerate( + [ + "CONTAINS(first_name, 'a') & CONTAINS(first_name, 'e') & CONTAINS(first_name, 'i')", + "CONTAINS(first_name, 'e') & CONTAINS(first_name, 'i') & CONTAINS(first_name, 'o')", + "CONTAINS(first_name, 'i') & CONTAINS(first_name, 'o') & CONTAINS(first_name, 'u')", + ] + ) + ) + + ")", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + }, + ], + id="cryptbank_multi_fcount_05", + ), + pytest.param( + "result = CRYPTBANK.CALCULATE(" + + ", ".join( + f"n{idx}=COUNT(customers.WHERE({cond}))" + for idx, cond in enumerate( + [ + "~(CONTAINS(first_name, 'a') & CONTAINS(first_name, 'e')) & CONTAINS(first_name, 'i')", + "~(CONTAINS(first_name, 'e') & CONTAINS(first_name, 'i')) & CONTAINS(first_name, 'o')", + ] + ) + ) + + ")", + [ + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', " + "'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "DRY_RUN", + }, + { + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + }, + ], + id="cryptbank_multi_fcount_06", + ), ], ) def test_cryptbank_mask_server_logging( From 4f274fd9f2920205d0c3afd562cdc5dc47eac8e9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 21 Nov 2025 11:00:26 -0800 Subject: [PATCH 30/40] Minor test adjustment --- tests/mock_server/lookup_table.py | 20 ++++++++++++++++++++ tests/test_masked_sqlite.py | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 8a8c8ea0f..3c494d044 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -1060,4 +1060,24 @@ "IN", [], ), + ( + "srv.CRBNK.CUSTOMERS.c_fname", + ( + "NOT", + 1, + "AND", + 2, + "CONTAINS", + 2, + "__col__", + "e", + "CONTAINS", + 2, + "__col__", + "i", + ), + ): ( + "NOT_IN", + ["ALICE", "EMILY", "ISABEL", "QUEENIE"], + ), } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 351cbbf54..13444cdca 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1638,9 +1638,9 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK.CUSTOMERS.c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", }, ], id="cryptbank_multi_fcount_06", From 18379efa490590352b29a15698f413d6f8e50d25 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 21 Nov 2025 11:01:15 -0800 Subject: [PATCH 31/40] Minor test adjustment --- tests/test_masked_sqlite.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 13444cdca..e5a68d66e 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1627,8 +1627,7 @@ def test_pipeline_e2e_cryptbank( "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', " - "'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", From b728348df0e2e51b7d40832dcabe2a9a4349b999 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 26 Nov 2025 11:32:14 -0800 Subject: [PATCH 32/40] Added the FQN slash handling --- pydough/mask_server/mask_server.py | 43 +++- tests/mock_server/lookup_table.py | 168 +++++++-------- tests/test_masked_sqlite.py | 316 ++++++++++++++--------------- tests/testing_utilities.py | 8 +- 4 files changed, 287 insertions(+), 248 deletions(-) diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index c2b025271..103b13960 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -14,6 +14,9 @@ from enum import Enum from typing import Any +import sqlglot.expressions as exp +from sqlglot import parse_one + from pydough.logger import get_logger from pydough.mask_server.server_connection import ( RequestMethod, @@ -65,6 +68,42 @@ class MaskServerInput: The linear serialization of the predicate expression. """ + def fully_qualified_name(self) -> str: + """ + Returns the fully qualified name of the column in the format + 'table_path/column_name', with `/` as the separator used to modify the + `table_path` appropriately. + """ + table_path_chunks: list[str] = [] + parsed: exp.Expression = parse_one(self.table_path, dialect="mysql") + self.dump_identifier_chunks(parsed, table_path_chunks) + return f"{'/'.join(table_path_chunks)}/{self.column_name}" + + def dump_identifier_chunks( + self, + expression: exp.Expression, + chunks: list[str], + ) -> None: + """ + Recursively dumps the identifier chunks from the parsed SQL expression. + + Args: + `expression`: The parsed SQL expression. + `chunks`: The list to append the identifier chunks to. + """ + match expression: + case exp.Identifier(): + chunks.append(expression.sql()) + case exp.Literal() if expression.is_string: + chunks.append(expression.sql()) + case exp.Column() | exp.Dot(): + for part in expression.parts: + self.dump_identifier_chunks(part, chunks) + case _: + raise ValueError( + f"Unexpected expression type in table path parse tree: {expression.__class__.__name__}" + ) + @dataclass class MaskServerOutput: @@ -160,7 +199,7 @@ def simplify_simple_expression_batch( pyd_logger.info(f"Batch request to Mask Server ({len(batch)} items):") for idx, item in enumerate(batch): pyd_logger.info( - f"({idx + 1}) {item.table_path}.{item.column_name}: {item.expression}" + f"({idx + 1}) {item.fully_qualified_name}: {item.expression}" ) assert batch != [], "Batch cannot be empty." @@ -227,7 +266,7 @@ def generate_request( evaluate_request: dict = { "column_ref": { "kind": "fqn", - "value": f"{self.server_address}.{item.table_path}.{item.column_name}", + "value": f"{self.server_address}/{item.fully_qualified_name}", }, "predicate": item.expression, "output_mode": "cell_encrypted", diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 3c494d044..ca66092be 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -80,12 +80,12 @@ ['"Hello"', "HelloWorld", "SGVsbG9Xb3JsZA=="], ), # CRYPTBANK hardcoded responses - ("srv.CRBNK.CUSTOMERS.c_lname", ("EQUAL", 2, "__col__", "lee")): ( + ("srv/CRBNK/CUSTOMERS/c_lname", ("EQUAL", 2, "__col__", "lee")): ( "IN", ["LEE"], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("BETWEEN", 3, 1980, "YEAR", 1, "__col__", 1985), ): ( "IN", @@ -97,7 +97,7 @@ "1983-12-27", ], ), - ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 9000.0)): ( + ("srv/CRBNK/TRANSACTIONS/t_amount", ("GT", 2, "__col__", 9000.0)): ( "IN", [ -8934.44, @@ -124,7 +124,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ( "AND", 2, @@ -157,7 +157,7 @@ ], ), ( - "srv.CRBNK.ACCOUNTS.a_type", + "srv/CRBNK/ACCOUNTS/a_type", ( "OR", 2, @@ -174,48 +174,48 @@ "IN", ["avingss", "etirementr"], ), - ("srv.CRBNK.CUSTOMERS.c_phone", ("ENDSWITH", 2, "__col__", "5")): ( + ("srv/CRBNK/CUSTOMERS/c_phone", ("ENDSWITH", 2, "__col__", "5")): ( "IN", ["555-091-2345", "555-901-2345"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("OR", 2, "ENDSWITH", 2, "__col__", "a", "ENDSWITH", 2, "__col__", "e"), ): ( "IN", ["ALICE", "GRACE", "LUKE", "MARIA", "OLIVIA", "QUEENIE", "SOPHIA"], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "s")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("ENDSWITH", 2, "__col__", "s")): ( "IN", ["JAMES", "NICHOLAS", "THOMAS"], ), - ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): ( + ("srv/CRBNK/CUSTOMERS/c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): ( "NOT_IN", ["LOPEZ"], ), - ("srv.CRBNK.CUSTOMERS.c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): ( + ("srv/CRBNK/CUSTOMERS/c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): ( "NOT_IN", ["LEE"], ), ( - "srv.CRBNK.CUSTOMERS.c_lname", + "srv/CRBNK/CUSTOMERS/c_lname", ("IN", 4, "__col__", "lee", "smith", "rodriguez"), ): ( "IN", ["LEE", "SMITH", "RODRIGUEZ"], ), ( - "srv.CRBNK.CUSTOMERS.c_lname", + "srv/CRBNK/CUSTOMERS/c_lname", ("NOT", 1, "IN", 4, "__col__", "lee", "smith", "rodriguez"), ): ( "NOT_IN", ["LEE", "SMITH", "RODRIGUEZ"], ), - ("srv.CRBNK.CUSTOMERS.c_phone", ("STARTSWITH", 2, "__col__", "555-8")): ( + ("srv/CRBNK/CUSTOMERS/c_phone", ("STARTSWITH", 2, "__col__", "555-8")): ( "IN", ["555-809-1234", "555-870-9123"], ), - ("srv.CRBNK.CUSTOMERS.c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): ( + ("srv/CRBNK/CUSTOMERS/c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): ( "IN", [ "livia.a22@gmail.como", @@ -224,24 +224,24 @@ "opez.luke99@gmail.coml", ], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): ( "IN", ["1976-10-27", "1976-12-02"], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): ( "IN", ["1983-12-27"], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("ENDSWITH", 2, "__col__", "e")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("ENDSWITH", 2, "__col__", "e")): ( "IN", ["ALICE", "GRACE", "LUKE", "QUEENIE"], ), - ("srv.CRBNK.CUSTOMERS.c_lname", ("ENDSWITH", 2, "__col__", "e")): ( + ("srv/CRBNK/CUSTOMERS/c_lname", ("ENDSWITH", 2, "__col__", "e")): ( "IN", ["LEE", "MOORE"], ), ( - "srv.CRBNK.ACCOUNTS.a_type", + "srv/CRBNK/ACCOUNTS/a_type", ( "AND", 2, @@ -258,19 +258,19 @@ "NOT_IN", ["avingss", "heckingc"], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): ( "NOT_IN", ["1990-07-31"], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1991-11-15")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("LTE", 2, "__col__", "1991-11-15")): ( "NOT_IN", ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("GT", 2, "__col__", "1991-11-15")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("GT", 2, "__col__", "1991-11-15")): ( "IN", ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("LT", 2, "__col__", "1991-11-15")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("LT", 2, "__col__", "1991-11-15")): ( "NOT_IN", [ "1990-07-31", @@ -280,7 +280,7 @@ "1994-06-15", ], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("GTE", 2, "__col__", "1991-11-15")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("GTE", 2, "__col__", "1991-11-15")): ( "IN", [ "1990-07-31", @@ -290,23 +290,23 @@ "1994-06-15", ], ), - ("srv.CRBNK.TRANSACTIONS.t_amount", ("LT", 2, "__col__", 0)): ( + ("srv/CRBNK/TRANSACTIONS/t_amount", ("LT", 2, "__col__", 0)): ( "IN", [], ), - ("srv.CRBNK.TRANSACTIONS.t_amount", ("GT", 2, "__col__", 0)): ( + ("srv/CRBNK/TRANSACTIONS/t_amount", ("GT", 2, "__col__", 0)): ( "NOT_IN", [], ), - ("srv.CRBNK.CUSTOMERS.c_birthday", ("LTE", 2, "__col__", "1925-01-01")): ( + ("srv/CRBNK/CUSTOMERS/c_birthday", ("LTE", 2, "__col__", "1925-01-01")): ( "IN", [], ), - ("srv.CRBNK.CUSTOMERS.c_phone", ("EQUAL", 2, "__col__", "555-123-456")): ( + ("srv/CRBNK/CUSTOMERS/c_phone", ("EQUAL", 2, "__col__", "555-123-456")): ( "IN", [], ), - ("srv.CRBNK.ACCOUNTS.a_open_ts", ("EQUAL", 2, "YEAR", 1, "__col__", 2021)): ( + ("srv/CRBNK/ACCOUNTS/a_open_ts", ("EQUAL", 2, "YEAR", 1, "__col__", 2021)): ( "IN", [ "2017-02-11 10:59:51", @@ -318,7 +318,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ( "AND", 2, @@ -360,14 +360,14 @@ ["1980-01-18", "1981-11-15", "1990-07-31", "1994-06-15"], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("IN", 5, "__col__", "1991-11-15", "1978-02-11", "2005-03-14", "1985-04-12"), ): ( "IN", ["1990-07-31", "1976-10-27", "1983-12-27"], ), ( - "srv.CRBNK.ACCOUNTS.a_open_ts", + "srv/CRBNK/ACCOUNTS/a_open_ts", ("BETWEEN", 3, "2020-03-28 09:20:00", "__col__", "2020-09-20 08:30:00"), ): ( "IN", @@ -379,7 +379,7 @@ "2016-09-03 12:01:51", ], ), - ("srv.CRBNK.CUSTOMERS.c_email", ("CONTAINS", 2, "__col__", "mail")): ( + ("srv/CRBNK/CUSTOMERS/c_email", ("CONTAINS", 2, "__col__", "mail")): ( "NOT_IN", [ "homasl@outlook.comt", @@ -392,7 +392,7 @@ "lice_j@example.orga", ], ), - ("srv.CRBNK.CUSTOMERS.c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): ( + ("srv/CRBNK/CUSTOMERS/c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): ( "IN", [ "ophia.jackson@mail.orgs", @@ -405,7 +405,7 @@ "ob.smith77@gmail.comb", ], ), - ("srv.CRBNK.ACCOUNTS.a_open_ts", ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3)): ( + ("srv/CRBNK/ACCOUNTS/a_open_ts", ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3)): ( "IN", [ "2013-04-22 11:37:51", @@ -422,14 +422,14 @@ ], ), ( - "srv.CRBNK.ACCOUNTS.a_open_ts", + "srv/CRBNK/ACCOUNTS/a_open_ts", ("EQUAL", 2, "QUARTER", 1, "__col__", "DAY", 1, "__col__"), ): ( "IN", ["2015-05-04 18:01:51"], ), ( - "srv.CRBNK.ACCOUNTS.a_open_ts", + "srv/CRBNK/ACCOUNTS/a_open_ts", ( "AND", 2, @@ -457,7 +457,7 @@ "2014-08-15 11:31:51", ], ), - ("srv.CRBNK.TRANSACTIONS.t_ts", ("EQUAL", 2, "SECOND", 1, "__col__", 23)): ( + ("srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "SECOND", 1, "__col__", 23)): ( "IN", [ "2020-11-11 09:03:02", @@ -466,7 +466,7 @@ ], ), ( - "srv.CRBNK.ACCOUNTS.a_balance", + "srv/CRBNK/ACCOUNTS/a_balance", ("BETWEEN", 3, 200, "ABS", 1, "SUB", 2, "__col__", 7250, 600), ): ( "IN", @@ -476,7 +476,7 @@ ], ), ( - "srv.CRBNK.ACCOUNTS.a_open_ts", + "srv/CRBNK/ACCOUNTS/a_open_ts", ( "EQUAL", 2, @@ -501,7 +501,7 @@ ], ), ( - "srv.CRBNK.ACCOUNTS.a_open_ts", + "srv/CRBNK/ACCOUNTS/a_open_ts", ("EQUAL", 2, "LEAST", 2, "HOUR", 1, "__col__", "MINUTE", 1, "__col__", 15), ): ( "IN", @@ -513,21 +513,21 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_phone", + "srv/CRBNK/CUSTOMERS/c_phone", ("CONTAINS", 2, "CONCAT", 2, "1-", "__col__", "1-5"), ): ( "NOT_IN", [], ), ( - "srv.CRBNK.CUSTOMERS.c_phone", + "srv/CRBNK/CUSTOMERS/c_phone", ("CONTAINS", 2, "CONCAT", 3, "1", "-", "__col__", "1-5"), ): ( "NOT_IN", [], ), ( - "srv.CRBNK.CUSTOMERS.c_phone", + "srv/CRBNK/CUSTOMERS/c_phone", ("CONTAINS", 2, "CONCAT", 5, "1", "-", "__col__", "-", "1", "5-1"), ): ( "IN", @@ -539,7 +539,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), ): ( "IN", @@ -550,7 +550,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), ): ( "IN", @@ -560,7 +560,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), ): ( "IN", @@ -569,7 +569,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), ): ( "NOT_IN", @@ -580,7 +580,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), ): ( "NOT_IN", @@ -590,7 +590,7 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_birthday", + "srv/CRBNK/CUSTOMERS/c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), ): ( "NOT_IN", @@ -599,11 +599,11 @@ ], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("IN", 4, "SLICE", 3, "__col__", 0, 1, "q", "r", "s"), ): ("IN", ["QUEENIE", "ROBERT", "SOPHIA"]), ( - "srv.CRBNK.CUSTOMERS.c_lname", + "srv/CRBNK/CUSTOMERS/c_lname", ( "CONTAINS", 2, @@ -630,12 +630,12 @@ "IN", ["LEE", "RODRIGUEZ"], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i")): ( "IN", ["ISABEL"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("IN", 6, "SLICE", 3, "__col__", 1, 2, "ar", "li", "ra", "to", "am"), ): ( "IN", @@ -651,7 +651,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "year", "__col__", "2023-01-01"), ): ( "IN", @@ -720,7 +720,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "quarter", "__col__", "2023-04-01"), ): ( "IN", @@ -745,7 +745,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "month", "__col__", "2023-06-01"), ): ( "IN", @@ -761,7 +761,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "day", "__col__", "2023-06-02"), ): ( "IN", @@ -771,7 +771,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "hour", "__col__", "2023-06-02 04:00:00"), ): ( "IN", @@ -781,7 +781,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "minute", "__col__", "2023-06-02 04:55:00"), ): ( "IN", @@ -791,7 +791,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "second", "__col__", "2023-06-02 04:55:31"), ): ( "IN", @@ -800,7 +800,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 1, "years", "__col__", "2020-11-11 18:00:52"), ): ( "IN", @@ -809,7 +809,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 2, "quarters", "__col__", "2020-05-11 18:00:52"), ): ( "IN", @@ -818,7 +818,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, -5, "months", "__col__", "2019-06-11 18:00:52"), ): ( "IN", @@ -827,7 +827,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 10, "days", "__col__", "2019-11-21 18:00:52"), ): ( "IN", @@ -836,7 +836,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 1000, "hours", "__col__", "2019-12-23 10:00:52"), ): ( "IN", @@ -845,7 +845,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 10000, "minutes", "__col__", "2019-11-18 16:40:52"), ): ( "IN", @@ -854,7 +854,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ( "EQUAL", 2, @@ -872,7 +872,7 @@ ], ), ( - "srv.CRBNK.TRANSACTIONS.t_ts", + "srv/CRBNK/TRANSACTIONS/t_ts", ( "EQUAL", 2, @@ -895,11 +895,11 @@ "2019-11-11 15:44:22", ], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "a")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "a")): ( "NOT_IN", ["BOB", "EMILY", "HENRY", "LUKE", "PETER", "QUEENIE", "ROBERT"], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "e")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "e")): ( "NOT_IN", [ "BOB", @@ -913,7 +913,7 @@ "THOMAS", ], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "i")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "i")): ( "IN", [ "ALICE", @@ -927,51 +927,51 @@ "SOPHIA", ], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "o")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "o")): ( "IN", ["BOB", "CAROL", "NICHOLAS", "OLIVIA", "ROBERT", "SOPHIA", "THOMAS"], ), - ("srv.CRBNK.CUSTOMERS.c_fname", ("CONTAINS", 2, "__col__", "u")): ( + ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "u")): ( "IN", ["LUKE", "QUEENIE"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "a", "CONTAINS", 2, "__col__", "e"), ): ( "IN", ["ALICE", "GRACE", "ISABEL", "JAMES", "KAREN"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "e", "CONTAINS", 2, "__col__", "i"), ): ( "IN", ["ALICE", "EMILY", "ISABEL", "QUEENIE"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "i", "CONTAINS", 2, "__col__", "o"), ): ( "IN", ["NICHOLAS", "OLIVIA", "SOPHIA"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "o", "CONTAINS", 2, "__col__", "u"), ): ( "IN", [], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "u", "CONTAINS", 2, "__col__", "a"), ): ( "IN", [], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ( "AND", 3, @@ -993,7 +993,7 @@ ["ALICE", "ISABEL"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ( "AND", 3, @@ -1015,7 +1015,7 @@ [], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ( "NOT", 1, @@ -1035,7 +1035,7 @@ ["NICHOLAS", "OLIVIA", "SOPHIA"], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ( "AND", 2, @@ -1061,7 +1061,7 @@ [], ), ( - "srv.CRBNK.CUSTOMERS.c_fname", + "srv/CRBNK/CUSTOMERS/c_fname", ( "NOT", 1, diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index e5a68d66e..6b7c9074e 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1128,8 +1128,8 @@ def test_pipeline_e2e_cryptbank( "selected_customers = customers.WHERE(last_name == 'lee')\n" "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ - {"CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']", "DRY_RUN"}, - {"CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']"}, + {"CRBNK/CUSTOMERS/c_lname: ['EQUAL', 2, '__col__', 'lee']", "DRY_RUN"}, + {"CRBNK/CUSTOMERS/c_lname: ['EQUAL', 2, '__col__', 'lee']"}, ], id="cryptbank_filter_count_01", ), @@ -1138,11 +1138,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "CRBNK/CUSTOMERS/c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']" + "CRBNK/CUSTOMERS/c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']" }, ], id="cryptbank_filter_count_03", @@ -1152,12 +1152,12 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", - "CRBNK.CUSTOMERS.c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "CRBNK/CUSTOMERS/c_lname: ['IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "CRBNK/CUSTOMERS/c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", + "CRBNK/CUSTOMERS/c_lname: ['NOT', 1, 'IN', 4, '__col__', 'lee', 'smith', 'rodriguez']", }, ], id="cryptbank_filter_count_04", @@ -1174,20 +1174,20 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 's']", - "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e', 'ENDSWITH', 2, '__col__', 's']", - "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", - "CRBNK.CUSTOMERS.c_phone: ['ENDSWITH', 2, '__col__', '5']", + "CRBNK/CUSTOMERS/c_fname: ['ENDSWITH', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['ENDSWITH', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['ENDSWITH', 2, '__col__', 's']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e', 'ENDSWITH', 2, '__col__', 's']", + "CRBNK/CUSTOMERS/c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", + "CRBNK/CUSTOMERS/c_phone: ['ENDSWITH', 2, '__col__', '5']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['ENDSWITH', 2, '__col__', 's']", - "CRBNK.CUSTOMERS.c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", - "CRBNK.CUSTOMERS.c_phone: ['ENDSWITH', 2, '__col__', '5']", + "CRBNK/CUSTOMERS/c_fname: ['ENDSWITH', 2, '__col__', 's']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'ENDSWITH', 2, '__col__', 'a', 'ENDSWITH', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_lname: ['NOT_EQUAL', 2, '__col__', 'lopez']", + "CRBNK/CUSTOMERS/c_phone: ['ENDSWITH', 2, '__col__', '5']", }, ], id="cryptbank_filter_count_27", @@ -1206,18 +1206,18 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", [ { - "CRBNK.ACCOUNTS.a_balance: ['GTE', 2, '__col__', 5000]", - "CRBNK.ACCOUNTS.a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", - "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'retirement']", - "CRBNK.ACCOUNTS.a_type: ['EQUAL', 2, '__col__', 'savings']", - "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'gmail']", - "CRBNK.CUSTOMERS.c_email: ['CONTAINS', 2, '__col__', 'outlook']", - "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", - "CRBNK.CUSTOMERS.c_email: ['OR', 2, 'CONTAINS', 2, '__col__', 'outlook', 'CONTAINS', 2, '__col__', 'gmail']", + "CRBNK/ACCOUNTS/a_balance: ['GTE', 2, '__col__', 5000]", + "CRBNK/ACCOUNTS/a_open_ts: ['LT', 2, 'YEAR', 1, '__col__', 2020]", + "CRBNK/ACCOUNTS/a_type: ['EQUAL', 2, '__col__', 'retirement']", + "CRBNK/ACCOUNTS/a_type: ['EQUAL', 2, '__col__', 'savings']", + "CRBNK/CUSTOMERS/c_email: ['CONTAINS', 2, '__col__', 'gmail']", + "CRBNK/CUSTOMERS/c_email: ['CONTAINS', 2, '__col__', 'outlook']", + "CRBNK/ACCOUNTS/a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", + "CRBNK/CUSTOMERS/c_email: ['OR', 2, 'CONTAINS', 2, '__col__', 'outlook', 'CONTAINS', 2, '__col__', 'gmail']", "DRY_RUN", }, { - "CRBNK.ACCOUNTS.a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", + "CRBNK/ACCOUNTS/a_type: ['OR', 2, 'EQUAL', 2, '__col__', 'retirement', 'EQUAL', 2, '__col__', 'savings']", }, ], id="cryptbank_filter_count_28", @@ -1227,11 +1227,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_birthday: ['LTE', 2, '__col__', '1925-01-01']", + "CRBNK/CUSTOMERS/c_birthday: ['LTE', 2, '__col__', '1925-01-01']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_birthday: ['LTE', 2, '__col__', '1925-01-01']", + "CRBNK/CUSTOMERS/c_birthday: ['LTE', 2, '__col__', '1925-01-01']", }, ], id="cryptbank_filter_count_29", @@ -1244,13 +1244,13 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", - "CRBNK.CUSTOMERS.c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", - "CRBNK.CUSTOMERS.c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", + "CRBNK/CUSTOMERS/c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK/CUSTOMERS/c_birthday: ['IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK/CUSTOMERS/c_birthday: ['IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12]", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", + "CRBNK/CUSTOMERS/c_birthday: ['AND', 2, 'IN', 7, 'ADD', 2, 'MONTH', 1, '__col__', 1, 2, 4, 6, 8, 10, 12, 'IN', 11, 'SUB', 2, 'YEAR', 1, '__col__', 2, 1975, 1977, 1979, 1981, 1983, 1985, 1987, 1989, 1991, 1993]", }, ], id="cryptbank_filter_count_30", @@ -1260,11 +1260,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", + "CRBNK/CUSTOMERS/c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", + "CRBNK/CUSTOMERS/c_birthday: ['IN', 5, '__col__', '1991-11-15', '1978-02-11', '2005-03-14', '1985-04-12']", }, ], id="cryptbank_filter_count_31", @@ -1274,11 +1274,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", [ { - "CRBNK.ACCOUNTS.a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", + "CRBNK/ACCOUNTS/a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", "DRY_RUN", }, { - "CRBNK.ACCOUNTS.a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", + "CRBNK/ACCOUNTS/a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", }, ], id="cryptbank_filter_count_32", @@ -1287,13 +1287,13 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n_neg=SUM(transactions.amount < 0), n_positive=SUM(transactions.amount > 0))", [ { - "CRBNK.TRANSACTIONS.t_amount: ['LT', 2, '__col__', 0]", - "CRBNK.TRANSACTIONS.t_amount: ['GT', 2, '__col__', 0]", + "CRBNK/TRANSACTIONS/t_amount: ['LT', 2, '__col__', 0]", + "CRBNK/TRANSACTIONS/t_amount: ['GT', 2, '__col__', 0]", "DRY_RUN", }, { - "CRBNK.TRANSACTIONS.t_amount: ['LT', 2, '__col__', 0]", - "CRBNK.TRANSACTIONS.t_amount: ['GT', 2, '__col__', 0]", + "CRBNK/TRANSACTIONS/t_amount: ['LT', 2, '__col__', 0]", + "CRBNK/TRANSACTIONS/t_amount: ['GT', 2, '__col__', 0]", }, ], id="cryptbank_agg_06", @@ -1323,39 +1323,39 @@ def test_pipeline_e2e_cryptbank( ")", [ { - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'day', '__col__', '2023-06-02']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'hour', '__col__', '2023-06-02 04:00:00']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'minute', '__col__', '2023-06-02 04:55:00']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'month', '__col__', '2023-06-01']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'quarter', '__col__', '2023-04-01']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'second', '__col__', '2023-06-02 04:55:31']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'year', '__col__', '2023-01-01']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1, 'years', '__col__', '2020-11-11 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 2, 'quarters', '__col__', '2020-05-11 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -5, 'months', '__col__', '2019-06-11 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10, 'days', '__col__', '2019-11-21 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1000, 'hours', '__col__', '2019-12-23 10:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'day', '__col__', '2023-06-02']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'hour', '__col__', '2023-06-02 04:00:00']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'minute', '__col__', '2023-06-02 04:55:00']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'month', '__col__', '2023-06-01']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'quarter', '__col__', '2023-04-01']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'second', '__col__', '2023-06-02 04:55:31']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'year', '__col__', '2023-01-01']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 1, 'years', '__col__', '2020-11-11 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 2, 'quarters', '__col__', '2020-05-11 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, -5, 'months', '__col__', '2019-06-11 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 10, 'days', '__col__', '2019-11-21 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 1000, 'hours', '__col__', '2019-12-23 10:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", "DRY_RUN", }, { - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'day', '__col__', '2023-06-02']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'hour', '__col__', '2023-06-02 04:00:00']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'minute', '__col__', '2023-06-02 04:55:00']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'month', '__col__', '2023-06-01']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'quarter', '__col__', '2023-04-01']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'second', '__col__', '2023-06-02 04:55:31']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'year', '__col__', '2023-01-01']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1, 'years', '__col__', '2020-11-11 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 2, 'quarters', '__col__', '2020-05-11 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -5, 'months', '__col__', '2019-06-11 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10, 'days', '__col__', '2019-11-21 18:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 1000, 'hours', '__col__', '2019-12-23 10:00:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", - "CRBNK.TRANSACTIONS.t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'day', '__col__', '2023-06-02']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'hour', '__col__', '2023-06-02 04:00:00']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'minute', '__col__', '2023-06-02 04:55:00']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'month', '__col__', '2023-06-01']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'quarter', '__col__', '2023-04-01']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'second', '__col__', '2023-06-02 04:55:31']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATETRUNC', 2, 'year', '__col__', '2023-01-01']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 1, 'years', '__col__', '2020-11-11 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 2, 'quarters', '__col__', '2020-05-11 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, -5, 'months', '__col__', '2019-06-11 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 10, 'days', '__col__', '2019-11-21 18:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 1000, 'hours', '__col__', '2019-12-23 10:00:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, 10000, 'minutes', '__col__', '2019-11-18 16:40:52']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, -1000000, 'seconds', '__col__', '2019-10-31 04:14:12']", + "CRBNK/TRANSACTIONS/t_ts: ['EQUAL', 2, 'DATEADD', 3, -1, 'days', 'DATETRUNC', 2, 'month', '__col__', '2019-10-31']", }, ], id="cryptbank_agg_07", @@ -1365,11 +1365,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", [ { - "CRBNK.ACCOUNTS.a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']", + "CRBNK/ACCOUNTS/a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']", "DRY_RUN", }, { - "CRBNK.ACCOUNTS.a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']" + "CRBNK/ACCOUNTS/a_open_ts: ['EQUAL', 2, 'QUARTER', 1, '__col__', 'DAY', 1, '__col__']" }, ], id="cryptbank_filter_count_34", @@ -1379,11 +1379,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']", + "CRBNK/CUSTOMERS/c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']" + "CRBNK/CUSTOMERS/c_phone: ['CONTAINS', 2, 'CONCAT', 2, '1-', '__col__', '1-5']" }, ], id="cryptbank_filter_count_40", @@ -1393,11 +1393,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']", + "CRBNK/CUSTOMERS/c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']" + "CRBNK/CUSTOMERS/c_phone: ['CONTAINS', 2, 'CONCAT', 3, '1', '-', '__col__', '1-5']" }, ], id="cryptbank_filter_count_41", @@ -1407,11 +1407,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']", + "CRBNK/CUSTOMERS/c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']" + "CRBNK/CUSTOMERS/c_phone: ['CONTAINS', 2, 'CONCAT', 5, '1', '-', '__col__', '-', '1', '5-1']" }, ], id="cryptbank_filter_count_42", @@ -1439,19 +1439,19 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", }, ], id="cryptbank_multi_fcount_01", @@ -1473,30 +1473,30 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', " + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', " "'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', " + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', " "'CONTAINS', 2, '__col__', 'u']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', " + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', " "'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', " + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', " "'CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', " + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', " "'CONTAINS', 2, '__col__', 'u']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", }, ], id="cryptbank_multi_fcount_02", @@ -1518,23 +1518,23 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", }, ], id="cryptbank_multi_fcount_03", @@ -1555,22 +1555,22 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'u']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", }, ], id="cryptbank_multi_fcount_04", @@ -1590,22 +1590,22 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", }, ], id="cryptbank_multi_fcount_05", @@ -1624,22 +1624,22 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'a']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'i']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "DRY_RUN", }, { - "CRBNK.CUSTOMERS.c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK.CUSTOMERS.c_fname: ['CONTAINS', 2, '__col__', 'o']", - "CRBNK.CUSTOMERS.c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", }, ], id="cryptbank_multi_fcount_06", diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 1c770906c..dd353f92b 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1509,8 +1509,8 @@ def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: ``` INFO pydough.mask_server.mask_server:mask_server.py:149 Batch request to Mask Server (2 items): - INFO pydough.mask_server.mask_server:mask_server.py:151 (1) CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee'] - INFO pydough.mask_server.mask_server:mask_server.py:151 (2) CRBNK.CUSTOMERS.c_birthday: ['EQUAL', 2, 'YEAR', 1, '__col__', 1980] + INFO pydough.mask_server.mask_server:mask_server.py:151 (1) CRBNK/CUSTOMERS/c_lname: ['EQUAL', 2, '__col__', 'lee'] + INFO pydough.mask_server.mask_server:mask_server.py:151 (2) CRBNK/CUSTOMERS/c_birthday: ['EQUAL', 2, 'YEAR', 1, '__col__', 1980] ``` A log message string with those lines would return the following list of @@ -1519,8 +1519,8 @@ def extract_batch_requests_from_logs(log_str: str) -> list[set[str]]: ``` [ { - "CRBNK.CUSTOMERS.c_lname: ['EQUAL', 2, '__col__', 'lee']", - "CRBNK.CUSTOMERS.c_birthday: ['EQUAL', 2, 'YEAR', 1, '__col__', 1980]", + "CRBNK/CUSTOMERS/c_lname: ['EQUAL', 2, '__col__', 'lee']", + "CRBNK/CUSTOMERS/c_birthday: ['EQUAL', 2, 'YEAR', 1, '__col__', 1980]", } ] ``` From 8e03b04cd9a04bee3cb22eac0be23ecf7a77e2a7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 2 Dec 2025 11:14:36 -0800 Subject: [PATCH 33/40] Revisions, QUOTE operator handling, docstrings/documentation [RUN ALL] --- pydough/errors/error_utils.py | 3 +- pydough/mask_server/mask_server.py | 97 ++++++++----------- .../mask_server_candidate_visitor.py | 42 +++++++- .../mask_server_rewrite_shuttle.py | 38 ++++++-- pydough/mask_server/min_cover_set.py | 52 +++++++++- tests/mock_server/api_mock_server.py | 4 +- tests/mock_server/lookup_table.py | 20 ++-- tests/test_masked_sqlite.py | 35 +++++++ tests/test_mock_mask_server.py | 2 - .../cryptbank_filter_count_59_raw.txt | 4 + .../cryptbank_filter_count_59_rewrite.txt | 4 + .../cryptbank_filter_count_59_raw_sqlite.sql | 7 ++ ...yptbank_filter_count_59_rewrite_sqlite.sql | 5 + 13 files changed, 228 insertions(+), 85 deletions(-) create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_59_raw.txt create mode 100644 tests/test_plan_refsols/cryptbank_filter_count_59_rewrite.txt create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_59_raw_sqlite.sql create mode 100644 tests/test_sql_refsols/cryptbank_filter_count_59_rewrite_sqlite.sql diff --git a/pydough/errors/error_utils.py b/pydough/errors/error_utils.py index 6afd449f7..fd9f276a1 100644 --- a/pydough/errors/error_utils.py +++ b/pydough/errors/error_utils.py @@ -203,7 +203,8 @@ def __init__(self): "sql_keyword": "must have a SQL name that is not a reserved word", } - def _split_identifier(self, name: str) -> list[str]: + @staticmethod + def _split_identifier(name: str) -> list[str]: """ Split a potentially qualified SQL identifier into parts. diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 103b13960..28f162db8 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -10,13 +10,12 @@ "MaskServerResponse", ] +import os from dataclasses import dataclass from enum import Enum from typing import Any -import sqlglot.expressions as exp -from sqlglot import parse_one - +from pydough.errors.error_utils import ValidSQLName from pydough.logger import get_logger from pydough.mask_server.server_connection import ( RequestMethod, @@ -68,42 +67,16 @@ class MaskServerInput: The linear serialization of the predicate expression. """ + @property def fully_qualified_name(self) -> str: """ Returns the fully qualified name of the column in the format 'table_path/column_name', with `/` as the separator used to modify the `table_path` appropriately. """ - table_path_chunks: list[str] = [] - parsed: exp.Expression = parse_one(self.table_path, dialect="mysql") - self.dump_identifier_chunks(parsed, table_path_chunks) + table_path_chunks: list[str] = ValidSQLName._split_identifier(self.table_path) return f"{'/'.join(table_path_chunks)}/{self.column_name}" - def dump_identifier_chunks( - self, - expression: exp.Expression, - chunks: list[str], - ) -> None: - """ - Recursively dumps the identifier chunks from the parsed SQL expression. - - Args: - `expression`: The parsed SQL expression. - `chunks`: The list to append the identifier chunks to. - """ - match expression: - case exp.Identifier(): - chunks.append(expression.sql()) - case exp.Literal() if expression.is_string: - chunks.append(expression.sql()) - case exp.Column() | exp.Dot(): - for part in expression.parts: - self.dump_identifier_chunks(part, chunks) - case _: - raise ValueError( - f"Unexpected expression type in table path parse tree: {expression.__class__.__name__}" - ) - @dataclass class MaskServerOutput: @@ -133,6 +106,11 @@ class MaskServerInfo: perform the evaluation. """ + batch_evaluate_api_path: str = "v1/predicates/batch-evaluate" + """ + The API path for batch evaluating predicates on the mask server. + """ + def __init__(self, base_url: str, server_address: str, token: str | None = None): """ Initialize the MaskServerInfo with the given server URL. @@ -169,7 +147,6 @@ def simplify_simple_expression_batch( self, batch: list[MaskServerInput], dry_run: bool, - hard_limit: int, ) -> list[MaskServerOutput]: """ Sends a batch of predicate expressions to the mask server for evaluation. @@ -182,13 +159,20 @@ def simplify_simple_expression_batch( Args: `batch`: The list of inputs to be sent to the server. `dry_run`: Whether to perform a dry run or not. - `hard_limit`: The maximum number of items that can be returned for - each predicate. Returns: An output list containing the response case and payload. """ + # Obtain the `hard_limit` (the maximum number of items that can be + # returned for each predicate) from the environment variable. Set the + # default to 1000 if the variable is not set or invalid. + hard_limit: int + try: + hard_limit = int(os.environ.get("PYDOUGH_MASK_SERVER_HARD_LIMIT", "1000")) + except Exception: + hard_limit = 1000 + # Log the batch request pyd_logger = get_logger(__name__) if dry_run: @@ -203,12 +187,7 @@ def simplify_simple_expression_batch( ) assert batch != [], "Batch cannot be empty." - - path: str = "v1/predicates/batch-evaluate" - method: RequestMethod = RequestMethod.POST - request: ServerRequest = self.generate_request( - batch, path, method, dry_run, hard_limit - ) + request: ServerRequest = self.generate_request(batch, dry_run, hard_limit) response_json = self.connection.send_server_request(request) result: list[MaskServerOutput] = self.generate_result(response_json) @@ -217,18 +196,14 @@ def simplify_simple_expression_batch( def generate_request( self, batch: list[MaskServerInput], - path: str, - method: RequestMethod, dry_run: bool, hard_limit: int, ) -> ServerRequest: """ - Generate a server request from the given batch of server inputs and path. + Generate a server request from the given batch of server inputs. Args: `batch`: A list of MaskServerInput objects. - `path`: The server path for the request. - `method`: The HTTP method for the request. `dry_run`: Whether the request is a dry run or not. `hard_limit`: The maximum number of items that can be returned for each predicate. @@ -241,7 +216,7 @@ def generate_request( { "items": [ { - "column_ref": {"kind": "fqn", "value": "srv.db.schema.table.name"}, + "column_ref": {"kind": "fqn", "value": "srv/db/schema/table/name"}, "predicate": ["EQUAL", 2, "__col__", 1], "mode": "dynamic", "predicate_format": "linear_with_arity", @@ -275,12 +250,17 @@ def generate_request( } payload["items"].append(evaluate_request) - return ServerRequest(path=path, payload=payload, method=method) + return ServerRequest( + path=self.batch_evaluate_api_path, + payload=payload, + method=RequestMethod.POST, + ) def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: """ - Generate a list of server outputs from the server response of a - non-dry-run request. + Generate a list of server outputs from the server response of a batch + request, either for a dry run or a normal run. On dry run requests, the + `records` field will be absent. Args: `response_dict`: The response from the mask server. @@ -328,11 +308,11 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: result: list[MaskServerOutput] = [] for item in response_dict.get("items", []): - """ - Case on whether operator is ERROR or not - If ERROR, then response_case is unsupported and payload is None - Otherwise, call self.get_server_response(operator) to get the enum, store in a variable, then case on this variable to obtain the payload (use item.get("materialization", {}).get("values", []) if it is IN_ARRAY or NOT_IN_ARRAY, otherwise None) - """ + # Case on whether operator is ERROR or not. + # If ERROR, then response_case is unsupported and payload is None. + # Otherwise, call self.get_server_response(operator) to get the + # enum, store in a variable, then case on this variable to obtain + # the payload. if item.get("result") == "ERROR": result.append( MaskServerOutput( @@ -341,10 +321,10 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: ) ) else: - response: dict = item.get("response", None) - if response is None: - # In this case, use a dummy value as a default to indicate - # the dry run was successful + response: dict = item["response"] + if response.get("records", None) is None: + # In this case, it was a dry-run, and use a dummy value to + # indicate that it was successful. result.append( MaskServerOutput( response_case=MaskServerResponse.IN_ARRAY, @@ -352,6 +332,7 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: ) ) else: + # In this case, parse the response normally. response_case: MaskServerResponse = self.get_server_response_case( response["metadata"]["dynamic_operator"] ) diff --git a/pydough/mask_server/mask_server_candidate_visitor.py b/pydough/mask_server/mask_server_candidate_visitor.py index 2fddf0df7..8bd102b08 100644 --- a/pydough/mask_server/mask_server_candidate_visitor.py +++ b/pydough/mask_server/mask_server_candidate_visitor.py @@ -119,6 +119,23 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): in PyDough becomes the `IN` operator in the mask server. """ + SERVER_OPERATOR_NAMES: set[str] = { + *OPERATORS_TO_SERVER_NAMES.values(), + "NOT_ININ", + "SLICE", + "CONCAT", + "DATETIME", + "DATEDIFF", + "DATETRUNC", + "REGEXP", + } + """ + The set of all operator names recognized by the Mask Server in its linear + serialization format, needed because when a string literal is used that + matches one of these reserved names, it must be wrapped in the QUOTE + function to avoid confusion. + """ + def __init__(self) -> None: self.candidate_pool: dict[ RelationalExpression, @@ -172,10 +189,21 @@ def __init__(self) -> None: RelationalExpression, set[RelationalExpression | None] ] = {} """ - TODO + A mapping of each expression to its set of parent expressions in the + relational tree. `None` is also included in the set if the expression + ever appears standalone (i.e., as the root of a relational expression in + the tree). This is used later as a core part of the algorithm for + `choose_minimal_covering_set`. Each expression can map to multiple + parents since the same expression instance can appear in multiple places + within the relational tree. """ self.ancestry_stack: list[RelationalExpression | None] = [None] + """ + A stack used to keep track of the ancestry of the current expression + being visited. The top of the stack is always the parent of the current + expression. This is used to build the `heritage_tree` mapping. + """ def reset(self): self.stack.clear() @@ -581,7 +609,7 @@ def convert_datetrunc_call_to_server_expression( serialization of the DATETRUNC operation, or None if the DATETRUNC operation could not be converted. """ - unit = DateTimeUnit.from_string(unit_str) + unit: DateTimeUnit | None = DateTimeUnit.from_string(unit_str) # Reject if the unit is not recognized, or is a WEEK (for now). if unit is None or unit == DateTimeUnit.WEEK: return None @@ -609,7 +637,7 @@ def convert_dateadd_call_to_server_expression( `amount`: The integer amount to add (can be negative). `unit_str`: The string representing the unit to add. """ - unit = DateTimeUnit.from_string(unit_str) + unit: DateTimeUnit | None = DateTimeUnit.from_string(unit_str) if unit is None or unit == DateTimeUnit.WEEK: return None result: list[str | int | float | None | bool] = ["DATEADD", 3] @@ -659,7 +687,7 @@ def convert_datediff_call_to_server_expression( or not isinstance(unit_expr[0], str) ): return None - unit = DateTimeUnit.from_string(unit_expr[0]) + unit: DateTimeUnit | None = DateTimeUnit.from_string(unit_expr[0]) if unit is None or unit == DateTimeUnit.WEEK: return None result.append(unit.value + "s") @@ -693,6 +721,12 @@ def convert_literal_to_server_expression( return ["NULL"] elif isinstance(literal.value, bool): return ["TRUE" if literal.value else "FALSE"] + elif ( + isinstance(literal.value, str) + and literal.value.upper() + in MaskServerCandidateVisitor.SERVER_OPERATOR_NAMES + ): + return ["QUOTE", 1, literal.value] elif isinstance(literal.value, (int, float, str)): return [literal.value] elif isinstance(literal.value, datetime.datetime): diff --git a/pydough/mask_server/mask_server_rewrite_shuttle.py b/pydough/mask_server/mask_server_rewrite_shuttle.py index a8ea9e31e..4f6bd2e74 100644 --- a/pydough/mask_server/mask_server_rewrite_shuttle.py +++ b/pydough/mask_server/mask_server_rewrite_shuttle.py @@ -101,7 +101,7 @@ def process_batch(self) -> None: # First, send the dry response batch to the Mask Server to identify # which predicates can be re-written. dry_run_results: list[MaskServerOutput] = ( - self.server_info.simplify_simple_expression_batch(batch, True, 1000) + self.server_info.simplify_simple_expression_batch(batch, True) ) batch, ancillary_info = self.identify_predicates_to_send( @@ -122,7 +122,7 @@ def process_batch(self) -> None: # to None in the case of failure, or the rewritten expression in the # case of success. responses: list[MaskServerOutput] = ( - self.server_info.simplify_simple_expression_batch(batch, False, 1000) + self.server_info.simplify_simple_expression_batch(batch, False) ) assert len(responses) == len(ancillary_info) for (expr, input_expr), response in zip(ancillary_info, responses): @@ -143,18 +143,42 @@ def identify_predicates_to_send( list[MaskServerInput], list[tuple[RelationalExpression, RelationalExpression]] ]: """ - TODO + Takes in the results of a dry run to the Mask Server, and identifies + which predicates should actually be sent to the server for processing in + order to minimize the total number of requests while still ensuring + that all necessary predicates are covered. + + Args: + `dry_run_results`: The results from the dry run to the Mask Server. + `batch`: The original batch of Mask Server inputs sent in the dry + run. + `ancillary_info`: The original ancillary info sent in the dry run. + `heritage_tree`: A mapping of each expression to its set of parent + expressions in the relational tree. `None` is also included in the + set if the expression ever appears standalone without a parent. + + Returns: + A tuple containing the new batch of Mask Server inputs to send, and + the new ancillary info corresponding to that batch. """ - expressions = [expr for expr, _ in ancillary_info] - successes = [ + # Extract the underlying expressions from the ancillary info, and + # identify the indices of the expressions that were successful in the + # dry run by checking the response cases. + expressions: list[RelationalExpression] = [expr for expr, _ in ancillary_info] + successes: list[int] = [ idx for idx, result in enumerate(dry_run_results) if result.response_case != MaskServerResponse.UNSUPPORTED ] + + # Run the algorithm to identify the indices of which successful dry run + # responses from the list should be kept. keep_idxs: set[int] = choose_minimal_covering_set( expressions, successes, heritage_tree ) + # Build the new batch and ancillary info lists by filtering to only + # those indices. new_batch: list[MaskServerInput] = [ elem for idx, elem in enumerate(batch) if idx in keep_idxs ] @@ -272,12 +296,12 @@ def build_in_array_expression( # - Otherwise, if doing IN -> `ABSENT(x) OR ISIN(x, ...)`. # - Otherwise, if doing NOT_IN -> `PRESENT(x) AND NOT(ISIN(x, ...))`. if contains_null and len(in_list) > 0: - null_op = ( + null_op: pydop.PyDoughExpressionOperator = ( pydop.ABSENT if response.response_case == MaskServerResponse.IN_ARRAY else pydop.PRESENT ) - bool_op = ( + bool_op: pydop.PyDoughExpressionOperator = ( pydop.BOR if response.response_case == MaskServerResponse.IN_ARRAY else pydop.BAN diff --git a/pydough/mask_server/min_cover_set.py b/pydough/mask_server/min_cover_set.py index d3f25ed63..bc544b4e9 100644 --- a/pydough/mask_server/min_cover_set.py +++ b/pydough/mask_server/min_cover_set.py @@ -1,5 +1,7 @@ """ -TODO +Logic for choosing the minimal set of expressions out of a list such that only +expressions marked as "successful" are included, and every expression from the +list is either included or has an ancestor that is included. """ __all__ = ["choose_minimal_covering_set"] @@ -13,31 +15,75 @@ def choose_minimal_covering_set( heritage_tree: dict[RelationalExpression, set[RelationalExpression | None]], ) -> set[int]: """ - TODO: ADD DESCRIPTION + Identifies the minimal set of indices from `successful_idxs` such that every + expression in `expressions` is either included in the set or has an ancestor + that is included. + + Args: + `expressions`: The list of expressions to cover. + `successful_idxs`: The list of indices into `expressions` that are + marked as successful. + `heritage_tree`: A mapping of each expression to its set of parent + expressions in the relational tree. `None` is also included in the set + if the expression ever appears standalone (i.e., as the root of a + relational expression in the tree). Each expression maps to a set since + an expression can appear in multiple places within the relational tree. + + Returns: + The set of indices from `successful_idxs` that form the minimal covering + set. """ + + # Build the following datastructures: + # 1. Set of expressions that are marked as successful. + # 2. Set of expressions that are not needed (i.e., every ancestor is either + # included in the answer, or is also not needed). + # 3. Set of expressions to include in the final answer. + # 4. Set of expressions already visited during traversal (to ensure dynamic + # programming principles are upheld to avoid redundant work). supported: set[RelationalExpression] = {expressions[idx] for idx in successful_idxs} not_needed: set[RelationalExpression] = set() include: set[RelationalExpression] = set() visited: set[RelationalExpression] = set() + # Run a DFS traversal for each expression, walking through the full forest + # from `expressions`. def traverse(expr: RelationalExpression): + # Abort if already visited, then mark the node as visited. if expr in visited: return visited.add(expr) - parents: set[RelationalExpression | None] = heritage_tree.get(expr, set()) + + # Extract all parents of the expression from the heritage tree. A + # `None` parent indicates that the current expression appears + # standalone. For each non-None parent, traverse it recursively. The + # expression starts out as unecessary, but loses that distinction if + # any of the parents indicate otherwise. + parents: set[RelationalExpression | None] = heritage_tree.get(expr, {None}) unnecessary: bool = True for parent in parents: if parent is not None: traverse(parent) + + # The expression loses its unecessary distinction if it appears + # standalone, or if any of its parents are simultaneously + # unsupported and necessary. if parent is None or (parent not in supported and parent not in not_needed): unnecessary = False + # If the current expression loses the unnecessary distinction, + # add it in the inclusion set, but only if it is supported. if expr in supported: include.add(expr) + + # If the expression was marked as unnecessary, add it to the + # `not_needed` set. if unnecessary: not_needed.add(expr) for expr in expressions: traverse(expr) + # Return the set of indices from `successful_idxs` that correspond to + # expressions that were placed in `include` during the DFS forest run. result: set[int] = {idx for idx in successful_idxs if expressions[idx] in include} return result diff --git a/tests/mock_server/api_mock_server.py b/tests/mock_server/api_mock_server.py index 6268f33bf..95121311b 100644 --- a/tests/mock_server/api_mock_server.py +++ b/tests/mock_server/api_mock_server.py @@ -65,7 +65,7 @@ def batch_evaluate( out_item["result"] = "ERROR" else: output_case, output_list = table_result - out_item["SUCCESS"] = "ERROR" + out_item["SUCCESS"] = "SUCCESS" out_item["response"] = { "strategy": "early_stop", "records": [ @@ -89,7 +89,7 @@ def batch_evaluate( } # Don't include response in dry run case if item.dry_run: - out_item.pop("response") + out_item["response"].pop("records") successful_responses += 1 # Adding the new item to the batch output diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index ca66092be..a3dd40838 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -6,7 +6,7 @@ LOOKUP_TABLE: dict[tuple[str, tuple], tuple[str, list]] = { # key: (column_reference, tuple(predicate)) # value: (response_case, payload) - ("srv.db.tbl.col", ("EQUAL", 2, "__col__", 0)): ( + ("srv/db/tbl/col", ("EQUAL", 2, "__col__", 0)): ( "NOT_IN", [ "value1", @@ -15,7 +15,7 @@ ], ), ( - "srv.db.orders.order_date", + "srv/db/tbl/order_date", ("BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"), ): ( "IN", @@ -27,28 +27,28 @@ "2025-01-05", ], ), - ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith")): ( + ("srv/db/tbl/col", ("NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith")): ( "NOT_IN", ["smith"], ), # booleans, - ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", True)): ( + ("srv/db/tbl/col", ("NOT_EQUAL", 2, "__col__", True)): ( "IN", [False], ), # decimals (string format) - ("srv.db.tbl.col", ("LT", 2, "__col__", "123.654445")): ( + ("srv/db/tbl/col", ("LT", 2, "__col__", "123.654445")): ( "IN", ["123.121123", "123.654444", "123.654445"], ), # json embedded - ("srv.db.tbl.col", ("NOT_EQUAL", 2, "__col__", '("key": "value")')): ( + ("srv/db/tbl/col", ("NOT_EQUAL", 2, "__col__", '("key": "value")')): ( "NOT_IN", ['("key": "value")'], ), # NULLs and Money ( - "srv.db.tbl.col", + "srv/db/tbl/col", ("AND", 2, "NOT_EQUAL", 2, "__col__", None, "GT", 2, "__col__", "$45.00"), ): ( "NOT_IN", @@ -56,7 +56,7 @@ ), # Result with Regex, Bytea, Backslash in really nested expression. ( - "srv.db.tbl.col", + "srv/db/tbl/col", ( "OR", 2, @@ -1080,4 +1080,8 @@ "NOT_IN", ["ALICE", "EMILY", "ISABEL", "QUEENIE"], ), + ( + "srv/CRBNK/CUSTOMERS/c_fname", + ("CONTAINS", 2, "QUOTE", 1, "SLICE", "UPPER", 1, "SLICE", 3, "__col__", 0, 1), + ): ("IN", ["CAROL", "EMILY", "ISABEL", "LUKE", "SOPHIA"]), } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 6b7c9074e..0d905c6cc 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -711,6 +711,16 @@ ), id="cryptbank_filter_count_58", ), + pytest.param( + PyDoughPandasTest( + "selected_customers = customers.WHERE(CONTAINS('SLICE', UPPER(first_name[:1])))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + "CRYPTBANK", + lambda: pd.DataFrame({"n": [5]}), + "cryptbank_filter_count_59", + ), + id="cryptbank_filter_count_59", + ), pytest.param( PyDoughPandasTest( "selected_transactions = transactions.WHERE((YEAR(time_stamp) == 2022) & (MONTH(time_stamp) == 6))\n" @@ -1422,6 +1432,31 @@ def test_pipeline_e2e_cryptbank( [], id="cryptbank_filter_count_43", ), + pytest.param( + "selected_customers = customers.WHERE(CONTAINS('SLICE', UPPER(first_name[:1])))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, 'QUOTE', 1, 'SLICE', 'UPPER', 1, 'SLICE', 3, '__col__', 0, 1]", + "DRY_RUN", + }, + { + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, 'QUOTE', 1, 'SLICE', 'UPPER', 1, 'SLICE', 3, '__col__', 0, 1]" + }, + ], + id="cryptbank_filter_count_59", + ), + pytest.param( + "selected_customers = customers.WHERE(ISIN(first_name, ['Datediff', 'YEAR', 'IN', 'NOT IN', 'NEQ', 'NOT_EQUAL', 'lower']))\n" + "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", + [ + { + "CRBNK/CUSTOMERS/c_fname: ['IN', 8, '__col__', 'QUOTE', 1, 'Datediff', 'QUOTE', 1, 'YEAR', 'IN', 'NOT IN', 'NEQ', 'QUOTE', 1, 'NOT_EQUAL', 'QUOTE', 1, 'lower']", + "DRY_RUN", + }, + ], + id="cryptbank_quote_list", + ), pytest.param( "result = CRYPTBANK.CALCULATE(" + ", ".join( diff --git a/tests/test_mock_mask_server.py b/tests/test_mock_mask_server.py index d1baa4604..62ae8f27c 100644 --- a/tests/test_mock_mask_server.py +++ b/tests/test_mock_mask_server.py @@ -250,7 +250,6 @@ def test_mock_mask_server( response: list[MaskServerOutput] = mask_server.simplify_simple_expression_batch( batch=batch, dry_run=False, - hard_limit=1000, ) assert response == answer, ( @@ -304,5 +303,4 @@ def test_mock_mask_server_errors( mask_server.simplify_simple_expression_batch( batch=batch, dry_run=False, - hard_limit=1000, ) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_59_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_59_raw.txt new file mode 100644 index 000000000..9fecff9db --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_59_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS('SLICE':string, UPPER(SLICE(UNMASK::(LOWER([c_fname])), None:unknown, 1:numeric, None:unknown))), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_59_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_59_rewrite.txt new file mode 100644 index 000000000..1c5d4dd33 --- /dev/null +++ b/tests/test_plan_refsols/cryptbank_filter_count_59_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(c_fname, ['CAROL', 'EMILY', 'ISABEL', 'LUKE', 'SOPHIA']:array[unknown]), columns={}) + SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname}) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_59_raw_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_59_raw_sqlite.sql new file mode 100644 index 000000000..16eff450d --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_59_raw_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + 'SLICE' LIKE ( + '%' || UPPER(SUBSTRING(LOWER(c_fname), 1, 1)) || '%' + ) diff --git a/tests/test_sql_refsols/cryptbank_filter_count_59_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_59_rewrite_sqlite.sql new file mode 100644 index 000000000..e35ec65cc --- /dev/null +++ b/tests/test_sql_refsols/cryptbank_filter_count_59_rewrite_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM crbnk.customers +WHERE + c_fname IN ('CAROL', 'EMILY', 'ISABEL', 'LUKE', 'SOPHIA') From a3c79cf0898abbdcd0bc6b88b76a31b7a5470c6c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 3 Dec 2025 10:02:13 -0800 Subject: [PATCH 34/40] Fixing mask server tests [RUN ALL] --- tests/mock_server/lookup_table.py | 2 +- tests/test_mock_mask_server.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index a3dd40838..25e352c03 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -15,7 +15,7 @@ ], ), ( - "srv/db/tbl/order_date", + "srv/db/orders/order_date", ("BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"), ): ( "IN", diff --git a/tests/test_mock_mask_server.py b/tests/test_mock_mask_server.py index 62ae8f27c..aad814529 100644 --- a/tests/test_mock_mask_server.py +++ b/tests/test_mock_mask_server.py @@ -2,7 +2,9 @@ Unit tests for the PyDough mask server module. """ +import io import re +from contextlib import redirect_stdout import pytest @@ -32,12 +34,12 @@ expression=["OR", 2, "__col__", 5], ), MaskServerInput( - table_path="db.orders", + table_path="db/orders", column_name="order_date", expression=["BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"], ), MaskServerInput( - table_path="db.tbl", + table_path="db/tbl", column_name="col", expression=["GT", 2, "__col__", 45.67], ), @@ -246,11 +248,13 @@ def test_mock_mask_server( base_url="http://localhost:8000", server_address="srv", token=token ) - # Doing the request - response: list[MaskServerOutput] = mask_server.simplify_simple_expression_batch( - batch=batch, - dry_run=False, - ) + # Capture stdout to avoid polluting the console with logging calls + with redirect_stdout(io.StringIO()): + # Doing the request + response: list[MaskServerOutput] = mask_server.simplify_simple_expression_batch( + batch=batch, + dry_run=False, + ) assert response == answer, ( f"Mismatch between the response {response!r} and the answer {answer!r}" From 32d7ee27552654c4b3ebf4c9a959339d9e846352 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 10 Dec 2025 11:47:00 -0800 Subject: [PATCH 35/40] API-based revisions overhaul WIP --- pydough/mask_server/mask_server.py | 17 +- .../mask_server_candidate_visitor.py | 41 +- .../mask_server_rewrite_shuttle.py | 3 + .../masked_table_column_metadata.py | 25 ++ tests/conftest.py | 13 +- tests/mock_server/api_mock_server.py | 3 +- tests/mock_server/lookup_table.py | 372 +++++++++++++----- tests/test_masked_sf.py | 16 +- tests/test_masked_sqlite.py | 4 +- tests/test_metadata/masked_graphs.json | 13 + tests/test_metadata/sf_masked_examples.json | 35 ++ tests/test_mock_mask_server.py | 32 +- tests/testing_utilities.py | 36 ++ 13 files changed, 490 insertions(+), 120 deletions(-) diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 28f162db8..4d2e32a8c 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -52,6 +52,11 @@ class MaskServerInput: Input data structure for the MaskServer. """ + dataset_id: str + """ + The dataset ID to use when querying the mask server. + """ + table_path: str """ The fully qualified SQL table path, given from the metadata. @@ -111,20 +116,17 @@ class MaskServerInfo: The API path for batch evaluating predicates on the mask server. """ - def __init__(self, base_url: str, server_address: str, token: str | None = None): + def __init__(self, base_url: str, token: str | None = None): """ Initialize the MaskServerInfo with the given server URL. Args: `base_url`: The URL of the mask server. - `server_address`: The server address to place at the front of all - qualified table paths. `token`: Optional authentication token for the server. """ self.connection: ServerConnection = ServerConnection( base_url=base_url, token=token ) - self.server_address: str = server_address def get_server_response_case(self, server_case: str) -> MaskServerResponse: """ @@ -216,7 +218,8 @@ def generate_request( { "items": [ { - "column_ref": {"kind": "fqn", "value": "srv/db/schema/table/name"}, + "dataset_id": "snowflake.bodo.blah_blah_blah", + "column_ref": {"kind": "fqn", "value": "db/schema/table/name"}, "predicate": ["EQUAL", 2, "__col__", 1], "mode": "dynamic", "predicate_format": "linear_with_arity", @@ -239,13 +242,15 @@ def generate_request( for item in batch: evaluate_request: dict = { + "dataset_id": item.dataset_id, "column_ref": { "kind": "fqn", - "value": f"{self.server_address}/{item.fully_qualified_name}", + "value": item.fully_qualified_name, }, "predicate": item.expression, "output_mode": "cell_encrypted", "mode": "dynamic", + "predicate_format": "linear_with_arity", "dry_run": dry_run, } payload["items"].append(evaluate_request) diff --git a/pydough/mask_server/mask_server_candidate_visitor.py b/pydough/mask_server/mask_server_candidate_visitor.py index 8bd102b08..18f68c51f 100644 --- a/pydough/mask_server/mask_server_candidate_visitor.py +++ b/pydough/mask_server/mask_server_candidate_visitor.py @@ -62,7 +62,6 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): pydop.LIKE: "LIKE", pydop.LOWER: "LOWER", pydop.UPPER: "UPPER", - pydop.MONOTONIC: "BETWEEN", pydop.YEAR: "YEAR", pydop.QUARTER: "QUARTER", pydop.MONTH: "MONTH", @@ -91,6 +90,7 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): - `JOIN_STRINGS` - `DATETIME` - `DATEDIFF` + - `MONOTONIC` """ PREDICATE_OPERATORS: set[str] = { @@ -105,7 +105,6 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): "CONTAINS", "LIKE", "IN", - "BETWEEN", "AND", "OR", "NOT", @@ -121,7 +120,7 @@ class MaskServerCandidateVisitor(RelationalExpressionVisitor): SERVER_OPERATOR_NAMES: set[str] = { *OPERATORS_TO_SERVER_NAMES.values(), - "NOT_ININ", + "NOT_IN", "SLICE", "CONCAT", "DATETIME", @@ -239,10 +238,13 @@ def visit_call_expression(self, expr: CallExpression) -> None: combined_exprs: list[str | int | float | None | bool] | None # A call in the form `UNMASK(input_expr)` is the atomic `__col__` - # expression. + # expression that forms the base case for all candidate expressions, if + # the column is server-masked. if ( isinstance(expr.op, pydop.MaskedExpressionFunctionOperator) and expr.op.is_unmask + and expr.op.masking_metadata.server_masked + and expr.op.masking_metadata.server_dataset_id is not None ): self.stack.append(((expr.op, expr.inputs[0]), ["__col__"])) @@ -335,6 +337,8 @@ def convert_call_to_server_expression( # Dispatch to the specified conversion method for each operator that # has dedicated logic, besides ISIN which was already handled. match call.op: + case pydop.MONOTONIC: + return self.convert_monotonic_call_to_server_expression(input_exprs) case pydop.SLICE: return self.convert_slice_call_to_server_expression(input_exprs) case pydop.JOIN_STRINGS: @@ -419,6 +423,35 @@ def convert_isin_call_to_server_expression( result.extend(in_list) return result + def convert_monotonic_call_to_server_expression( + self, input_exprs: list[list[str | int | float | None | bool] | None] + ) -> list[str | int | float | None | bool] | None: + """ + Converts a PyDough MONOTONIC operation to the linear serialization + format recognized by the Mask Server. MONOTONIC(a, b, c) is converted to + be equivalent to `(a <= b) AND (b <= c)`. + + Args: + `input_exprs`: A list of linear serializations for each input to + the MONOTONIC call, where each input serialization is either a + list of strings/ints/floats/bools/None, or None if the input + could not be converted. + + Returns: + A list of strings/ints/floats/bools/None representing the linear + serialization of the MONOTONIC operation, or None if the MONOTONIC + operation could not be converted. + """ + assert len(input_exprs) == 3, ( + "MONOTONIC operator requires exactly three inputs." + ) + if input_exprs[0] is None or input_exprs[1] is None or input_exprs[2] is None: + return None + arg0: list[str | int | float | None | bool] = input_exprs[0] + arg1: list[str | int | float | None | bool] = input_exprs[1] + arg2: list[str | int | float | None | bool] = input_exprs[2] + return ["AND", 2, "LTE", 2, *arg0, *arg1, "LTE", 2, *arg1, *arg2] + def convert_slice_call_to_server_expression( self, input_exprs: list[list[str | int | float | None | bool] | None] ) -> list[str | int | float | None | bool] | None: diff --git a/pydough/mask_server/mask_server_rewrite_shuttle.py b/pydough/mask_server/mask_server_rewrite_shuttle.py index 4f6bd2e74..1744608ec 100644 --- a/pydough/mask_server/mask_server_rewrite_shuttle.py +++ b/pydough/mask_server/mask_server_rewrite_shuttle.py @@ -85,8 +85,11 @@ def process_batch(self) -> None: expression_list, ) in self.candidate_visitor.candidate_pool.items(): ancillary_info.append((expr, input_expr)) + assert mask_op.masking_metadata.server_masked + assert mask_op.masking_metadata.server_dataset_id is not None batch.append( MaskServerInput( + dataset_id=mask_op.masking_metadata.server_dataset_id, table_path=mask_op.table_path, column_name=mask_op.masking_metadata.column_name, expression=expression_list, diff --git a/pydough/metadata/properties/masked_table_column_metadata.py b/pydough/metadata/properties/masked_table_column_metadata.py index a30c4aaa7..1c787aa95 100644 --- a/pydough/metadata/properties/masked_table_column_metadata.py +++ b/pydough/metadata/properties/masked_table_column_metadata.py @@ -36,6 +36,7 @@ class MaskedTableColumnMetadata(TableColumnMetadata): "protect protocol", "unprotect protocol", "server masked", + "server dataset id", } def __init__( @@ -48,7 +49,11 @@ def __init__( unprotect_protocol: str, protect_protocol: str, server_masked: bool, + server_dataset_id: str | None, sample_values: list | None = None, + description: str | None = None, + synonyms: list[str] | None = None, + extra_semantic_info: dict | None = None, ): super().__init__( name, @@ -56,11 +61,15 @@ def __init__( protected_data_type, column_name, sample_values, + description, + synonyms, + extra_semantic_info, ) self._unprotected_data_type: PyDoughType = data_type self._unprotect_protocol: str = unprotect_protocol self._protect_protocol: str = protect_protocol self._server_masked: bool = server_masked + self._server_dataset_id: str | None = server_dataset_id @property def unprotected_data_type(self) -> PyDoughType: @@ -95,6 +104,14 @@ def server_masked(self) -> bool: """ return self._server_masked + @property + def server_dataset_id(self) -> str | None: + """ + Returns the dataset ID to use when querying the mask server for this + column, if any. + """ + return self._server_dataset_id + @staticmethod def create_error_name(name: str, collection_error_name: str) -> str: return f"masked table column property {name!r} of {collection_error_name}" @@ -106,6 +123,7 @@ def components(self) -> list: comp.append(self.unprotect_protocol) comp.append(self.protect_protocol) comp.append(self.server_masked) + comp.append(self.server_dataset_id) return comp @staticmethod @@ -158,6 +176,12 @@ def parse_from_json( if "server masked" in property_json: server_masked = extract_bool(property_json, "server masked", error_name) + server_dataset_id: str | None = None + if "server dataset id" in property_json: + server_dataset_id = extract_string( + property_json, "server dataset id", error_name + ) + NoExtraKeys(MaskedTableColumnMetadata.allowed_fields).verify( property_json, error_name ) @@ -172,6 +196,7 @@ def parse_from_json( unprotect_protocol, protect_protocol, server_masked, + server_dataset_id, ) # Parse the optional common semantic properties like the description. property.parse_optional_properties(property_json) diff --git a/tests/conftest.py b/tests/conftest.py index 95dd874f0..84f8d5c67 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2015,4 +2015,15 @@ def mock_server_info(mock_server_setup: str) -> MaskServerInfo: """ Returns the MaskServerInfo for the mock server. """ - return MaskServerInfo(base_url=mock_server_setup, server_address="srv", token=None) + return MaskServerInfo(base_url=mock_server_setup, token=None) + + +@pytest.fixture(scope="session") +def true_mask_server_info() -> MaskServerInfo: + """ + Returns the MaskServerInfo for the true Mask server. + """ + if not os.getenv("PYDOUGH_MASK_SERVER_PATH"): + raise RuntimeError("PYDOUGH_MASK_SERVER_PATH environment variable is not set") + + return MaskServerInfo(base_url=os.environ["PYDOUGH_MASK_SERVER_PATH"], token=None) diff --git a/tests/mock_server/api_mock_server.py b/tests/mock_server/api_mock_server.py index 95121311b..a810d765f 100644 --- a/tests/mock_server/api_mock_server.py +++ b/tests/mock_server/api_mock_server.py @@ -17,6 +17,7 @@ class EvaluateRequest(BaseModel): + dataset_id: str column_ref: dict[str, str] predicate: list[str | int | float | None | bool] output_mode: str @@ -56,7 +57,7 @@ def batch_evaluate( "value", }, f"Invalid column_reference format in mock: {item.column_ref!r}." assert item.column_ref["kind"] == "fqn", "Only FQN kind is supported in mock." - key = (item.column_ref["value"], tuple(item.predicate)) + key = (item.dataset_id, item.column_ref["value"], tuple(item.predicate)) table_result: tuple[str, list] | None = LOOKUP_TABLE.get(key, None) out_item: dict = { "index": payload.items.index(item) + 1, diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 25e352c03..7673f2dce 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -3,10 +3,10 @@ request column reference and predicate. """ -LOOKUP_TABLE: dict[tuple[str, tuple], tuple[str, list]] = { - # key: (column_reference, tuple(predicate)) +LOOKUP_TABLE: dict[tuple[str, str, tuple], tuple[str, list]] = { + # key: (dataset_id, fully_qualified_column_name, tuple(predicate)) # value: (response_case, payload) - ("srv/db/tbl/col", ("EQUAL", 2, "__col__", 0)): ( + ("dummy_server", "db/tbl/col", ("EQUAL", 2, "__col__", 0)): ( "NOT_IN", [ "value1", @@ -15,8 +15,20 @@ ], ), ( - "srv/db/orders/order_date", - ("BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"), + "dummy_server", + "db/orders/order_date", + ( + "AND", + 2, + "LTE", + 2, + "2025-01-01", + "__col__", + "LTE", + 2, + "__col__", + "2025-02-01", + ), ): ( "IN", [ @@ -27,28 +39,29 @@ "2025-01-05", ], ), - ("srv/db/tbl/col", ("NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith")): ( + ("dummy_server", "db/tbl/col", ("NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith")): ( "NOT_IN", ["smith"], ), # booleans, - ("srv/db/tbl/col", ("NOT_EQUAL", 2, "__col__", True)): ( + ("dummy_server", "db/tbl/col", ("NOT_EQUAL", 2, "__col__", True)): ( "IN", [False], ), # decimals (string format) - ("srv/db/tbl/col", ("LT", 2, "__col__", "123.654445")): ( + ("dummy_server", "db/tbl/col", ("LT", 2, "__col__", "123.654445")): ( "IN", ["123.121123", "123.654444", "123.654445"], ), # json embedded - ("srv/db/tbl/col", ("NOT_EQUAL", 2, "__col__", '("key": "value")')): ( + ("dummy_server", "db/tbl/col", ("NOT_EQUAL", 2, "__col__", '("key": "value")')): ( "NOT_IN", ['("key": "value")'], ), # NULLs and Money ( - "srv/db/tbl/col", + "dummy_server", + "db/tbl/col", ("AND", 2, "NOT_EQUAL", 2, "__col__", None, "GT", 2, "__col__", "$45.00"), ): ( "NOT_IN", @@ -56,7 +69,8 @@ ), # Result with Regex, Bytea, Backslash in really nested expression. ( - "srv/db/tbl/col", + "dummy_server", + "db/tbl/col", ( "OR", 2, @@ -80,13 +94,29 @@ ['"Hello"', "HelloWorld", "SGVsbG9Xb3JsZA=="], ), # CRYPTBANK hardcoded responses - ("srv/CRBNK/CUSTOMERS/c_lname", ("EQUAL", 2, "__col__", "lee")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_lname", ("EQUAL", 2, "__col__", "lee")): ( "IN", ["LEE"], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", - ("BETWEEN", 3, 1980, "YEAR", 1, "__col__", 1985), + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ( + "AND", + 2, + "LTE", + 2, + 1980, + "YEAR", + 1, + "__col__", + "LTE", + 2, + "YEAR", + 1, + "__col__", + 1985, + ), ): ( "IN", [ @@ -97,7 +127,7 @@ "1983-12-27", ], ), - ("srv/CRBNK/TRANSACTIONS/t_amount", ("GT", 2, "__col__", 9000.0)): ( + ("dummy_server", "CRBNK/TRANSACTIONS/t_amount", ("GT", 2, "__col__", 9000.0)): ( "IN", [ -8934.44, @@ -124,7 +154,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ( "AND", 2, @@ -157,7 +188,8 @@ ], ), ( - "srv/CRBNK/ACCOUNTS/a_type", + "dummy_server", + "CRBNK/ACCOUNTS/a_type", ( "OR", 2, @@ -174,48 +206,59 @@ "IN", ["avingss", "etirementr"], ), - ("srv/CRBNK/CUSTOMERS/c_phone", ("ENDSWITH", 2, "__col__", "5")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_phone", ("ENDSWITH", 2, "__col__", "5")): ( "IN", ["555-091-2345", "555-901-2345"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("OR", 2, "ENDSWITH", 2, "__col__", "a", "ENDSWITH", 2, "__col__", "e"), ): ( "IN", ["ALICE", "GRACE", "LUKE", "MARIA", "OLIVIA", "QUEENIE", "SOPHIA"], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("ENDSWITH", 2, "__col__", "s")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("ENDSWITH", 2, "__col__", "s")): ( "IN", ["JAMES", "NICHOLAS", "THOMAS"], ), - ("srv/CRBNK/CUSTOMERS/c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_lname", ("NOT_EQUAL", 2, "__col__", "lopez")): ( "NOT_IN", ["LOPEZ"], ), - ("srv/CRBNK/CUSTOMERS/c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_lname", ("NOT_EQUAL", 2, "__col__", "lee")): ( "NOT_IN", ["LEE"], ), ( - "srv/CRBNK/CUSTOMERS/c_lname", + "dummy_server", + "CRBNK/CUSTOMERS/c_lname", ("IN", 4, "__col__", "lee", "smith", "rodriguez"), ): ( "IN", ["LEE", "SMITH", "RODRIGUEZ"], ), ( - "srv/CRBNK/CUSTOMERS/c_lname", + "dummy_server", + "CRBNK/CUSTOMERS/c_lname", ("NOT", 1, "IN", 4, "__col__", "lee", "smith", "rodriguez"), ): ( "NOT_IN", ["LEE", "SMITH", "RODRIGUEZ"], ), - ("srv/CRBNK/CUSTOMERS/c_phone", ("STARTSWITH", 2, "__col__", "555-8")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_phone", + ("STARTSWITH", 2, "__col__", "555-8"), + ): ( "IN", ["555-809-1234", "555-870-9123"], ), - ("srv/CRBNK/CUSTOMERS/c_email", ("ENDSWITH", 2, "__col__", "gmail.com")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_email", + ("ENDSWITH", 2, "__col__", "gmail.com"), + ): ( "IN", [ "livia.a22@gmail.como", @@ -224,24 +267,33 @@ "opez.luke99@gmail.coml", ], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("EQUAL", 2, "YEAR", 1, "__col__", 1978)): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("EQUAL", 2, "YEAR", 1, "__col__", 1978), + ): ( "IN", ["1976-10-27", "1976-12-02"], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("EQUAL", 2, "__col__", "1985-04-12")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("EQUAL", 2, "__col__", "1985-04-12"), + ): ( "IN", ["1983-12-27"], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("ENDSWITH", 2, "__col__", "e")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("ENDSWITH", 2, "__col__", "e")): ( "IN", ["ALICE", "GRACE", "LUKE", "QUEENIE"], ), - ("srv/CRBNK/CUSTOMERS/c_lname", ("ENDSWITH", 2, "__col__", "e")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_lname", ("ENDSWITH", 2, "__col__", "e")): ( "IN", ["LEE", "MOORE"], ), ( - "srv/CRBNK/ACCOUNTS/a_type", + "dummy_server", + "CRBNK/ACCOUNTS/a_type", ( "AND", 2, @@ -258,19 +310,35 @@ "NOT_IN", ["avingss", "heckingc"], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("NOT_EQUAL", 2, "__col__", "1991-11-15")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("NOT_EQUAL", 2, "__col__", "1991-11-15"), + ): ( "NOT_IN", ["1990-07-31"], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("LTE", 2, "__col__", "1991-11-15")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("LTE", 2, "__col__", "1991-11-15"), + ): ( "NOT_IN", ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("GT", 2, "__col__", "1991-11-15")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("GT", 2, "__col__", "1991-11-15"), + ): ( "IN", ["1991-03-13", "1992-05-06", "1993-01-01", "1994-06-15"], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("LT", 2, "__col__", "1991-11-15")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("LT", 2, "__col__", "1991-11-15"), + ): ( "NOT_IN", [ "1990-07-31", @@ -280,7 +348,11 @@ "1994-06-15", ], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("GTE", 2, "__col__", "1991-11-15")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("GTE", 2, "__col__", "1991-11-15"), + ): ( "IN", [ "1990-07-31", @@ -290,23 +362,35 @@ "1994-06-15", ], ), - ("srv/CRBNK/TRANSACTIONS/t_amount", ("LT", 2, "__col__", 0)): ( + ("dummy_server", "CRBNK/TRANSACTIONS/t_amount", ("LT", 2, "__col__", 0)): ( "IN", [], ), - ("srv/CRBNK/TRANSACTIONS/t_amount", ("GT", 2, "__col__", 0)): ( + ("dummy_server", "CRBNK/TRANSACTIONS/t_amount", ("GT", 2, "__col__", 0)): ( "NOT_IN", [], ), - ("srv/CRBNK/CUSTOMERS/c_birthday", ("LTE", 2, "__col__", "1925-01-01")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", + ("LTE", 2, "__col__", "1925-01-01"), + ): ( "IN", [], ), - ("srv/CRBNK/CUSTOMERS/c_phone", ("EQUAL", 2, "__col__", "555-123-456")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_phone", + ("EQUAL", 2, "__col__", "555-123-456"), + ): ( "IN", [], ), - ("srv/CRBNK/ACCOUNTS/a_open_ts", ("EQUAL", 2, "YEAR", 1, "__col__", 2021)): ( + ( + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", + ("EQUAL", 2, "YEAR", 1, "__col__", 2021), + ): ( "IN", [ "2017-02-11 10:59:51", @@ -318,7 +402,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ( "AND", 2, @@ -360,15 +445,28 @@ ["1980-01-18", "1981-11-15", "1990-07-31", "1994-06-15"], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("IN", 5, "__col__", "1991-11-15", "1978-02-11", "2005-03-14", "1985-04-12"), ): ( "IN", ["1990-07-31", "1976-10-27", "1983-12-27"], ), ( - "srv/CRBNK/ACCOUNTS/a_open_ts", - ("BETWEEN", 3, "2020-03-28 09:20:00", "__col__", "2020-09-20 08:30:00"), + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", + ( + "AND", + 2, + "LTE", + 2, + "2020-03-28 09:20:00", + "__col__", + "LTE", + 2, + "__col__", + "2020-09-20 08:30:00", + ), ): ( "IN", [ @@ -379,7 +477,7 @@ "2016-09-03 12:01:51", ], ), - ("srv/CRBNK/CUSTOMERS/c_email", ("CONTAINS", 2, "__col__", "mail")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_email", ("CONTAINS", 2, "__col__", "mail")): ( "NOT_IN", [ "homasl@outlook.comt", @@ -392,7 +490,7 @@ "lice_j@example.orga", ], ), - ("srv/CRBNK/CUSTOMERS/c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_email", ("LIKE", 2, "__col__", "%.%@%mail%")): ( "IN", [ "ophia.jackson@mail.orgs", @@ -405,7 +503,11 @@ "ob.smith77@gmail.comb", ], ), - ("srv/CRBNK/ACCOUNTS/a_open_ts", ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3)): ( + ( + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", + ("IN", 4, "MONTH", 1, "__col__", 1, 2, 3), + ): ( "IN", [ "2013-04-22 11:37:51", @@ -422,14 +524,16 @@ ], ), ( - "srv/CRBNK/ACCOUNTS/a_open_ts", + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", ("EQUAL", 2, "QUARTER", 1, "__col__", "DAY", 1, "__col__"), ): ( "IN", ["2015-05-04 18:01:51"], ), ( - "srv/CRBNK/ACCOUNTS/a_open_ts", + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", ( "AND", 2, @@ -457,7 +561,11 @@ "2014-08-15 11:31:51", ], ), - ("srv/CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "SECOND", 1, "__col__", 23)): ( + ( + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", + ("EQUAL", 2, "SECOND", 1, "__col__", 23), + ): ( "IN", [ "2020-11-11 09:03:02", @@ -466,8 +574,30 @@ ], ), ( - "srv/CRBNK/ACCOUNTS/a_balance", - ("BETWEEN", 3, 200, "ABS", 1, "SUB", 2, "__col__", 7250, 600), + "dummy_server", + "CRBNK/ACCOUNTS/a_balance", + ( + "AND", + 2, + "LTE", + 2, + 200, + "ABS", + 1, + "SUB", + 2, + "__col__", + 7250, + "LTE", + 2, + "ABS", + 1, + "SUB", + 2, + "__col__", + 7250, + 600, + ), ): ( "IN", [ @@ -476,7 +606,8 @@ ], ), ( - "srv/CRBNK/ACCOUNTS/a_open_ts", + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", ( "EQUAL", 2, @@ -501,7 +632,8 @@ ], ), ( - "srv/CRBNK/ACCOUNTS/a_open_ts", + "dummy_server", + "CRBNK/ACCOUNTS/a_open_ts", ("EQUAL", 2, "LEAST", 2, "HOUR", 1, "__col__", "MINUTE", 1, "__col__", 15), ): ( "IN", @@ -513,21 +645,24 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_phone", + "dummy_server", + "CRBNK/CUSTOMERS/c_phone", ("CONTAINS", 2, "CONCAT", 2, "1-", "__col__", "1-5"), ): ( "NOT_IN", [], ), ( - "srv/CRBNK/CUSTOMERS/c_phone", + "dummy_server", + "CRBNK/CUSTOMERS/c_phone", ("CONTAINS", 2, "CONCAT", 3, "1", "-", "__col__", "1-5"), ): ( "NOT_IN", [], ), ( - "srv/CRBNK/CUSTOMERS/c_phone", + "dummy_server", + "CRBNK/CUSTOMERS/c_phone", ("CONTAINS", 2, "CONCAT", 5, "1", "-", "__col__", "-", "1", "5-1"), ): ( "IN", @@ -539,7 +674,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), ): ( "IN", @@ -550,7 +686,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), ): ( "IN", @@ -560,7 +697,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), ): ( "IN", @@ -569,7 +707,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 1991), ): ( "NOT_IN", @@ -580,7 +719,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 1990, 1990, 2005), ): ( "NOT_IN", @@ -590,7 +730,8 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_birthday", + "dummy_server", + "CRBNK/CUSTOMERS/c_birthday", ("NOT", 1, "IN", 3, "COALESCE", 2, "YEAR", 1, "__col__", 2005, 2005, 2006), ): ( "NOT_IN", @@ -599,11 +740,13 @@ ], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("IN", 4, "SLICE", 3, "__col__", 0, 1, "q", "r", "s"), ): ("IN", ["QUEENIE", "ROBERT", "SOPHIA"]), ( - "srv/CRBNK/CUSTOMERS/c_lname", + "dummy_server", + "CRBNK/CUSTOMERS/c_lname", ( "CONTAINS", 2, @@ -630,12 +773,17 @@ "IN", ["LEE", "RODRIGUEZ"], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i")): ( + ( + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", + ("EQUAL", 2, "SLICE", 3, "__col__", 0, 1, "i"), + ): ( "IN", ["ISABEL"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("IN", 6, "SLICE", 3, "__col__", 1, 2, "ar", "li", "ra", "to", "am"), ): ( "IN", @@ -651,7 +799,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "year", "__col__", "2023-01-01"), ): ( "IN", @@ -720,7 +869,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "quarter", "__col__", "2023-04-01"), ): ( "IN", @@ -745,7 +895,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "month", "__col__", "2023-06-01"), ): ( "IN", @@ -761,7 +912,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "day", "__col__", "2023-06-02"), ): ( "IN", @@ -771,7 +923,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "hour", "__col__", "2023-06-02 04:00:00"), ): ( "IN", @@ -781,7 +934,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "minute", "__col__", "2023-06-02 04:55:00"), ): ( "IN", @@ -791,7 +945,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATETRUNC", 2, "second", "__col__", "2023-06-02 04:55:31"), ): ( "IN", @@ -800,7 +955,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 1, "years", "__col__", "2020-11-11 18:00:52"), ): ( "IN", @@ -809,7 +965,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 2, "quarters", "__col__", "2020-05-11 18:00:52"), ): ( "IN", @@ -818,7 +975,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, -5, "months", "__col__", "2019-06-11 18:00:52"), ): ( "IN", @@ -827,7 +985,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 10, "days", "__col__", "2019-11-21 18:00:52"), ): ( "IN", @@ -836,7 +995,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 1000, "hours", "__col__", "2019-12-23 10:00:52"), ): ( "IN", @@ -845,7 +1005,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ("EQUAL", 2, "DATEADD", 3, 10000, "minutes", "__col__", "2019-11-18 16:40:52"), ): ( "IN", @@ -854,7 +1015,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ( "EQUAL", 2, @@ -872,7 +1034,8 @@ ], ), ( - "srv/CRBNK/TRANSACTIONS/t_ts", + "dummy_server", + "CRBNK/TRANSACTIONS/t_ts", ( "EQUAL", 2, @@ -895,11 +1058,11 @@ "2019-11-11 15:44:22", ], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "a")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "a")): ( "NOT_IN", ["BOB", "EMILY", "HENRY", "LUKE", "PETER", "QUEENIE", "ROBERT"], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "e")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "e")): ( "NOT_IN", [ "BOB", @@ -913,7 +1076,7 @@ "THOMAS", ], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "i")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "i")): ( "IN", [ "ALICE", @@ -927,51 +1090,57 @@ "SOPHIA", ], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "o")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "o")): ( "IN", ["BOB", "CAROL", "NICHOLAS", "OLIVIA", "ROBERT", "SOPHIA", "THOMAS"], ), - ("srv/CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "u")): ( + ("dummy_server", "CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "__col__", "u")): ( "IN", ["LUKE", "QUEENIE"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "a", "CONTAINS", 2, "__col__", "e"), ): ( "IN", ["ALICE", "GRACE", "ISABEL", "JAMES", "KAREN"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "e", "CONTAINS", 2, "__col__", "i"), ): ( "IN", ["ALICE", "EMILY", "ISABEL", "QUEENIE"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "i", "CONTAINS", 2, "__col__", "o"), ): ( "IN", ["NICHOLAS", "OLIVIA", "SOPHIA"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "o", "CONTAINS", 2, "__col__", "u"), ): ( "IN", [], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("AND", 2, "CONTAINS", 2, "__col__", "u", "CONTAINS", 2, "__col__", "a"), ): ( "IN", [], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ( "AND", 3, @@ -993,7 +1162,8 @@ ["ALICE", "ISABEL"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ( "AND", 3, @@ -1015,7 +1185,8 @@ [], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ( "NOT", 1, @@ -1035,7 +1206,8 @@ ["NICHOLAS", "OLIVIA", "SOPHIA"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ( "AND", 2, @@ -1061,7 +1233,8 @@ [], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ( "NOT", 1, @@ -1081,7 +1254,8 @@ ["ALICE", "EMILY", "ISABEL", "QUEENIE"], ), ( - "srv/CRBNK/CUSTOMERS/c_fname", + "dummy_server", + "CRBNK/CUSTOMERS/c_fname", ("CONTAINS", 2, "QUOTE", 1, "SLICE", "UPPER", 1, "SLICE", 3, "__col__", 0, 1), ): ("IN", ["CAROL", "EMILY", "ISABEL", "LUKE", "SOPHIA"]), } diff --git a/tests/test_masked_sf.py b/tests/test_masked_sf.py index d94b1007f..35496202a 100644 --- a/tests/test_masked_sf.py +++ b/tests/test_masked_sf.py @@ -5,7 +5,8 @@ import pytest from pydough.database_connectors import DatabaseContext, DatabaseDialect -from tests.testing_utilities import graph_fetcher +from pydough.mask_server import MaskServerInfo +from tests.testing_utilities import graph_fetcher, temp_env_override from .testing_sf_masked_utilities import ( PyDoughSnowflakeMaskedTest, @@ -825,6 +826,7 @@ def sf_masked_test_data( return request.param +@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "20"}) @pytest.mark.sf_masked def test_pipeline_until_relational_masked_sf( sf_masked_test_data: PyDoughSnowflakeMaskedTest, @@ -832,6 +834,7 @@ def test_pipeline_until_relational_masked_sf( get_plan_test_filename: Callable[[str], str], update_tests: bool, enable_mask_rewrites: str, + true_mask_server_info: MaskServerInfo, ) -> None: """ Tests the conversion of the PyDough queries on the masked dataset @@ -841,10 +844,14 @@ def test_pipeline_until_relational_masked_sf( f"{sf_masked_test_data.test_name}_{enable_mask_rewrites}" ) sf_masked_test_data.run_relational_test( - get_sf_masked_graphs, file_path, update_tests + get_sf_masked_graphs, + file_path, + update_tests, + mask_server=true_mask_server_info, ) +@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "20"}) @pytest.mark.sf_masked def test_pipeline_until_sql_masked_sf( sf_masked_test_data: PyDoughSnowflakeMaskedTest, @@ -853,6 +860,7 @@ def test_pipeline_until_sql_masked_sf( get_sql_test_filename: Callable[[str, DatabaseDialect], str], update_tests: bool, enable_mask_rewrites: str, + true_mask_server_info: MaskServerInfo, ): """ Tests the conversion of the PyDough queries on the custom masked dataset @@ -867,9 +875,11 @@ def test_pipeline_until_sql_masked_sf( file_path, update_tests, sf_data, + mask_server=true_mask_server_info, ) +@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "20"}) @pytest.mark.execute @pytest.mark.sf_masked @pytest.mark.parametrize("account_type", ["NONE", "PARTIAL", "FULL"]) @@ -879,6 +889,7 @@ def test_pipeline_e2e_masked_sf( get_sf_masked_graphs: graph_fetcher, # noqa: F811 sf_masked_context: Callable[[str, str, str], DatabaseContext], # noqa: F811 enable_mask_rewrites: str, # noqa: F811 + true_mask_server_info: MaskServerInfo, ) -> None: """ End-to-end test for Snowflake with masked columns. @@ -890,4 +901,5 @@ def test_pipeline_e2e_masked_sf( get_sf_masked_graphs, sf_masked_context("BODO", sf_masked_test_data.graph_name, account_type), coerce_types=True, + mask_server=true_mask_server_info, ) diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 0d905c6cc..97c94144f 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1284,11 +1284,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_accounts))", [ { - "CRBNK/ACCOUNTS/a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", + "CRBNK/ACCOUNTS/a_open_ts: ['AND', 2, 'LTE', 2, '2020-03-28 09:20:00', '__col__', 'LTE', 2, '__col__', '2020-09-20 08:30:00']", "DRY_RUN", }, { - "CRBNK/ACCOUNTS/a_open_ts: ['BETWEEN', 3, '2020-03-28 09:20:00', '__col__', '2020-09-20 08:30:00']", + "CRBNK/ACCOUNTS/a_open_ts: ['AND', 2, 'LTE', 2, '2020-03-28 09:20:00', '__col__', 'LTE', 2, '__col__', '2020-09-20 08:30:00']", }, ], id="cryptbank_filter_count_32", diff --git a/tests/test_metadata/masked_graphs.json b/tests/test_metadata/masked_graphs.json index ef9680335..7ff27c758 100644 --- a/tests/test_metadata/masked_graphs.json +++ b/tests/test_metadata/masked_graphs.json @@ -17,6 +17,7 @@ "unprotect protocol": "(42 - ({}))", "protect protocol": "(42 - ({}))", "server masked": true, + "server dataset id": "dummy_server", "description": "The unique key for the customer", "sample values": [1, 2, 10, 18], "synonyms": ["customer id"] @@ -29,6 +30,7 @@ "unprotect protocol": "LOWER({})", "protect protocol": "UPPER({})", "server masked": true, + "server dataset id": "dummy_server", "description": "The first name of the customer", "sample values": ["alice", "olivia", "queenie", "james"], "synonyms": ["customer name"] @@ -41,6 +43,7 @@ "unprotect protocol": "LOWER({})", "protect protocol": "UPPER({})", "server masked": true, + "server dataset id": "dummy_server", "description": "The last name of the customer", "sample values": ["smith", "johnson", "lee"], "synonyms": ["surname"] @@ -53,6 +56,7 @@ "unprotect protocol": "REPLACE(REPLACE(REPLACE({}, '9', '*'), '0', '9'), '*', '0')", "protect protocol": "REPLACE(REPLACE(REPLACE({}, '0', '*'), '9', '0'), '*', '9')", "server masked": true, + "server dataset id": "dummy_server", "description": "The phone number of the customer", "sample values": ["555-123-4567", "555-768-9012"], "synonyms": ["cell number", "contact phone number"] @@ -65,6 +69,7 @@ "unprotect protocol": "SUBSTRING({0}, -1) || SUBSTRING({0}, 1, LENGTH({0}) - 1)", "protect protocol": "SUBSTRING({0}, 2) || SUBSTRING({0}, 1, 1)", "server masked": true, + "server dataset id": "dummy_server", "description": "The email address of the customer", "sample values": ["alice_j@example.org", "m.gonzalez@ymail.com"], "synonyms": ["email address", "contact email"] @@ -77,6 +82,7 @@ "unprotect protocol": "SUBSTRING({0}, -1) || SUBSTRING({0}, 1, LENGTH({0}) - 1)", "protect protocol": "SUBSTRING({0}, 2) || SUBSTRING({0}, 1, 1)", "server masked": true, + "server dataset id": "dummy_server", "description": "The address of the customer, in the format 'street address;city;state;zip'", "sample values": ["123 Maple St;Portland;OR;97205", "654 Cedar Blvd;San Diego;CA;92101"], "synonyms": ["home address", "residential address", "residence", "location", "home"] @@ -89,6 +95,7 @@ "unprotect protocol": "DATE({}, '+472 days')", "protect protocol": "DATE({}, '-472 days')", "server masked": true, + "server dataset id": "dummy_server", "description": "The date the customer was born on.", "synonyms": ["birth date", "date of birth", "DOB"] } @@ -147,6 +154,7 @@ "unprotect protocol": "CASE WHEN {0} = 0 THEN 0 ELSE (CASE WHEN {0} > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING({0}, 1 + INSTR({0}, '-'), LENGTH({0}) / 2) AS INTEGER) END", "protect protocol": "CAST(STRING({0}) || STRING(ABS({0})) AS INTEGER)", "server masked": true, + "server dataset id": "dummy_server", "description": "The unique key for the account", "sample values": [5, 17, 35, 58], "synonyms": ["account id", "account number"] @@ -177,6 +185,7 @@ "unprotect protocol": "SQRT({})", "protect protocol": "({0} * {0})", "server masked": true, + "server dataset id": "dummy_server", "description": "The current balance of the account", "sample values": [2450.75, 520.10, 22500.00], "synonyms": ["money in account", "account balance", "funds available"] @@ -189,6 +198,7 @@ "unprotect protocol": "SUBSTRING({0}, -1) || SUBSTRING({0}, 1, LENGTH({0}) - 1)", "protect protocol": "SUBSTRING({0}, 2) || SUBSTRING({0}, 1, 1)", "server masked": true, + "server dataset id": "dummy_server", "description": "The type of account, which is one of: checking, savings, retirement, business, or mma", "sample values": ["checking", "savings", "retirement", "business", "mma"], "synonyms": ["category"] @@ -201,6 +211,7 @@ "unprotect protocol": "DATETIME({}, '+123456789 seconds')", "protect protocol": "DATETIME({}, '-123456789 seconds')", "server masked": true, + "server dataset id": "dummy_server", "description": "The timestamp when the account was opened at the branch", "synonyms": ["datetime of opening", "account creation date", "account open timestamp"] } @@ -249,6 +260,7 @@ "unprotect protocol": "(1025.67 - ({}))", "protect protocol": "(1025.67 - ({}))", "server masked": true, + "server dataset id": "dummy_server", "description": "The amount of money transferred in the transaction", "sample values": [2753.92, 322.67, 5278.45], "synonyms": ["amount wired", "transaction amount", "money transferred"] @@ -261,6 +273,7 @@ "unprotect protocol": "DATETIME({}, '+54321 seconds')", "protect protocol": "DATETIME({}, '-54321 seconds')", "server masked": true, + "server dataset id": "dummy_server", "description": "The timestamp when the transaction occurred", "synonyms": ["transaction datetime", "transaction time", "datetime of transfer"] } diff --git a/tests/test_metadata/sf_masked_examples.json b/tests/test_metadata/sf_masked_examples.json index 4232f56bd..227ba31b6 100644 --- a/tests/test_metadata/sf_masked_examples.json +++ b/tests/test_metadata/sf_masked_examples.json @@ -24,6 +24,7 @@ "column name": "customerid", "data type": "numeric", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_accounts", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "ID of the customer who owns the account", @@ -36,6 +37,7 @@ "column name": "accounttype", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_accounts", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT({}, 'deAccount')", "description": "Type of the account (either 'Checking' or 'Savings')", @@ -57,6 +59,7 @@ "column name": "currency", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_accounts", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT({}, 'deAccount')", "description": "Currency of the account (either 'USD', 'EUR', or 'GBP')", @@ -69,6 +72,7 @@ "column name": "createddate", "data type": "datetime", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_accounts", "unprotect protocol": "PTY_UNPROTECT_DOB({})", "protect protocol": "PTY_PROTECT_DOB({})", "description": "The date the account was created", @@ -81,6 +85,7 @@ "column name": "status", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_accounts", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT({}, 'deAccount')", "description": "The current status of the account (either 'Active' or 'Inactive')", @@ -205,6 +210,7 @@ "column name": "customerid", "data type": "numeric", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The unique id for the customer", @@ -217,6 +223,7 @@ "column name": "firstname", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The first name of the customer", @@ -229,6 +236,7 @@ "column name": "lastname", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT_NAME({})", "description": "The last name of the customer", @@ -241,6 +249,7 @@ "column name": "address", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The address of the customer", @@ -255,6 +264,7 @@ "column name": "city", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT_ADDRESS({})", "description": "The city where the customer resides", @@ -267,6 +277,7 @@ "column name": "state", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT({}, 'deAddress')", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The state where the customer resides", @@ -288,6 +299,7 @@ "column name": "email", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT({}, 'deEmail')", "protect protocol": "PTY_PROTECT_EMAIL({})", "description": "The email address of the customer", @@ -301,6 +313,7 @@ "column name": "phonenumber", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT_PHONE({})", "protect protocol": "PTY_PROTECT({}, 'dePhone')", "description": "The phone number of the customer", @@ -313,6 +326,7 @@ "column name": "dob", "data type": "datetime", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT({}, 'deDOB')", "protect protocol": "PTY_PROTECT({}, 'deDOB')", "description": "The date of birth of the customer", @@ -361,6 +375,7 @@ "column name": "creditcardnumber", "data type": "numeric", "server masked": true, + "server dataset id": "snowflake.bodo.fsi.protected_customers", "unprotect protocol": "PTY_UNPROTECT_CCN({})", "protect protocol": "PTY_PROTECT_CCN({})", "description": "The credit card number of the customer", @@ -571,6 +586,7 @@ "column name": "patient_id", "data type": "numeric", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_claims", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The id of the patient who filed the claim", @@ -583,6 +599,7 @@ "column name": "claim_date", "data type": "datetime", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_claims", "unprotect protocol": "PTY_UNPROTECT_DOB({})", "protect protocol": "PTY_PROTECT({}, 'deDOB')", "description": "The date when the claim was filed", @@ -595,6 +612,7 @@ "column name": "provider_name", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_claims", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The name of the healthcare provider", @@ -643,6 +661,7 @@ "column name": "claim_status", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_claims", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The current status of the claim (either 'Pending', 'Approved', or 'Denied')", @@ -735,6 +754,7 @@ "column name": "patient_id", "data type": "numeric", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The unique id for the patient", @@ -747,6 +767,7 @@ "column name": "first_name", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT_NAME({})", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The first name of the patient", @@ -759,6 +780,7 @@ "column name": "last_name", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The last name of the patient", @@ -771,6 +793,7 @@ "column name": "date_of_birth", "data type": "datetime", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT({}, 'deDOB')", "protect protocol": "PTY_PROTECT_DOB({})", "description": "The date of birth of the patient", @@ -792,6 +815,7 @@ "column name": "ssn", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT_SSN({})", "protect protocol": "PTY_PROTECT_SSN({})", "description": "The social security number of the patient", @@ -804,6 +828,7 @@ "column name": "address", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The address of the patient", @@ -818,6 +843,7 @@ "column name": "phone_number", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT({}, 'dePhone')", "protect protocol": "PTY_PROTECT({}, 'dePhone')", "description": "The phone number of the patient", @@ -830,6 +856,7 @@ "column name": "email", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.health.protected_patients", "unprotect protocol": "PTY_UNPROTECT({}, 'deEmail')", "protect protocol": "PTY_PROTECT({}, 'deEmail')", "description": "The email address of the patient", @@ -933,6 +960,7 @@ "column name": "first_name", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT_NAME({})", "description": "The first name of the loyalty member", @@ -945,6 +973,7 @@ "column name": "last_name", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", "unprotect protocol": "PTY_UNPROTECT_NAME({})", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The last name of the loyalty member", @@ -957,6 +986,7 @@ "column name": "email", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", "unprotect protocol": "PTY_UNPROTECT_EMAIL({})", "protect protocol": "PTY_PROTECT_EMAIL({})", "description": "The email address of the loyalty member", @@ -978,6 +1008,7 @@ "column name": "address", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", "unprotect protocol": "PTY_UNPROTECT({}, 'deAddress')", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The address of the loyalty member", @@ -992,6 +1023,7 @@ "column name": "date_of_birth", "data type": "datetime", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", "unprotect protocol": "PTY_UNPROTECT({}, 'deDOB')", "protect protocol": "PTY_PROTECT({}, 'deDOB')", "description": "Birthdate of the loyalty member", @@ -1062,6 +1094,7 @@ "column name": "transaction_date", "data type": "datetime", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_transactions", "unprotect protocol": "PTY_UNPROTECT_TS({})", "protect protocol": "PTY_PROTECT_TS({})", "description": "The date when the transaction occurred", @@ -1074,6 +1107,7 @@ "column name": "store_location", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_transactions", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The location of the store where the transaction took place", @@ -1095,6 +1129,7 @@ "column name": "payment_method", "data type": "string", "server masked": true, + "server dataset id": "snowflake.bodo.retail.protected_transactions", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The method used for payment (either 'Cash', 'Credit Card', 'Gift Card', or 'Mobile Payment')", diff --git a/tests/test_mock_mask_server.py b/tests/test_mock_mask_server.py index aad814529..e1c9bde7b 100644 --- a/tests/test_mock_mask_server.py +++ b/tests/test_mock_mask_server.py @@ -24,26 +24,42 @@ None, [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["EQUAL", 2, "__col__", 0], ), MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["OR", 2, "__col__", 5], ), MaskServerInput( + dataset_id="dummy_server", table_path="db/orders", column_name="order_date", - expression=["BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"], + expression=[ + "AND", + 2, + "LTE", + 2, + "2025-01-01", + "__col__", + "LTE", + 2, + "__col__", + "2025-02-01", + ], ), MaskServerInput( + dataset_id="dummy_server", table_path="db/tbl", column_name="col", expression=["GT", 2, "__col__", 45.67], ), MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["NOT_EQUAL", 2, "__col__", "LOWER", 1, "Smith"], @@ -86,6 +102,7 @@ None, [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["EQUAL", 2, "__col__", 0], @@ -107,6 +124,7 @@ None, [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["OR", 2, "__col__", 5], @@ -124,6 +142,7 @@ "test-token-123", [ MaskServerInput( + dataset_id="dummy_server", table_path="db.orders", column_name="order_date", expression=["BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"], @@ -147,6 +166,7 @@ "test-token-123", [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["NOT_EQUAL", 2, "__col__", True], @@ -164,11 +184,13 @@ None, [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["LT", 2, "__col__", "123.654445"], ), MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=[ @@ -201,6 +223,7 @@ None, [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=[ @@ -245,7 +268,7 @@ def test_mock_mask_server( """ mask_server: MaskServerInfo = MaskServerInfo( - base_url="http://localhost:8000", server_address="srv", token=token + base_url="http://localhost:8000", token=token ) # Capture stdout to avoid polluting the console with logging calls @@ -277,6 +300,7 @@ def test_mock_mask_server( "bad_token_123", [ MaskServerInput( + dataset_id="dummy_server", table_path="db.tbl", column_name="col", expression=["OR", 2, "__col__", 5], @@ -298,9 +322,7 @@ def test_mock_mask_server_errors( Testing that the MaskServer raises an exception with the expected error message """ with pytest.raises(Exception, match=re.escape(error_msg)): - mask_server: MaskServerInfo = MaskServerInfo( - base_url=base_url, server_address="srv", token=token - ) + mask_server: MaskServerInfo = MaskServerInfo(base_url=base_url, token=token) mask_server.connection.set_timeout(0.5) # Doing the request diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index dd353f92b..cfb3e13ad 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -26,12 +26,15 @@ "extract_batch_requests_from_logs", "graph_fetcher", "map_over_dict_values", + "temp_env_override", ] import datetime +import os import re from abc import ABC, abstractmethod from collections.abc import Callable +from contextlib import contextmanager from dataclasses import dataclass from decimal import Decimal from typing import Any @@ -79,6 +82,39 @@ graph_fetcher = Callable[[str], GraphMetadata] +@contextmanager +def temp_env_override(env_vars: dict[str, str | None]): + """Update the current environment variables with key-value pairs provided + in a dictionary and then restore it after. + + Args + env_vars (dict(str, str or None)): A dictionary of environment variables to set. + A value of None indicates a variable should be removed. + """ + + def update_env_vars(env_vars): + old_env_vars: dict[str, str | None] = {} + for k, v in env_vars.items(): + if k in os.environ: + old_env_vars[k] = os.environ[k] + else: + old_env_vars[k] = None + + if v is None: + if k in os.environ: + del os.environ[k] + else: + os.environ[k] = v + return old_env_vars + + old_env = {} + try: + old_env = update_env_vars(env_vars) + yield + finally: + update_env_vars(old_env) + + def map_over_dict_values( dictionary: dict[Any, Any], func: Callable[[Any], Any] ) -> dict[Any, Any]: From 0ed7303f40dcca2f43076ca339a37692b7ba8e97 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 18 Dec 2025 11:48:29 -0800 Subject: [PATCH 36/40] Mask server working, need to iron out kinks with 'retail_transactions_filter' test --- pydough/mask_server/mask_server.py | 18 +++-- tests/test_masked_sf.py | 6 +- tests/test_metadata/sf_masked_examples.json | 70 +++++++++---------- tests/test_mock_mask_server.py | 13 +++- ..._accounts_customers_compound_a_rewrite.txt | 4 +- ..._accounts_customers_compound_b_rewrite.txt | 4 +- ..._accounts_customers_compound_c_rewrite.txt | 2 +- .../fsi_customers_accounts_join_rewrite.txt | 2 +- .../fsi_customers_filter_isin_rewrite.txt | 2 +- .../fsi_customers_filter_not_isin_rewrite.txt | 2 +- .../health_claims_filter_day_rewrite.txt | 2 +- .../retail_members_compound_a_rewrite.txt | 2 +- .../retail_members_compound_b_rewrite.txt | 2 +- .../retail_members_compound_e_rewrite.txt | 2 +- .../retail_members_compound_f_rewrite.txt | 2 +- .../retail_members_compound_j_rewrite.txt | 2 +- ...l_members_filter_name_endswith_rewrite.txt | 2 +- .../retail_transactions_filter_rewrite.txt | 4 +- ...nsactions_payment_method_cmp_a_rewrite.txt | 2 +- ...nsactions_payment_method_cmp_b_rewrite.txt | 2 +- ...nsactions_payment_method_cmp_c_rewrite.txt | 2 +- ...nsactions_payment_method_cmp_d_rewrite.txt | 2 +- ...customers_compound_a_rewrite_snowflake.sql | 4 +- ...customers_compound_b_rewrite_snowflake.sql | 6 +- ...customers_compound_c_rewrite_snowflake.sql | 19 +---- ...tomers_accounts_join_rewrite_snowflake.sql | 2 +- ...ustomers_filter_isin_rewrite_snowflake.sql | 2 +- ...mers_filter_not_isin_rewrite_snowflake.sql | 2 +- ...th_claims_filter_day_rewrite_snowflake.sql | 2 +- ...l_members_compound_a_rewrite_snowflake.sql | 4 +- ...l_members_compound_b_rewrite_snowflake.sql | 3 +- ...l_members_compound_e_rewrite_snowflake.sql | 3 +- ...l_members_compound_f_rewrite_snowflake.sql | 3 +- ...l_members_compound_j_rewrite_snowflake.sql | 2 +- ...filter_name_endswith_rewrite_snowflake.sql | 4 +- ..._transactions_filter_rewrite_snowflake.sql | 3 +- ...payment_method_cmp_a_rewrite_snowflake.sql | 2 +- ...payment_method_cmp_b_rewrite_snowflake.sql | 2 +- ...payment_method_cmp_c_rewrite_snowflake.sql | 2 +- ...payment_method_cmp_d_rewrite_snowflake.sql | 2 +- 40 files changed, 107 insertions(+), 109 deletions(-) diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 4d2e32a8c..8bf99e831 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -10,6 +10,7 @@ "MaskServerResponse", ] +import base64 import os from dataclasses import dataclass from enum import Enum @@ -225,10 +226,11 @@ def generate_request( "predicate_format": "linear_with_arity", "output_mode": "cell_encrypted", "dry_run": true, + "limits": {"dedup": True}, }, ... ], - "expression_format": {"name": "linear", "version": "0.2.0"} + "expression_format": {"name": "linear", "version": "0.2.0"}, "hard_limit": 1000, } ``` @@ -252,6 +254,7 @@ def generate_request( "mode": "dynamic", "predicate_format": "linear_with_arity", "dry_run": dry_run, + "limits": {"dedup": True}, } payload["items"].append(evaluate_request) @@ -348,10 +351,15 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: MaskServerResponse.IN_ARRAY, MaskServerResponse.NOT_IN_ARRAY, ): - payload = [ - record.get("cell_encrypted") - for record in response.get("records", []) - ] + payload = [] + for record in response.get("records", []): + record_raw: str = record["cell_encrypted"] + padded = ( + record_raw + "=" * (4 - len(record_raw) % 4) + if len(record_raw) % 4 + else record_raw + ) + payload.append(base64.b64decode(padded).decode("utf-8")) result.append( MaskServerOutput( diff --git a/tests/test_masked_sf.py b/tests/test_masked_sf.py index 35496202a..56fcb91bf 100644 --- a/tests/test_masked_sf.py +++ b/tests/test_masked_sf.py @@ -826,7 +826,7 @@ def sf_masked_test_data( return request.param -@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "20"}) +@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "50"}) @pytest.mark.sf_masked def test_pipeline_until_relational_masked_sf( sf_masked_test_data: PyDoughSnowflakeMaskedTest, @@ -851,7 +851,7 @@ def test_pipeline_until_relational_masked_sf( ) -@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "20"}) +@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "50"}) @pytest.mark.sf_masked def test_pipeline_until_sql_masked_sf( sf_masked_test_data: PyDoughSnowflakeMaskedTest, @@ -879,7 +879,7 @@ def test_pipeline_until_sql_masked_sf( ) -@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "20"}) +@temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "50"}) @pytest.mark.execute @pytest.mark.sf_masked @pytest.mark.parametrize("account_type", ["NONE", "PARTIAL", "FULL"]) diff --git a/tests/test_metadata/sf_masked_examples.json b/tests/test_metadata/sf_masked_examples.json index 227ba31b6..fdba1e454 100644 --- a/tests/test_metadata/sf_masked_examples.json +++ b/tests/test_metadata/sf_masked_examples.json @@ -24,7 +24,7 @@ "column name": "customerid", "data type": "numeric", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_accounts", + "server dataset id": "BODO.FSI.ACCOUNTS", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "ID of the customer who owns the account", @@ -37,7 +37,7 @@ "column name": "accounttype", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_accounts", + "server dataset id": "BODO.FSI.ACCOUNTS", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT({}, 'deAccount')", "description": "Type of the account (either 'Checking' or 'Savings')", @@ -59,7 +59,7 @@ "column name": "currency", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_accounts", + "server dataset id": "BODO.FSI.ACCOUNTS", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT({}, 'deAccount')", "description": "Currency of the account (either 'USD', 'EUR', or 'GBP')", @@ -72,7 +72,7 @@ "column name": "createddate", "data type": "datetime", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_accounts", + "server dataset id": "BODO.FSI.ACCOUNTS", "unprotect protocol": "PTY_UNPROTECT_DOB({})", "protect protocol": "PTY_PROTECT_DOB({})", "description": "The date the account was created", @@ -85,7 +85,7 @@ "column name": "status", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_accounts", + "server dataset id": "BODO.FSI.ACCOUNTS", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT({}, 'deAccount')", "description": "The current status of the account (either 'Active' or 'Inactive')", @@ -210,7 +210,7 @@ "column name": "customerid", "data type": "numeric", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The unique id for the customer", @@ -223,7 +223,7 @@ "column name": "firstname", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The first name of the customer", @@ -236,7 +236,7 @@ "column name": "lastname", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT_NAME({})", "description": "The last name of the customer", @@ -249,7 +249,7 @@ "column name": "address", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The address of the customer", @@ -264,7 +264,7 @@ "column name": "city", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT_ADDRESS({})", "description": "The city where the customer resides", @@ -277,7 +277,7 @@ "column name": "state", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deAddress')", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The state where the customer resides", @@ -299,7 +299,7 @@ "column name": "email", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deEmail')", "protect protocol": "PTY_PROTECT_EMAIL({})", "description": "The email address of the customer", @@ -313,7 +313,7 @@ "column name": "phonenumber", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT_PHONE({})", "protect protocol": "PTY_PROTECT({}, 'dePhone')", "description": "The phone number of the customer", @@ -326,7 +326,7 @@ "column name": "dob", "data type": "datetime", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deDOB')", "protect protocol": "PTY_PROTECT({}, 'deDOB')", "description": "The date of birth of the customer", @@ -375,7 +375,7 @@ "column name": "creditcardnumber", "data type": "numeric", "server masked": true, - "server dataset id": "snowflake.bodo.fsi.protected_customers", + "server dataset id": "BODO.FSI.PROTECTED_CUSTOMERS", "unprotect protocol": "PTY_UNPROTECT_CCN({})", "protect protocol": "PTY_PROTECT_CCN({})", "description": "The credit card number of the customer", @@ -586,7 +586,7 @@ "column name": "patient_id", "data type": "numeric", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_claims", + "server dataset id": "BODO.HEALTH.CLAIMS", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The id of the patient who filed the claim", @@ -599,7 +599,7 @@ "column name": "claim_date", "data type": "datetime", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_claims", + "server dataset id": "BODO.HEALTH.CLAIMS", "unprotect protocol": "PTY_UNPROTECT_DOB({})", "protect protocol": "PTY_PROTECT({}, 'deDOB')", "description": "The date when the claim was filed", @@ -612,7 +612,7 @@ "column name": "provider_name", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_claims", + "server dataset id": "BODO.HEALTH.CLAIMS", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The name of the healthcare provider", @@ -661,7 +661,7 @@ "column name": "claim_status", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_claims", + "server dataset id": "BODO.HEALTH.CLAIMS", "unprotect protocol": "PTY_UNPROTECT({}, 'deAccount')", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The current status of the claim (either 'Pending', 'Approved', or 'Denied')", @@ -754,7 +754,7 @@ "column name": "patient_id", "data type": "numeric", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The unique id for the patient", @@ -767,7 +767,7 @@ "column name": "first_name", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT_NAME({})", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The first name of the patient", @@ -780,7 +780,7 @@ "column name": "last_name", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The last name of the patient", @@ -793,7 +793,7 @@ "column name": "date_of_birth", "data type": "datetime", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT({}, 'deDOB')", "protect protocol": "PTY_PROTECT_DOB({})", "description": "The date of birth of the patient", @@ -815,7 +815,7 @@ "column name": "ssn", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT_SSN({})", "protect protocol": "PTY_PROTECT_SSN({})", "description": "The social security number of the patient", @@ -828,7 +828,7 @@ "column name": "address", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The address of the patient", @@ -843,7 +843,7 @@ "column name": "phone_number", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT({}, 'dePhone')", "protect protocol": "PTY_PROTECT({}, 'dePhone')", "description": "The phone number of the patient", @@ -856,7 +856,7 @@ "column name": "email", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.health.protected_patients", + "server dataset id": "BODO.HEALTH.PROTECTED_PATIENTS", "unprotect protocol": "PTY_UNPROTECT({}, 'deEmail')", "protect protocol": "PTY_PROTECT({}, 'deEmail')", "description": "The email address of the patient", @@ -960,7 +960,7 @@ "column name": "first_name", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", + "server dataset id": "BODO.RETAIL.PROTECTED_LOYALTY_MEMBERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deName')", "protect protocol": "PTY_PROTECT_NAME({})", "description": "The first name of the loyalty member", @@ -973,7 +973,7 @@ "column name": "last_name", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", + "server dataset id": "BODO.RETAIL.PROTECTED_LOYALTY_MEMBERS", "unprotect protocol": "PTY_UNPROTECT_NAME({})", "protect protocol": "PTY_PROTECT({}, 'deName')", "description": "The last name of the loyalty member", @@ -986,7 +986,7 @@ "column name": "email", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", + "server dataset id": "BODO.RETAIL.PROTECTED_LOYALTY_MEMBERS", "unprotect protocol": "PTY_UNPROTECT_EMAIL({})", "protect protocol": "PTY_PROTECT_EMAIL({})", "description": "The email address of the loyalty member", @@ -1008,7 +1008,7 @@ "column name": "address", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", + "server dataset id": "BODO.RETAIL.PROTECTED_LOYALTY_MEMBERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deAddress')", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The address of the loyalty member", @@ -1023,7 +1023,7 @@ "column name": "date_of_birth", "data type": "datetime", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_loyalty_members", + "server dataset id": "BODO.RETAIL.PROTECTED_LOYALTY_MEMBERS", "unprotect protocol": "PTY_UNPROTECT({}, 'deDOB')", "protect protocol": "PTY_PROTECT({}, 'deDOB')", "description": "Birthdate of the loyalty member", @@ -1094,7 +1094,7 @@ "column name": "transaction_date", "data type": "datetime", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_transactions", + "server dataset id": "BODO.RETAIL.TRANSACTIONS", "unprotect protocol": "PTY_UNPROTECT_TS({})", "protect protocol": "PTY_PROTECT_TS({})", "description": "The date when the transaction occurred", @@ -1107,7 +1107,7 @@ "column name": "store_location", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_transactions", + "server dataset id": "BODO.RETAIL.TRANSACTIONS", "unprotect protocol": "PTY_UNPROTECT_ADDRESS({})", "protect protocol": "PTY_PROTECT({}, 'deAddress')", "description": "The location of the store where the transaction took place", @@ -1129,7 +1129,7 @@ "column name": "payment_method", "data type": "string", "server masked": true, - "server dataset id": "snowflake.bodo.retail.protected_transactions", + "server dataset id": "BODO.RETAIL.TRANSACTIONS", "unprotect protocol": "PTY_UNPROTECT_ACCOUNT({})", "protect protocol": "PTY_PROTECT_ACCOUNT({})", "description": "The method used for payment (either 'Cash', 'Credit Card', 'Gift Card', or 'Mobile Payment')", diff --git a/tests/test_mock_mask_server.py b/tests/test_mock_mask_server.py index e1c9bde7b..8f2925f4b 100644 --- a/tests/test_mock_mask_server.py +++ b/tests/test_mock_mask_server.py @@ -145,7 +145,18 @@ dataset_id="dummy_server", table_path="db.orders", column_name="order_date", - expression=["BETWEEN", 3, "__col__", "2025-01-01", "2025-02-01"], + expression=[ + "AND", + 2, + "LTE", + 2, + "2025-01-01", + "__col__", + "LTE", + 2, + "__col__", + "2025-02-01", + ], ), ], [ diff --git a/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt b/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt index f3451d2a7..b884e6bca 100644 --- a/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt +++ b/tests/test_plan_refsols/fsi_accounts_customers_compound_a_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([t0.customerid])) == UNMASK::(PTY_UNPROTECT([t1.customerid], 'deAccount')), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=currency != MASK::(PTY_PROTECT(['GBP':string], 'deAccount')) & balance < 20000:numeric, columns={'customerid': customerid}) + FILTER(condition=balance < 20000:numeric & ISIN(currency, ['jpb', 'gFr']:array[unknown]), columns={'customerid': customerid}) SCAN(table=bodo.fsi.accounts, columns={'balance': balance, 'currency': currency, 'customerid': customerid}) - FILTER(condition=state == MASK::(PTY_PROTECT(['California':string], 'deAddress')), columns={'customerid': customerid}) + FILTER(condition=state == 'V6kSQBaqGv':unknown, columns={'customerid': customerid}) SCAN(table=bodo.fsi.protected_customers, columns={'customerid': customerid, 'state': state}) diff --git a/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt b/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt index db5c10d1f..603d39a83 100644 --- a/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt +++ b/tests/test_plan_refsols/fsi_accounts_customers_compound_b_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::(PTY_UNPROTECT_ACCOUNT([t0.customerid])) == UNMASK::(PTY_UNPROTECT([t1.customerid], 'deAccount')), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) - FILTER(condition=YEAR(UNMASK::(PTY_UNPROTECT_DOB([createddate]))) <= 2022:numeric & ISIN(currency, [Call(op=MASK, inputs=[Literal(value='USD', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='GPB', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='EUR', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='JPY', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='AUD', type=UnknownType())], return_type=StringType())]:bool), columns={'customerid': customerid}) + FILTER(condition=YEAR(UNMASK::(PTY_UNPROTECT_DOB([createddate]))) <= 2022:numeric & ISIN(currency, ['jpb', 'gFr']:array[unknown]), columns={'customerid': customerid}) SCAN(table=bodo.fsi.accounts, columns={'createddate': createddate, 'currency': currency, 'customerid': customerid}) - FILTER(condition=ISIN(state, [Call(op=MASK, inputs=[Literal(value='Georgia', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Alabama', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Mississippi', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Arkansas', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Louisiana', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Florida', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='South Carolina', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='North Carolina', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Texas', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Tennessee', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Missouri', type=UnknownType())], return_type=StringType())]:bool) & NOT(ISIN(firstname, [Call(op=MASK, inputs=[Literal(value='Jennifer', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Julio', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Johnson', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Jameson', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Michael', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Robert', type=UnknownType())], return_type=StringType())]:bool)), columns={'customerid': customerid}) + FILTER(condition=ISIN(state, ['EdJ6cty', 'raXuWJGK', '4o0uuG1', 'FvlL1x8', 'TY84qyAxy', 'AqjyPuvoU8d', 'q6OaWD9X', 'MZBK0 U3nQzZbb', 'lN1sA AANifXzd', 'JXtZBpRhT', 'YYE75']:array[unknown]) & NOT(ISIN(firstname, ['tzuhpuCF', 'cPBnsOl', 'NVGimP']:array[unknown])), columns={'customerid': customerid}) SCAN(table=bodo.fsi.protected_customers, columns={'customerid': customerid, 'firstname': firstname, 'state': state}) diff --git a/tests/test_plan_refsols/fsi_accounts_customers_compound_c_rewrite.txt b/tests/test_plan_refsols/fsi_accounts_customers_compound_c_rewrite.txt index 6bc643316..a0c36bdbe 100644 --- a/tests/test_plan_refsols/fsi_accounts_customers_compound_c_rewrite.txt +++ b/tests/test_plan_refsols/fsi_accounts_customers_compound_c_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC('2020-01-31':string, UNMASK::(PTY_UNPROTECT_DOB([createddate])), '2020-03-13':string) | MONOTONIC('2022-12-25':string, UNMASK::(PTY_UNPROTECT_DOB([createddate])), '2023-01-15':string) | MONOTONIC('2024-08-04':string, UNMASK::(PTY_UNPROTECT_DOB([createddate])), '2024-11-08':string) | MONOTONIC('2025-06-07':string, UNMASK::(PTY_UNPROTECT_DOB([createddate])), '2026-03-07':string), columns={}) + FILTER(condition=ISIN(createddate, ['3149-05-04', '1478-09-27', '2396-11-12', '0714-10-12', '2461-03-25', '2326-07-19', '2883-05-12', '1368-06-18', '2386-05-20', '2241-06-04', '2413-07-10', '1464-06-25', '2308-05-18', '2690-01-11', '0937-05-21', '0794-10-27', '2856-02-06', '1335-02-11', '1605-10-12', '2456-12-12', '1610-12-28', '1267-04-15', '2133-09-29', '3337-02-07', '1403-12-19', '1484-05-22']:array[unknown]), columns={}) SCAN(table=bodo.fsi.accounts, columns={'createddate': createddate}) diff --git a/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt b/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt index 0e96429f6..1cef79db3 100644 --- a/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt +++ b/tests/test_plan_refsols/fsi_customers_accounts_join_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('num_customers_checking_accounts', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=accounttype != MASK::(PTY_PROTECT(['checking':string], 'deAccount')), columns={}) + FILTER(condition=ISIN(accounttype, ['HPlnssRN', 'XADfRcm']:array[unknown]), columns={}) SCAN(table=bodo.fsi.accounts, columns={'accounttype': accounttype}) diff --git a/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt b/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt index 2021c990a..561498fea 100644 --- a/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt +++ b/tests/test_plan_refsols/fsi_customers_filter_isin_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(lastname, [Call(op=MASK, inputs=[Literal(value='Barnes', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Hernandez', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Moore', type=UnknownType())], return_type=StringType())]:bool), columns={}) + FILTER(condition=ISIN(lastname, ['CyypMP', 'TlwQYRsjl', 'SmfgY']:array[unknown]), columns={}) SCAN(table=bodo.fsi.protected_customers, columns={'lastname': lastname}) diff --git a/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt b/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt index fdd20a45a..59eddfa22 100644 --- a/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt +++ b/tests/test_plan_refsols/fsi_customers_filter_not_isin_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(ISIN(lastname, [Call(op=MASK, inputs=[Literal(value='Barnes', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Hernandez', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Moore', type=UnknownType())], return_type=StringType())]:bool)), columns={}) + FILTER(condition=NOT(ISIN(lastname, ['CyypMP', 'TlwQYRsjl', 'SmfgY']:array[unknown])), columns={}) SCAN(table=bodo.fsi.protected_customers, columns={'lastname': lastname}) diff --git a/tests/test_plan_refsols/health_claims_filter_day_rewrite.txt b/tests/test_plan_refsols/health_claims_filter_day_rewrite.txt index 5541f71b5..faec07875 100644 --- a/tests/test_plan_refsols/health_claims_filter_day_rewrite.txt +++ b/tests/test_plan_refsols/health_claims_filter_day_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_DOB([claim_date]))) == 31:numeric, columns={}) + FILTER(condition=ISIN(claim_date, ['2666-05-02', '2627-10-27', '2896-11-08', '0775-03-22', '1471-09-22', '3175-06-30', '1909-08-08', '2063-10-13', '3095-04-16', '1842-06-18', '1292-11-24', '1324-05-13', '2757-05-10', '1415-01-25']:array[unknown]), columns={}) SCAN(table=bodo.health.claims, columns={'claim_date': claim_date}) diff --git a/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt index 1422ebb17..053492e14 100644 --- a/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_a_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) >= datetime.date(2002, 1, 1):datetime & ISIN(last_name, [Call(op=MASK, inputs=[Literal(value='Johnson', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Robinson', type=UnknownType())], return_type=StringType())]:bool), columns={}) + FILTER(condition=ISIN(date_of_birth, ['1922-08-06', '0913-06-11', '2142-09-01', '2006-03-03', '0915-05-17', '1823-12-26', '1722-02-13', '2208-12-06', '2350-04-16', '2973-02-23', '1484-10-19', '1924-10-25', '2544-09-01', '2363-10-31', '2685-03-23', '1040-04-02', '3136-09-15', '1569-07-03', '1804-07-19', '1543-07-16', '2478-02-14', '0983-02-13', '2243-03-06', '2628-10-02', '2064-12-22', '1463-05-18', '1078-01-28', '1125-11-24', '1405-11-12', '3290-02-08', '1278-11-09', '3093-06-09', '1464-06-16', '2613-07-13', '1964-08-20', '1061-01-22', '2797-05-10', '1905-02-26', '1938-07-08', '1535-05-03', '1289-11-13', '1818-01-12', '1073-07-09', '2605-10-18', '1711-07-03', '3018-03-01', '2830-08-29']:array[unknown]) & ISIN(last_name, ['xnUVZyS', 'UcoQBfzB']:array[unknown]), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt index 530b3f93d..203ac6a84 100644 --- a/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_b_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=last_name != MASK::(PTY_PROTECT(['Smith':string], 'deName')) & date_of_birth == MASK::(PTY_PROTECT([datetime.date(1979, 3, 7):datetime], 'deDOB')), columns={}) + FILTER(condition=last_name != MASK::(PTY_PROTECT(['Smith':string], 'deName')) & date_of_birth == '1622-10-03':unknown, columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_members_compound_e_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_e_rewrite.txt index e14a2e9b0..a4c5878d8 100644 --- a/tests/test_plan_refsols/retail_members_compound_e_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_e_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) < datetime.date(1983, 1, 30):datetime & UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) >= datetime.date(1983, 1, 10):datetime, columns={}) + FILTER(condition=ISIN(date_of_birth, ['2637-10-01', '1403-11-22']:array[unknown]), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth}) diff --git a/tests/test_plan_refsols/retail_members_compound_f_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_f_rewrite.txt index 65af4ef09..dc0445dc1 100644 --- a/tests/test_plan_refsols/retail_members_compound_f_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_f_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) <= datetime.date(1976, 7, 28):datetime & UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) > datetime.date(1976, 7, 1):datetime, columns={}) + FILTER(condition=ISIN(date_of_birth, ['1357-07-11', '0988-09-15']:array[unknown]), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth}) diff --git a/tests/test_plan_refsols/retail_members_compound_j_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_j_rewrite.txt index b9366f919..120646627 100644 --- a/tests/test_plan_refsols/retail_members_compound_j_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_j_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=CONTAINS(LOWER(UNMASK::(PTY_UNPROTECT_NAME([last_name]))), 'hu':string), columns={}) + FILTER(condition=ISIN(last_name, ['jNPacL', 'NIAZ', 'eIVERzXY', 'tREJmG', 'cxyIdcy']:array[unknown]), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_members_filter_name_endswith_rewrite.txt b/tests/test_plan_refsols/retail_members_filter_name_endswith_rewrite.txt index 6c2b46896..98e3e2918 100644 --- a/tests/test_plan_refsols/retail_members_filter_name_endswith_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_filter_name_endswith_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ENDSWITH(UNMASK::(PTY_UNPROTECT([first_name], 'deName')), 'e':string) | ENDSWITH(UNMASK::(PTY_UNPROTECT_NAME([last_name])), 'e':string), columns={}) + FILTER(condition=ISIN(first_name, ['CdCPvr', 'EKjOcM', 'euaiZD', 'DsOlJExPB', 'yyrzPqwYJ', 'veSbKfjZ', 'ltpzJF', 'QlYGrf', 'wrJPcBLnb', 'aPZukW', 'zQhHu', 'rBysMhdxNH', 'xSofz', 'CvHV', 'UhnVJm', 'zixlYsG', 'OXzucS', 'nRhMWQ', 'oKd', 'rASYq', 'mFtb', 'XdaEj', 'StqmwCvYW', 'zPgDshgP', 'OQMeTN', 'fcxiAcj', 'otHnLXhd', 'ZpEzCmV', 'pFTdpMJ', 'eMPChjxY', 'IzjmJq', 'wzDFEL', 'vhZhdhNRf', 'FoEhR', 'RxvEbkd', 'KrdDrun', 'sFBVM']:array[unknown]) | ISIN(last_name, ['XuCRC', 'vcVqo', 'xpFpz', 'rpnEFk', 'brcc', 'teibn', 'KLvNYE', 'OgARIx', 'aPZukW', 'RZnrOO', 'LFtAm', 'VTFJ', 'NaTJ', 'gYR', 'SUvctz', 'SmfgY', 'FgeTdq', 'EYAd', 'iPPF', 'LEcnd', 'YYsb', 'wlBDLGE', 'xAzGl']:array[unknown]), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'first_name': first_name, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt b/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt index 83503c8c6..29d8a3528 100644 --- a/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', ROUND(avg_total_amount, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_total_amount': AVG(total_amount)}) - FILTER(condition=MONTH(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric & YEAR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2025:numeric, columns={'total_amount': total_amount}) - SCAN(table=bodo.retail.transactions, columns={'total_amount': total_amount, 'transaction_date': transaction_date}) + FILTER(condition=False:bool, columns={'total_amount': total_amount}) + SCAN(table=bodo.retail.transactions, columns={'total_amount': total_amount}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt index ff57eb91f..65f17072e 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_a_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=payment_method == MASK::(PTY_PROTECT_ACCOUNT(['Cash':string])), columns={}) + FILTER(condition=payment_method == 'CsNw':unknown, columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt index 89904da39..787e165d3 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_b_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=payment_method != MASK::(PTY_PROTECT_ACCOUNT(['Credit Card':string])), columns={}) + FILTER(condition=ISIN(payment_method, ['CsNw', 'DwXR YwQL', 'BaGWrt IqJfFoq']:array[unknown]), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt index 36ff8674c..62d7b5e54 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_c_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(payment_method, [Call(op=MASK, inputs=[Literal(value='Cash', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Gift Card', type=UnknownType())], return_type=StringType())]:bool), columns={}) + FILTER(condition=ISIN(payment_method, ['CsNw', 'DwXR YwQL']:array[unknown]), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt index 4853a4398..5ad240062 100644 --- a/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_payment_method_cmp_d_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=NOT(ISIN(payment_method, [Call(op=MASK, inputs=[Literal(value='Mobile Payment', type=UnknownType())], return_type=StringType()), Call(op=MASK, inputs=[Literal(value='Gift Card', type=UnknownType())], return_type=StringType())]:bool)), columns={}) + FILTER(condition=ISIN(payment_method, ['CsNw', 'JrVjGo Mdvt']:array[unknown]), columns={}) SCAN(table=bodo.retail.transactions, columns={'payment_method': payment_method}) diff --git a/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql index fffd63196..8b1649d7a 100644 --- a/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_accounts_customers_compound_a_rewrite_snowflake.sql @@ -3,6 +3,6 @@ SELECT FROM bodo.fsi.accounts AS accounts JOIN bodo.fsi.protected_customers AS protected_customers ON PTY_UNPROTECT(protected_customers.customerid, 'deAccount') = PTY_UNPROTECT_ACCOUNT(accounts.customerid) - AND protected_customers.state = PTY_PROTECT('California', 'deAddress') + AND protected_customers.state = 'V6kSQBaqGv' WHERE - accounts.balance < 20000 AND accounts.currency <> PTY_PROTECT('GBP', 'deAccount') + accounts.balance < 20000 AND accounts.currency IN ('jpb', 'gFr') diff --git a/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql index 0e2d85f72..53ca14cce 100644 --- a/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_accounts_customers_compound_b_rewrite_snowflake.sql @@ -2,9 +2,9 @@ SELECT COUNT(*) AS n FROM bodo.fsi.accounts AS accounts JOIN bodo.fsi.protected_customers AS protected_customers - ON NOT protected_customers.firstname IN (PTY_PROTECT('Jennifer', 'deName'), PTY_PROTECT('Julio', 'deName'), PTY_PROTECT('Johnson', 'deName'), PTY_PROTECT('Jameson', 'deName'), PTY_PROTECT('Michael', 'deName'), PTY_PROTECT('Robert', 'deName')) + ON NOT protected_customers.firstname IN ('tzuhpuCF', 'cPBnsOl', 'NVGimP') AND PTY_UNPROTECT(protected_customers.customerid, 'deAccount') = PTY_UNPROTECT_ACCOUNT(accounts.customerid) - AND protected_customers.state IN (PTY_PROTECT('Georgia', 'deAddress'), PTY_PROTECT('Alabama', 'deAddress'), PTY_PROTECT('Mississippi', 'deAddress'), PTY_PROTECT('Arkansas', 'deAddress'), PTY_PROTECT('Louisiana', 'deAddress'), PTY_PROTECT('Florida', 'deAddress'), PTY_PROTECT('South Carolina', 'deAddress'), PTY_PROTECT('North Carolina', 'deAddress'), PTY_PROTECT('Texas', 'deAddress'), PTY_PROTECT('Tennessee', 'deAddress'), PTY_PROTECT('Missouri', 'deAddress')) + AND protected_customers.state IN ('EdJ6cty', 'raXuWJGK', '4o0uuG1', 'FvlL1x8', 'TY84qyAxy', 'AqjyPuvoU8d', 'q6OaWD9X', 'MZBK0 U3nQzZbb', 'lN1sA AANifXzd', 'JXtZBpRhT', 'YYE75') WHERE YEAR(CAST(PTY_UNPROTECT_DOB(accounts.createddate) AS TIMESTAMP)) <= 2022 - AND accounts.currency IN (PTY_PROTECT('USD', 'deAccount'), PTY_PROTECT('GPB', 'deAccount'), PTY_PROTECT('EUR', 'deAccount'), PTY_PROTECT('JPY', 'deAccount'), PTY_PROTECT('AUD', 'deAccount')) + AND accounts.currency IN ('jpb', 'gFr') diff --git a/tests/test_sql_refsols/fsi_accounts_customers_compound_c_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_accounts_customers_compound_c_rewrite_snowflake.sql index 8c5b82203..7074bd30f 100644 --- a/tests/test_sql_refsols/fsi_accounts_customers_compound_c_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_accounts_customers_compound_c_rewrite_snowflake.sql @@ -2,21 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.fsi.accounts WHERE - ( - PTY_UNPROTECT_DOB(createddate) <= '2020-03-13' - OR PTY_UNPROTECT_DOB(createddate) >= '2022-12-25' - ) - AND ( - PTY_UNPROTECT_DOB(createddate) <= '2023-01-15' - OR PTY_UNPROTECT_DOB(createddate) >= '2024-08-04' - ) - AND ( - PTY_UNPROTECT_DOB(createddate) <= '2024-11-08' - OR PTY_UNPROTECT_DOB(createddate) >= '2022-12-25' - ) - AND ( - PTY_UNPROTECT_DOB(createddate) <= '2024-11-08' - OR PTY_UNPROTECT_DOB(createddate) >= '2025-06-07' - ) - AND PTY_UNPROTECT_DOB(createddate) <= '2026-03-07' - AND PTY_UNPROTECT_DOB(createddate) >= '2020-01-31' + createddate IN ('3149-05-04', '1478-09-27', '2396-11-12', '0714-10-12', '2461-03-25', '2326-07-19', '2883-05-12', '1368-06-18', '2386-05-20', '2241-06-04', '2413-07-10', '1464-06-25', '2308-05-18', '2690-01-11', '0937-05-21', '0794-10-27', '2856-02-06', '1335-02-11', '1605-10-12', '2456-12-12', '1610-12-28', '1267-04-15', '2133-09-29', '3337-02-07', '1403-12-19', '1484-05-22') diff --git a/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql index eb9530c20..2ddee36af 100644 --- a/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_customers_accounts_join_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS num_customers_checking_accounts FROM bodo.fsi.accounts WHERE - accounttype <> PTY_PROTECT('checking', 'deAccount') + accounttype IN ('HPlnssRN', 'XADfRcm') diff --git a/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql index a3438c207..8a4faefad 100644 --- a/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_customers_filter_isin_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.fsi.protected_customers WHERE - lastname IN (PTY_PROTECT_NAME('Barnes'), PTY_PROTECT_NAME('Hernandez'), PTY_PROTECT_NAME('Moore')) + lastname IN ('CyypMP', 'TlwQYRsjl', 'SmfgY') diff --git a/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql b/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql index 3c0e5c76e..35adebf6f 100644 --- a/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/fsi_customers_filter_not_isin_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.fsi.protected_customers WHERE - NOT lastname IN (PTY_PROTECT_NAME('Barnes'), PTY_PROTECT_NAME('Hernandez'), PTY_PROTECT_NAME('Moore')) + NOT lastname IN ('CyypMP', 'TlwQYRsjl', 'SmfgY') diff --git a/tests/test_sql_refsols/health_claims_filter_day_rewrite_snowflake.sql b/tests/test_sql_refsols/health_claims_filter_day_rewrite_snowflake.sql index 0d2ef7a75..bdfbd0355 100644 --- a/tests/test_sql_refsols/health_claims_filter_day_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/health_claims_filter_day_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.health.claims WHERE - DAY(CAST(PTY_UNPROTECT_DOB(claim_date) AS TIMESTAMP)) = 31 + claim_date IN ('2666-05-02', '2627-10-27', '2896-11-08', '0775-03-22', '1471-09-22', '3175-06-30', '1909-08-08', '2063-10-13', '3095-04-16', '1842-06-18', '1292-11-24', '1324-05-13', '2757-05-10', '1415-01-25') diff --git a/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql index 3098195a7..df43b2a35 100644 --- a/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_a_rewrite_snowflake.sql @@ -2,5 +2,5 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - PTY_UNPROTECT(date_of_birth, 'deDOB') >= CAST('2002-01-01' AS DATE) - AND last_name IN (PTY_PROTECT('Johnson', 'deName'), PTY_PROTECT('Robinson', 'deName')) + date_of_birth IN ('1922-08-06', '0913-06-11', '2142-09-01', '2006-03-03', '0915-05-17', '1823-12-26', '1722-02-13', '2208-12-06', '2350-04-16', '2973-02-23', '1484-10-19', '1924-10-25', '2544-09-01', '2363-10-31', '2685-03-23', '1040-04-02', '3136-09-15', '1569-07-03', '1804-07-19', '1543-07-16', '2478-02-14', '0983-02-13', '2243-03-06', '2628-10-02', '2064-12-22', '1463-05-18', '1078-01-28', '1125-11-24', '1405-11-12', '3290-02-08', '1278-11-09', '3093-06-09', '1464-06-16', '2613-07-13', '1964-08-20', '1061-01-22', '2797-05-10', '1905-02-26', '1938-07-08', '1535-05-03', '1289-11-13', '1818-01-12', '1073-07-09', '2605-10-18', '1711-07-03', '3018-03-01', '2830-08-29') + AND last_name IN ('xnUVZyS', 'UcoQBfzB') diff --git a/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql index 6733bb292..10b3a685e 100644 --- a/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_b_rewrite_snowflake.sql @@ -2,5 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - date_of_birth = PTY_PROTECT(CAST('1979-03-07' AS DATE), 'deDOB') - AND last_name <> PTY_PROTECT('Smith', 'deName') + date_of_birth = '1622-10-03' AND last_name <> PTY_PROTECT('Smith', 'deName') diff --git a/tests/test_sql_refsols/retail_members_compound_e_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_e_rewrite_snowflake.sql index 99b1a295a..75eab3e4e 100644 --- a/tests/test_sql_refsols/retail_members_compound_e_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_e_rewrite_snowflake.sql @@ -2,5 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - PTY_UNPROTECT(date_of_birth, 'deDOB') < CAST('1983-01-30' AS DATE) - AND PTY_UNPROTECT(date_of_birth, 'deDOB') >= CAST('1983-01-10' AS DATE) + date_of_birth IN ('2637-10-01', '1403-11-22') diff --git a/tests/test_sql_refsols/retail_members_compound_f_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_f_rewrite_snowflake.sql index 0c84a8637..9b69600a5 100644 --- a/tests/test_sql_refsols/retail_members_compound_f_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_f_rewrite_snowflake.sql @@ -2,5 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - PTY_UNPROTECT(date_of_birth, 'deDOB') <= CAST('1976-07-28' AS DATE) - AND PTY_UNPROTECT(date_of_birth, 'deDOB') > CAST('1976-07-01' AS DATE) + date_of_birth IN ('1357-07-11', '0988-09-15') diff --git a/tests/test_sql_refsols/retail_members_compound_j_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_j_rewrite_snowflake.sql index 7e1177fd7..372fff9b1 100644 --- a/tests/test_sql_refsols/retail_members_compound_j_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_j_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - CONTAINS(LOWER(PTY_UNPROTECT_NAME(last_name)), 'hu') + last_name IN ('jNPacL', 'NIAZ', 'eIVERzXY', 'tREJmG', 'cxyIdcy') diff --git a/tests/test_sql_refsols/retail_members_filter_name_endswith_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_filter_name_endswith_rewrite_snowflake.sql index 898dc0225..a8d47e303 100644 --- a/tests/test_sql_refsols/retail_members_filter_name_endswith_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_filter_name_endswith_rewrite_snowflake.sql @@ -2,5 +2,5 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - ENDSWITH(PTY_UNPROTECT(first_name, 'deName'), 'e') - OR ENDSWITH(PTY_UNPROTECT_NAME(last_name), 'e') + first_name IN ('CdCPvr', 'EKjOcM', 'euaiZD', 'DsOlJExPB', 'yyrzPqwYJ', 'veSbKfjZ', 'ltpzJF', 'QlYGrf', 'wrJPcBLnb', 'aPZukW', 'zQhHu', 'rBysMhdxNH', 'xSofz', 'CvHV', 'UhnVJm', 'zixlYsG', 'OXzucS', 'nRhMWQ', 'oKd', 'rASYq', 'mFtb', 'XdaEj', 'StqmwCvYW', 'zPgDshgP', 'OQMeTN', 'fcxiAcj', 'otHnLXhd', 'ZpEzCmV', 'pFTdpMJ', 'eMPChjxY', 'IzjmJq', 'wzDFEL', 'vhZhdhNRf', 'FoEhR', 'RxvEbkd', 'KrdDrun', 'sFBVM') + OR last_name IN ('XuCRC', 'vcVqo', 'xpFpz', 'rpnEFk', 'brcc', 'teibn', 'KLvNYE', 'OgARIx', 'aPZukW', 'RZnrOO', 'LFtAm', 'VTFJ', 'NaTJ', 'gYR', 'SUvctz', 'SmfgY', 'FgeTdq', 'EYAd', 'iPPF', 'LEcnd', 'YYsb', 'wlBDLGE', 'xAzGl') diff --git a/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql index c00c12c43..34ca0f1ab 100644 --- a/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql @@ -2,5 +2,4 @@ SELECT ROUND(AVG(total_amount), 2) AS n FROM bodo.retail.transactions WHERE - MONTH(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 - AND YEAR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2025 + FALSE diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql index caea10607..3153704a5 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_a_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - payment_method = PTY_PROTECT_ACCOUNT('Cash') + payment_method = 'CsNw' diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql index 7277069b6..6a6c763ee 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_b_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - payment_method <> PTY_PROTECT_ACCOUNT('Credit Card') + payment_method IN ('CsNw', 'DwXR YwQL', 'BaGWrt IqJfFoq') diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql index f65b42330..a4b1115bd 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_c_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - payment_method IN (PTY_PROTECT_ACCOUNT('Cash'), PTY_PROTECT_ACCOUNT('Gift Card')) + payment_method IN ('CsNw', 'DwXR YwQL') diff --git a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql index 73128145f..68a137bb0 100644 --- a/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_payment_method_cmp_d_rewrite_snowflake.sql @@ -2,4 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.transactions WHERE - NOT payment_method IN (PTY_PROTECT_ACCOUNT('Mobile Payment'), PTY_PROTECT_ACCOUNT('Gift Card')) + payment_method IN ('CsNw', 'JrVjGo Mdvt') From 7e98a09c8ba49d0b56df0cd2d045cba8e79c66bb Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 22 Dec 2025 10:10:00 -0800 Subject: [PATCH 37/40] More documentation --- tests/mock_server/api_mock_server.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/mock_server/api_mock_server.py b/tests/mock_server/api_mock_server.py index a810d765f..47b53fd97 100644 --- a/tests/mock_server/api_mock_server.py +++ b/tests/mock_server/api_mock_server.py @@ -63,8 +63,12 @@ def batch_evaluate( "index": payload.items.index(item) + 1, } if table_result is None: + # If the key is not found in the lookup table, return an error for + # this item of the batch. out_item["result"] = "ERROR" else: + # Otherwise, generate a successful response based on the lookup + # table. output_case, output_list = table_result out_item["SUCCESS"] = "SUCCESS" out_item["response"] = { @@ -96,6 +100,10 @@ def batch_evaluate( # Adding the new item to the batch output responses.append(out_item) + # Determine overall result: + # - SUCCESS if all items succeeded + # - ERROR if all items failed + # - PARTIAL_FAILURE otherwise result: str if successful_responses == len(payload.items): result = "SUCCESS" From af7089edefd99927859be47a65bfe1c473f9827c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 22 Dec 2025 19:53:55 -0800 Subject: [PATCH 38/40] More tests after TS fixed, still need to iterate and remove prints [RUN ALL] --- pydough/mask_server/mask_server.py | 4 + tests/test_masked_sf.py | 210 ++++++++++++++++-- .../retail_transactions_filter_rewrite.txt | 4 +- .../retail_transactions_ts_raw.txt | 20 ++ .../retail_transactions_ts_rewrite.txt | 20 ++ ..._transactions_filter_rewrite_snowflake.sql | 3 +- .../retail_transactions_ts_raw_snowflake.sql | 50 +++++ ...tail_transactions_ts_rewrite_snowflake.sql | 43 ++++ 8 files changed, 337 insertions(+), 17 deletions(-) create mode 100644 tests/test_plan_refsols/retail_transactions_ts_raw.txt create mode 100644 tests/test_plan_refsols/retail_transactions_ts_rewrite.txt create mode 100644 tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 7909f108c..54df3d70e 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -193,6 +193,10 @@ def simplify_simple_expression_batch( request: ServerRequest = self.generate_request(batch, dry_run, hard_limit) response_json = self.connection.send_server_request(request) result: list[MaskServerOutput] = self.generate_result(response_json) + print() + print(request.payload) + print() + print(response_json) return result diff --git a/tests/test_masked_sf.py b/tests/test_masked_sf.py index 56fcb91bf..ec54cfe7c 100644 --- a/tests/test_masked_sf.py +++ b/tests/test_masked_sf.py @@ -1,12 +1,22 @@ import datetime +import io from collections.abc import Callable +from contextlib import redirect_stdout import pandas as pd import pytest +from pydough import to_sql from pydough.database_connectors import DatabaseContext, DatabaseDialect from pydough.mask_server import MaskServerInfo -from tests.testing_utilities import graph_fetcher, temp_env_override +from pydough.metadata import GraphMetadata +from pydough.unqualified import UnqualifiedNode +from tests.testing_utilities import ( + extract_batch_requests_from_logs, + graph_fetcher, + temp_env_override, + transform_and_exec_pydough, +) from .testing_sf_masked_utilities import ( PyDoughSnowflakeMaskedTest, @@ -319,6 +329,26 @@ ), id="retail_transactions_filter", ), + pytest.param( + PyDoughSnowflakeMaskedTest( + "t1 = transactions.WHERE((DAY(transaction_date) == 1) & (HOUR(transaction_date) == 7))\n" + "t2 = transactions.WHERE((DAY(transaction_date) == 2) & (HOUR(transaction_date) == 7))\n" + "t3 = transactions.WHERE((DAY(transaction_date) == 1) & (HOUR(transaction_date) == 8))\n" + "t4 = transactions.WHERE((DAY(transaction_date) == 2) & (HOUR(transaction_date) == 8))\n" + "t5 = transactions.WHERE(((DAY(transaction_date) < 4) & (HOUR(transaction_date) < 3)) | ((MINUTE(transaction_date) == SECOND(transaction_date)) & (HOUR(transaction_date) < 3)))\n" + "result = RETAIL.CALCULATE(n1=COUNT(t1), n2=COUNT(t2), n3=COUNT(t3), n4=COUNT(t4), n5=COUNT(t5))", + "RETAIL", + "retail_transactions_ts", + answers={ + "NONE": None, + "PARTIAL": None, + "FULL": pd.DataFrame( + {"n1": [2], "n2": [6], "n3": [3], "n4": [6], "n5": [52]} + ), + }, + ), + id="retail_transactions_ts", + ), pytest.param( PyDoughSnowflakeMaskedTest( "acc_typs = accounts.PARTITION(name='account_types', by=account_type)\n" @@ -814,6 +844,66 @@ ), id="fsi_accounts_customers_compound_c", ), + pytest.param( + PyDoughSnowflakeMaskedTest( + "selected_members = loyalty_members.WHERE(CONTAINS('GT', UPPER(first_name[:1])))\n" + "result = RETAIL.CALCULATE(n=COUNT(selected_members))", + "RETAIL", + "retail_names_analysis_a", + order_sensitive=True, + answers={ + "NONE": None, + "PARTIAL": None, + "FULL": pd.DataFrame({"n": [25]}), + }, + ), + id="retail_names_analysis_a", + ), + pytest.param( + PyDoughSnowflakeMaskedTest( + "selected_members = loyalty_members.WHERE(CONTAINS('day', LOWER(first_name[:2])))\n" + "result = RETAIL.CALCULATE(n=COUNT(selected_members))", + "RETAIL", + "retail_names_analysis_b", + order_sensitive=True, + answers={ + "NONE": None, + "PARTIAL": None, + "FULL": pd.DataFrame({"n": [11]}), + }, + ), + id="retail_names_analysis_b", + ), + pytest.param( + PyDoughSnowflakeMaskedTest( + "selected_members = loyalty_members.WHERE(YEAR(date_of_birth) < 2026)\n" + "result = RETAIL.CALCULATE(n=COUNT(selected_members))", + "RETAIL", + "retail_all", + order_sensitive=True, + answers={ + "NONE": None, + "PARTIAL": None, + "FULL": pd.DataFrame({"n": [500]}), + }, + ), + id="retail_all", + ), + pytest.param( + PyDoughSnowflakeMaskedTest( + "selected_members = loyalty_members.WHERE(YEAR(date_of_birth) >= 2026)\n" + "result = RETAIL.CALCULATE(n=COUNT(selected_members))", + "RETAIL", + "retail_none", + order_sensitive=True, + answers={ + "NONE": None, + "PARTIAL": None, + "FULL": pd.DataFrame({"n": [0]}), + }, + ), + id="retail_none", + ), ], ) def sf_masked_test_data( @@ -843,12 +933,13 @@ def test_pipeline_until_relational_masked_sf( file_path: str = get_plan_test_filename( f"{sf_masked_test_data.test_name}_{enable_mask_rewrites}" ) - sf_masked_test_data.run_relational_test( - get_sf_masked_graphs, - file_path, - update_tests, - mask_server=true_mask_server_info, - ) + with redirect_stdout(io.StringIO()): + sf_masked_test_data.run_relational_test( + get_sf_masked_graphs, + file_path, + update_tests, + mask_server=true_mask_server_info, + ) @temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "50"}) @@ -870,13 +961,14 @@ def test_pipeline_until_sql_masked_sf( file_path: str = get_sql_test_filename( f"{sf_masked_test_data.test_name}_{enable_mask_rewrites}", sf_data.dialect ) - sf_masked_test_data.run_sql_test( - get_sf_masked_graphs, - file_path, - update_tests, - sf_data, - mask_server=true_mask_server_info, - ) + with redirect_stdout(io.StringIO()): + sf_masked_test_data.run_sql_test( + get_sf_masked_graphs, + file_path, + update_tests, + sf_data, + mask_server=true_mask_server_info, + ) @temp_env_override({"PYDOUGH_MASK_SERVER_HARD_LIMIT": "50"}) @@ -897,9 +989,99 @@ def test_pipeline_e2e_masked_sf( sf_masked_test_data.account_type = account_type if sf_masked_test_data.answers.get(account_type) is None: pytest.skip(f"No reference solution for account_type={account_type}") + # with redirect_stdout(io.StringIO()): sf_masked_test_data.run_e2e_test( get_sf_masked_graphs, sf_masked_context("BODO", sf_masked_test_data.graph_name, account_type), coerce_types=True, mask_server=true_mask_server_info, ) + + +@pytest.mark.sf_masked +@temp_env_override( + {"PYDOUGH_MASK_SERVER_HARD_LIMIT": "50", "PYDOUGH_ENABLE_MASK_REWRITES": "1"} +) +@pytest.mark.parametrize( + ["graph_name", "pydough_code", "batch_requests"], + [ + pytest.param( + "FSI", + "selected_customers = customers.WHERE(last_name == 'Adams')\n" + "result = FSI.CALCULATE(n=COUNT(selected_customers))", + [ + { + "DRY_RUN", + "bodo/fsi/protected_customers/lastname: ['EQUAL', 2, '__col__', 'Adams']", + }, + { + "bodo/fsi/protected_customers/lastname: ['EQUAL', 2, '__col__', 'Adams']", + }, + ], + id="fsi_customers_a", + ), + pytest.param( + "FSI", + "c1 = customers.WHERE((MONTH(date_of_birth) == 6) & (DAY(date_of_birth) == 15))\n" + "c2 = customers.WHERE((YEAR(date_of_birth) == 1970) & (MONTH(date_of_birth) == 6))\n" + "c3 = customers.WHERE((YEAR(date_of_birth) == 1970) & (DAY(date_of_birth) == 15))\n" + "c4 = customers.WHERE((YEAR(date_of_birth) == 1970) & (MONTH(date_of_birth) == 6) & (DAY(date_of_birth) == 15))\n" + "result = FSI.CALCULATE(n1=COUNT(c1), n2=COUNT(c2), n3=COUNT(c3), n4=COUNT(c4))", + [ + { + "DRY_RUN", + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6]", + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['AND', 3, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'DAY', 1, '__col__', 15]", + "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'MONTH', 1, '__col__', 6]", + "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + }, + { + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6]", + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['AND', 3, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + }, + ], + id="fsi_customers_b", + ), + ], +) +def test_masked_sf_mask_server_logging( + graph_name: str, + pydough_code: str, + batch_requests: list[set[str]], + get_sf_masked_graphs: graph_fetcher, # noqa: F811 + true_mask_server_info: MaskServerInfo, + caplog, +): + """ + Tests whether, during the conversion of the PyDough queries on the masked + Snowflake dataset into SQL text, the correct logging calls are made + regarding batches sent to the mask server. This is to ensure that the calls + are being batched as expected, the right calls are being sent to the server, + and expressions that are non-predicates are not being sent, even if they are + a valid sub-expression of a predicate that can be sent. + """ + # Obtain the graph and the unqualified node + graph: GraphMetadata = get_sf_masked_graphs(graph_name) + root: UnqualifiedNode = transform_and_exec_pydough( + pydough_code, + graph, + {"datetime": datetime, "pd": pd}, + ) + + # Convert the PyDough code to SQL text, while capturing + # stdout to avoid polluting the console with logging calls + with redirect_stdout(io.StringIO()): + to_sql(root, metadata=graph, mask_server=true_mask_server_info) + + # Retrieve the output from the captured logger output + batch_requests_made: list[set[str]] = extract_batch_requests_from_logs(caplog.text) + + # Compare the expected batch requests to those made. + assert batch_requests_made == batch_requests, ( + "The batch requests made do not match the expected batch requests." + ) diff --git a/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt b/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt index 29d8a3528..83503c8c6 100644 --- a/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_filter_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', ROUND(avg_total_amount, 2:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'avg_total_amount': AVG(total_amount)}) - FILTER(condition=False:bool, columns={'total_amount': total_amount}) - SCAN(table=bodo.retail.transactions, columns={'total_amount': total_amount}) + FILTER(condition=MONTH(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric & YEAR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2025:numeric, columns={'total_amount': total_amount}) + SCAN(table=bodo.retail.transactions, columns={'total_amount': total_amount, 'transaction_date': transaction_date}) diff --git a/tests/test_plan_refsols/retail_transactions_ts_raw.txt b/tests/test_plan_refsols/retail_transactions_ts_raw.txt new file mode 100644 index 000000000..46d76c896 --- /dev/null +++ b/tests/test_plan_refsols/retail_transactions_ts_raw.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric, columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric, columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric, columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric, columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric, columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) diff --git a/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt b/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt new file mode 100644 index 000000000..a03d36ac8 --- /dev/null +++ b/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown]), columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown]), columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown]), columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown]), columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]), columns={}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) diff --git a/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql index 34ca0f1ab..c00c12c43 100644 --- a/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_filter_rewrite_snowflake.sql @@ -2,4 +2,5 @@ SELECT ROUND(AVG(total_amount), 2) AS n FROM bodo.retail.transactions WHERE - FALSE + MONTH(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 + AND YEAR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2025 diff --git a/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql b/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql new file mode 100644 index 000000000..f4a984e71 --- /dev/null +++ b/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql @@ -0,0 +1,50 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 1 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 +), _s1 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 1 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 8 +), _s5 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 8 +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + ( + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 4 + OR MINUTE(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = SECOND(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) + ) + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 3 +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s3.n_rows AS n3, + _s5.n_rows AS n4, + _s7.n_rows AS n5 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s5 AS _s5 +CROSS JOIN _s7 AS _s7 diff --git a/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql new file mode 100644 index 000000000..b8cd69ee9 --- /dev/null +++ b/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql @@ -0,0 +1,43 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + transaction_date IN ('2178-03-20 07:19:29', '2825-09-23 07:37:08') +), _s1 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + transaction_date IN ('1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03') +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + transaction_date IN ('3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51') +), _s5 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + transaction_date IN ('2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08') +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM bodo.retail.transactions + WHERE + transaction_date IN ('1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32') + OR transaction_date IN ('2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01') +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s3.n_rows AS n3, + _s5.n_rows AS n4, + _s7.n_rows AS n5 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s5 AS _s5 +CROSS JOIN _s7 AS _s7 From f61935693b4848af083545187d744688076c7dc7 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 23 Dec 2025 09:52:15 -0800 Subject: [PATCH 39/40] Edge case debugging WIP --- pydough/mask_server/mask_server.py | 39 +++++++++++-------- .../mask_server_candidate_visitor.py | 8 ---- tests/mock_server/api_mock_server.py | 10 ++++- tests/mock_server/lookup_table.py | 2 +- tests/test_masked_sqlite.py | 6 +-- tests/test_plan_refsols/retail_all_raw.txt | 4 ++ .../test_plan_refsols/retail_all_rewrite.txt | 3 ++ .../retail_members_compound_g_rewrite.txt | 2 +- .../retail_members_compound_h_rewrite.txt | 2 +- .../retail_names_analysis_a_raw.txt | 4 ++ .../retail_names_analysis_a_rewrite.txt | 4 ++ .../retail_names_analysis_b_raw.txt | 4 ++ .../retail_names_analysis_b_rewrite.txt | 4 ++ tests/test_plan_refsols/retail_none_raw.txt | 4 ++ .../test_plan_refsols/retail_none_rewrite.txt | 4 ++ .../retail_all_raw_snowflake.sql | 5 +++ .../retail_all_rewrite_snowflake.sql | 3 ++ ...l_members_compound_g_rewrite_snowflake.sql | 2 +- ...l_members_compound_h_rewrite_snowflake.sql | 3 +- .../retail_names_analysis_a_raw_snowflake.sql | 5 +++ ...ail_names_analysis_a_rewrite_snowflake.sql | 5 +++ .../retail_names_analysis_b_raw_snowflake.sql | 5 +++ ...ail_names_analysis_b_rewrite_snowflake.sql | 5 +++ .../retail_none_raw_snowflake.sql | 5 +++ .../retail_none_rewrite_snowflake.sql | 5 +++ 25 files changed, 107 insertions(+), 36 deletions(-) create mode 100644 tests/test_plan_refsols/retail_all_raw.txt create mode 100644 tests/test_plan_refsols/retail_all_rewrite.txt create mode 100644 tests/test_plan_refsols/retail_names_analysis_a_raw.txt create mode 100644 tests/test_plan_refsols/retail_names_analysis_a_rewrite.txt create mode 100644 tests/test_plan_refsols/retail_names_analysis_b_raw.txt create mode 100644 tests/test_plan_refsols/retail_names_analysis_b_rewrite.txt create mode 100644 tests/test_plan_refsols/retail_none_raw.txt create mode 100644 tests/test_plan_refsols/retail_none_rewrite.txt create mode 100644 tests/test_sql_refsols/retail_all_raw_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_all_rewrite_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_names_analysis_a_raw_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_names_analysis_a_rewrite_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_names_analysis_b_raw_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_names_analysis_b_rewrite_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_none_raw_snowflake.sql create mode 100644 tests/test_sql_refsols/retail_none_rewrite_snowflake.sql diff --git a/pydough/mask_server/mask_server.py b/pydough/mask_server/mask_server.py index 54df3d70e..3c06415e6 100644 --- a/pydough/mask_server/mask_server.py +++ b/pydough/mask_server/mask_server.py @@ -129,22 +129,24 @@ def __init__(self, base_url: str, token: str | None = None): base_url=base_url, token=token ) - def get_server_response_case(self, server_case: str) -> MaskServerResponse: + def get_server_response_case(self, response_metadata: dict) -> MaskServerResponse: """ Mapping from server response strings to MaskServerResponse enum values. Args: - `server_case`: The response string from the server. + `response_metadata`: The metadata field from the server response. Returns: The corresponding MaskServerResponse enum value. """ - match server_case: - case "IN": - return MaskServerResponse.IN_ARRAY - case "NOT_IN": - return MaskServerResponse.NOT_IN_ARRAY - case _: - return MaskServerResponse.UNSUPPORTED + if response_metadata.get("dynamic_operator", None) == "IN": + match response_metadata.get("representation", None): + case "IN" | None: + return MaskServerResponse.IN_ARRAY + case "NOT_IN": + return MaskServerResponse.NOT_IN_ARRAY + case _: + return MaskServerResponse.UNSUPPORTED + return MaskServerResponse.UNSUPPORTED def simplify_simple_expression_batch( self, @@ -349,7 +351,7 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: else: # In this case, parse the response normally. response_case: MaskServerResponse = self.get_server_response_case( - response["metadata"]["dynamic_operator"] + response["metadata"] ) payload: Any = None @@ -363,13 +365,16 @@ def generate_result(self, response_dict: dict) -> list[MaskServerOutput]: # values, and decode them from base64. payload = [] for record in response.get("records", []): - record_raw: str = record["cell_encrypted"] - padded = ( - record_raw + "=" * (4 - len(record_raw) % 4) - if len(record_raw) % 4 - else record_raw - ) - payload.append(base64.b64decode(padded).decode("utf-8")) + record_raw = record["cell_encrypted"] + if isinstance(record_raw, str): + padded = ( + record_raw + "=" * (4 - len(record_raw) % 4) + if len(record_raw) % 4 + else record_raw + ) + payload.append(base64.b64decode(padded).decode("utf-8")) + else: + payload.append(record_raw) result.append( MaskServerOutput( diff --git a/pydough/mask_server/mask_server_candidate_visitor.py b/pydough/mask_server/mask_server_candidate_visitor.py index a71300f17..98be67a72 100644 --- a/pydough/mask_server/mask_server_candidate_visitor.py +++ b/pydough/mask_server/mask_server_candidate_visitor.py @@ -774,14 +774,6 @@ def convert_literal_to_server_expression( return ["NULL"] elif isinstance(literal.value, bool): return ["TRUE" if literal.value else "FALSE"] - elif ( - isinstance(literal.value, str) - and literal.value.upper() - in MaskServerCandidateVisitor.SERVER_OPERATOR_NAMES - ): - # If the string literal matches a reserved operator name, wrap it - # in a QUOTE function to avoid confusion. E.g. `['QUOTE', 1, 'AND']` - return ["QUOTE", 1, literal.value] elif isinstance(literal.value, (int, float, str)): return [literal.value] elif isinstance(literal.value, datetime.datetime): diff --git a/tests/mock_server/api_mock_server.py b/tests/mock_server/api_mock_server.py index 47b53fd97..accef8458 100644 --- a/tests/mock_server/api_mock_server.py +++ b/tests/mock_server/api_mock_server.py @@ -8,6 +8,8 @@ Intended for use in unit and integration tests. """ +import base64 + from fastapi import Depends, FastAPI, HTTPException, Request from pydantic import BaseModel @@ -76,7 +78,9 @@ def batch_evaluate( "records": [ { "mode": "cell_encrypted", - "cell_encrypted": elem, + "cell_encrypted": base64.b64encode(str(elem).encode("utf-8")) + if isinstance(elem, str) + else elem, } for elem in output_list ], @@ -89,9 +93,11 @@ def batch_evaluate( "actual_output_mode": "cell_encrypted", "available_output_modes": ["cell_encrypted"], "encryption_mode": None, - "dynamic_operator": output_case, + "dynamic_operator": "IN", }, } + if output_case == "NOT_IN": + out_item["response"]["metadata"]["representation"] = "NOT_IN" # Don't include response in dry run case if item.dry_run: out_item["response"].pop("records") diff --git a/tests/mock_server/lookup_table.py b/tests/mock_server/lookup_table.py index 7673f2dce..02e5a635d 100644 --- a/tests/mock_server/lookup_table.py +++ b/tests/mock_server/lookup_table.py @@ -1256,6 +1256,6 @@ ( "dummy_server", "CRBNK/CUSTOMERS/c_fname", - ("CONTAINS", 2, "QUOTE", 1, "SLICE", "UPPER", 1, "SLICE", 3, "__col__", 0, 1), + ("CONTAINS", 2, "SLICE", "UPPER", 1, "SLICE", 3, "__col__", 0, 1), ): ("IN", ["CAROL", "EMILY", "ISABEL", "LUKE", "SOPHIA"]), } diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 97c94144f..731aa14c9 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1437,11 +1437,11 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, 'QUOTE', 1, 'SLICE', 'UPPER', 1, 'SLICE', 3, '__col__', 0, 1]", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, 'SLICE', 'UPPER', 1, 'SLICE', 3, '__col__', 0, 1]", "DRY_RUN", }, { - "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, 'QUOTE', 1, 'SLICE', 'UPPER', 1, 'SLICE', 3, '__col__', 0, 1]" + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, 'SLICE', 'UPPER', 1, 'SLICE', 3, '__col__', 0, 1]" }, ], id="cryptbank_filter_count_59", @@ -1451,7 +1451,7 @@ def test_pipeline_e2e_cryptbank( "result = CRYPTBANK.CALCULATE(n=COUNT(selected_customers))", [ { - "CRBNK/CUSTOMERS/c_fname: ['IN', 8, '__col__', 'QUOTE', 1, 'Datediff', 'QUOTE', 1, 'YEAR', 'IN', 'NOT IN', 'NEQ', 'QUOTE', 1, 'NOT_EQUAL', 'QUOTE', 1, 'lower']", + "CRBNK/CUSTOMERS/c_fname: ['IN', 8, '__col__', 'Datediff', 'YEAR', 'IN', 'NOT IN', 'NEQ', 'NOT_EQUAL', 'lower']", "DRY_RUN", }, ], diff --git a/tests/test_plan_refsols/retail_all_raw.txt b/tests/test_plan_refsols/retail_all_raw.txt new file mode 100644 index 000000000..1d6902166 --- /dev/null +++ b/tests/test_plan_refsols/retail_all_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))) < 2026:numeric, columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth}) diff --git a/tests/test_plan_refsols/retail_all_rewrite.txt b/tests/test_plan_refsols/retail_all_rewrite.txt new file mode 100644 index 000000000..b252b0c5e --- /dev/null +++ b/tests/test_plan_refsols/retail_all_rewrite.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={}) diff --git a/tests/test_plan_refsols/retail_members_compound_g_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_g_rewrite.txt index ab64fa711..8e495c01b 100644 --- a/tests/test_plan_refsols/retail_members_compound_g_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_g_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))) <= 13:numeric & DAY(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))) > 3:numeric & ISIN(MONTH(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))), [1, 2, 5, 10, 12]:array[unknown]) & ISIN(YEAR(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))), [1960, 1970, 1980, 1990, 2000]:array[unknown]), columns={}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))) <= 13:numeric & ISIN(MONTH(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))), [1, 2, 5, 10, 12]:array[unknown]) & ISIN(YEAR(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))), [1960, 1970, 1980, 1990, 2000]:array[unknown]) & NOT(ISIN(date_of_birth, ['0897-01-11', '0681-01-31', '2337-11-25', '0765-06-07', '3270-09-07', '3114-01-23', '0946-07-28', '0671-06-23', '1030-02-26', '2892-07-01', '1787-09-06', '2191-11-24', '0912-05-28', '1828-09-20', '1318-12-03', '0660-08-20', '1546-05-12', '2064-12-18', '1664-12-03', '0627-05-21', '1348-11-22', '3202-05-20', '0959-04-01', '1397-05-24', '3184-08-05', '2207-02-22', '2388-11-19', '2563-07-20', '3159-09-21', '2692-10-23', '1365-12-07', '1712-02-18', '0846-08-04', '3332-01-06', '2501-07-04', '3297-10-03', '2235-01-19', '2006-03-03', '2544-09-01', '1543-07-16']:array[unknown])), columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth}) diff --git a/tests/test_plan_refsols/retail_members_compound_h_rewrite.txt b/tests/test_plan_refsols/retail_members_compound_h_rewrite.txt index 88edd4783..e8d0806cb 100644 --- a/tests/test_plan_refsols/retail_members_compound_h_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_compound_h_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB')) < datetime.date(2007, 1, 1):datetime & UNMASK::(PTY_UNPROTECT_NAME([last_name])) >= 'Cross':string, columns={}) + FILTER(condition=date_of_birth != '2605-10-18':unknown & UNMASK::(PTY_UNPROTECT_NAME([last_name])) >= 'Cross':string, columns={}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_names_analysis_a_raw.txt b/tests/test_plan_refsols/retail_names_analysis_a_raw.txt new file mode 100644 index 000000000..65b95754b --- /dev/null +++ b/tests/test_plan_refsols/retail_names_analysis_a_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS('GT':string, UPPER(SLICE(UNMASK::(PTY_UNPROTECT([first_name], 'deName')), None:unknown, 1:numeric, None:unknown))), columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={'first_name': first_name}) diff --git a/tests/test_plan_refsols/retail_names_analysis_a_rewrite.txt b/tests/test_plan_refsols/retail_names_analysis_a_rewrite.txt new file mode 100644 index 000000000..c24d5cbc7 --- /dev/null +++ b/tests/test_plan_refsols/retail_names_analysis_a_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(first_name, ['wrJPcBLnb', 'tfdP', 'aPZukW', 'FllTrn', 'dWFj', 'zQhHu', 'cLxbra', 'iShNn', 'nrvyDT', 'Eikudy', 'dDxAuD', 'RwQZcxw', 'RnYgnn', 'UMwsjSm', 'VjzF', 'lDKVA', 'DAzoEa', 'POnnEr', 'EGBa']:array[unknown]), columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={'first_name': first_name}) diff --git a/tests/test_plan_refsols/retail_names_analysis_b_raw.txt b/tests/test_plan_refsols/retail_names_analysis_b_raw.txt new file mode 100644 index 000000000..2e8548f25 --- /dev/null +++ b/tests/test_plan_refsols/retail_names_analysis_b_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=CONTAINS('day':string, LOWER(SLICE(UNMASK::(PTY_UNPROTECT([first_name], 'deName')), None:unknown, 2:numeric, None:unknown))), columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={'first_name': first_name}) diff --git a/tests/test_plan_refsols/retail_names_analysis_b_rewrite.txt b/tests/test_plan_refsols/retail_names_analysis_b_rewrite.txt new file mode 100644 index 000000000..7f144b658 --- /dev/null +++ b/tests/test_plan_refsols/retail_names_analysis_b_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=ISIN(first_name, ['ZcH', 'MgEZTa', 'veSbKfjZ', 'HBRvO', 'jvUyLK', 'tdfnU']:array[unknown]), columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={'first_name': first_name}) diff --git a/tests/test_plan_refsols/retail_none_raw.txt b/tests/test_plan_refsols/retail_none_raw.txt new file mode 100644 index 000000000..93fbf94a8 --- /dev/null +++ b/tests/test_plan_refsols/retail_none_raw.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=YEAR(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))) >= 2026:numeric, columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={'date_of_birth': date_of_birth}) diff --git a/tests/test_plan_refsols/retail_none_rewrite.txt b/tests/test_plan_refsols/retail_none_rewrite.txt new file mode 100644 index 000000000..aef1db0af --- /dev/null +++ b/tests/test_plan_refsols/retail_none_rewrite.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=False:bool, columns={}) + SCAN(table=bodo.retail.protected_loyalty_members, columns={}) diff --git a/tests/test_sql_refsols/retail_all_raw_snowflake.sql b/tests/test_sql_refsols/retail_all_raw_snowflake.sql new file mode 100644 index 000000000..d70eed690 --- /dev/null +++ b/tests/test_sql_refsols/retail_all_raw_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + YEAR(CAST(PTY_UNPROTECT(date_of_birth, 'deDOB') AS TIMESTAMP)) < 2026 diff --git a/tests/test_sql_refsols/retail_all_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_all_rewrite_snowflake.sql new file mode 100644 index 000000000..232c2ec8a --- /dev/null +++ b/tests/test_sql_refsols/retail_all_rewrite_snowflake.sql @@ -0,0 +1,3 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members diff --git a/tests/test_sql_refsols/retail_members_compound_g_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_g_rewrite_snowflake.sql index 3da3ff9aa..f8d121533 100644 --- a/tests/test_sql_refsols/retail_members_compound_g_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_g_rewrite_snowflake.sql @@ -3,6 +3,6 @@ SELECT FROM bodo.retail.protected_loyalty_members WHERE DAY(CAST(PTY_UNPROTECT(date_of_birth, 'deDOB') AS TIMESTAMP)) <= 13 - AND DAY(CAST(PTY_UNPROTECT(date_of_birth, 'deDOB') AS TIMESTAMP)) > 3 AND MONTH(CAST(PTY_UNPROTECT(date_of_birth, 'deDOB') AS TIMESTAMP)) IN (1, 2, 5, 10, 12) + AND NOT date_of_birth IN ('0897-01-11', '0681-01-31', '2337-11-25', '0765-06-07', '3270-09-07', '3114-01-23', '0946-07-28', '0671-06-23', '1030-02-26', '2892-07-01', '1787-09-06', '2191-11-24', '0912-05-28', '1828-09-20', '1318-12-03', '0660-08-20', '1546-05-12', '2064-12-18', '1664-12-03', '0627-05-21', '1348-11-22', '3202-05-20', '0959-04-01', '1397-05-24', '3184-08-05', '2207-02-22', '2388-11-19', '2563-07-20', '3159-09-21', '2692-10-23', '1365-12-07', '1712-02-18', '0846-08-04', '3332-01-06', '2501-07-04', '3297-10-03', '2235-01-19', '2006-03-03', '2544-09-01', '1543-07-16') AND YEAR(CAST(PTY_UNPROTECT(date_of_birth, 'deDOB') AS TIMESTAMP)) IN (1960, 1970, 1980, 1990, 2000) diff --git a/tests/test_sql_refsols/retail_members_compound_h_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_members_compound_h_rewrite_snowflake.sql index 9663cadc8..02fbeefe3 100644 --- a/tests/test_sql_refsols/retail_members_compound_h_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_members_compound_h_rewrite_snowflake.sql @@ -2,5 +2,4 @@ SELECT COUNT(*) AS n FROM bodo.retail.protected_loyalty_members WHERE - PTY_UNPROTECT(date_of_birth, 'deDOB') < CAST('2007-01-01' AS DATE) - AND PTY_UNPROTECT_NAME(last_name) >= 'Cross' + PTY_UNPROTECT_NAME(last_name) >= 'Cross' AND date_of_birth <> '2605-10-18' diff --git a/tests/test_sql_refsols/retail_names_analysis_a_raw_snowflake.sql b/tests/test_sql_refsols/retail_names_analysis_a_raw_snowflake.sql new file mode 100644 index 000000000..b90077927 --- /dev/null +++ b/tests/test_sql_refsols/retail_names_analysis_a_raw_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + CONTAINS('GT', UPPER(SUBSTRING(PTY_UNPROTECT(first_name, 'deName'), 1, 1))) diff --git a/tests/test_sql_refsols/retail_names_analysis_a_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_names_analysis_a_rewrite_snowflake.sql new file mode 100644 index 000000000..2a85ffb2e --- /dev/null +++ b/tests/test_sql_refsols/retail_names_analysis_a_rewrite_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + first_name IN ('wrJPcBLnb', 'tfdP', 'aPZukW', 'FllTrn', 'dWFj', 'zQhHu', 'cLxbra', 'iShNn', 'nrvyDT', 'Eikudy', 'dDxAuD', 'RwQZcxw', 'RnYgnn', 'UMwsjSm', 'VjzF', 'lDKVA', 'DAzoEa', 'POnnEr', 'EGBa') diff --git a/tests/test_sql_refsols/retail_names_analysis_b_raw_snowflake.sql b/tests/test_sql_refsols/retail_names_analysis_b_raw_snowflake.sql new file mode 100644 index 000000000..116928e29 --- /dev/null +++ b/tests/test_sql_refsols/retail_names_analysis_b_raw_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + CONTAINS('day', LOWER(SUBSTRING(PTY_UNPROTECT(first_name, 'deName'), 1, 2))) diff --git a/tests/test_sql_refsols/retail_names_analysis_b_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_names_analysis_b_rewrite_snowflake.sql new file mode 100644 index 000000000..394bfbb0d --- /dev/null +++ b/tests/test_sql_refsols/retail_names_analysis_b_rewrite_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + first_name IN ('ZcH', 'MgEZTa', 'veSbKfjZ', 'HBRvO', 'jvUyLK', 'tdfnU') diff --git a/tests/test_sql_refsols/retail_none_raw_snowflake.sql b/tests/test_sql_refsols/retail_none_raw_snowflake.sql new file mode 100644 index 000000000..477aa0fb8 --- /dev/null +++ b/tests/test_sql_refsols/retail_none_raw_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + YEAR(CAST(PTY_UNPROTECT(date_of_birth, 'deDOB') AS TIMESTAMP)) >= 2026 diff --git a/tests/test_sql_refsols/retail_none_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_none_rewrite_snowflake.sql new file mode 100644 index 000000000..f797002a5 --- /dev/null +++ b/tests/test_sql_refsols/retail_none_rewrite_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM bodo.retail.protected_loyalty_members +WHERE + FALSE From 8a5f82d15be0607160ed9ae2a99a37750b059c9e Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 23 Dec 2025 10:00:17 -0800 Subject: [PATCH 40/40] Adding PYDOUGH_MASK_SERVER_PATH to CI --- .github/workflows/pr_testing.yml | 1 + .github/workflows/sf_masked_testing.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/pr_testing.yml b/.github/workflows/pr_testing.yml index 88b6ec4ba..3641ed72d 100644 --- a/.github/workflows/pr_testing.yml +++ b/.github/workflows/pr_testing.yml @@ -231,6 +231,7 @@ jobs: SF_NONE_USERNAME: ${{ secrets.SF_NONE_USERNAME }} SF_NONE_PASSWORD: ${{ secrets.SF_NONE_PASSWORD }} SF_MASKED_ACCOUNT: ${{ secrets.SF_MASKED_ACCOUNT }} + PYDOUGH_MASK_SERVER_PATH: ${{ secrets.PYDOUGH_MASK_SERVER_PATH }} with: python-versions: ${{ github.event_name == 'workflow_dispatch' && needs.get-py-ver-matrix.outputs.matrix diff --git a/.github/workflows/sf_masked_testing.yml b/.github/workflows/sf_masked_testing.yml index 56b5986c1..aee42d70d 100644 --- a/.github/workflows/sf_masked_testing.yml +++ b/.github/workflows/sf_masked_testing.yml @@ -22,6 +22,8 @@ on: required: true SF_MASKED_ACCOUNT: required: true + PYDOUGH_MASK_SERVER_PATH: + required: true jobs: sf-tests: