diff --git a/python/infinity_embedded/local_infinity/table.py b/python/infinity_embedded/local_infinity/table.py index 7b0137de3e..046dd6b4a7 100644 --- a/python/infinity_embedded/local_infinity/table.py +++ b/python/infinity_embedded/local_infinity/table.py @@ -424,6 +424,19 @@ def drop_columns(self, columns: list[str] | str): columns = [columns] return self._conn.drop_columns(db_name=self._db_name, table_name=self._table_name, column_names=columns) + def _to_string(self, query: Query): + # columns: Optional[List[WrapParsedExpr]], + # highlight: Optional[List[WrapParsedExpr]], + # search: Optional[WrapSearchExpr], + # filter: Optional[WrapParsedExpr], + # group_by: Optional[List[WrapParsedExpr]], + # limit: Optional[WrapParsedExpr], + # offset: Optional[WrapParsedExpr], + # sort: Optional[List[WrapOrderByExpr]], + # total_hits_count: Optional[bool] + + return "" + def _execute_query(self, query: Query): # execute the query highlight = [] diff --git a/python/infinity_sdk/infinity/remote_thrift/query_builder.py b/python/infinity_sdk/infinity/remote_thrift/query_builder.py index c6ac98289d..536b28605a 100644 --- a/python/infinity_sdk/infinity/remote_thrift/query_builder.py +++ b/python/infinity_sdk/infinity/remote_thrift/query_builder.py @@ -506,6 +506,20 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin self._sort = sort_list return self + def to_string(self) -> str: + query = Query( + columns=self._columns, + highlight=self._highlight, + search=self._search, + filter=self._filter, + groupby=self._groupby, + limit=self._limit, + offset=self._offset, + sort=self._sort, + total_hits_count=self._total_hits_count, + ) + return self._table._to_string(query) + def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any], {}]: query = Query( columns=self._columns, diff --git a/python/infinity_sdk/infinity/remote_thrift/table.py b/python/infinity_sdk/infinity/remote_thrift/table.py index 3ff29cc695..6d07e66f5a 100644 --- a/python/infinity_sdk/infinity/remote_thrift/table.py +++ b/python/infinity_sdk/infinity/remote_thrift/table.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import json import functools import inspect from typing import Optional, Union, List, Any @@ -30,6 +32,8 @@ check_valid_name, get_remote_constant_expr_from_python_value, get_ordinary_info, + parsed_expression_to_string, + search_to_string ) from infinity.table import ExplainType from infinity.common import ConflictType, DEFAULT_MATCH_VECTOR_TOPN, SortType @@ -409,6 +413,9 @@ def option(self, option_kv: {}): self.query_builder.option(option_kv) return self + def to_string(self): + return self.query_builder.to_string() + def to_result(self): return self.query_builder.to_result() @@ -446,6 +453,44 @@ def drop_columns(self, column_names: list[str] | str): def compact(self): return self._conn.compact(db_name=self._db_name, table_name=self._table_name) + def _to_string(self, query: Query): + # columns: Optional[List[ParsedExpr]], + # highlight: Optional[List[ParsedExpr]], + # search: Optional[SearchExpr], + # filter: Optional[ParsedExpr], + # groupby: Optional[List[ParsedExpr]], + # limit: Optional[ParsedExpr], + # offset: Optional[ParsedExpr], + # sort: Optional[List[OrderByExpr]], + # total_hits_count: Optional[bool] + res = {"db": self._db_name, "table": self._table_name} + if query.columns: + res["columns"] = [] + for column in query.columns: + res["columns"].append(parsed_expression_to_string(column)) + + if query.highlight: + res["highlights"] = [] + for highlight in query.highlight: + res["highlights"].append(parsed_expression_to_string(highlight)) + + if query.search: + res["search"] = search_to_string(query.search) + + if query.filter: + res["filter"] = parsed_expression_to_string(query.filter) + + if query.limit: + res["limit"] = parsed_expression_to_string(query.limit) + + if query.offset: + res["offset"] = parsed_expression_to_string(query.offset) + + if query.offset: + res["offset"] = parsed_expression_to_string(query.offset) + + return json.dumps(res) + def _execute_query(self, query: Query) -> tuple[dict[str, list[Any]], dict[str, Any]]: # execute the query diff --git a/python/infinity_sdk/infinity/remote_thrift/utils.py b/python/infinity_sdk/infinity/remote_thrift/utils.py index 5e7bed11ff..40120d8b2e 100644 --- a/python/infinity_sdk/infinity/remote_thrift/utils.py +++ b/python/infinity_sdk/infinity/remote_thrift/utils.py @@ -28,6 +28,158 @@ from infinity.errors import ErrorCode +def parsed_expression_to_string(expr: ttypes.ParsedExpr) -> str: + if expr is None: + return str() + + expr_type = expr.type + if expr_type.constant_expr: + match expr_type.constant_expr.literal_type: + case ttypes.LiteralType.Boolean: + return str(expr_type.constant_expr.bool_value) + case ttypes.LiteralType.Int64: + return str(expr_type.i64_value) + case ttypes.LiteralType.Double: + return str(expr_type.f64_value) + case ttypes.LiteralType.String: + return expr_type.str_value + case ttypes.LiteralType.IntegerArray: + return str(expr_type.i64_array_value) + case ttypes.LiteralType.DoubleArray: + return str(expr_type.f64_array_value) + case ttypes.LiteralType.IntegerTensor: + return str(expr_type.i64_tensor_value) + case ttypes.LiteralType.DoubleTensor: + return str(expr_type.f64_tensor_value) + case ttypes.LiteralType.IntegerTensorArray: + return str(expr_type.i64_tensor_array) + case ttypes.LiteralType.DoubleTensorArray: + return str(expr_type.f64_tensor_array) + case ttypes.LiteralType.SparseIntegerArray: + return str(expr_type.i64_array_idx) + + if expr_type.column_expr: + if expr_type.column_expr.column_name: + return str(".".join(expr_type.column_expr.column_name)) + if expr_type.column_expr.star: + return "*" + + if expr_type.function_expr: + function_name = expr_type.function_expr.function_name + arguments_str = str + for index, argument in enumerate(expr_type.function_expr.arguments): + arg_str = parsed_expression_to_string(argument) + if index == 0: + arguments_str = arg_str + else: + arguments_str = f"{arguments_str}, {arg_str}" + return f"{function_name}({arguments_str})" + + if expr_type.between_expr: + value_str = parsed_expression_to_string(expr_type.between_expr.value) + upper_bound_str = parsed_expression_to_string(expr_type.between_expr.upper_bound) + lower_bound_str = parsed_expression_to_string(expr_type.between_expr.lower_bound) + return f"between(f{value_str}, f{upper_bound_str}, f{lower_bound_str})" + + if expr_type.knn_expr: + column_expr_str = parsed_expression_to_string(expr_type.knn_expr.column_expr) + return f"match_dense(column=f{column_expr_str}, top={expr_type.knn_expr.topn})" + + if expr_type.match_sparse_expr: + column_expr_str = parsed_expression_to_string(expr_type.match_sparse_expr.column_expr) + return f"match_sparse(column=f{column_expr_str}, top={expr_type.match_sparse_expr.topn})" + + if expr_type.match_tensor_expr: + column_expr_str = parsed_expression_to_string(expr_type.match_tensor_expr.column_expr) + return f"match_tensor(column=f{column_expr_str}, top={expr_type.match_tensor_expr.topn})" + + if expr_type.match_expr: + column_expr_str = parsed_expression_to_string(expr_type.match_expr.column_expr) + return f"match_text(column=f{column_expr_str}, top={expr_type.match_expr.topn})" + + if expr_type.fusion_expr: + return f"fusion(method={expr_type.fusion_expr.method}, options={expr_type.fusion_expr.options_text})" + + if expr_type.search_expr: + return f"search()" + + if expr_type.in_expr: + arguments_str = str + for index, argument in enumerate(expr_type.in_expr.arguments): + arg_str = parsed_expression_to_string(argument) + if index == 0: + arguments_str = arg_str + else: + arguments_str = f"{arguments_str}, {arg_str}" + + left_expr_str = parsed_expression_to_string(expr_type.in_expr.left_operand) + if expr_type.in_expr.in_type: + return f"{left_expr_str} IN (f{arguments_str})" + else: + return f"{left_expr_str} NOT IN (f{arguments_str})" + + return "" + +def search_to_string(search_expr: ttypes.SearchExpr) -> str: + if search_expr.match_exprs: + match_exprs_str = str + for index, match_expr in enumerate(search_expr.match_exprs): + match_expr_str = parsed_expression_to_string(match_expr) + if index == 0: + match_exprs_str = match_expr_str + else: + match_exprs_str = f"{match_exprs_str}, {match_expr_str}" + + return match_exprs_str + + if search_expr.fusion_exprs: + fusion_exprs_str = str + for index, fusion_expr in enumerate(search_expr.fusion_exprs): + fusion_expr_str = parsed_expression_to_string(fusion_expr) + if index == 0: + fusion_exprs_str = fusion_expr_str + else: + fusion_exprs_str = f"{fusion_exprs_str}, {fusion_expr_str}" + + return fusion_exprs_str + + + raise InfinityException(ErrorCode.INVALID_EXPRESSION, "Invalid search expression") + + +def fusion_to_string(fusion_expr: ttypes.FusionExpr) -> str: + # 1: string method, + # 2: string options_text, + # 3: optional MatchTensorExpr optional_match_tensor_expr, + match_tensor_expr_str = None + if fusion_expr.optional_match_tensor_expr: + column_expr_str = parsed_expression_to_string(fusion_expr.optional_match_tensor_expr.column_expr) + match_tensor_expr_str = f"match_tensor(column={column_expr_str}, top={fusion_expr.optional_match_tensor_expr.topn})" + + if match_tensor_expr_str: + return f"fusion(name={fusion_expr.method}, options={fusion_expr.options_text}, optional_match_tensor={match_tensor_expr_str})" + else: + return f"fusion(name={fusion_expr.method}, options={fusion_expr.options_text})" + +def generic_match_to_string(generic_match_expr: ttypes.GenericMatchExpr) -> str: + + if generic_match_expr.match_vector_expr: + column_expr_str = parsed_expression_to_string(generic_match_expr.match_vector_expr.column_expr) + return f"match_dense(column={column_expr_str}, top={generic_match_expr.match_vector_expr.topn})" + + if generic_match_expr.match_sparse_expr: + column_expr_str = parsed_expression_to_string(generic_match_expr.match_sparse_expr.column_expr) + return f"match_sparse(column={column_expr_str}, top={generic_match_expr.match_sparse_expr.topn})" + + if generic_match_expr.match_tensor_expr: + column_expr_str = parsed_expression_to_string(generic_match_expr.match_tensor_expr.column_expr) + return f"match_tensor(column={column_expr_str}, top={generic_match_expr.match_tensor_expr.topn})" + + if generic_match_expr.match_text_expr: + column_expr_str = parsed_expression_to_string(generic_match_expr.match_text_expr.column_expr) + return f"match_text(column={column_expr_str}, top={generic_match_expr.match_text_expr.topn})" + + def traverse_conditions(cons, fn=None) -> ttypes.ParsedExpr: if isinstance(cons, exp.Binary): parsed_expr = ttypes.ParsedExpr()