Skip to content

Commit

Permalink
Add to_string method to python sdk (#2396)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

1. Add to_string method to remote python sdk

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
- [x] Python SDK impacted, Need to update PyPI

Signed-off-by: Jin Hai <[email protected]>
  • Loading branch information
JinHai-CN authored Dec 23, 2024
1 parent ee3fc60 commit 4d11f30
Show file tree
Hide file tree
Showing 4 changed files with 224 additions and 0 deletions.
13 changes: 13 additions & 0 deletions python/infinity_embedded/local_infinity/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,19 @@ def drop_columns(self, columns: list[str] | str):
columns = [columns]
return self._conn.drop_columns(db_name=self._db_name, table_name=self._table_name, column_names=columns)

def _to_string(self, query: Query):
# columns: Optional[List[WrapParsedExpr]],
# highlight: Optional[List[WrapParsedExpr]],
# search: Optional[WrapSearchExpr],
# filter: Optional[WrapParsedExpr],
# group_by: Optional[List[WrapParsedExpr]],
# limit: Optional[WrapParsedExpr],
# offset: Optional[WrapParsedExpr],
# sort: Optional[List[WrapOrderByExpr]],
# total_hits_count: Optional[bool]

return ""

def _execute_query(self, query: Query):
# execute the query
highlight = []
Expand Down
14 changes: 14 additions & 0 deletions python/infinity_sdk/infinity/remote_thrift/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,20 @@ def sort(self, order_by_expr_list: Optional[List[list[str, SortType]]]) -> Infin
self._sort = sort_list
return self

def to_string(self) -> str:
query = Query(
columns=self._columns,
highlight=self._highlight,
search=self._search,
filter=self._filter,
groupby=self._groupby,
limit=self._limit,
offset=self._offset,
sort=self._sort,
total_hits_count=self._total_hits_count,
)
return self._table._to_string(query)

def to_result(self) -> tuple[dict[str, list[Any]], dict[str, Any], {}]:
query = Query(
columns=self._columns,
Expand Down
45 changes: 45 additions & 0 deletions python/infinity_sdk/infinity/remote_thrift/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import functools
import inspect
from typing import Optional, Union, List, Any
Expand All @@ -30,6 +32,8 @@
check_valid_name,
get_remote_constant_expr_from_python_value,
get_ordinary_info,
parsed_expression_to_string,
search_to_string
)
from infinity.table import ExplainType
from infinity.common import ConflictType, DEFAULT_MATCH_VECTOR_TOPN, SortType
Expand Down Expand Up @@ -409,6 +413,9 @@ def option(self, option_kv: {}):
self.query_builder.option(option_kv)
return self

def to_string(self):
return self.query_builder.to_string()

def to_result(self):
return self.query_builder.to_result()

Expand Down Expand Up @@ -446,6 +453,44 @@ def drop_columns(self, column_names: list[str] | str):
def compact(self):
return self._conn.compact(db_name=self._db_name, table_name=self._table_name)

def _to_string(self, query: Query):
# columns: Optional[List[ParsedExpr]],
# highlight: Optional[List[ParsedExpr]],
# search: Optional[SearchExpr],
# filter: Optional[ParsedExpr],
# groupby: Optional[List[ParsedExpr]],
# limit: Optional[ParsedExpr],
# offset: Optional[ParsedExpr],
# sort: Optional[List[OrderByExpr]],
# total_hits_count: Optional[bool]
res = {"db": self._db_name, "table": self._table_name}
if query.columns:
res["columns"] = []
for column in query.columns:
res["columns"].append(parsed_expression_to_string(column))

if query.highlight:
res["highlights"] = []
for highlight in query.highlight:
res["highlights"].append(parsed_expression_to_string(highlight))

if query.search:
res["search"] = search_to_string(query.search)

if query.filter:
res["filter"] = parsed_expression_to_string(query.filter)

if query.limit:
res["limit"] = parsed_expression_to_string(query.limit)

if query.offset:
res["offset"] = parsed_expression_to_string(query.offset)

if query.offset:
res["offset"] = parsed_expression_to_string(query.offset)

return json.dumps(res)

def _execute_query(self, query: Query) -> tuple[dict[str, list[Any]], dict[str, Any]]:

# execute the query
Expand Down
152 changes: 152 additions & 0 deletions python/infinity_sdk/infinity/remote_thrift/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,158 @@
from infinity.errors import ErrorCode


def parsed_expression_to_string(expr: ttypes.ParsedExpr) -> str:
if expr is None:
return str()

expr_type = expr.type
if expr_type.constant_expr:
match expr_type.constant_expr.literal_type:
case ttypes.LiteralType.Boolean:
return str(expr_type.constant_expr.bool_value)
case ttypes.LiteralType.Int64:
return str(expr_type.i64_value)
case ttypes.LiteralType.Double:
return str(expr_type.f64_value)
case ttypes.LiteralType.String:
return expr_type.str_value
case ttypes.LiteralType.IntegerArray:
return str(expr_type.i64_array_value)
case ttypes.LiteralType.DoubleArray:
return str(expr_type.f64_array_value)
case ttypes.LiteralType.IntegerTensor:
return str(expr_type.i64_tensor_value)
case ttypes.LiteralType.DoubleTensor:
return str(expr_type.f64_tensor_value)
case ttypes.LiteralType.IntegerTensorArray:
return str(expr_type.i64_tensor_array)
case ttypes.LiteralType.DoubleTensorArray:
return str(expr_type.f64_tensor_array)
case ttypes.LiteralType.SparseIntegerArray:
return str(expr_type.i64_array_idx)

if expr_type.column_expr:
if expr_type.column_expr.column_name:
return str(".".join(expr_type.column_expr.column_name))
if expr_type.column_expr.star:
return "*"

if expr_type.function_expr:
function_name = expr_type.function_expr.function_name
arguments_str = str
for index, argument in enumerate(expr_type.function_expr.arguments):
arg_str = parsed_expression_to_string(argument)
if index == 0:
arguments_str = arg_str
else:
arguments_str = f"{arguments_str}, {arg_str}"
return f"{function_name}({arguments_str})"

if expr_type.between_expr:
value_str = parsed_expression_to_string(expr_type.between_expr.value)
upper_bound_str = parsed_expression_to_string(expr_type.between_expr.upper_bound)
lower_bound_str = parsed_expression_to_string(expr_type.between_expr.lower_bound)
return f"between(f{value_str}, f{upper_bound_str}, f{lower_bound_str})"

if expr_type.knn_expr:
column_expr_str = parsed_expression_to_string(expr_type.knn_expr.column_expr)
return f"match_dense(column=f{column_expr_str}, top={expr_type.knn_expr.topn})"

if expr_type.match_sparse_expr:
column_expr_str = parsed_expression_to_string(expr_type.match_sparse_expr.column_expr)
return f"match_sparse(column=f{column_expr_str}, top={expr_type.match_sparse_expr.topn})"

if expr_type.match_tensor_expr:
column_expr_str = parsed_expression_to_string(expr_type.match_tensor_expr.column_expr)
return f"match_tensor(column=f{column_expr_str}, top={expr_type.match_tensor_expr.topn})"

if expr_type.match_expr:
column_expr_str = parsed_expression_to_string(expr_type.match_expr.column_expr)
return f"match_text(column=f{column_expr_str}, top={expr_type.match_expr.topn})"

if expr_type.fusion_expr:
return f"fusion(method={expr_type.fusion_expr.method}, options={expr_type.fusion_expr.options_text})"

if expr_type.search_expr:
return f"search()"

if expr_type.in_expr:
arguments_str = str
for index, argument in enumerate(expr_type.in_expr.arguments):
arg_str = parsed_expression_to_string(argument)
if index == 0:
arguments_str = arg_str
else:
arguments_str = f"{arguments_str}, {arg_str}"

left_expr_str = parsed_expression_to_string(expr_type.in_expr.left_operand)
if expr_type.in_expr.in_type:
return f"{left_expr_str} IN (f{arguments_str})"
else:
return f"{left_expr_str} NOT IN (f{arguments_str})"

return ""

def search_to_string(search_expr: ttypes.SearchExpr) -> str:
if search_expr.match_exprs:
match_exprs_str = str
for index, match_expr in enumerate(search_expr.match_exprs):
match_expr_str = parsed_expression_to_string(match_expr)
if index == 0:
match_exprs_str = match_expr_str
else:
match_exprs_str = f"{match_exprs_str}, {match_expr_str}"

return match_exprs_str

if search_expr.fusion_exprs:
fusion_exprs_str = str
for index, fusion_expr in enumerate(search_expr.fusion_exprs):
fusion_expr_str = parsed_expression_to_string(fusion_expr)
if index == 0:
fusion_exprs_str = fusion_expr_str
else:
fusion_exprs_str = f"{fusion_exprs_str}, {fusion_expr_str}"

return fusion_exprs_str


raise InfinityException(ErrorCode.INVALID_EXPRESSION, "Invalid search expression")


def fusion_to_string(fusion_expr: ttypes.FusionExpr) -> str:
# 1: string method,
# 2: string options_text,
# 3: optional MatchTensorExpr optional_match_tensor_expr,
match_tensor_expr_str = None
if fusion_expr.optional_match_tensor_expr:
column_expr_str = parsed_expression_to_string(fusion_expr.optional_match_tensor_expr.column_expr)
match_tensor_expr_str = f"match_tensor(column={column_expr_str}, top={fusion_expr.optional_match_tensor_expr.topn})"

if match_tensor_expr_str:
return f"fusion(name={fusion_expr.method}, options={fusion_expr.options_text}, optional_match_tensor={match_tensor_expr_str})"
else:
return f"fusion(name={fusion_expr.method}, options={fusion_expr.options_text})"

def generic_match_to_string(generic_match_expr: ttypes.GenericMatchExpr) -> str:

if generic_match_expr.match_vector_expr:
column_expr_str = parsed_expression_to_string(generic_match_expr.match_vector_expr.column_expr)
return f"match_dense(column={column_expr_str}, top={generic_match_expr.match_vector_expr.topn})"

if generic_match_expr.match_sparse_expr:
column_expr_str = parsed_expression_to_string(generic_match_expr.match_sparse_expr.column_expr)
return f"match_sparse(column={column_expr_str}, top={generic_match_expr.match_sparse_expr.topn})"

if generic_match_expr.match_tensor_expr:
column_expr_str = parsed_expression_to_string(generic_match_expr.match_tensor_expr.column_expr)
return f"match_tensor(column={column_expr_str}, top={generic_match_expr.match_tensor_expr.topn})"

if generic_match_expr.match_text_expr:
column_expr_str = parsed_expression_to_string(generic_match_expr.match_text_expr.column_expr)
return f"match_text(column={column_expr_str}, top={generic_match_expr.match_text_expr.topn})"


def traverse_conditions(cons, fn=None) -> ttypes.ParsedExpr:
if isinstance(cons, exp.Binary):
parsed_expr = ttypes.ParsedExpr()
Expand Down

0 comments on commit 4d11f30

Please sign in to comment.