-
Notifications
You must be signed in to change notification settings - Fork 28
Refactor to use DBMS_HYBRID_SEARCH.SEARCH directly #148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2546,14 +2546,13 @@ def _collection_hybrid_search( | |||||
| # Execute SET statement | ||||||
| self._execute_query_with_cursor(conn, set_sql, [], use_context_manager) | ||||||
|
|
||||||
| # Get SQL query from DBMS_HYBRID_SEARCH.GET_SQL | ||||||
| get_sql_query = f"SELECT DBMS_HYBRID_SEARCH.GET_SQL('{table_name}', @search_parm) as query_sql FROM dual" | ||||||
| logger.debug(f"Getting SQL query: {get_sql_query}") | ||||||
| # Call DBMS_HYBRID_SEARCH.SEARCH directly instead of GET_SQL + SQL execution | ||||||
| search_query = f"SELECT DBMS_HYBRID_SEARCH.SEARCH('{table_name}', @search_parm) as search_result FROM dual" | ||||||
| logger.debug(f"Executing search query: {search_query}") | ||||||
|
|
||||||
| rows = self._execute_query_with_cursor(conn, get_sql_query, [], use_context_manager) | ||||||
|
|
||||||
| if not rows or not rows[0].get("query_sql"): | ||||||
| logger.warning("No SQL query returned from GET_SQL") | ||||||
| rows = self._execute_query_with_cursor(conn, search_query, [], use_context_manager) | ||||||
| if not rows or not rows[0].get("search_result"): | ||||||
| logger.warning("No result returned from DBMS_HYBRID_SEARCH.SEARCH") | ||||||
| return { | ||||||
| "ids": [[]], | ||||||
| "distances": [[]], | ||||||
|
|
@@ -2562,19 +2561,16 @@ def _collection_hybrid_search( | |||||
| "embeddings": [[]], | ||||||
| } | ||||||
|
|
||||||
| # Get the SQL query string | ||||||
| query_sql = rows[0]["query_sql"] | ||||||
| if isinstance(query_sql, str): | ||||||
| # Remove any surrounding quotes if present | ||||||
| query_sql = query_sql.strip().strip("'\"") | ||||||
|
|
||||||
| logger.debug(f"Executing query SQL: {query_sql}") | ||||||
|
|
||||||
| # Execute the returned SQL query | ||||||
| result_rows = self._execute_query_with_cursor(conn, query_sql, [], use_context_manager) | ||||||
| # Parse the search result JSON | ||||||
| search_result_json = rows[0]["search_result"] | ||||||
| if isinstance(search_result_json, str): | ||||||
| search_result = json.loads(search_result_json) | ||||||
| else: | ||||||
| search_result = search_result_json | ||||||
|
Comment on lines
+2564
to
+2569
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # First, let's examine the file structure and the specific lines mentioned
wc -l src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 101 🏁 Script executed: # Read the context around lines 2564-2573
sed -n '2540,2590p' src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 2233 🏁 Script executed: # Find the _transform_sql_result method definition
rg -n "_transform_sql_result" src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 182 🏁 Script executed: # Find the _transform_search_result method definition
rg -n "_transform_search_result" src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 182 🏁 Script executed: # Search for DBMS_HYBRID_SEARCH.SEARCH usage
rg -n "DBMS_HYBRID_SEARCH\.SEARCH" src/pyseekdb/client/client_base.py -A 5 -B 5Repository: oceanbase/pyseekdb Length of output: 1494 🏁 Script executed: # Read _transform_sql_result implementation
sed -n '3007,3060p' src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 2030 🏁 Script executed: # Read _transform_search_result implementation
sed -n '3107,3170p' src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 2271 🏁 Script executed: # Check type hints for both methods
sed -n '3007,3010p' src/pyseekdb/client/client_base.py
sed -n '3107,3110p' src/pyseekdb/client/client_base.pyRepository: oceanbase/pyseekdb Length of output: 539 Validate SEARCH response schema before transforming.
Suggested validation- # Transform search result to standard format
- return self._transform_sql_result(search_result, include)
+ # Transform search result to standard format
+ if isinstance(search_result, dict) and "hits" in search_result:
+ return self._transform_search_result(search_result, include)
+ if isinstance(search_result, dict) and "rows" in search_result:
+ search_result = search_result["rows"]
+ if not isinstance(search_result, list):
+ raise ValueError(f"Unexpected SEARCH result schema: {type(search_result)}")
+ return self._transform_sql_result(search_result, include)🤖 Prompt for AI Agents |
||||||
| logger.debug(f"Search result received from DBMS_HYBRID_SEARCH.SEARCH") | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove redundant f-string.
🧹 Proposed fix- logger.debug(f"Search result received from DBMS_HYBRID_SEARCH.SEARCH")
+ logger.debug("Search result received from DBMS_HYBRID_SEARCH.SEARCH")📝 Committable suggestion
Suggested change
🧰 Tools🪛 Ruff (0.14.14)2570-2570: f-string without any placeholders Remove extraneous (F541) 🤖 Prompt for AI AgentsThere was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no placeholders in this f-string, please check if this is intentional |
||||||
|
|
||||||
| # Transform SQL query results to standard format | ||||||
| return self._transform_sql_result(result_rows, include) | ||||||
| # Transform search result to standard format | ||||||
| return self._transform_sql_result(search_result, include) | ||||||
|
|
||||||
| def _build_search_parm( # noqa: C901 | ||||||
| self, | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 1876
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 2494
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 929
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 770
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 1576
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 1876
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 331
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 1873
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 2960
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 460
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 1795
🏁 Script executed:
Repository: oceanbase/pyseekdb
Length of output: 450
Escape or parameterize the
table_namein the DBMS_HYBRID_SEARCH.SEARCH call to prevent SQL injection.The
table_nameis derived from user-suppliedcollection_nameviaCollectionNames.table_name()and is interpolated directly into the SQL string without escaping or parameterization. Although the preceding SET statement usesescape_string()for the search parameters, this call lacks protection. Use parameter binding with%sto align with the parameterized approach already supported by_execute_query_with_cursor.Proposed fix
🧰 Tools
🪛 Ruff (0.14.14)
2550-2550: Possible SQL injection vector through string-based query construction
(S608)
🤖 Prompt for AI Agents