1- """
2- Standalone internet search function that can query multiple providers (Exa, Tavily, Linkup)
3- without coupling to the existing project code. No environment variables are read here; all
4- API keys must be provided via function parameters.
5-
6- Returned structure is a dictionary with one key:
7- - items: list of {fileName, url, text, source, published_date, summary}
8-
9- Example:
10-
11- from nexent.core.tools.standalone_web_search import internet_search
12-
13- result = internet_search(
14- query="OpenAI o4 mini update",
15- api_keys={
16- "exa": "EXA_API_KEY",
17- "tavily": "TAVILY_API_KEY",
18- "linkup": "LINKUP_API_KEY",
19- },
20- providers=["exa", "tavily", "linkup"],
21- max_results_per_provider=5,
22- )
23-
24- for item in result["items"]:
25- print(item["fileName"], item["url"], item["summary"]) # Display URLs and individual summaries
26- """
1+ # -- encoding: utf-8 --
2+ # Copyright (c) 2024 Huawei Technologies Co., Ltd. All Rights Reserved.
3+ # This file is a part of the ModelEngine Project.
4+ # Licensed under the MIT License. See License.txt in the project root for license information.
5+ # ======================================================================================================================
276import json
287from dataclasses import dataclass
298from typing import Dict , List , Optional , Sequence
@@ -96,62 +75,12 @@ def _truncate(text: str, max_chars: int) -> str:
9675 return text [: max_chars - 1 ].rstrip () + "…"
9776
9877
99- def _generate_individual_summary (text : str , max_chars : int = 200 ) -> str :
100- """为单个搜索结果生成独立摘要
101-
102- 策略:
103- - 如果内容较短,直接返回
104- - 如果内容较长,提取前几个句子作为摘要
105- - 确保摘要不超过最大字符限制
106- """
107- if not text :
108- return ""
109-
110- # 如果内容已经很短,直接返回
111- if len (text ) <= max_chars :
112- return text
113-
114- # 按句子分割(简单按句号分割)
115- sentences = text .split ('. ' )
116-
117- # 收集句子直到达到字符限制
118- summary_parts = []
119- current_length = 0
120-
121- for sentence in sentences :
122- sentence = sentence .strip ()
123- if not sentence :
124- continue
125-
126- # 确保句子以句号结束
127- if not sentence .endswith ('.' ):
128- sentence += '.'
129-
130- sentence_length = len (sentence ) + 1 # +1 for space
131-
132- # 如果添加这个句子会超过限制,且已经有内容,就停止
133- if current_length + sentence_length > max_chars and summary_parts :
134- break
135-
136- summary_parts .append (sentence )
137- current_length += sentence_length
138-
139- summary = '. ' .join (summary_parts )
140-
141- # 确保不超过最大字符限制
142- if len (summary ) > max_chars :
143- summary = summary [:max_chars ].rstrip () + "…"
144-
145- return summary
146-
147-
14878def _internet_search (
14979 query : str ,
15080 api_keys : Dict [str , str ],
15181 providers : Optional [Sequence [str ]] = None ,
15282 max_results_per_provider : int = 5 ,
15383 max_snippet_chars : int = 500 ,
154- max_summary_chars : int = 200 ,
15584) -> List [SearchItem ]:
15685 """Run internet search via selected providers and return unified items with individual summaries."""
15786 selected = list (providers ) if providers is not None else []
@@ -160,20 +89,20 @@ def _internet_search(
16089 if api_keys .get (name ):
16190 selected .append (name )
16291 items : List [SearchItem ] = []
92+ errors = [] # 记录失败的搜索工具
16393
16494 # Exa
16595 if "exa" in selected and api_keys .get ("exa" ):
16696 try :
16797 exa_client = Exa (api_key = api_keys ["exa" ])
168- res = exa_client .search_and_texts (
98+ res = exa_client .search_and_contents (
16999 query ,
170100 text = {"max_characters" : 2000 },
171101 livecrawl = "always" ,
172- extras = {"links" : 0 , "image_links" : 0 },
173102 num_results = max_results_per_provider ,
174103 )
175104 for i , r in enumerate (getattr (res , "results" , [])[:max_results_per_provider ]):
176- text = _truncate (getattr (r , "content" , "" ) or "" , max_snippet_chars )
105+ text = _truncate (getattr (r , "text" , "" ) or getattr ( r , " content" , "" ) or "" , max_snippet_chars )
177106 items .append (
178107 SearchItem (
179108 id = getattr (r , "id" , "" ) or f"exa_{ i } " ,
@@ -184,12 +113,13 @@ def _internet_search(
184113 "url" : getattr (r , "url" , "" ) or "" ,
185114 "source" : "exa" ,
186115 "published_date" : getattr (r , "published_date" , None ),
187- "summary" : _generate_individual_summary ( text , max_summary_chars ) ,
116+ "summary" : text ,
188117 }
189118 )
190119 )
191- except Exception :
192- pass
120+ except Exception as e :
121+ sys_plugin_logger .warning (f'Failed to search in Exa tool: { str (e )} ' )
122+ errors .append ("exa" )
193123
194124 # Tavily
195125 if "tavily" in selected and api_keys .get ("tavily" ):
@@ -212,12 +142,13 @@ def _internet_search(
212142 "url" : r .get ("url" , "" ) or "" ,
213143 "source" : "tavily" ,
214144 "published_date" : r .get ("published_date" ),
215- "summary" : _generate_individual_summary ( text , max_summary_chars ) ,
145+ "summary" : text ,
216146 }
217147 )
218148 )
219- except Exception :
220- pass
149+ except Exception as e :
150+ sys_plugin_logger .warning (f'Failed to search in Tavily tool: { str (e )} ' )
151+ errors .append ("tavily" )
221152
222153 # Linkup
223154 if "linkup" in selected and api_keys .get ("linkup" ):
@@ -241,12 +172,20 @@ def _internet_search(
241172 "url" : getattr (r , "url" , "" ) or "" ,
242173 "source" : "linkup" ,
243174 "published_date" : None ,
244- "summary" : _generate_individual_summary ( text , max_summary_chars ) ,
175+ "summary" : text ,
245176 }
246177 )
247178 )
248- except Exception :
249- pass
179+ except Exception as e :
180+ sys_plugin_logger .warning (f'Failed to search in Linkup tool: { str (e )} ' )
181+ errors .append ("linkup" )
182+
183+ # 如果所有搜索都失败了,才抛出异常
184+ if not items and errors :
185+ raise FitException (
186+ InternalErrorCode .CLIENT_ERROR ,
187+ f'All search tools failed: { ", " .join (errors )} '
188+ )
250189
251190 # 去重逻辑
252191 seen = set ()
0 commit comments