Skip to content

Commit c70ab45

Browse files
[search online] 修改检视意见
1 parent 4713bc7 commit c70ab45

File tree

1 file changed

+27
-88
lines changed

1 file changed

+27
-88
lines changed

app-builder/plugins/fit_py_internet_search/src/internet_search.py

Lines changed: 27 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,8 @@
1-
"""
2-
Standalone internet search function that can query multiple providers (Exa, Tavily, Linkup)
3-
without coupling to the existing project code. No environment variables are read here; all
4-
API keys must be provided via function parameters.
5-
6-
Returned structure is a dictionary with one key:
7-
- items: list of {fileName, url, text, source, published_date, summary}
8-
9-
Example:
10-
11-
from nexent.core.tools.standalone_web_search import internet_search
12-
13-
result = internet_search(
14-
query="OpenAI o4 mini update",
15-
api_keys={
16-
"exa": "EXA_API_KEY",
17-
"tavily": "TAVILY_API_KEY",
18-
"linkup": "LINKUP_API_KEY",
19-
},
20-
providers=["exa", "tavily", "linkup"],
21-
max_results_per_provider=5,
22-
)
23-
24-
for item in result["items"]:
25-
print(item["fileName"], item["url"], item["summary"]) # Display URLs and individual summaries
26-
"""
1+
# -- encoding: utf-8 --
2+
# Copyright (c) 2024 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
# This file is a part of the ModelEngine Project.
4+
# Licensed under the MIT License. See License.txt in the project root for license information.
5+
# ======================================================================================================================
276
import json
287
from dataclasses import dataclass
298
from typing import Dict, List, Optional, Sequence
@@ -96,62 +75,12 @@ def _truncate(text: str, max_chars: int) -> str:
9675
return text[: max_chars - 1].rstrip() + "…"
9776

9877

99-
def _generate_individual_summary(text: str, max_chars: int = 200) -> str:
100-
"""为单个搜索结果生成独立摘要
101-
102-
策略:
103-
- 如果内容较短,直接返回
104-
- 如果内容较长,提取前几个句子作为摘要
105-
- 确保摘要不超过最大字符限制
106-
"""
107-
if not text:
108-
return ""
109-
110-
# 如果内容已经很短,直接返回
111-
if len(text) <= max_chars:
112-
return text
113-
114-
# 按句子分割(简单按句号分割)
115-
sentences = text.split('. ')
116-
117-
# 收集句子直到达到字符限制
118-
summary_parts = []
119-
current_length = 0
120-
121-
for sentence in sentences:
122-
sentence = sentence.strip()
123-
if not sentence:
124-
continue
125-
126-
# 确保句子以句号结束
127-
if not sentence.endswith('.'):
128-
sentence += '.'
129-
130-
sentence_length = len(sentence) + 1 # +1 for space
131-
132-
# 如果添加这个句子会超过限制,且已经有内容,就停止
133-
if current_length + sentence_length > max_chars and summary_parts:
134-
break
135-
136-
summary_parts.append(sentence)
137-
current_length += sentence_length
138-
139-
summary = '. '.join(summary_parts)
140-
141-
# 确保不超过最大字符限制
142-
if len(summary) > max_chars:
143-
summary = summary[:max_chars].rstrip() + "…"
144-
145-
return summary
146-
147-
14878
def _internet_search(
14979
query: str,
15080
api_keys: Dict[str, str],
15181
providers: Optional[Sequence[str]] = None,
15282
max_results_per_provider: int = 5,
15383
max_snippet_chars: int = 500,
154-
max_summary_chars: int = 200,
15584
) -> List[SearchItem]:
15685
"""Run internet search via selected providers and return unified items with individual summaries."""
15786
selected = list(providers) if providers is not None else []
@@ -160,20 +89,20 @@ def _internet_search(
16089
if api_keys.get(name):
16190
selected.append(name)
16291
items: List[SearchItem] = []
92+
errors = [] # 记录失败的搜索工具
16393

16494
# Exa
16595
if "exa" in selected and api_keys.get("exa"):
16696
try:
16797
exa_client = Exa(api_key=api_keys["exa"])
168-
res = exa_client.search_and_texts(
98+
res = exa_client.search_and_contents(
16999
query,
170100
text={"max_characters": 2000},
171101
livecrawl="always",
172-
extras={"links": 0, "image_links": 0},
173102
num_results=max_results_per_provider,
174103
)
175104
for i, r in enumerate(getattr(res, "results", [])[:max_results_per_provider]):
176-
text = _truncate(getattr(r, "content", "") or "", max_snippet_chars)
105+
text = _truncate(getattr(r, "text", "") or getattr(r, "content", "") or "", max_snippet_chars)
177106
items.append(
178107
SearchItem(
179108
id=getattr(r, "id", "") or f"exa_{i}",
@@ -184,12 +113,13 @@ def _internet_search(
184113
"url": getattr(r, "url", "") or "",
185114
"source": "exa",
186115
"published_date": getattr(r, "published_date", None),
187-
"summary": _generate_individual_summary(text, max_summary_chars),
116+
"summary": text,
188117
}
189118
)
190119
)
191-
except Exception:
192-
pass
120+
except Exception as e:
121+
sys_plugin_logger.warning(f'Failed to search in Exa tool: {str(e)}')
122+
errors.append("exa")
193123

194124
# Tavily
195125
if "tavily" in selected and api_keys.get("tavily"):
@@ -212,12 +142,13 @@ def _internet_search(
212142
"url": r.get("url", "") or "",
213143
"source": "tavily",
214144
"published_date": r.get("published_date"),
215-
"summary": _generate_individual_summary(text, max_summary_chars),
145+
"summary": text,
216146
}
217147
)
218148
)
219-
except Exception:
220-
pass
149+
except Exception as e:
150+
sys_plugin_logger.warning(f'Failed to search in Tavily tool: {str(e)}')
151+
errors.append("tavily")
221152

222153
# Linkup
223154
if "linkup" in selected and api_keys.get("linkup"):
@@ -241,12 +172,20 @@ def _internet_search(
241172
"url": getattr(r, "url", "") or "",
242173
"source": "linkup",
243174
"published_date": None,
244-
"summary": _generate_individual_summary(text, max_summary_chars),
175+
"summary": text,
245176
}
246177
)
247178
)
248-
except Exception:
249-
pass
179+
except Exception as e:
180+
sys_plugin_logger.warning(f'Failed to search in Linkup tool: {str(e)}')
181+
errors.append("linkup")
182+
183+
# 如果所有搜索都失败了,才抛出异常
184+
if not items and errors:
185+
raise FitException(
186+
InternalErrorCode.CLIENT_ERROR,
187+
f'All search tools failed: {", ".join(errors)}'
188+
)
250189

251190
# 去重逻辑
252191
seen = set()

0 commit comments

Comments
 (0)