Skip to content

Commit 04b1b43

Browse files
authored
Merge pull request #1472 from hurxxxx/master
feat: Add enhanced Serper API support with country, language, date filters and configurable site exclusion
2 parents 06106bd + 7031d19 commit 04b1b43

File tree

2 files changed

+80
-11
lines changed

2 files changed

+80
-11
lines changed

docs/docs/gpt-researcher/search-engines/search-engines.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ Thanks to our community, we have integrated the following web search engines:
2828
- [Google](https://developers.google.com/custom-search/v1/overview) - Env: `RETRIEVER=google`
2929
- [SearchApi](https://www.searchapi.io/) - Env: `RETRIEVER=searchapi`
3030
- [Serp API](https://serpapi.com/) - Env: `RETRIEVER=serpapi`
31-
- [Serper](https://serper.dev/) - Env: `RETRIEVER=serper`
31+
- [Serper](https://serper.dev/) - Env: `RETRIEVER=serper` - [Setup Guide](#serper)
3232
- [Searx](https://searx.github.io/searx/) - Env: `RETRIEVER=searx`
3333
- [Duckduckgo](https://pypi.org/project/duckduckgo-search/) - Env: `RETRIEVER=duckduckgo`
3434
- [Arxiv](https://info.arxiv.org/help/api/index.html) - Env: `RETRIEVER=arxiv`
@@ -72,4 +72,27 @@ For the custom retriever to work correctly, the response from the endpoint shoul
7272

7373
The system assumes this response format and processes the list of sources accordingly.
7474

75+
## Search Engine Configuration
76+
77+
### Serper
78+
79+
To use [Serper](https://serper.dev/) as your search engine:
80+
81+
1. Get your API key from [serper.dev](https://serper.dev/)
82+
2. Set the required environment variables:
83+
84+
```bash
85+
RETRIEVER=serper
86+
SERPER_API_KEY=your_api_key_here
87+
```
88+
89+
**Optional Configuration:**
90+
91+
```bash
92+
SERPER_REGION=us # Country code (us, kr, jp, etc.)
93+
SERPER_LANGUAGE=en # Language code (en, ko, ja, etc.)
94+
SERPER_TIME_RANGE=qdr:w # Time filter (qdr:h, qdr:d, qdr:w, qdr:m, qdr:y)
95+
SERPER_EXCLUDE_SITES=youtube.com # Exclude sites (comma-separated)
96+
```
97+
7598
Missing a retriever? Feel free to contribute to this project by submitting issues or pull requests on our [GitHub](https://github.com/assafelovic/gpt-researcher) page.

gpt_researcher/retrievers/serper/serper.py

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,39 @@
88

99
class SerperSearch():
1010
"""
11-
Google Serper Retriever
11+
Google Serper Retriever with support for country, language, and date filtering
1212
"""
13-
def __init__(self, query, query_domains=None):
13+
def __init__(self, query, query_domains=None, country=None, language=None, time_range=None, exclude_sites=None):
1414
"""
1515
Initializes the SerperSearch object
1616
Args:
1717
query (str): The search query string.
1818
query_domains (list, optional): List of domains to include in the search. Defaults to None.
19+
country (str, optional): Country code for search results (e.g., 'us', 'kr', 'jp'). Defaults to None.
20+
language (str, optional): Language code for search results (e.g., 'en', 'ko', 'ja'). Defaults to None.
21+
time_range (str, optional): Time range filter (e.g., 'qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y'). Defaults to None.
22+
exclude_sites (list, optional): List of sites to exclude from search results. Defaults to None.
1923
"""
2024
self.query = query
2125
self.query_domains = query_domains or None
26+
self.country = country or os.getenv("SERPER_REGION")
27+
self.language = language or os.getenv("SERPER_LANGUAGE")
28+
self.time_range = time_range or os.getenv("SERPER_TIME_RANGE")
29+
self.exclude_sites = exclude_sites or self._get_exclude_sites_from_env()
2230
self.api_key = self.get_api_key()
2331

32+
def _get_exclude_sites_from_env(self):
33+
"""
34+
Gets the list of sites to exclude from environment variables
35+
Returns:
36+
list: List of sites to exclude
37+
"""
38+
exclude_sites_env = os.getenv("SERPER_EXCLUDE_SITES", "")
39+
if exclude_sites_env:
40+
# Split by comma and strip whitespace
41+
return [site.strip() for site in exclude_sites_env.split(",") if site.strip()]
42+
return []
43+
2444
def get_api_key(self):
2545
"""
2646
Gets the Serper API key
@@ -36,12 +56,12 @@ def get_api_key(self):
3656

3757
def search(self, max_results=7):
3858
"""
39-
Searches the query
59+
Searches the query with optional country, language, and time filtering
4060
Returns:
41-
61+
list: List of search results with title, href, and body
4262
"""
4363
print("Searching with query {0}...".format(self.query))
44-
"""Useful for general internet search queries using the Serp API."""
64+
"""Useful for general internet search queries using the Serper API."""
4565

4666
# Search the query (see https://serper.dev/playground for the format)
4767
url = "https://google.serper.dev/search"
@@ -51,8 +71,36 @@ def search(self, max_results=7):
5171
'Content-Type': 'application/json'
5272
}
5373

54-
# TODO: Add support for query domains
55-
data = json.dumps({"q": self.query, "num": max_results})
74+
# Build search parameters
75+
query_with_filters = self.query
76+
77+
# Exclude sites using Google search syntax
78+
if self.exclude_sites:
79+
for site in self.exclude_sites:
80+
query_with_filters += f" -site:{site}"
81+
82+
# Add domain filtering if specified
83+
if self.query_domains:
84+
# Add site:domain1 OR site:domain2 OR ... to the search query
85+
domain_query = " site:" + " OR site:".join(self.query_domains)
86+
query_with_filters += domain_query
87+
88+
search_params = {
89+
"q": query_with_filters,
90+
"num": max_results
91+
}
92+
93+
# Add optional parameters if they exist
94+
if self.country:
95+
search_params["gl"] = self.country # Geographic location (country)
96+
97+
if self.language:
98+
search_params["hl"] = self.language # Host language
99+
100+
if self.time_range:
101+
search_params["tbs"] = self.time_range # Time-based search
102+
103+
data = json.dumps(search_params)
56104

57105
resp = requests.request("POST", url, timeout=10, headers=headers, data=data)
58106

@@ -70,10 +118,8 @@ def search(self, max_results=7):
70118
search_results = []
71119

72120
# Normalize the results to match the format of the other search APIs
121+
# Excluded sites should already be filtered out by the query parameters
73122
for result in results:
74-
# skip youtube results
75-
if "youtube.com" in result["link"]:
76-
continue
77123
search_result = {
78124
"title": result["title"],
79125
"href": result["link"],

0 commit comments

Comments
 (0)