8
8
9
9
class SerperSearch ():
10
10
"""
11
- Google Serper Retriever
11
+ Google Serper Retriever with support for country, language, and date filtering
12
12
"""
13
- def __init__ (self , query , query_domains = None ):
13
+ def __init__ (self , query , query_domains = None , country = None , language = None , time_range = None , exclude_sites = None ):
14
14
"""
15
15
Initializes the SerperSearch object
16
16
Args:
17
17
query (str): The search query string.
18
18
query_domains (list, optional): List of domains to include in the search. Defaults to None.
19
+ country (str, optional): Country code for search results (e.g., 'us', 'kr', 'jp'). Defaults to None.
20
+ language (str, optional): Language code for search results (e.g., 'en', 'ko', 'ja'). Defaults to None.
21
+ time_range (str, optional): Time range filter (e.g., 'qdr:h', 'qdr:d', 'qdr:w', 'qdr:m', 'qdr:y'). Defaults to None.
22
+ exclude_sites (list, optional): List of sites to exclude from search results. Defaults to None.
19
23
"""
20
24
self .query = query
21
25
self .query_domains = query_domains or None
26
+ self .country = country or os .getenv ("SERPER_REGION" )
27
+ self .language = language or os .getenv ("SERPER_LANGUAGE" )
28
+ self .time_range = time_range or os .getenv ("SERPER_TIME_RANGE" )
29
+ self .exclude_sites = exclude_sites or self ._get_exclude_sites_from_env ()
22
30
self .api_key = self .get_api_key ()
23
31
32
+ def _get_exclude_sites_from_env (self ):
33
+ """
34
+ Gets the list of sites to exclude from environment variables
35
+ Returns:
36
+ list: List of sites to exclude
37
+ """
38
+ exclude_sites_env = os .getenv ("SERPER_EXCLUDE_SITES" , "" )
39
+ if exclude_sites_env :
40
+ # Split by comma and strip whitespace
41
+ return [site .strip () for site in exclude_sites_env .split ("," ) if site .strip ()]
42
+ return []
43
+
24
44
def get_api_key (self ):
25
45
"""
26
46
Gets the Serper API key
@@ -36,12 +56,12 @@ def get_api_key(self):
36
56
37
57
def search (self , max_results = 7 ):
38
58
"""
39
- Searches the query
59
+ Searches the query with optional country, language, and time filtering
40
60
Returns:
41
-
61
+ list: List of search results with title, href, and body
42
62
"""
43
63
print ("Searching with query {0}..." .format (self .query ))
44
- """Useful for general internet search queries using the Serp API."""
64
+ """Useful for general internet search queries using the Serper API."""
45
65
46
66
# Search the query (see https://serper.dev/playground for the format)
47
67
url = "https://google.serper.dev/search"
@@ -51,8 +71,36 @@ def search(self, max_results=7):
51
71
'Content-Type' : 'application/json'
52
72
}
53
73
54
- # TODO: Add support for query domains
55
- data = json .dumps ({"q" : self .query , "num" : max_results })
74
+ # Build search parameters
75
+ query_with_filters = self .query
76
+
77
+ # Exclude sites using Google search syntax
78
+ if self .exclude_sites :
79
+ for site in self .exclude_sites :
80
+ query_with_filters += f" -site:{ site } "
81
+
82
+ # Add domain filtering if specified
83
+ if self .query_domains :
84
+ # Add site:domain1 OR site:domain2 OR ... to the search query
85
+ domain_query = " site:" + " OR site:" .join (self .query_domains )
86
+ query_with_filters += domain_query
87
+
88
+ search_params = {
89
+ "q" : query_with_filters ,
90
+ "num" : max_results
91
+ }
92
+
93
+ # Add optional parameters if they exist
94
+ if self .country :
95
+ search_params ["gl" ] = self .country # Geographic location (country)
96
+
97
+ if self .language :
98
+ search_params ["hl" ] = self .language # Host language
99
+
100
+ if self .time_range :
101
+ search_params ["tbs" ] = self .time_range # Time-based search
102
+
103
+ data = json .dumps (search_params )
56
104
57
105
resp = requests .request ("POST" , url , timeout = 10 , headers = headers , data = data )
58
106
@@ -70,10 +118,8 @@ def search(self, max_results=7):
70
118
search_results = []
71
119
72
120
# Normalize the results to match the format of the other search APIs
121
+ # Excluded sites should already be filtered out by the query parameters
73
122
for result in results :
74
- # skip youtube results
75
- if "youtube.com" in result ["link" ]:
76
- continue
77
123
search_result = {
78
124
"title" : result ["title" ],
79
125
"href" : result ["link" ],
0 commit comments