diff --git a/src/libs/LangChain.Core/LangChain.Core.csproj b/src/libs/LangChain.Core/LangChain.Core.csproj
index 7d9c31fa..3e558272 100644
--- a/src/libs/LangChain.Core/LangChain.Core.csproj
+++ b/src/libs/LangChain.Core/LangChain.Core.csproj
@@ -14,10 +14,6 @@
-
-
-
-
LangChain core classes.
$(PackageTags);core
@@ -32,6 +28,7 @@
+
diff --git a/src/libs/LangChain.Core/Retrievers/WebSearchRetriever.cs b/src/libs/LangChain.Core/Retrievers/WebSearchRetriever.cs
new file mode 100644
index 00000000..b97d07a1
--- /dev/null
+++ b/src/libs/LangChain.Core/Retrievers/WebSearchRetriever.cs
@@ -0,0 +1,32 @@
+using LangChain.Callback;
+using LangChain.Docstore;
+using LangChain.Utilities;
+
+namespace LangChain.Retrievers;
+
+public sealed class WebSearchRetriever : BaseRetriever
+{
+ private readonly IWebSearch _webSearch;
+ private readonly int _k;
+
+ public WebSearchRetriever(IWebSearch webSearch, int k = 10)
+ {
+ _webSearch = webSearch;
+ _k = k;
+ }
+
+ protected override async Task> GetRelevantDocumentsCoreAsync(
+ string query,
+ CallbackManagerForRetrieverRun runManager = null)
+ {
+ var searchResult = await _webSearch.ResultsAsync(query, _k);
+
+ return searchResult.Select(v => new Document(
+ v.Body,
+ new Dictionary()
+ {
+ ["title"] = v.Title,
+ ["link"] = v.Link
+ }));
+ }
+}
\ No newline at end of file
diff --git a/src/libs/LangChain.Core/Utilities/DuckDuckGoSearch.cs b/src/libs/LangChain.Core/Utilities/DuckDuckGoSearch.cs
new file mode 100644
index 00000000..b7ba93e3
--- /dev/null
+++ b/src/libs/LangChain.Core/Utilities/DuckDuckGoSearch.cs
@@ -0,0 +1,396 @@
+using System.Net;
+using System.Net.Http;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using System.Text.RegularExpressions;
+
+namespace LangChain.Utilities;
+
+///
+/// DuckDuckGo search client
+///
+public sealed class DuckDuckGoSearch : IDisposable
+{
+ private readonly HttpClient _client = new(
+ new HttpClientHandler
+ {
+ AllowAutoRedirect = true,
+ MaxAutomaticRedirections = 2
+ });
+
+ private readonly Regex _stringTagsRegex = new Regex("<.*?>", RegexOptions.Compiled);
+ private readonly Regex _regex500InUrl = new Regex("(?:\\d{3}-\\d{2}\\.js)", RegexOptions.Compiled);
+
+ ///
+ /// DuckDuckGo text search generator. Query params: https://duckduckgo.com/params
+ ///
+ ///
+ ///
+ /// keywords for query
+ ///
+ ///
+ ///
+ ///
+ /// api, html, lite. Defaults to api.
+ /// api - collect data from https://duckduckgo.com,
+ /// html - collect data from https://html.duckduckgo.com,
+ /// lite - collect data from https://lite.duckduckgo.com.
+ ///
+ /// max number of results. If null, returns results only from the first response
+ ///
+ public async IAsyncEnumerable> TextSearchAsync(
+ string keywords,
+ string region = "wt-wt",
+ SafeSearchType safeSearch = SafeSearchType.Moderate,
+ TimeLimit? timeLimit = null,
+ int? maxResults = null)
+ {
+ var results = TextSearchApiAsync(keywords, region, safeSearch, timeLimit, maxResults);
+ var resultsCounter = 0;
+ await foreach (var result in results)
+ {
+ yield return result;
+ resultsCounter += 1;
+ if (maxResults != null && resultsCounter >= maxResults)
+ {
+ yield break;
+ }
+ }
+ }
+
+ private async IAsyncEnumerable> TextSearchApiAsync(
+ string keywords,
+ string region,
+ SafeSearchType safeSearch,
+ TimeLimit? timeLimit,
+ int? maxResults)
+ {
+ var payload = await GetPayloadAsync(keywords, region, safeSearch, timeLimit);
+
+ var i = 0;
+ var cache = new HashSet();
+ while (i++ <= 10)
+ {
+ var response = await HttpGetAsync("https://links.duckduckgo.com/d.js", payload);
+ if (response.StatusCode != HttpStatusCode.OK)
+ {
+ yield break;
+ }
+
+ LinksResponse.LinksResponseItem[]? pageData;
+ try
+ {
+ var contentRaw = await response.Content.ReadAsStringAsync();
+ var content = JsonSerializer.Deserialize(contentRaw);
+
+ pageData = content?.Results;
+ if (pageData == null || pageData.Length == 0)
+ {
+ yield break;
+ }
+ }
+ catch
+ {
+ yield break;
+ }
+
+ string? nextPageUrl = null;
+ var resultExists = false;
+ foreach (var row in pageData)
+ {
+ var href = row.Url;
+ if (href != null &&
+ !cache.Contains(href) &&
+ href != $"http://www.google.com/search?q={keywords}")
+ {
+ cache.Add(href);
+ var body = NormalizeHtml(row.Body);
+ if (!String.IsNullOrEmpty(body))
+ {
+ resultExists = true;
+ yield return new Dictionary
+ {
+ ["title"] = NormalizeHtml(row.Title),
+ ["href"] = NormalizeUrl(href),
+ ["body"] = body,
+ };
+ }
+ }
+ else
+ {
+ nextPageUrl = row.NextPageUrl;
+ }
+ }
+
+ if (maxResults == null || resultExists == false || String.IsNullOrEmpty(nextPageUrl))
+ {
+ yield break;
+ }
+
+ var separator = new[] { "s=" };
+ payload["s"] = nextPageUrl.Split(separator, StringSplitOptions.RemoveEmptyEntries)[1].Split('&')[0];
+
+ await Sleep();
+ }
+ }
+
+ private async Task> GetPayloadAsync(
+ string keywords,
+ string region,
+ SafeSearchType safeSearch, TimeLimit? timeLimit)
+ {
+ var vqd = await GetVqdAsync(keywords);
+
+ var timeLimitString = timeLimit switch
+ {
+ TimeLimit.Day => "d",
+ TimeLimit.Week => "w",
+ TimeLimit.Month => "m",
+ TimeLimit.Year => "y",
+ _ => String.Empty
+ };
+
+ var payload = new Dictionary
+ {
+ ["q"] = keywords,
+ ["kl"] = region,
+ ["l"] = region,
+ ["bing_market"] = region,
+ ["s"] = "0",
+ ["df"] = timeLimitString,
+ ["vqd"] = vqd,
+ ["o"] = "json",
+ ["sp"] = "0",
+ };
+
+ switch (safeSearch)
+ {
+ case SafeSearchType.Moderate:
+ payload["ex"] = "-1";
+ break;
+ case SafeSearchType.Off:
+ payload["ex"] = "-2";
+ break;
+ case SafeSearchType.On:
+ payload["p"] = "1";
+ break;
+ default:
+ throw new ArgumentOutOfRangeException(nameof(safeSearch), safeSearch, null);
+ }
+
+ return payload;
+ }
+
+ ///
+ /// Unquote URL and replace spaces with '+'
+ ///
+ private static string NormalizeUrl(string url)
+ {
+ if (String.IsNullOrEmpty(url))
+ {
+ return String.Empty;
+ }
+
+ return WebUtility.UrlDecode(url.Replace(" ", "+"));
+ }
+
+ ///
+ /// Strip HTML tags from the raw_html string.
+ ///
+ private string NormalizeHtml(string rawHtml)
+ {
+ if (String.IsNullOrEmpty(rawHtml))
+ {
+ return String.Empty;
+ }
+
+ var html = _stringTagsRegex.Replace(rawHtml, "");
+
+ return WebUtility.HtmlDecode(html);
+ }
+
+ private class LinksResponse
+ {
+ [JsonInclude]
+ [JsonPropertyName("results")]
+ public LinksResponseItem[]? Results { get; private set; }
+
+ public class LinksResponseItem
+ {
+ [JsonInclude]
+ [JsonPropertyName("u")]
+ public string? Url { get; private set; }
+
+ [JsonInclude]
+ [JsonPropertyName("t")]
+ public string? Title { get; private set; }
+
+ [JsonInclude]
+ [JsonPropertyName("a")]
+ public string? Body { get; private set; }
+
+ [JsonInclude]
+ [JsonPropertyName("n")]
+ public string? NextPageUrl { get; private set; }
+ }
+ }
+
+ ///
+ /// Sleep between API requests if proxies is None.
+ ///
+ private async Task Sleep()
+ {
+ // TODO: if (proxies == null)
+ await Task.Delay(750);
+ }
+
+ ///
+ /// Get vqd value for a search query.
+ ///
+ ///
+ ///
+ private async Task GetVqdAsync(string keywords)
+ {
+ var resp = await HttpGetAsync(
+ "https://duckduckgo.com",
+ new Dictionary
+ {
+ ["q"] = keywords
+ });
+
+ if (resp.StatusCode == HttpStatusCode.OK)
+ {
+ var content = await resp.Content.ReadAsStringAsync();
+
+ var vqdIndex = content.IndexOf("vqd=", StringComparison.Ordinal);
+ if (vqdIndex > 0)
+ {
+ var start = vqdIndex + "vqd=".Length;
+ var nextChar = content[start];
+
+ char endToken;
+ if (nextChar == '\'')
+ {
+ start += 1;
+ endToken = '\'';
+ }
+ else if (nextChar == '\"')
+ {
+ start += 1;
+ endToken = '\"';
+ }
+ else
+ {
+ endToken = '&';
+ }
+
+ var end = content.IndexOf(endToken, start);
+
+ return content.Substring(start, end - start);
+ }
+ }
+
+ throw new VqdExtractionException($"Could not extract vqd. {keywords}");
+ }
+
+ private static string AddQueryParamsToUrl(string baseUrl, Dictionary queryParameters)
+ {
+ var queryParts = new List();
+ foreach (var queryParameter in queryParameters)
+ {
+ var encodedKey = WebUtility.UrlEncode(queryParameter.Key);
+ var encodedValue = WebUtility.UrlEncode(queryParameter.Value);
+
+ queryParts.Add($"{encodedKey}={encodedValue}");
+ }
+
+ var url = $"{baseUrl}?{String.Join("&", queryParts)}";
+
+ return url;
+ }
+
+ private async Task HttpGetAsync(string url, Dictionary queryParams)
+ {
+ var urlWithQuery = AddQueryParamsToUrl(url, queryParams);
+
+ HttpResponseMessage responseMessage;
+ try
+ {
+ responseMessage = await _client.GetAsync(urlWithQuery);
+ }
+ catch (TaskCanceledException e)
+ {
+ throw new TimeoutException($"HttpGetAsync {urlWithQuery}", e);
+ }
+ catch (Exception e)
+ {
+ throw new DuckDuckGoSearchException($"HttpGetAsync {urlWithQuery}. {e.GetType()}: {e}", e);
+ }
+
+ var lastUrl = responseMessage.RequestMessage?.RequestUri?.ToString();
+ if (lastUrl != null && Is500InUrl(lastUrl))
+ {
+ throw new ApiException($"HttpGetAsync {urlWithQuery}");
+ }
+
+ if (responseMessage.StatusCode == HttpStatusCode.Accepted)
+ {
+ throw new RateLimitException($"HttpGetAsync {urlWithQuery}");
+ }
+
+ if (responseMessage.StatusCode == HttpStatusCode.OK)
+ {
+ return responseMessage;
+ }
+
+ throw new HttpRequestException($"HttpGetAsync finished with status code: {responseMessage.StatusCode}");
+ }
+
+ ///
+ /// something like '506-00.js' inside the url
+ ///
+ private bool Is500InUrl(string url)
+ {
+ return _regex500InUrl.IsMatch(url);
+ }
+
+ ///
+ public class VqdExtractionException(string? message)
+ : Exception(message);
+
+ ///
+ public class DuckDuckGoSearchException(string? message, Exception innerException)
+ : Exception(message, innerException);
+
+ ///
+ public class ApiException(string? message)
+ : Exception(message);
+
+ ///
+ public class TimeoutException(string? message, Exception innerException)
+ : Exception(message, innerException);
+
+ ///
+ public class RateLimitException(string? message)
+ : Exception(message);
+
+ public enum TimeLimit
+ {
+ Day,
+ Week,
+ Month,
+ Year,
+ }
+
+ public enum SafeSearchType
+ {
+ On,
+ Moderate,
+ Off
+ }
+
+ public void Dispose()
+ {
+ _client.Dispose();
+ }
+}
\ No newline at end of file
diff --git a/src/libs/LangChain.Core/Utilities/DuckDuckGoSearchAPIWrapper.cs b/src/libs/LangChain.Core/Utilities/DuckDuckGoSearchAPIWrapper.cs
new file mode 100644
index 00000000..d1ccb8d3
--- /dev/null
+++ b/src/libs/LangChain.Core/Utilities/DuckDuckGoSearchAPIWrapper.cs
@@ -0,0 +1,101 @@
+namespace LangChain.Utilities;
+
+///
+/// Wrapper for DuckDuckGo Search API.
+///
+/// Free and does not require any setup.
+///
+public sealed class DuckDuckGoSearchApiWrapper(
+ string region = "wt-wt",
+ DuckDuckGoSearch.SafeSearchType safeSearch = DuckDuckGoSearch.SafeSearchType.Moderate,
+ DuckDuckGoSearch.TimeLimit time = DuckDuckGoSearch.TimeLimit.Year,
+ int maxResults = 5)
+ : IWebSearch, IDisposable
+{
+ private readonly DuckDuckGoSearch _search = new();
+
+ ///
+ /// Get aggregated search result
+ ///
+ public async Task RunAsync(string query)
+ {
+ var snippets = await GetSnippetsAsync(query);
+
+ return String.Join(" ", snippets);
+ }
+
+ ///
+ /// Run query through DuckDuckGo and return concatenated results.
+ ///
+ public async Task> GetSnippetsAsync(string query)
+ {
+ var results = _search.TextSearchAsync(
+ query,
+ region: region,
+ safeSearch: safeSearch,
+ timeLimit: time);
+
+ var snippets = new List();
+ await foreach (var result in results)
+ {
+ snippets.Add(result["body"]);
+
+ if (snippets.Count == maxResults)
+ {
+ break;
+ }
+ }
+
+ if (snippets.Count == 0)
+ {
+ snippets.Add("No good DuckDuckGo Search Result was found");
+ }
+
+ return snippets;
+ }
+
+ ///
+ /// Run query through DuckDuckGo and return metadata.
+ ///
+ ///
+ /// Only "api" backend supported
+ ///
+ /// The query to search for.
+ /// The number of results to return.
+ ///
+ /// A list of items with the following props:
+ /// title - The description of the result.
+ /// snippet - The title of the result.
+ /// link - The link to the result.
+ ///
+ public async Task> ResultsAsync(
+ string query,
+ int numResults)
+ {
+ var results = _search.TextSearchAsync(
+ query,
+ region: region,
+ safeSearch: safeSearch,
+ timeLimit: time,
+ maxResults: maxResults);
+
+ var formattedResults = new List();
+ await foreach (var result in results)
+ {
+ var formattedResult = new WebSearchResult(result["title"], result["body"], result["href"]);
+ formattedResults.Add(formattedResult);
+
+ if (formattedResults.Count == numResults)
+ {
+ break;
+ }
+ }
+
+ return formattedResults;
+ }
+
+ public void Dispose()
+ {
+ _search.Dispose();
+ }
+}
\ No newline at end of file
diff --git a/src/libs/LangChain.Core/Utilities/IWebSearch.cs b/src/libs/LangChain.Core/Utilities/IWebSearch.cs
new file mode 100644
index 00000000..30743425
--- /dev/null
+++ b/src/libs/LangChain.Core/Utilities/IWebSearch.cs
@@ -0,0 +1,14 @@
+namespace LangChain.Utilities;
+
+public interface IWebSearch
+{
+ Task RunAsync(string query);
+ Task> ResultsAsync(string query, int numResults);
+}
+
+public class WebSearchResult(string title, string body, string link)
+{
+ public string Title { get; set; } = title;
+ public string Body { get; set; } = body;
+ public string Link { get; set; } = link;
+}
\ No newline at end of file
diff --git a/src/tests/LangChain.Core.UnitTests/Utilities/DuckDuckGoSearchTests.cs b/src/tests/LangChain.Core.UnitTests/Utilities/DuckDuckGoSearchTests.cs
new file mode 100644
index 00000000..6baf408e
--- /dev/null
+++ b/src/tests/LangChain.Core.UnitTests/Utilities/DuckDuckGoSearchTests.cs
@@ -0,0 +1,42 @@
+using LangChain.Retrievers;
+using LangChain.Utilities;
+
+namespace LangChain.Core.UnitTests.Utilities;
+
+[TestFixture]
+public class DuckDuckGoSearchTests
+{
+ [Test]
+ public async Task Run_Ok()
+ {
+ var search = new DuckDuckGoSearchApiWrapper();
+
+ var result = await search.RunAsync("wikipedia");
+
+ result.Should().NotBeEmpty();
+ result.Should().Contain("encyclopedia");
+ }
+
+ [Test]
+ public async Task GetSnippets_Ok()
+ {
+ var search = new DuckDuckGoSearchApiWrapper();
+
+ var result = await search.GetSnippetsAsync("wikipedia");
+
+ result.Should().NotBeEmpty();
+ result.Should().Contain(v => v.Contains("encyclopedia"));
+ }
+
+ [Test]
+ public async Task Retriever_Ok()
+ {
+ var search = new DuckDuckGoSearchApiWrapper();
+ var retriever = new WebSearchRetriever(search);
+
+ var result = await retriever.GetRelevantDocumentsAsync("wikipedia");
+
+ result.Should().NotBeEmpty();
+ result.Should().Contain(d => d.PageContent.Contains("encyclopedia"));
+ }
+}
\ No newline at end of file