exa-labs · jld-adriano · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025 · Jul 9, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,40 @@
+# Changelog
+
+## [1.14.16] - 2025-01-08
+
+### Added
+- Added `include_urls` and `exclude_urls` parameters to search methods
+  - Available in `search()`, `search_and_contents()`, `find_similar()`, and `find_similar_and_contents()` methods
+  - Both sync and async versions support these new parameters
+  - Supports wildcard patterns at the beginning or end of URL patterns (e.g., `*/contact-us/*`, `*.ai`)
+  - Use `include_urls` to filter results to only URLs matching specified patterns
+  - Use `exclude_urls` to filter out results with URLs matching specified patterns
+
+### Important Constraints
+- `include_urls`/`exclude_urls` cannot be used together with `include_domains`/`exclude_domains` in the same request
+- `include_urls` and `exclude_urls` cannot be used together in the same request
+
+### Testing
+- Added comprehensive test coverage for URL filtering functionality
+  - Unit tests for parameter validation in `tests/test_url_filters_unit.py`
+  - Integration tests requiring API key in `tests/test_search_api.py`
+  - Tests cover both synchronous and asynchronous implementations
+
+### Examples
+- Added `examples/url_filtering_example.py` demonstrating various URL filtering patterns
+- Added `examples/company_research_url_filtering.py` showing practical company research workflows
+
+### Example Usage
+```python
+# Include only contact pages
+results = exa.search("AI startup", include_urls=["*/contact-us/*", "*/about/*"])
+
+# Exclude blog and news pages
+results = exa.search("machine learning", exclude_urls=["*/blog/*", "*/news/*"])
+
+# Filter LinkedIn profiles
+results = exa.find_similar(
+    "https://www.linkedin.com/in/example/",
+    include_urls=["www.linkedin.com/in/*"]
+)
+```
diff --git a/README.md b/README.md
@@ -34,6 +34,15 @@ exa = Exa(api_key="your-api-key")
   # search with domain filters
   results = exa.search("This is a Exa query:", include_domains=["www.cnn.com", "www.nytimes.com"])
 
+  # search with URL pattern filters
+  results = exa.search("AI law startup", include_urls=["*/contact-us/*", "*/about/*"])
+
+  # exclude certain URL patterns
+  results = exa.search("machine learning", exclude_urls=["*/blog/*", "*/news/*"])
+
+  # Note: include_urls/exclude_urls cannot be used together with include_domains/exclude_domains
+  # Also, include_urls and exclude_urls cannot be used together in the same request
+
   # search and get text contents
   results = exa.search_and_contents("This is a Exa query:")
 

diff --git a/exa_py/api.py b/exa_py/api.py
diff --git a/examples/company_research_url_filtering.py b/examples/company_research_url_filtering.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Example: Using URL filtering for efficient company research.
+
+This example demonstrates how to use include_urls and exclude_urls to gather
+specific types of information about companies, such as contact information,
+team pages, and official announcements while filtering out noise.
+"""
+
+import os
+from exa_py import Exa
+
+# Initialize the Exa client
+api_key = os.environ.get("EXA_API_KEY")
+if not api_key:
+    raise ValueError("Please set EXA_API_KEY environment variable")
+
+exa = Exa(api_key=api_key)
+
+
+def research_company(company_name: str):
+    """Research a company using targeted URL filtering."""
+    print(f"\n=== Researching {company_name} ===\n")
+
+    # 1. Find official company pages
+    print("1. Finding official company information:")
+    print("-" * 50)
+    official_results = exa.search(
+        f"{company_name} official website",
+        num_results=5,
+        include_urls=["*/about/*", "*/about-us/*", "*/company/*", "*/who-we-are/*"]
+    )
+    print(f"Found {len(official_results.results)} official pages")
+    for result in official_results.results[:3]:
+        print(f"  - {result.title}")
+        print(f"    {result.url}\n")
+
+    # 2. Find press releases and news
+    print("\n2. Finding recent press releases:")
+    print("-" * 50)
+    press_results = exa.search_and_contents(
+        f"{company_name} announcement",
+        num_results=3,
+        include_urls=["*/press/*", "*/news/*", "*/blog/*", "*/newsroom/*"],
+        text={"max_characters": 300}
+    )
+    print(f"Found {len(press_results.results)} press releases")
+    for result in press_results.results:
+        print(f"  • {result.title}")
+        print(f"    {result.url}")
+        if result.text:
+            preview = result.text[:150] + "..." if len(result.text) > 150 else result.text
+            print(f"    Preview: {preview}\n")
+
+    # 3. Find team information on LinkedIn
+    print("\n3. Finding team members on LinkedIn:")
+    team_results = exa.search(
+        f"{company_name} employees team",
+        num_results=5,
+        include_urls=["www.linkedin.com/in/*", "linkedin.com/in/*"]
+    )
+    print(f"Found {len(team_results.results)} LinkedIn profiles")
+    for result in team_results.results[:3]:
+        print(f"  - {result.title}: {result.url}")
+
+
+def find_competitors(company_name: str, industry: str):
+    """Find competitor companies using URL filtering."""
+    print(f"\n=== Finding competitors of {company_name} in {industry} ===\n")
+
+    # Find similar company websites
+    sample_url = f"https://www.{company_name.lower().replace(' ', '')}.com"
+
+    similar_companies = exa.find_similar(
+        sample_url,
+        num_results=10,
+        exclude_urls=["*/blog/*", "*/news/*", "*/wiki/*", "*/linkedin.com/*"]
+    )
+
+    print(f"Found {len(similar_companies.results)} similar company websites:")
+    for i, result in enumerate(similar_companies.results[:5], 1):
+        print(f"{i}. {result.title}")
+        print(f"   {result.url}")
+    print()
+
+
+def main():
+    """Run company research examples."""
+    print("=== Company Research with URL Filtering ===")
+    print("This example shows how to efficiently research companies")
+    print("by filtering URLs to find specific types of information.\n")
+
+    # Example companies to research
+    companies = ["OpenAI", "Anthropic"]
+
+    for company in companies:
+        research_company(company)
+
+    # Find competitors example
+    find_competitors("OpenAI", "AI research")
+
+    print("\n=== Summary ===")
+    print("URL filtering helps you:")
+    print("• Find official company information (not third-party coverage)")
+    print("• Locate specific page types (contact, team, careers)")
+    print("• Exclude irrelevant content (blogs, news, wikis)")
+    print("• Focus your research on high-value pages")
+    print("• Save time by getting targeted results")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/url_filtering_example.py b/examples/url_filtering_example.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""
+Example demonstrating URL filtering with include_urls and exclude_urls parameters.
+
+This example shows how to use the new URL filtering capabilities in Exa API
+to refine search results based on URL patterns.
+
+IMPORTANT: When using include_urls or exclude_urls, you cannot use 
+include_domains or exclude_domains in the same request.
+Also, include_urls and exclude_urls cannot be used together in the same request.
+"""
+
+import os
+from exa_py import Exa
+
+# Initialize the Exa client
+api_key = os.environ.get("EXA_API_KEY")
+if not api_key:
+    raise ValueError("Please set EXA_API_KEY environment variable")
+
+exa = Exa(api_key=api_key)
+
+print("=== Exa URL Filtering Examples ===\n")
+
+# Example 1: Find contact pages for AI startups
+print("1. Finding contact pages for AI startups:")
+print("-" * 50)
+results = exa.search(
+    "AI startup artificial intelligence",
+    num_results=5,
+    include_urls=["*/contact/*", "*/contact-us/*", "*/about/contact/*"]
+)
+print(f"Found {len(results.results)} contact pages:")
+for i, result in enumerate(results.results[:3], 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+print()
+
+# Example 2: Exclude blog and news articles
+print("2. Searching for technical content, excluding blogs and news:")
+print("-" * 50)
+results = exa.search(
+    "machine learning algorithms",
+    num_results=5,
+    exclude_urls=["*/blog/*", "*/news/*", "*/press/*"]
+)
+print(f"Found {len(results.results)} non-blog/news results:")
+for i, result in enumerate(results.results[:3], 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+print()
+
+# Example 3: Find LinkedIn profiles
+print("3. Finding LinkedIn profiles similar to a tech leader:")
+print("-" * 50)
+results = exa.find_similar(
+    "https://www.linkedin.com/in/satyanadella/",
+    num_results=5,
+    include_urls=["www.linkedin.com/in/*", "linkedin.com/in/*"]
+)
+print(f"Found {len(results.results)} LinkedIn profiles:")
+for i, result in enumerate(results.results[:3], 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+print()
+
+# Example 4: Filter by domain extension
+print("4. Finding only .edu educational resources:")
+print("-" * 50)
+results = exa.search(
+    "quantum computing introduction",
+    num_results=5,
+    include_urls=["*.edu/*"]
+)
+print(f"Found {len(results.results)} educational resources:")
+for i, result in enumerate(results.results[:3], 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+print()
+
+# Example 5: Finding product pages (using include_urls only)
+print("5. Finding product pages:")
+print("-" * 50)
+results = exa.search(
+    "best laptop 2024",
+    num_results=5,
+    include_urls=["*/products/*", "*/shop/*", "*/store/*", "*/item/*"]
+)
+print(f"Found {len(results.results)} product pages:")
+for i, result in enumerate(results.results[:3], 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+print()
+
+# Example 6: Using with search_and_contents
+print("6. Getting content from specific page types:")
+print("-" * 50)
+results = exa.search_and_contents(
+    "company mission statement",
+    num_results=3,
+    include_urls=["*/about/*", "*/about-us/*", "*/mission/*"],
+    text={"max_characters": 500}
+)
+print(f"Found {len(results.results)} about/mission pages with content:")
+for i, result in enumerate(results.results, 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+    if result.text:
+        preview = result.text[:200] + "..." if len(result.text) > 200 else result.text
+        print(f"   Preview: {preview}")
+    print()
+
+# Example 7: Finding similar pages with URL constraints (using include_urls only)
+print("7. Finding similar documentation pages:")
+print("-" * 50)
+results = exa.find_similar_and_contents(
+    "https://docs.python.org/3/tutorial/",
+    num_results=3,
+    include_urls=["*/docs/*", "*/documentation/*", "*/guide/*", "*/tutorial/*"],
+    text=True
+)
+print(f"Found {len(results.results)} similar documentation pages:")
+for i, result in enumerate(results.results, 1):
+    print(f"{i}. {result.title}")
+    print(f"   URL: {result.url}")
+    print()
+
+print("\n=== Advanced Usage Tips ===")
+print("1. Wildcards (*) can be used at the beginning or end of patterns")
+print("2. Multiple patterns can be specified in a list")
+print("3. include_urls and exclude_urls CANNOT be used together in the same request")
+print("4. Patterns are case-sensitive")
+print("5. IMPORTANT: Cannot use include_urls/exclude_urls with include_domains/exclude_domains")
+print("6. Use these filters to:")
+print("   - Find specific page types (contact, about, product pages)")
+print("   - Filter by domain or subdomain patterns")
+print("   - Exclude unwanted content types (use exclude_urls separately)")
+print("   - Focus on specific platforms (LinkedIn, GitHub, etc.)")
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ in-project = true
 
 [project]
 name = "exa-py"
-version = "1.14.15"
+version = "1.14.16"
 description = "Python SDK for Exa API."
 readme = "README.md"
 requires-python = ">=3.9"

diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="exa_py",
-    version="1.14.15",
+    version="1.14.16",
     description="Python SDK for Exa API.",
     long_description_content_type="text/markdown",
     long_description=open("README.md").read(),