Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions components/scout-settings-modal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ import { supabase } from "@/lib/supabase/client";
import { X, Plus, Pencil } from "lucide-react";
import posthog from "posthog-js";

type ScrapeOptions = {
cookies?: string;
headers?: Record<string, string>;
waitFor?: number | string;
timeout?: number;
};

type Scout = {
id: string;
title: string;
Expand All @@ -38,6 +45,7 @@ type Scout = {
} | null;
frequency: "daily" | "every_3_days" | "weekly" | null;
is_active: boolean;
scrape_options?: ScrapeOptions;
};

type Location = {
Expand Down Expand Up @@ -73,13 +81,16 @@ export function ScoutSettingsModal({
>(null);
const [isActive, setIsActive] = useState(false);
const [isSaving, setIsSaving] = useState(false);
// Scrape options state
const [cookies, setCookies] = useState("");

// Track which fields are in edit mode
const [editMode, setEditMode] = useState({
title: false,
goal: false,
description: false,
location: false,
cookies: false,
searchQueries: false,
frequency: false,
});
Expand All @@ -98,6 +109,7 @@ export function ScoutSettingsModal({
setSearchQueries(scout.search_queries || []);
setFrequency(scout.frequency);
setIsActive(scout.is_active);
setCookies(scout.scrape_options?.cookies || "");
// Check if location is "any" (0, 0) or a real location
const isAnyLocation =
scout.location &&
Expand All @@ -115,6 +127,7 @@ export function ScoutSettingsModal({
goal: false,
description: false,
location: false,
cookies: false,
searchQueries: false,
frequency: false,
});
Expand Down Expand Up @@ -172,6 +185,14 @@ export function ScoutSettingsModal({
locationToSave = scout.location;
}

// Build scrape_options object
const scrapeOptions: ScrapeOptions = {};
if (cookies.trim()) {
scrapeOptions.cookies = cookies.trim();
}
// Only save if there are options set
const scrapeOptionsToSave = Object.keys(scrapeOptions).length > 0 ? scrapeOptions : null;

const { error } = await supabase
.from("scouts")
.update({
Expand All @@ -181,6 +202,7 @@ export function ScoutSettingsModal({
search_queries: searchQueries,
frequency,
location: locationToSave,
scrape_options: scrapeOptionsToSave,
})
.eq("id", scout.id);

Expand Down Expand Up @@ -552,6 +574,46 @@ export function ScoutSettingsModal({
)}
</div>

{/* Cookies (Advanced) */}
<div className="border-t pt-24">
<div className="flex items-center justify-between mb-12">
<div>
<label className="text-label-medium text-gray-700">
Cookies
</label>
<p className="text-body-small text-gray-500 mt-2">
Optional: Send cookies when scraping websites (for authenticated content)
</p>
</div>
<button
type="button"
onClick={() =>
setEditMode({ ...editMode, cookies: !editMode.cookies })
}
className="p-6 hover:bg-gray-100 rounded-6 transition-colors"
>
<Pencil className="h-14 w-14 text-gray-500" />
</button>
</div>
{editMode.cookies ? (
<Textarea
value={cookies}
onChange={(e) => setCookies(e.target.value)}
placeholder="session_id=abc123; auth_token=xyz789"
className="w-full font-mono text-body-small"
rows={3}
/>
) : (
<p className="text-body-medium text-gray-900 py-12 px-16 bg-gray-50 rounded-6 min-h-[44px] flex items-center font-mono">
{cookies ? (
<span className="truncate">{cookies.slice(0, 50)}{cookies.length > 50 ? '...' : ''}</span>
) : (
<span className="text-gray-400 italic font-sans">Not configured</span>
)}
</p>
)}
</div>

{/* Save Button */}
<div className="flex justify-end gap-12 pt-24 border-t">
<Button
Expand Down
18 changes: 14 additions & 4 deletions supabase/functions/scout-cron/agent.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Main agent orchestration logic

import type { Scout, ScoutResponse } from "./types.ts";
import type { Scout, ScoutResponse, ScrapeOptions } from "./types.ts";
import { getMaxAge, isBlacklistedDomain } from "./constants.ts";
import {
createStep,
Expand All @@ -9,7 +9,7 @@ import {
logFirecrawlUsage,
markFirecrawlKeyInvalid,
} from "./helpers.ts";
import { executeSearchTool, executeScrapeTool } from "./tools.ts";
import { executeSearchTool, executeScrapeTool, FirecrawlOptions } from "./tools.ts";
import { sendScoutSuccessEmail } from "./email.ts";
import {
trackExecutionStarted,
Expand Down Expand Up @@ -382,17 +382,27 @@ REMINDER: Write your final response like a NEWS BRIEF. DO NOT mention your proce
let toolResult: any;
let hasError = false;

// Build Firecrawl options from scout's scrape_options
const firecrawlOptions: FirecrawlOptions | undefined = scout.scrape_options
? {
cookies: scout.scrape_options.cookies,
headers: scout.scrape_options.headers,
waitFor: scout.scrape_options.waitFor,
timeout: scout.scrape_options.timeout,
}
: undefined;

// Execute the tool
try {
if (toolName === "searchWeb") {
// Only pass location if it exists and is not "any"
const locationToUse = scout.location?.city && scout.location.city !== "any"
? scout.location.city
: undefined;
toolResult = await executeSearchTool(toolArgs, FIRECRAWL_API_KEY, locationToUse, maxAgeMs);
toolResult = await executeSearchTool(toolArgs, FIRECRAWL_API_KEY, locationToUse, maxAgeMs, firecrawlOptions);
firecrawlApiCallsCount++;
} else if (toolName === "scrapeWebsite") {
toolResult = await executeScrapeTool(toolArgs, FIRECRAWL_API_KEY, maxAgeMs);
toolResult = await executeScrapeTool(toolArgs, FIRECRAWL_API_KEY, maxAgeMs, firecrawlOptions);
firecrawlApiCallsCount++;
}

Expand Down
54 changes: 52 additions & 2 deletions supabase/functions/scout-cron/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,22 @@

import { isBlacklistedDomain } from "./constants.ts";

// Options for Firecrawl requests
export interface FirecrawlOptions {
headers?: Record<string, string>; // Custom headers including cookies
cookies?: string; // Cookie string (convenience option)
waitFor?: number | string; // Wait for selector or time (ms)
timeout?: number; // Request timeout in ms
}

// Execute web search using Firecrawl
export async function executeSearchTool(args: any, apiKey: string, location?: string, maxAge?: number) {
export async function executeSearchTool(
args: any,
apiKey: string,
location?: string,
maxAge?: number,
options?: FirecrawlOptions
) {
try {
const searchPayload: any = {
query: args.query,
Expand All @@ -19,6 +33,19 @@ export async function executeSearchTool(args: any, apiKey: string, location?: st
searchPayload.tbs = args.tbs;
}

// Add custom headers/cookies if provided
if (options?.headers || options?.cookies) {
searchPayload.scrapeOptions.headers = {
...options.headers,
...(options.cookies && { "Cookie": options.cookies }),
};
}

// Add wait options if provided
if (options?.waitFor) {
searchPayload.scrapeOptions.waitFor = options.waitFor;
}

// Add location and country parameters for geo-targeting
// According to Firecrawl API, for best results both should be set
if (location) {
Expand Down Expand Up @@ -96,7 +123,12 @@ export async function executeSearchTool(args: any, apiKey: string, location?: st
}

// Execute website scraping using Firecrawl
export async function executeScrapeTool(args: any, apiKey: string, maxAge?: number) {
export async function executeScrapeTool(
args: any,
apiKey: string,
maxAge?: number,
options?: FirecrawlOptions
) {
try {
const scrapePayload: any = {
url: args.url,
Expand All @@ -110,6 +142,24 @@ export async function executeScrapeTool(args: any, apiKey: string, maxAge?: numb
maxAge: maxAge || 3600000, // Default to 1 hour if not provided
};

// Add custom headers/cookies if provided
if (options?.headers || options?.cookies) {
scrapePayload.headers = {
...options.headers,
...(options.cookies && { "Cookie": options.cookies }),
};
}

// Add wait options if provided (wait for selector or time in ms)
if (options?.waitFor) {
scrapePayload.waitFor = options.waitFor;
}

// Add custom timeout if provided
if (options?.timeout) {
scrapePayload.timeout = options.timeout;
}

// Add 60-second timeout to prevent hanging
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 60000);
Expand Down
8 changes: 8 additions & 0 deletions supabase/functions/scout-cron/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
// Type definitions for Scout agent

export interface ScrapeOptions {
cookies?: string; // Cookie string to send with requests
headers?: Record<string, string>; // Custom HTTP headers
waitFor?: number | string; // Wait time in ms or CSS selector
timeout?: number; // Request timeout in ms
}

export interface Scout {
id: string;
user_id: string;
Expand All @@ -16,6 +23,7 @@ export interface Scout {
is_active: boolean;
last_run_at: string | null;
consecutive_failures: number;
scrape_options?: ScrapeOptions; // Optional scrape configuration
}

export type FirecrawlKeyStatus = "pending" | "active" | "fallback" | "failed" | "invalid";
Expand Down
22 changes: 22 additions & 0 deletions supabase/migrations/20260108000000_add_scout_scrape_options.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- =============================================================================
-- ADD SCRAPE OPTIONS TO SCOUTS TABLE
-- Allows users to configure cookies, headers, and other scrape options per scout
-- =============================================================================

-- Add scrape_options column to scouts table
ALTER TABLE scouts ADD COLUMN IF NOT EXISTS scrape_options JSONB DEFAULT '{}'::jsonb;

-- Add comment explaining the structure
COMMENT ON COLUMN scouts.scrape_options IS
'Optional scrape configuration for Firecrawl API. Structure:
{
"cookies": "session_id=abc123; auth_token=xyz789",
"headers": {"Authorization": "Bearer token"},
"waitFor": 2000,
"timeout": 30000
}
- cookies: Cookie string to send with requests
- headers: Custom HTTP headers (key-value pairs)
- waitFor: Wait time in ms or CSS selector before scraping
- timeout: Request timeout in ms
';