Skip to content

Commit 7057710

Browse files
authored
Merge pull request #59 from ScrapeGraphAI/feat/add-scheduled-jobs
add scheduled jobs
2 parents baeb4c0 + cc9ab95 commit 7057710

File tree

8 files changed

+1238
-4
lines changed

8 files changed

+1238
-4
lines changed

scrapegraph-py/__init__.py

Whitespace-only changes.
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/usr/bin/env python3
2+
"""Scheduled Jobs Example - Sync Client"""
3+
4+
import os
5+
from scrapegraph_py import Client
6+
from scrapegraph_py.models.scheduled_jobs import ServiceType
7+
8+
def main():
9+
client = Client.from_env()
10+
11+
print("🚀 ScrapeGraph AI Scheduled Jobs Example")
12+
print("=" * 50)
13+
14+
try:
15+
print("\n📅 Creating a scheduled SmartScraper job...")
16+
17+
smartscraper_config = {
18+
"website_url": "https://example.com",
19+
"user_prompt": "Extract the main heading and description from the page"
20+
}
21+
22+
job = client.create_scheduled_job(
23+
job_name="Daily Example Scraping",
24+
service_type=ServiceType.SMARTSCRAPER,
25+
cron_expression="0 9 * * *",
26+
job_config=smartscraper_config,
27+
is_active=True
28+
)
29+
30+
job_id = job["id"]
31+
print(f"✅ Created job: {job['job_name']} (ID: {job_id})")
32+
print(f" Next run: {job.get('next_run_at', 'Not scheduled')}")
33+
34+
print("\n📅 Creating a scheduled SearchScraper job...")
35+
36+
searchscraper_config = {
37+
"user_prompt": "Find the latest news about artificial intelligence",
38+
"num_results": 5
39+
}
40+
41+
search_job = client.create_scheduled_job(
42+
job_name="Weekly AI News Search",
43+
service_type=ServiceType.SEARCHSCRAPER,
44+
cron_expression="0 10 * * 1",
45+
job_config=searchscraper_config,
46+
is_active=True
47+
)
48+
49+
search_job_id = search_job["id"]
50+
print(f"✅ Created job: {search_job['job_name']} (ID: {search_job_id})")
51+
52+
print("\n📋 Listing all scheduled jobs...")
53+
54+
jobs_response = client.get_scheduled_jobs(page=1, page_size=10)
55+
jobs = jobs_response["jobs"]
56+
57+
print(f"Found {jobs_response['total']} total jobs:")
58+
for job in jobs:
59+
status = "🟢 Active" if job["is_active"] else "🔴 Inactive"
60+
print(f" - {job['job_name']} ({job['service_type']}) - {status}")
61+
print(f" Schedule: {job['cron_expression']}")
62+
if job.get('next_run_at'):
63+
print(f" Next run: {job['next_run_at']}")
64+
65+
print(f"\n🔍 Getting details for job {job_id}...")
66+
67+
job_details = client.get_scheduled_job(job_id)
68+
print(f"Job Name: {job_details['job_name']}")
69+
print(f"Service Type: {job_details['service_type']}")
70+
print(f"Created: {job_details['created_at']}")
71+
print(f"Active: {job_details['is_active']}")
72+
73+
print(f"\n📝 Updating job schedule...")
74+
75+
updated_job = client.update_scheduled_job(
76+
job_id=job_id,
77+
cron_expression="0 8 * * *",
78+
job_name="Daily Example Scraping (Updated)"
79+
)
80+
81+
print(f"✅ Updated job: {updated_job['job_name']}")
82+
print(f" New schedule: {updated_job['cron_expression']}")
83+
84+
print(f"\n⏸️ Pausing job {job_id}...")
85+
86+
pause_result = client.pause_scheduled_job(job_id)
87+
print(f"✅ {pause_result['message']}")
88+
print(f" Job is now: {'Active' if pause_result['is_active'] else 'Paused'}")
89+
90+
print(f"\n▶️ Resuming job {job_id}...")
91+
92+
resume_result = client.resume_scheduled_job(job_id)
93+
print(f"✅ {resume_result['message']}")
94+
print(f" Job is now: {'Active' if resume_result['is_active'] else 'Paused'}")
95+
if resume_result.get('next_run_at'):
96+
print(f" Next run: {resume_result['next_run_at']}")
97+
98+
print(f"\n🚀 Manually triggering job {job_id}...")
99+
100+
trigger_result = client.trigger_scheduled_job(job_id)
101+
print(f"✅ {trigger_result['message']}")
102+
print(f" Execution ID: {trigger_result['execution_id']}")
103+
print(f" Triggered at: {trigger_result['triggered_at']}")
104+
105+
print(f"\n📊 Getting execution history for job {job_id}...")
106+
107+
executions_response = client.get_job_executions(
108+
job_id=job_id,
109+
page=1,
110+
page_size=5
111+
)
112+
113+
executions = executions_response["executions"]
114+
print(f"Found {executions_response['total']} total executions:")
115+
116+
for execution in executions:
117+
status_emoji = {
118+
"completed": "✅",
119+
"failed": "❌",
120+
"running": "🔄",
121+
"pending": "⏳"
122+
}.get(execution["status"], "❓")
123+
124+
print(f" {status_emoji} {execution['status'].upper()}")
125+
print(f" Started: {execution['started_at']}")
126+
if execution.get('completed_at'):
127+
print(f" Completed: {execution['completed_at']}")
128+
if execution.get('credits_used'):
129+
print(f" Credits used: {execution['credits_used']}")
130+
131+
print(f"\n🔧 Filtering jobs by service type (smartscraper)...")
132+
133+
filtered_jobs = client.get_scheduled_jobs(
134+
service_type=ServiceType.SMARTSCRAPER,
135+
is_active=True
136+
)
137+
138+
print(f"Found {filtered_jobs['total']} active SmartScraper jobs:")
139+
for job in filtered_jobs["jobs"]:
140+
print(f" - {job['job_name']} (Schedule: {job['cron_expression']})")
141+
142+
print(f"\n🗑️ Cleaning up - deleting created jobs...")
143+
144+
delete_result1 = client.delete_scheduled_job(job_id)
145+
print(f"✅ {delete_result1['message']} (Job 1)")
146+
147+
delete_result2 = client.delete_scheduled_job(search_job_id)
148+
print(f"✅ {delete_result2['message']} (Job 2)")
149+
150+
print("\n🎉 Scheduled jobs example completed successfully!")
151+
152+
except Exception as e:
153+
print(f"\n❌ Error: {str(e)}")
154+
raise
155+
156+
finally:
157+
client.close()
158+
159+
160+
if __name__ == "__main__":
161+
if os.getenv("SGAI_MOCK", "0").lower() in ["1", "true", "yes"]:
162+
print("🧪 Running in MOCK mode - no real API calls will be made")
163+
164+
main()
Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,40 @@
11
from .async_client import AsyncClient
22
from .client import Client
33

4-
__all__ = ["Client", "AsyncClient"]
4+
# Scheduled Jobs Models
5+
from .models.scheduled_jobs import (
6+
GetJobExecutionsRequest,
7+
GetScheduledJobRequest,
8+
GetScheduledJobsRequest,
9+
JobActionRequest,
10+
JobActionResponse,
11+
JobExecutionListResponse,
12+
JobExecutionResponse,
13+
JobTriggerResponse,
14+
ScheduledJobCreate,
15+
ScheduledJobListResponse,
16+
ScheduledJobResponse,
17+
ScheduledJobUpdate,
18+
ServiceType,
19+
TriggerJobRequest,
20+
)
21+
22+
__all__ = [
23+
"Client",
24+
"AsyncClient",
25+
# Scheduled Jobs Models
26+
"ServiceType",
27+
"ScheduledJobCreate",
28+
"ScheduledJobUpdate",
29+
"ScheduledJobResponse",
30+
"ScheduledJobListResponse",
31+
"JobExecutionResponse",
32+
"JobExecutionListResponse",
33+
"JobTriggerResponse",
34+
"JobActionResponse",
35+
"GetScheduledJobsRequest",
36+
"GetScheduledJobRequest",
37+
"GetJobExecutionsRequest",
38+
"TriggerJobRequest",
39+
"JobActionRequest",
40+
]

0 commit comments

Comments
 (0)