1+ #!/usr/bin/env python3
2+ """Scheduled Jobs Example - Sync Client"""
3+
4+ import os
5+ from scrapegraph_py import Client
6+ from scrapegraph_py .models .scheduled_jobs import ServiceType
7+
8+ def main ():
9+ client = Client .from_env ()
10+
11+ print ("🚀 ScrapeGraph AI Scheduled Jobs Example" )
12+ print ("=" * 50 )
13+
14+ try :
15+ print ("\n 📅 Creating a scheduled SmartScraper job..." )
16+
17+ smartscraper_config = {
18+ "website_url" : "https://example.com" ,
19+ "user_prompt" : "Extract the main heading and description from the page"
20+ }
21+
22+ job = client .create_scheduled_job (
23+ job_name = "Daily Example Scraping" ,
24+ service_type = ServiceType .SMARTSCRAPER ,
25+ cron_expression = "0 9 * * *" ,
26+ job_config = smartscraper_config ,
27+ is_active = True
28+ )
29+
30+ job_id = job ["id" ]
31+ print (f"✅ Created job: { job ['job_name' ]} (ID: { job_id } )" )
32+ print (f" Next run: { job .get ('next_run_at' , 'Not scheduled' )} " )
33+
34+ print ("\n 📅 Creating a scheduled SearchScraper job..." )
35+
36+ searchscraper_config = {
37+ "user_prompt" : "Find the latest news about artificial intelligence" ,
38+ "num_results" : 5
39+ }
40+
41+ search_job = client .create_scheduled_job (
42+ job_name = "Weekly AI News Search" ,
43+ service_type = ServiceType .SEARCHSCRAPER ,
44+ cron_expression = "0 10 * * 1" ,
45+ job_config = searchscraper_config ,
46+ is_active = True
47+ )
48+
49+ search_job_id = search_job ["id" ]
50+ print (f"✅ Created job: { search_job ['job_name' ]} (ID: { search_job_id } )" )
51+
52+ print ("\n 📋 Listing all scheduled jobs..." )
53+
54+ jobs_response = client .get_scheduled_jobs (page = 1 , page_size = 10 )
55+ jobs = jobs_response ["jobs" ]
56+
57+ print (f"Found { jobs_response ['total' ]} total jobs:" )
58+ for job in jobs :
59+ status = "🟢 Active" if job ["is_active" ] else "🔴 Inactive"
60+ print (f" - { job ['job_name' ]} ({ job ['service_type' ]} ) - { status } " )
61+ print (f" Schedule: { job ['cron_expression' ]} " )
62+ if job .get ('next_run_at' ):
63+ print (f" Next run: { job ['next_run_at' ]} " )
64+
65+ print (f"\n 🔍 Getting details for job { job_id } ..." )
66+
67+ job_details = client .get_scheduled_job (job_id )
68+ print (f"Job Name: { job_details ['job_name' ]} " )
69+ print (f"Service Type: { job_details ['service_type' ]} " )
70+ print (f"Created: { job_details ['created_at' ]} " )
71+ print (f"Active: { job_details ['is_active' ]} " )
72+
73+ print (f"\n 📝 Updating job schedule..." )
74+
75+ updated_job = client .update_scheduled_job (
76+ job_id = job_id ,
77+ cron_expression = "0 8 * * *" ,
78+ job_name = "Daily Example Scraping (Updated)"
79+ )
80+
81+ print (f"✅ Updated job: { updated_job ['job_name' ]} " )
82+ print (f" New schedule: { updated_job ['cron_expression' ]} " )
83+
84+ print (f"\n ⏸️ Pausing job { job_id } ..." )
85+
86+ pause_result = client .pause_scheduled_job (job_id )
87+ print (f"✅ { pause_result ['message' ]} " )
88+ print (f" Job is now: { 'Active' if pause_result ['is_active' ] else 'Paused' } " )
89+
90+ print (f"\n ▶️ Resuming job { job_id } ..." )
91+
92+ resume_result = client .resume_scheduled_job (job_id )
93+ print (f"✅ { resume_result ['message' ]} " )
94+ print (f" Job is now: { 'Active' if resume_result ['is_active' ] else 'Paused' } " )
95+ if resume_result .get ('next_run_at' ):
96+ print (f" Next run: { resume_result ['next_run_at' ]} " )
97+
98+ print (f"\n 🚀 Manually triggering job { job_id } ..." )
99+
100+ trigger_result = client .trigger_scheduled_job (job_id )
101+ print (f"✅ { trigger_result ['message' ]} " )
102+ print (f" Execution ID: { trigger_result ['execution_id' ]} " )
103+ print (f" Triggered at: { trigger_result ['triggered_at' ]} " )
104+
105+ print (f"\n 📊 Getting execution history for job { job_id } ..." )
106+
107+ executions_response = client .get_job_executions (
108+ job_id = job_id ,
109+ page = 1 ,
110+ page_size = 5
111+ )
112+
113+ executions = executions_response ["executions" ]
114+ print (f"Found { executions_response ['total' ]} total executions:" )
115+
116+ for execution in executions :
117+ status_emoji = {
118+ "completed" : "✅" ,
119+ "failed" : "❌" ,
120+ "running" : "🔄" ,
121+ "pending" : "⏳"
122+ }.get (execution ["status" ], "❓" )
123+
124+ print (f" { status_emoji } { execution ['status' ].upper ()} " )
125+ print (f" Started: { execution ['started_at' ]} " )
126+ if execution .get ('completed_at' ):
127+ print (f" Completed: { execution ['completed_at' ]} " )
128+ if execution .get ('credits_used' ):
129+ print (f" Credits used: { execution ['credits_used' ]} " )
130+
131+ print (f"\n 🔧 Filtering jobs by service type (smartscraper)..." )
132+
133+ filtered_jobs = client .get_scheduled_jobs (
134+ service_type = ServiceType .SMARTSCRAPER ,
135+ is_active = True
136+ )
137+
138+ print (f"Found { filtered_jobs ['total' ]} active SmartScraper jobs:" )
139+ for job in filtered_jobs ["jobs" ]:
140+ print (f" - { job ['job_name' ]} (Schedule: { job ['cron_expression' ]} )" )
141+
142+ print (f"\n 🗑️ Cleaning up - deleting created jobs..." )
143+
144+ delete_result1 = client .delete_scheduled_job (job_id )
145+ print (f"✅ { delete_result1 ['message' ]} (Job 1)" )
146+
147+ delete_result2 = client .delete_scheduled_job (search_job_id )
148+ print (f"✅ { delete_result2 ['message' ]} (Job 2)" )
149+
150+ print ("\n 🎉 Scheduled jobs example completed successfully!" )
151+
152+ except Exception as e :
153+ print (f"\n ❌ Error: { str (e )} " )
154+ raise
155+
156+ finally :
157+ client .close ()
158+
159+
160+ if __name__ == "__main__" :
161+ if os .getenv ("SGAI_MOCK" , "0" ).lower () in ["1" , "true" , "yes" ]:
162+ print ("🧪 Running in MOCK mode - no real API calls will be made" )
163+
164+ main ()
0 commit comments