@@ -809,27 +809,7 @@ def tool_comparison_guide() -> str:
809809
810810
811811# Add tool for markdownify
812- @mcp .tool (
813- description = "Convert a webpage into clean, formatted markdown" ,
814- input_schema = {
815- "type" : "object" ,
816- "properties" : {
817- "website_url" : {
818- "type" : "string" ,
819- "description" : "URL of the webpage to convert to markdown" ,
820- "format" : "uri" ,
821- "examples" : ["https://example.com" , "https://docs.python.org/3/" ]
822- }
823- },
824- "required" : ["website_url" ],
825- "additionalProperties" : False
826- },
827- annotations = {
828- "readOnlyHint" : True ,
829- "destructiveHint" : False ,
830- "idempotentHint" : True
831- }
832- )
812+ @mcp .tool ()
833813def markdownify (website_url : str , ctx : Context ) -> Dict [str , Any ]:
834814 """
835815 Convert a webpage into clean, formatted markdown.
@@ -849,48 +829,7 @@ def markdownify(website_url: str, ctx: Context) -> Dict[str, Any]:
849829
850830
851831# Add tool for smartscraper
852- @mcp .tool (
853- description = "Extract structured data from a webpage using AI" ,
854- input_schema = {
855- "type" : "object" ,
856- "properties" : {
857- "user_prompt" : {
858- "type" : "string" ,
859- "description" : "Instructions for what data to extract from the webpage" ,
860- "examples" : [
861- "Extract all product names and prices" ,
862- "Get contact information and business hours" ,
863- "Find all article titles and publication dates"
864- ]
865- },
866- "website_url" : {
867- "type" : "string" ,
868- "description" : "URL of the webpage to scrape" ,
869- "format" : "uri" ,
870- "examples" : ["https://example.com/products" , "https://news.ycombinator.com" ]
871- },
872- "number_of_scrolls" : {
873- "type" : "integer" ,
874- "description" : "Number of infinite scrolls to perform to load more content (optional)" ,
875- "minimum" : 0 ,
876- "maximum" : 10 ,
877- "default" : 0
878- },
879- "markdown_only" : {
880- "type" : "boolean" ,
881- "description" : "Whether to return only markdown content without AI processing (optional)" ,
882- "default" : false
883- }
884- },
885- "required" : ["user_prompt" , "website_url" ],
886- "additionalProperties" : False
887- },
888- annotations = {
889- "readOnlyHint" : True ,
890- "destructiveHint" : False ,
891- "idempotentHint" : True
892- }
893- )
832+ @mcp .tool ()
894833def smartscraper (
895834 user_prompt : str ,
896835 website_url : str ,
@@ -919,44 +858,7 @@ def smartscraper(
919858
920859
921860# Add tool for searchscraper
922- @mcp .tool (
923- description = "Perform AI-powered web searches with structured results" ,
924- input_schema = {
925- "type" : "object" ,
926- "properties" : {
927- "user_prompt" : {
928- "type" : "string" ,
929- "description" : "Search query or instructions for what information to find" ,
930- "examples" : [
931- "Find the latest AI research papers" ,
932- "Search for Python web scraping tutorials" ,
933- "Get information about climate change statistics"
934- ]
935- },
936- "num_results" : {
937- "type" : "integer" ,
938- "description" : "Number of websites to search (optional, default: 3 websites = 30 credits)" ,
939- "minimum" : 1 ,
940- "maximum" : 10 ,
941- "default" : 3
942- },
943- "number_of_scrolls" : {
944- "type" : "integer" ,
945- "description" : "Number of infinite scrolls to perform on each website (optional)" ,
946- "minimum" : 0 ,
947- "maximum" : 5 ,
948- "default" : 0
949- }
950- },
951- "required" : ["user_prompt" ],
952- "additionalProperties" : False
953- },
954- annotations = {
955- "readOnlyHint" : True ,
956- "destructiveHint" : False ,
957- "idempotentHint" : False
958- }
959- )
861+ @mcp .tool ()
960862def searchscraper (
961863 user_prompt : str ,
962864 ctx : Context ,
@@ -983,61 +885,7 @@ def searchscraper(
983885
984886
985887# Add tool for SmartCrawler initiation
986- @mcp .tool (
987- description = "Initiate intelligent multi-page web crawling with AI extraction or markdown conversion" ,
988- input_schema = {
989- "type" : "object" ,
990- "properties" : {
991- "url" : {
992- "type" : "string" ,
993- "description" : "Starting URL to crawl" ,
994- "format" : "uri" ,
995- "examples" : ["https://example.com" , "https://docs.python.org" ]
996- },
997- "prompt" : {
998- "type" : "string" ,
999- "description" : "AI prompt for data extraction (required for AI mode)" ,
1000- "examples" : [
1001- "Extract product information including name, price, and description" ,
1002- "Get all article titles, authors, and publication dates" ,
1003- "Find contact information and business details"
1004- ]
1005- },
1006- "extraction_mode" : {
1007- "type" : "string" ,
1008- "description" : "Extraction mode: 'ai' for AI extraction (10 credits/page) or 'markdown' for markdown conversion (2 credits/page)" ,
1009- "enum" : ["ai" , "markdown" ],
1010- "default" : "ai"
1011- },
1012- "depth" : {
1013- "type" : "integer" ,
1014- "description" : "Maximum link traversal depth (optional)" ,
1015- "minimum" : 1 ,
1016- "maximum" : 5 ,
1017- "default" : 2
1018- },
1019- "max_pages" : {
1020- "type" : "integer" ,
1021- "description" : "Maximum number of pages to crawl (optional)" ,
1022- "minimum" : 1 ,
1023- "maximum" : 100 ,
1024- "default" : 10
1025- },
1026- "same_domain_only" : {
1027- "type" : "boolean" ,
1028- "description" : "Whether to crawl only within the same domain (optional)" ,
1029- "default" : true
1030- }
1031- },
1032- "required" : ["url" ],
1033- "additionalProperties" : False
1034- },
1035- annotations = {
1036- "readOnlyHint" : True ,
1037- "destructiveHint" : False ,
1038- "idempotentHint" : False
1039- }
1040- )
888+ @mcp .tool ()
1041889def smartcrawler_initiate (
1042890 url : str ,
1043891 ctx : Context ,
@@ -1081,27 +929,7 @@ def smartcrawler_initiate(
1081929
1082930
1083931# Add tool for fetching SmartCrawler results
1084- @mcp .tool (
1085- description = "Fetch the results of a SmartCrawler operation" ,
1086- input_schema = {
1087- "type" : "object" ,
1088- "properties" : {
1089- "request_id" : {
1090- "type" : "string" ,
1091- "description" : "The request ID returned by smartcrawler_initiate" ,
1092- "pattern" : "^[a-zA-Z0-9-_]+$" ,
1093- "examples" : ["req_123abc" , "crawl-456def" ]
1094- }
1095- },
1096- "required" : ["request_id" ],
1097- "additionalProperties" : False
1098- },
1099- annotations = {
1100- "readOnlyHint" : True ,
1101- "destructiveHint" : False ,
1102- "idempotentHint" : True
1103- }
1104- )
932+ @mcp .tool ()
1105933def smartcrawler_fetch_results (request_id : str , ctx : Context ) -> Dict [str , Any ]:
1106934 """
1107935 Fetch the results of a SmartCrawler operation.
@@ -1122,32 +950,7 @@ def smartcrawler_fetch_results(request_id: str, ctx: Context) -> Dict[str, Any]:
1122950
1123951
1124952# Add tool for basic scrape
1125- @mcp .tool (
1126- description = "Fetch page content for a URL" ,
1127- input_schema = {
1128- "type" : "object" ,
1129- "properties" : {
1130- "website_url" : {
1131- "type" : "string" ,
1132- "description" : "URL to scrape" ,
1133- "format" : "uri" ,
1134- "examples" : ["https://example.com" , "https://news.ycombinator.com" ]
1135- },
1136- "render_heavy_js" : {
1137- "type" : "boolean" ,
1138- "description" : "Whether to render heavy JavaScript (optional, may increase processing time)" ,
1139- "default" : false
1140- }
1141- },
1142- "required" : ["website_url" ],
1143- "additionalProperties" : False
1144- },
1145- annotations = {
1146- "readOnlyHint" : True ,
1147- "destructiveHint" : False ,
1148- "idempotentHint" : True
1149- }
1150- )
953+ @mcp .tool ()
1151954def scrape (website_url : str , ctx : Context , render_heavy_js : Optional [bool ] = None ) -> Dict [str , Any ]:
1152955 """
1153956 Fetch page content for a URL.
@@ -1167,27 +970,7 @@ def scrape(website_url: str, ctx: Context, render_heavy_js: Optional[bool] = Non
1167970
1168971
1169972# Add tool for sitemap extraction
1170- @mcp .tool (
1171- description = "Extract sitemap for a website" ,
1172- input_schema = {
1173- "type" : "object" ,
1174- "properties" : {
1175- "website_url" : {
1176- "type" : "string" ,
1177- "description" : "Base website URL to extract sitemap from" ,
1178- "format" : "uri" ,
1179- "examples" : ["https://example.com" , "https://docs.python.org" ]
1180- }
1181- },
1182- "required" : ["website_url" ],
1183- "additionalProperties" : False
1184- },
1185- annotations = {
1186- "readOnlyHint" : True ,
1187- "destructiveHint" : False ,
1188- "idempotentHint" : True
1189- }
1190- )
973+ @mcp .tool ()
1191974def sitemap (website_url : str , ctx : Context ) -> Dict [str , Any ]:
1192975 """
1193976 Extract sitemap for a website.
@@ -1206,71 +989,7 @@ def sitemap(website_url: str, ctx: Context) -> Dict[str, Any]:
1206989
1207990
1208991# Add tool for Agentic Scraper (no live session/browser interaction)
1209- @mcp .tool (
1210- description = "Run the Agentic Scraper workflow with AI-powered automation" ,
1211- input_schema = {
1212- "type" : "object" ,
1213- "properties" : {
1214- "url" : {
1215- "type" : "string" ,
1216- "description" : "Target website URL to scrape" ,
1217- "format" : "uri" ,
1218- "examples" : ["https://example.com" , "https://ecommerce-site.com/products" ]
1219- },
1220- "user_prompt" : {
1221- "type" : "string" ,
1222- "description" : "Instructions for what to do or extract (optional)" ,
1223- "examples" : [
1224- "Navigate to the products page and extract all product details" ,
1225- "Find the contact form and extract all available contact methods" ,
1226- "Search for pricing information and extract all plans"
1227- ]
1228- },
1229- "output_schema" : {
1230- "oneOf" : [
1231- {"type" : "string" , "description" : "JSON string representing the desired output schema" },
1232- {"type" : "object" , "description" : "Object representing the desired output schema" }
1233- ],
1234- "description" : "Desired structured output schema (optional)"
1235- },
1236- "steps" : {
1237- "oneOf" : [
1238- {"type" : "string" , "description" : "Single step or JSON array string of steps" },
1239- {"type" : "array" , "items" : {"type" : "string" }, "description" : "Array of high-level steps for the agent" }
1240- ],
1241- "description" : "High-level steps/instructions for the agent (optional)" ,
1242- "examples" : [
1243- ["Navigate to products" , "Extract product info" , "Get pricing" ],
1244- "Click on the menu and find contact information"
1245- ]
1246- },
1247- "ai_extraction" : {
1248- "type" : "boolean" ,
1249- "description" : "Whether to enable AI extraction mode (optional)" ,
1250- "default" : true
1251- },
1252- "persistent_session" : {
1253- "type" : "boolean" ,
1254- "description" : "Whether to keep session alive between steps (optional)" ,
1255- "default" : false
1256- },
1257- "timeout_seconds" : {
1258- "type" : "number" ,
1259- "description" : "Per-request timeout override in seconds (optional)" ,
1260- "minimum" : 10 ,
1261- "maximum" : 300 ,
1262- "default" : 120
1263- }
1264- },
1265- "required" : ["url" ],
1266- "additionalProperties" : False
1267- },
1268- annotations = {
1269- "readOnlyHint" : True ,
1270- "destructiveHint" : False ,
1271- "idempotentHint" : False
1272- }
1273- )
992+ @mcp .tool ()
1274993def agentic_scrapper (
1275994 url : str ,
1276995 ctx : Context ,
0 commit comments