Skip to content

Commit a429f62

Browse files
committed
Update server.py
1 parent 797d967 commit a429f62

File tree

1 file changed

+8
-289
lines changed

1 file changed

+8
-289
lines changed

src/scrapegraph_mcp/server.py

Lines changed: 8 additions & 289 deletions
Original file line numberDiff line numberDiff line change
@@ -809,27 +809,7 @@ def tool_comparison_guide() -> str:
809809

810810

811811
# Add tool for markdownify
812-
@mcp.tool(
813-
description="Convert a webpage into clean, formatted markdown",
814-
input_schema={
815-
"type": "object",
816-
"properties": {
817-
"website_url": {
818-
"type": "string",
819-
"description": "URL of the webpage to convert to markdown",
820-
"format": "uri",
821-
"examples": ["https://example.com", "https://docs.python.org/3/"]
822-
}
823-
},
824-
"required": ["website_url"],
825-
"additionalProperties": False
826-
},
827-
annotations={
828-
"readOnlyHint": True,
829-
"destructiveHint": False,
830-
"idempotentHint": True
831-
}
832-
)
812+
@mcp.tool()
833813
def markdownify(website_url: str, ctx: Context) -> Dict[str, Any]:
834814
"""
835815
Convert a webpage into clean, formatted markdown.
@@ -849,48 +829,7 @@ def markdownify(website_url: str, ctx: Context) -> Dict[str, Any]:
849829

850830

851831
# Add tool for smartscraper
852-
@mcp.tool(
853-
description="Extract structured data from a webpage using AI",
854-
input_schema={
855-
"type": "object",
856-
"properties": {
857-
"user_prompt": {
858-
"type": "string",
859-
"description": "Instructions for what data to extract from the webpage",
860-
"examples": [
861-
"Extract all product names and prices",
862-
"Get contact information and business hours",
863-
"Find all article titles and publication dates"
864-
]
865-
},
866-
"website_url": {
867-
"type": "string",
868-
"description": "URL of the webpage to scrape",
869-
"format": "uri",
870-
"examples": ["https://example.com/products", "https://news.ycombinator.com"]
871-
},
872-
"number_of_scrolls": {
873-
"type": "integer",
874-
"description": "Number of infinite scrolls to perform to load more content (optional)",
875-
"minimum": 0,
876-
"maximum": 10,
877-
"default": 0
878-
},
879-
"markdown_only": {
880-
"type": "boolean",
881-
"description": "Whether to return only markdown content without AI processing (optional)",
882-
"default": false
883-
}
884-
},
885-
"required": ["user_prompt", "website_url"],
886-
"additionalProperties": False
887-
},
888-
annotations={
889-
"readOnlyHint": True,
890-
"destructiveHint": False,
891-
"idempotentHint": True
892-
}
893-
)
832+
@mcp.tool()
894833
def smartscraper(
895834
user_prompt: str,
896835
website_url: str,
@@ -919,44 +858,7 @@ def smartscraper(
919858

920859

921860
# Add tool for searchscraper
922-
@mcp.tool(
923-
description="Perform AI-powered web searches with structured results",
924-
input_schema={
925-
"type": "object",
926-
"properties": {
927-
"user_prompt": {
928-
"type": "string",
929-
"description": "Search query or instructions for what information to find",
930-
"examples": [
931-
"Find the latest AI research papers",
932-
"Search for Python web scraping tutorials",
933-
"Get information about climate change statistics"
934-
]
935-
},
936-
"num_results": {
937-
"type": "integer",
938-
"description": "Number of websites to search (optional, default: 3 websites = 30 credits)",
939-
"minimum": 1,
940-
"maximum": 10,
941-
"default": 3
942-
},
943-
"number_of_scrolls": {
944-
"type": "integer",
945-
"description": "Number of infinite scrolls to perform on each website (optional)",
946-
"minimum": 0,
947-
"maximum": 5,
948-
"default": 0
949-
}
950-
},
951-
"required": ["user_prompt"],
952-
"additionalProperties": False
953-
},
954-
annotations={
955-
"readOnlyHint": True,
956-
"destructiveHint": False,
957-
"idempotentHint": False
958-
}
959-
)
861+
@mcp.tool()
960862
def searchscraper(
961863
user_prompt: str,
962864
ctx: Context,
@@ -983,61 +885,7 @@ def searchscraper(
983885

984886

985887
# Add tool for SmartCrawler initiation
986-
@mcp.tool(
987-
description="Initiate intelligent multi-page web crawling with AI extraction or markdown conversion",
988-
input_schema={
989-
"type": "object",
990-
"properties": {
991-
"url": {
992-
"type": "string",
993-
"description": "Starting URL to crawl",
994-
"format": "uri",
995-
"examples": ["https://example.com", "https://docs.python.org"]
996-
},
997-
"prompt": {
998-
"type": "string",
999-
"description": "AI prompt for data extraction (required for AI mode)",
1000-
"examples": [
1001-
"Extract product information including name, price, and description",
1002-
"Get all article titles, authors, and publication dates",
1003-
"Find contact information and business details"
1004-
]
1005-
},
1006-
"extraction_mode": {
1007-
"type": "string",
1008-
"description": "Extraction mode: 'ai' for AI extraction (10 credits/page) or 'markdown' for markdown conversion (2 credits/page)",
1009-
"enum": ["ai", "markdown"],
1010-
"default": "ai"
1011-
},
1012-
"depth": {
1013-
"type": "integer",
1014-
"description": "Maximum link traversal depth (optional)",
1015-
"minimum": 1,
1016-
"maximum": 5,
1017-
"default": 2
1018-
},
1019-
"max_pages": {
1020-
"type": "integer",
1021-
"description": "Maximum number of pages to crawl (optional)",
1022-
"minimum": 1,
1023-
"maximum": 100,
1024-
"default": 10
1025-
},
1026-
"same_domain_only": {
1027-
"type": "boolean",
1028-
"description": "Whether to crawl only within the same domain (optional)",
1029-
"default": true
1030-
}
1031-
},
1032-
"required": ["url"],
1033-
"additionalProperties": False
1034-
},
1035-
annotations={
1036-
"readOnlyHint": True,
1037-
"destructiveHint": False,
1038-
"idempotentHint": False
1039-
}
1040-
)
888+
@mcp.tool()
1041889
def smartcrawler_initiate(
1042890
url: str,
1043891
ctx: Context,
@@ -1081,27 +929,7 @@ def smartcrawler_initiate(
1081929

1082930

1083931
# Add tool for fetching SmartCrawler results
1084-
@mcp.tool(
1085-
description="Fetch the results of a SmartCrawler operation",
1086-
input_schema={
1087-
"type": "object",
1088-
"properties": {
1089-
"request_id": {
1090-
"type": "string",
1091-
"description": "The request ID returned by smartcrawler_initiate",
1092-
"pattern": "^[a-zA-Z0-9-_]+$",
1093-
"examples": ["req_123abc", "crawl-456def"]
1094-
}
1095-
},
1096-
"required": ["request_id"],
1097-
"additionalProperties": False
1098-
},
1099-
annotations={
1100-
"readOnlyHint": True,
1101-
"destructiveHint": False,
1102-
"idempotentHint": True
1103-
}
1104-
)
932+
@mcp.tool()
1105933
def smartcrawler_fetch_results(request_id: str, ctx: Context) -> Dict[str, Any]:
1106934
"""
1107935
Fetch the results of a SmartCrawler operation.
@@ -1122,32 +950,7 @@ def smartcrawler_fetch_results(request_id: str, ctx: Context) -> Dict[str, Any]:
1122950

1123951

1124952
# Add tool for basic scrape
1125-
@mcp.tool(
1126-
description="Fetch page content for a URL",
1127-
input_schema={
1128-
"type": "object",
1129-
"properties": {
1130-
"website_url": {
1131-
"type": "string",
1132-
"description": "URL to scrape",
1133-
"format": "uri",
1134-
"examples": ["https://example.com", "https://news.ycombinator.com"]
1135-
},
1136-
"render_heavy_js": {
1137-
"type": "boolean",
1138-
"description": "Whether to render heavy JavaScript (optional, may increase processing time)",
1139-
"default": false
1140-
}
1141-
},
1142-
"required": ["website_url"],
1143-
"additionalProperties": False
1144-
},
1145-
annotations={
1146-
"readOnlyHint": True,
1147-
"destructiveHint": False,
1148-
"idempotentHint": True
1149-
}
1150-
)
953+
@mcp.tool()
1151954
def scrape(website_url: str, ctx: Context, render_heavy_js: Optional[bool] = None) -> Dict[str, Any]:
1152955
"""
1153956
Fetch page content for a URL.
@@ -1167,27 +970,7 @@ def scrape(website_url: str, ctx: Context, render_heavy_js: Optional[bool] = Non
1167970

1168971

1169972
# Add tool for sitemap extraction
1170-
@mcp.tool(
1171-
description="Extract sitemap for a website",
1172-
input_schema={
1173-
"type": "object",
1174-
"properties": {
1175-
"website_url": {
1176-
"type": "string",
1177-
"description": "Base website URL to extract sitemap from",
1178-
"format": "uri",
1179-
"examples": ["https://example.com", "https://docs.python.org"]
1180-
}
1181-
},
1182-
"required": ["website_url"],
1183-
"additionalProperties": False
1184-
},
1185-
annotations={
1186-
"readOnlyHint": True,
1187-
"destructiveHint": False,
1188-
"idempotentHint": True
1189-
}
1190-
)
973+
@mcp.tool()
1191974
def sitemap(website_url: str, ctx: Context) -> Dict[str, Any]:
1192975
"""
1193976
Extract sitemap for a website.
@@ -1206,71 +989,7 @@ def sitemap(website_url: str, ctx: Context) -> Dict[str, Any]:
1206989

1207990

1208991
# Add tool for Agentic Scraper (no live session/browser interaction)
1209-
@mcp.tool(
1210-
description="Run the Agentic Scraper workflow with AI-powered automation",
1211-
input_schema={
1212-
"type": "object",
1213-
"properties": {
1214-
"url": {
1215-
"type": "string",
1216-
"description": "Target website URL to scrape",
1217-
"format": "uri",
1218-
"examples": ["https://example.com", "https://ecommerce-site.com/products"]
1219-
},
1220-
"user_prompt": {
1221-
"type": "string",
1222-
"description": "Instructions for what to do or extract (optional)",
1223-
"examples": [
1224-
"Navigate to the products page and extract all product details",
1225-
"Find the contact form and extract all available contact methods",
1226-
"Search for pricing information and extract all plans"
1227-
]
1228-
},
1229-
"output_schema": {
1230-
"oneOf": [
1231-
{"type": "string", "description": "JSON string representing the desired output schema"},
1232-
{"type": "object", "description": "Object representing the desired output schema"}
1233-
],
1234-
"description": "Desired structured output schema (optional)"
1235-
},
1236-
"steps": {
1237-
"oneOf": [
1238-
{"type": "string", "description": "Single step or JSON array string of steps"},
1239-
{"type": "array", "items": {"type": "string"}, "description": "Array of high-level steps for the agent"}
1240-
],
1241-
"description": "High-level steps/instructions for the agent (optional)",
1242-
"examples": [
1243-
["Navigate to products", "Extract product info", "Get pricing"],
1244-
"Click on the menu and find contact information"
1245-
]
1246-
},
1247-
"ai_extraction": {
1248-
"type": "boolean",
1249-
"description": "Whether to enable AI extraction mode (optional)",
1250-
"default": true
1251-
},
1252-
"persistent_session": {
1253-
"type": "boolean",
1254-
"description": "Whether to keep session alive between steps (optional)",
1255-
"default": false
1256-
},
1257-
"timeout_seconds": {
1258-
"type": "number",
1259-
"description": "Per-request timeout override in seconds (optional)",
1260-
"minimum": 10,
1261-
"maximum": 300,
1262-
"default": 120
1263-
}
1264-
},
1265-
"required": ["url"],
1266-
"additionalProperties": False
1267-
},
1268-
annotations={
1269-
"readOnlyHint": True,
1270-
"destructiveHint": False,
1271-
"idempotentHint": False
1272-
}
1273-
)
992+
@mcp.tool()
1274993
def agentic_scrapper(
1275994
url: str,
1276995
ctx: Context,

0 commit comments

Comments
 (0)