Merge pull request #1432 from sriramsowmithri9807/master

assafelovic · web-flow · commit 5e924774f7d2 · 2025-06-13T10:15:53.000+01:00
fix: Resolve 'ScrapeResponse' object subscriptable error in FireCrawl scraper
diff --git a/cli.py b/cli.py
@@ -79,6 +79,17 @@
     default="objective"
 )
 
+# =====================================
+# Arg: Encoding
+# =====================================
+
+cli.add_argument(
+    "--encoding",
+    type=str,
+    help="The encoding to use for the output file (default: utf-8).",
+    default="utf-8"
+)
+
 # =====================================
 # Arg: Query Domains
 # =====================================
@@ -134,7 +145,8 @@ async def main(args):
             query=args.query,
             query_domains=query_domains,
             report_type=args.report_type,
-            tone=tone_map[args.tone]
+            tone=tone_map[args.tone],
+            encoding=args.encoding
         )
 
         await researcher.conduct_research()
@@ -144,7 +156,7 @@ async def main(args):
     # Write the report to a file
     artifact_filepath = f"outputs/{uuid4()}.md"
     os.makedirs("outputs", exist_ok=True)
-    with open(artifact_filepath, "w") as f:
+    with open(artifact_filepath, "w", encoding="utf-8") as f:
         f.write(report)
 
     print(f"Report written to '{artifact_filepath}'")
diff --git a/gpt-researcher b/gpt-researcher
@@ -0,0 +1 @@
+Subproject commit cd64f77268012397f245ef19670eccc997a7f581
diff --git a/gpt_researcher/agent.py b/gpt_researcher/agent.py
@@ -154,6 +154,9 @@ def __init__(
         self.memory = Memory(
             self.cfg.embedding_provider, self.cfg.embedding_model, **self.cfg.embedding_kwargs
         )
+        
+        # Set default encoding to utf-8
+        self.encoding = kwargs.get('encoding', 'utf-8')
 
         # Initialize components
         self.research_conductor: ResearchConductor = ResearchConductor(self)
diff --git a/gpt_researcher/scraper/firecrawl/firecrawl.py b/gpt_researcher/scraper/firecrawl/firecrawl.py
@@ -61,7 +61,7 @@ def scrape(self) -> tuple:
 
             # Extract the content (markdown) and title from FireCrawl response
             content = response.data.markdown
-            title = response["metadata"]["title"]
+            title = response.metadata.get("title", "")
 
             # Parse the HTML content of the response to create a BeautifulSoup object for the utility functions
             response_bs = self.session.get(self.link, timeout=4)
diff --git a/json_schema_generator.py b/json_schema_generator.py
@@ -0,0 +1,43 @@
+import json
+from typing import Dict, Any
+from pydantic import BaseModel
+
+class UserSchema(BaseModel):
+    id: int
+    name: str
+    email: str
+    age: int
+    is_active: bool
+
+def generate_structured_json(schema: BaseModel, data: Dict[str, Any]) -> str:
+    """
+    Generate structured JSON output based on provided schema
+    
+    Args:
+        schema: Pydantic model defining the schema structure
+        data: Dictionary containing the data to be structured
+    
+    Returns:
+        str: JSON string with structured data
+    """
+    try:
+        # Create instance of schema with provided data
+        structured_data = schema(**data)
+        # Convert to JSON string
+        return json.dumps(structured_data.dict(), indent=2)
+    except Exception as e:
+        return f"Error generating JSON: {str(e)}"
+
+# Example usage
+if __name__ == "__main__":
+    sample_data = {
+        "id": 1,
+        "name": "John Doe",
+        "email": "john@example.com",
+        "age": 30,
+        "is_active": True
+    }
+    
+    json_output = generate_structured_json(UserSchema, sample_data)
+    print("Structured JSON Output:")
+    print(json_output)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit cd64f77268012397f245ef19670eccc997a7f581`
Original file line number	Diff line number	Diff line change
`@@ -154,6 +154,9 @@ def __init__(`
`154`	`154`	`self.memory = Memory(`
`155`	`155`	`self.cfg.embedding_provider, self.cfg.embedding_model, **self.cfg.embedding_kwargs`
`156`	`156`	`)`
	`157`	`+`
	`158`	`+ # Set default encoding to utf-8`
	`159`	`+ self.encoding = kwargs.get('encoding', 'utf-8')`
`157`	`160`
`158`	`161`	`# Initialize components`
`159`	`162`	`self.research_conductor: ResearchConductor = ResearchConductor(self)`