Skip to content

Commit 5e92477

Browse files
authored
Merge pull request #1432 from sriramsowmithri9807/master
fix: Resolve 'ScrapeResponse' object subscriptable error in FireCrawl scraper
2 parents cd64f77 + 9f1a708 commit 5e92477

File tree

5 files changed

+62
-3
lines changed

5 files changed

+62
-3
lines changed

cli.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,17 @@
7979
default="objective"
8080
)
8181

82+
# =====================================
83+
# Arg: Encoding
84+
# =====================================
85+
86+
cli.add_argument(
87+
"--encoding",
88+
type=str,
89+
help="The encoding to use for the output file (default: utf-8).",
90+
default="utf-8"
91+
)
92+
8293
# =====================================
8394
# Arg: Query Domains
8495
# =====================================
@@ -134,7 +145,8 @@ async def main(args):
134145
query=args.query,
135146
query_domains=query_domains,
136147
report_type=args.report_type,
137-
tone=tone_map[args.tone]
148+
tone=tone_map[args.tone],
149+
encoding=args.encoding
138150
)
139151

140152
await researcher.conduct_research()
@@ -144,7 +156,7 @@ async def main(args):
144156
# Write the report to a file
145157
artifact_filepath = f"outputs/{uuid4()}.md"
146158
os.makedirs("outputs", exist_ok=True)
147-
with open(artifact_filepath, "w") as f:
159+
with open(artifact_filepath, "w", encoding="utf-8") as f:
148160
f.write(report)
149161

150162
print(f"Report written to '{artifact_filepath}'")

gpt-researcher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit cd64f77268012397f245ef19670eccc997a7f581

gpt_researcher/agent.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ def __init__(
154154
self.memory = Memory(
155155
self.cfg.embedding_provider, self.cfg.embedding_model, **self.cfg.embedding_kwargs
156156
)
157+
158+
# Set default encoding to utf-8
159+
self.encoding = kwargs.get('encoding', 'utf-8')
157160

158161
# Initialize components
159162
self.research_conductor: ResearchConductor = ResearchConductor(self)

gpt_researcher/scraper/firecrawl/firecrawl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def scrape(self) -> tuple:
6161

6262
# Extract the content (markdown) and title from FireCrawl response
6363
content = response.data.markdown
64-
title = response["metadata"]["title"]
64+
title = response.metadata.get("title", "")
6565

6666
# Parse the HTML content of the response to create a BeautifulSoup object for the utility functions
6767
response_bs = self.session.get(self.link, timeout=4)

json_schema_generator.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import json
2+
from typing import Dict, Any
3+
from pydantic import BaseModel
4+
5+
class UserSchema(BaseModel):
6+
id: int
7+
name: str
8+
email: str
9+
age: int
10+
is_active: bool
11+
12+
def generate_structured_json(schema: BaseModel, data: Dict[str, Any]) -> str:
13+
"""
14+
Generate structured JSON output based on provided schema
15+
16+
Args:
17+
schema: Pydantic model defining the schema structure
18+
data: Dictionary containing the data to be structured
19+
20+
Returns:
21+
str: JSON string with structured data
22+
"""
23+
try:
24+
# Create instance of schema with provided data
25+
structured_data = schema(**data)
26+
# Convert to JSON string
27+
return json.dumps(structured_data.dict(), indent=2)
28+
except Exception as e:
29+
return f"Error generating JSON: {str(e)}"
30+
31+
# Example usage
32+
if __name__ == "__main__":
33+
sample_data = {
34+
"id": 1,
35+
"name": "John Doe",
36+
"email": "[email protected]",
37+
"age": 30,
38+
"is_active": True
39+
}
40+
41+
json_output = generate_structured_json(UserSchema, sample_data)
42+
print("Structured JSON Output:")
43+
print(json_output)

0 commit comments

Comments
 (0)