Skip to content

Commit 4dad1b1

Browse files
authored
Add save_to_json() convenience method to JobResult (#35)
* Add save_to_json() convenience method to JobResult - Add simple save_to_json() method that handles Citation serialization automatically - Creates directories as needed and uses existing to_dict() method internally - Updated README.md with usage examples - Added comprehensive test with tmp_path fixture - Minimal code addition (~10 lines) for maximum user convenience Fixes the need for users to manually handle Citation serialization when saving results. * Clean up implementation and revert version change - Remove unused __json__() method from Citation class - Remove unused BatchataJSONEncoder and json_encoder.py file - Clean up utils/__init__.py imports - Revert version back to 0.4.4 (no version bump yet) - Keep minimal, clean save_to_json() implementation
1 parent c148789 commit 4dad1b1

4 files changed

Lines changed: 79 additions & 4 deletions

File tree

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,9 @@ for result in results["completed"]:
112112
print(f" Vendor: {analysis.vendor} (page: {citations.get("vendor").page})")
113113
print(f" Total: ${analysis.total_amount:.2f} (page: {citations.get("total_amount").page})")
114114
print(f" Status: {analysis.payment_status} (page: {citations.get("payment_status").page})")
115+
116+
# Save each result to JSON file
117+
result.save_to_json(f"./invoice_results/{result.job_id}.json")
115118

116119
# Process failed/cancelled results
117120
for result in results["failed"]:
@@ -121,7 +124,6 @@ for result in results["cancelled"]:
121124
print(f"\nJob {result.job_id} was cancelled: {result.error}")
122125
```
123126

124-
125127
## Interactive Progress Display
126128

127129
Batchata provides an interactive real-time progress display when using `print_status=True`:

batchata/core/job_result.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""JobResult data model."""
22

3-
from dataclasses import asdict, dataclass
3+
from dataclasses import dataclass
44
from typing import Any, Dict, List, Optional, Union
55
from pydantic import BaseModel
66

@@ -88,6 +88,20 @@ def to_dict(self) -> Dict[str, Any]:
8888
"batch_id": self.batch_id
8989
}
9090

91+
def save_to_json(self, filepath: str, indent: int = 2) -> None:
92+
"""Save JobResult to JSON file.
93+
94+
Args:
95+
filepath: Path to save the JSON file
96+
indent: JSON indentation (default: 2)
97+
"""
98+
import json
99+
from pathlib import Path
100+
101+
Path(filepath).parent.mkdir(parents=True, exist_ok=True)
102+
with open(filepath, 'w') as f:
103+
json.dump(self.to_dict(), f, indent=indent)
104+
91105
@classmethod
92106
def from_dict(cls, data: Dict[str, Any]) -> 'JobResult':
93107
"""Deserialize from state."""

tests/core/test_job_result.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,4 +316,63 @@ def test_citation_mappings_json_serialization(self):
316316
assert len(restored.citations) == 2
317317
assert len(restored.citation_mappings) == 3
318318
assert len(restored.citation_mappings['cap_rate']) == 2
319-
assert len(restored.citation_mappings['occupancy']) == 1
319+
assert len(restored.citation_mappings['occupancy']) == 1
320+
321+
def test_save_to_json(self, tmp_path):
322+
"""Test that save_to_json() correctly saves JobResult to a JSON file."""
323+
# Create a JobResult with citations and citation_mappings
324+
citations = [
325+
Citation(
326+
text='Test citation text',
327+
source='test.pdf',
328+
page=1,
329+
metadata={'type': 'page_location', 'document_index': 0}
330+
)
331+
]
332+
333+
citation_mappings = {
334+
'test_field': citations
335+
}
336+
337+
result = JobResult(
338+
job_id="test-save-json",
339+
raw_response="Test response",
340+
parsed_response={'test_field': 'test_value'},
341+
citations=citations,
342+
citation_mappings=citation_mappings,
343+
input_tokens=100,
344+
output_tokens=50,
345+
cost_usd=0.05
346+
)
347+
348+
# Save to JSON file
349+
json_file = tmp_path / "subdir" / "test_result.json"
350+
result.save_to_json(str(json_file))
351+
352+
# Verify file was created
353+
assert json_file.exists()
354+
355+
# Verify content is correct by loading and comparing
356+
import json
357+
with open(json_file, 'r') as f:
358+
saved_data = json.load(f)
359+
360+
# Should match the result of to_dict()
361+
expected_data = result.to_dict()
362+
assert saved_data == expected_data
363+
364+
# Verify specific fields
365+
assert saved_data['job_id'] == 'test-save-json'
366+
assert saved_data['input_tokens'] == 100
367+
assert saved_data['output_tokens'] == 50
368+
assert saved_data['cost_usd'] == 0.05
369+
370+
# Verify citations are properly serialized (not Citation objects)
371+
assert isinstance(saved_data['citations'][0], dict)
372+
assert saved_data['citations'][0]['text'] == 'Test citation text'
373+
assert saved_data['citations'][0]['source'] == 'test.pdf'
374+
assert saved_data['citations'][0]['page'] == 1
375+
376+
# Verify citation_mappings are properly serialized
377+
assert isinstance(saved_data['citation_mappings']['test_field'][0], dict)
378+
assert saved_data['citation_mappings']['test_field'][0]['text'] == 'Test citation text'

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)