-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
84 lines (72 loc) · 2.71 KB
/
main.py
File metadata and controls
84 lines (72 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import sys
import argparse
import json
from datetime import datetime
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
from services.analysis_service import run_analysis
from services.evaluation_service import run_evaluation
def main():
"""Command line interface."""
parser = argparse.ArgumentParser(description="Run Multi-Agent Data Analyst pipeline")
parser.add_argument(
"command_or_dataset",
help="Either a dataset path (analyze mode) or a command: analyze/evaluate",
)
parser.add_argument("dataset_path", nargs="?", help="Dataset path when using analyze/evaluate commands")
parser.add_argument("--mode", choices=["full", "lightning"], default="full",
help="Pipeline mode: full (all steps) or lightning (faster)")
parser.add_argument(
"--domain",
choices=["General", "Finance", "HR", "Marketing", "Healthcare"],
default="General",
help="Domain context for agents",
)
parser.add_argument("--repeats", type=int, default=2, help="Number of repeats per mode")
parser.add_argument(
"--modes",
nargs="+",
default=["full", "lightning"],
choices=["full", "lightning"],
help="Modes to benchmark",
)
args = parser.parse_args()
if args.command_or_dataset in {"analyze", "evaluate"}:
command = args.command_or_dataset
if not args.dataset_path:
parser.error("dataset_path is required when using analyze/evaluate commands")
file_path = args.dataset_path
else:
command = "analyze"
file_path = args.command_or_dataset
if not os.path.exists(file_path):
print(f"Error: File not found: {file_path}")
sys.exit(1)
if command == "analyze":
results = run_analysis(file_path, verbose=True, mode=args.mode, domain=args.domain)
if results["status"] == "error":
sys.exit(1)
return
evaluation = run_evaluation(
file_path=file_path,
domain=args.domain,
modes=args.modes,
repeats=args.repeats,
)
os.makedirs("outputs/evaluations", exist_ok=True)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
out_path = f"outputs/evaluations/leaderboard_{ts}.json"
with open(out_path, "w", encoding="utf-8") as f:
json.dump(evaluation, f, indent=2)
print("\nEVALUATION COMPLETE")
print(f"Leaderboard file: {out_path}")
print("Top modes by avg score:")
for row in evaluation.get("leaderboard", []):
print(
f" - {row['mode']}: score={row['avg_score']}, "
f"avg_duration={row['avg_duration_sec']}s, success_rate={row['success_rate']}"
)
if __name__ == "__main__":
main()