Skip to content

Commit a669963

Browse files
Merge pull request #204 from simjak/simonas/init
fix: init fixes to run it
2 parents b7d2eea + 36ef27e commit a669963

File tree

12 files changed

+194
-108
lines changed

12 files changed

+194
-108
lines changed

.env.example

Whitespace-only changes.

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ celerybeat.pid
122122
# Environments
123123
.env
124124
.venv
125+
.venv*
125126
env/
126127
venv/
127128
ENV/
@@ -171,3 +172,4 @@ test/
171172
memory/
172173
examples/document-processing/logs/
173174
!src/agents/datasets/data/math/test
175+
client.db

examples/chatbot/run.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
import os
2-
from agents import SolutionConfig, Solution
2+
33
import litellm
4+
from agents import Solution, SolutionConfig
45

6+
from dotenv import load_dotenv
57
litellm.set_verbose = True
6-
os.environ["OPENAI_API_KEY"] = ""
7-
os.environ["OPENAI_BASE_URL"] = ""
8+
9+
load_dotenv()
10+
11+
# Set Environment Variables
12+
if os.environ.get("OPENAI_API_KEY") is None:
13+
os.environ["OPENAI_API_KEY"] = ""
14+
if os.environ.get("OPENAI_BASE_URL") is None:
15+
os.environ["OPENAI_BASE_URL"] = ""
816

917
solution = Solution(config=SolutionConfig("examples/chatbot/config.json"))
1018
solution.run()

examples/software_dev/configs/SOP.json

+19-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,12 @@
1616
"begin_role": "Boss",
1717
"node_description": "Imagine a scenario where the boss has presented a requirement. The architect is tasked with proposing a framework based on this requirement. The leader's role is to provide feedback on the architect's proposal, and another architect will finalize the framework based on the leader's comments.",
1818
"node_prompt_templates": {
19-
"target": "The target program is: <target>{prompt}</target>"
19+
"target": "The target program is: <target>{prompt}</target>",
20+
"style": "You need to follow the output style: {style}.\n",
21+
"task": "The task you need to execute is: {task}.\n",
22+
"rule": "The rule you need to follow is: {rule}.\n",
23+
"demonstrations": "Here are the demonstrations you can refer to:\n{demonstrations}.\n",
24+
"last": "{last}"
2025
},
2126
"node_prompt_paddings": {
2227
"Boss": {
@@ -151,7 +156,12 @@
151156
"begin_role": "Boss",
152157
"node_description": "In this scenario, the boss has provided a requirement. The developer's task is to write code based on the architecture proposed by the architect. The leader evaluates the written code for elegance, readability, and functionality, providing feedback. Another developer makes necessary modifications to the code.",
153158
"node_prompt_templates": {
154-
"target": "The target program is: <target>{prompt}</target>"
159+
"target": "The target program is: <target>{prompt}</target>",
160+
"style": "You need to follow the output style: {style}.\n",
161+
"task": "The task you need to execute is: {task}.\n",
162+
"rule": "The rule you need to follow is: {rule}.\n",
163+
"demonstrations": "Here are the demonstrations you can refer to:\n{demonstrations}.\n",
164+
"last": "{last}"
155165
},
156166
"node_prompt_paddings": {
157167
"Boss": {
@@ -305,7 +315,12 @@
305315
"node_description": "In this scenario, the boss has provided a requirement. The debugger simulates a compiler to determine whether the code is runnable and provides feedback. The developer writes code based on the debugger's feedback. The leader evaluates whether the final code meets the boss's requirements and provides feedback for further modifications. The coder writes the final code to a file.",
306316
"node_prompt_templates": {
307317
"target": "The target program is: <target>{prompt}</target>",
308-
"finalize code": "Please consider all the messages above and integrate the final project code. Ensures the code is clean, well-formatted. {extract}"
318+
"finalize code": "Please consider all the messages above and integrate the final project code. Ensures the code is clean, well-formatted. {extract}",
319+
"style": "You need to follow the output style: {style}.\n",
320+
"task": "The task you need to execute is: {task}.\n",
321+
"rule": "The rule you need to follow is: {rule}.\n",
322+
"demonstrations": "Here are the demonstrations you can refer to:\n{demonstrations}.\n",
323+
"last": "{last}"
309324
},
310325
"node_prompt_paddings": {
311326
"Boss": {
@@ -480,7 +495,7 @@
480495
"value": {
481496
"extract": "Please extract the final project code and put them between <result> and </result>."
482497
}
483-
498+
484499
}
485500
}
486501
}

examples/software_dev/configs/optimizer_config.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -22,26 +22,26 @@
2222
"log_path": "logs/trainer_god"
2323
},
2424
"meta_prompt": {
25-
"loss_without_score": {
25+
"loss": {
2626
"order": [
27-
"loss_with_ground_truth"
27+
"loss_with_ground_truth_and_score"
2828
],
2929
"extract_key": [
3030
"score",
3131
"requirement_for_previous"
3232
],
33-
"loss_with_ground_truth": "You are a fine-tuner of a large model. I will provide you with some output results from the model and the expected correct results. You need to evaluate these data and provide a score out of 10, please wrap the score using <score></score>. Additionally, please provide some suggestions for modifying the model's output, using <requirement_for_previous></requirement_for_previous> to wrap your suggestions.\n\nHere is the model's output:\n<result>{result}</result>;\n\nThe expected result is:\n<ground_truth>{ground_truth}</ground_truth>\n\nPlease note:\n\nEnsure that the output is wrapped with <score></score> and <requirement_for_previous></requirement_for_previous> respectively.\nThe output should be as consistent as possible with the expected result while being correct. For example, if the expected result is “BUST”, and the model's output is “The women's lifestyle magazine is 'BUST' magazine.”, even though the answer is correct, you should advise the model to be more concise.\nThe standard for a score of 10 is that the model's output is exactly the same as the expected result in a case-insensitive manner, and without any unnecessary content. Even if the model's output is semantically correct, if it includes superfluous content, points should be deducted.",
33+
"loss_with_ground_truth_and_score": "You are a fine-tuner of a large model. I will provide you with some output results from the model and the expected correct results. You need to evaluate these data and provide a score out of 10, please wrap the score using <score></score>. Additionally, please provide some suggestions for modifying the model's output, using <requirement_for_previous></requirement_for_previous> to wrap your suggestions.\n\nHere is the model's output:\n<result>{result}</result>;\n\nThe expected result is:\n<ground_truth>{ground_truth}</ground_truth>\n\nPlease note:\n\nEnsure that the output is wrapped with <score></score> and <requirement_for_previous></requirement_for_previous> respectively.\nThe output should be as consistent as possible with the expected result while being correct. For example, if the expected result is “BUST”, and the model's output is “The women's lifestyle magazine is 'BUST' magazine.”, even though the answer is correct, you should advise the model to be more concise.\nThe standard for a score of 10 is that the model's output is exactly the same as the expected result in a case-insensitive manner, and without any unnecessary content. Even if the model's output is semantically correct, if it includes superfluous content, points should be deducted.",
3434
"loss_no_gt": "我会给你一些模型的输出结果,你需要对这些数据进行评分,并给出一个10分制的评分,输出时评分请使用<score></score>包裹,此外还请给出一些队模型输出结果的修改建议,请使用<requirement_for_previous></requirement_for_previous>包裹。\n\n如下是你需要处理的数据: {result}。",
3535
"loss_no_gt_no_result": "我会给你一些交互记录,你需要对这些记录进行评分,并给出一个10分制的评分,输出时评分请使用<score></score>包裹,此外还请给出一些对模型输出结果的修改建议,请使用<requirement_for_previous></requirement_for_previous>包裹。交互信息如下:{history}。"
3636
},
37-
"loss": {
37+
"loss_without_score": {
3838
"order": [
39-
"loss_with_ground_truth_and_score"
39+
"loss_with_ground_truth"
4040
],
4141
"extract_key": [
4242
"requirement_for_previous"
4343
],
44-
"loss_with_ground_truth_and_score": "You are a large language model fine-tuner. I will provide you with a model's output and the expected correct result. You need to evaluate it and suggest modifications to the model's output. Please use `<requirement_for_previous></requirement_for_previous>` to enclose your feedback.\n\nBelow is the model's output:\n<result>{result}</result>\n\nThe expected result is:\n<ground_truth>{ground_truth}</ground_truth>\n\nHere is the evaluation score for the model. Your goal is to optimize this score:\n<score>{score}</score>\n\nThe relevant information about this score is as follows:\n<evaluation_info>{score_info}</evaluation_info>\n\nNote:\n1. Ensure that `<requirement_for_previous></requirement_for_previous>` exists and appears once.\n2. If the model's output is satisfactory, you can output <requirement_for_previous>The output is satisfactory, no additional requirements</requirement_for_previous>.\n3. The output should be as close to the expected result as possible while ensuring correctness. For example, if the expected result is \"BUST\" and the model's output is \"The women's lifestyle magazine is 'BUST' magazine.\", even though this answer is correct, you should remind the model to be concise."
44+
"loss_with_ground_truth": "You are a large language model fine-tuner. I will provide you with a model's output and the expected correct result. You need to evaluate it and suggest modifications to the model's output. Please use `<requirement_for_previous></requirement_for_previous>` to enclose your feedback.\n\nBelow is the model's output:\n<result>{result}</result>\n\nThe expected result is:\n<ground_truth>{ground_truth}</ground_truth>\n\nHere is the evaluation score for the model. Your goal is to optimize this score:\n<score>{score}</score>\n\nThe relevant information about this score is as follows:\n<evaluation_info>{score_info}</evaluation_info>\n\nNote:\n1. Ensure that `<requirement_for_previous></requirement_for_previous>` exists and appears once.\n2. If the model's output is satisfactory, you can output <requirement_for_previous>The output is satisfactory, no additional requirements</requirement_for_previous>.\n3. The output should be as close to the expected result as possible while ensuring correctness. For example, if the expected result is \"BUST\" and the model's output is \"The women's lifestyle magazine is 'BUST' magazine.\", even though this answer is correct, you should remind the model to be concise."
4545
}
4646
}
4747
},

examples/software_dev/run_train.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
import litellm
55
from agents.optimization.trainer import Trainer, TrainerConfig
66

7-
os.environ["OPENAI_API_KEY"] = ""
8-
os.environ["OPENAI_BASE_URL"] = ""
7+
if os.environ.get("OPENAI_API_KEY") is None:
8+
os.environ["OPENAI_API_KEY"] = ""
9+
if os.environ.get("OPENAI_BASE_URL") is None:
10+
os.environ["OPENAI_BASE_URL"] = ""
911

1012
litellm.set_verbose = False
1113

src/agents/agents/llm.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,16 @@
1515
# limitations under the License.
1616
import os
1717
import time
18-
import litellm
19-
import backoff
2018
from abc import abstractmethod
2119
from typing import Union
2220

23-
from ..utils.files import save_logs
21+
import litellm
22+
from dotenv import load_dotenv
23+
2424
from ..utils.config import Config
25+
from ..utils.files import save_logs
26+
27+
load_dotenv()
2528

2629
WAIT_TIME = 20
2730

@@ -30,10 +33,14 @@
3033
def completion_with_backoff(**kwargs):
3134
litellm.api_key = os.environ["OPENAI_API_KEY"]
3235
litellm.api_base = os.environ.get("OPENAI_BASE_URL")
36+
37+
if os.environ.get("OPENAI_API_KEY") is None:
38+
raise ValueError("OPENAI_API_KEY is not set")
39+
3340
while True:
3441
try:
3542
return litellm.completion(**kwargs)
36-
except litellm.OpenAIError as e:
43+
except litellm.OpenAIError:
3744
print(f"Please wait {WAIT_TIME} seconds and resend later ...")
3845
time.sleep(WAIT_TIME)
3946

src/agents/datasets/software_dev.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,23 @@ def get_case_dict(self, idx: int):
3939
Returns:
4040
Dict[str, Any]: A dictionary with case details.
4141
"""
42-
return {
43-
"case_id": "software_dev_" + str(self.data[idx]["task_id"]),
44-
"case_name": self.data[idx]["task_name"],
45-
"task_id": "software_dev",
46-
"function_ids": "no use now",
47-
"KB_id": "no use now",
48-
"input": {"input_data": {"prompt": self.data[idx]["prompt"]}},
49-
"ground_truth": self.data[idx].get("answer", None),
50-
"idx": idx,
51-
"metric_name": self.metric_name,
52-
"metric_description": self.metric_description,
53-
}
42+
try:
43+
return {
44+
"case_id": "software_dev_" + str(self.data[idx]["task_id"]),
45+
"case_name": self.data[idx]["task_name"],
46+
"task_id": "software_dev",
47+
"task_description": self.data[idx]["task_description"] if "task_description" in self.data[idx] else self.data[idx]["prompt"],
48+
"function_ids": "no use now",
49+
"KB_id": "no use now",
50+
"input": {"input_data": {"prompt": self.data[idx]["prompt"]}},
51+
"ground_truth": self.data[idx].get("answer", None),
52+
"idx": idx,
53+
"metric_name": self.metric_name,
54+
"metric_description": self.metric_description,
55+
}
56+
except Exception as e:
57+
print(f"Error: {e}, {self.data[idx]}")
58+
raise e
5459

5560
def evaluate(self, idx: int, answer: str):
5661
"""

src/agents/evaluation/case.py

+26-22
Original file line numberDiff line numberDiff line change
@@ -29,34 +29,38 @@ def __init__(self, json_data: dict):
2929
json_data (dict): The JSON data to initialize the Case object.
3030
"""
3131
# raw data, it will not be saved when dump
32-
self.raw_data = json_data
32+
try:
33+
self.raw_data = json_data
3334

34-
self.case_id: str = json_data["case_id"]
35-
self.case_name: str = json_data["case_name"]
35+
self.case_id: str = json_data["case_id"]
36+
self.case_name: str = json_data["case_name"]
3637

37-
self.task_id: str = json_data["task_id"]
38-
self.task_description = json_data["task_description"]
38+
self.task_id: str = json_data["task_id"]
39+
self.task_description = json_data["task_description"]
3940

40-
self.function_ids: str = json_data["function_ids"]
41-
self.KB_id: str = json_data["KB_id"]
41+
self.function_ids: str = json_data["function_ids"]
42+
self.KB_id: str = json_data["KB_id"]
4243

43-
self.input: dict = json_data["input"]
44-
self.ground_truth: dict = json_data.get("ground_truth")
44+
self.input: dict = json_data["input"]
45+
self.ground_truth: dict = json_data.get("ground_truth")
4546

46-
# fields that not available until they are run
47-
self.result: dict = json_data.get("result", {}) # 客户期望的直接的输出结果
48-
self.trajectory: Trajectory = Trajectory.load_from_json(
49-
json_data.get("trajectory", [])
50-
)
47+
# fields that not available until they are run
48+
self.result: dict = json_data.get("result", {}) # 客户期望的直接的输出结果
49+
self.trajectory: Trajectory = Trajectory.load_from_json(
50+
json_data.get("trajectory", [])
51+
)
5152

52-
# fields that not available until they are evaluated or optimized
53-
self.dataset_eval: DatasetEvaluation = DatasetEvaluation(
54-
**json_data.get("dataset_eval", {})
55-
) # Dataset evaluation results
56-
self.loss: CaseLoss = CaseLoss(**json_data.get("loss", {})) # 评估结果
57-
self.sop_suggestion: SOPSuggestion = SOPSuggestion(
58-
**json_data.get("sop_suggestion", {})
59-
) # Suggestions for SOP optimization
53+
# fields that not available until they are evaluated or optimized
54+
self.dataset_eval: DatasetEvaluation = DatasetEvaluation(
55+
**json_data.get("dataset_eval", {})
56+
) # Dataset evaluation results
57+
self.loss: CaseLoss = CaseLoss(**json_data.get("loss", {})) # 评估结果
58+
self.sop_suggestion: SOPSuggestion = SOPSuggestion(
59+
**json_data.get("sop_suggestion", {})
60+
) # Suggestions for SOP optimization
61+
except Exception as e:
62+
print(f"Error: {e}, {json_data}")
63+
raise e
6064

6165
@classmethod
6266
def read_batch_from_json(cls, json_path):

src/agents/evaluation/state.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@ def __init__(
2323
environment (Environment): The environment associated with this state. A deep copy is made.
2424
"""
2525
self.node = node
26-
self.agent: Agent = copy.deepcopy(agent) # 这里需要深拷贝,而且是仅仅一个agent
26+
# self.agent: Agent = copy.deepcopy(agent) # 这里需要深拷贝,而且是仅仅一个agent
27+
self.agent: Agent = agent
2728
self.action: Action = action
28-
self.environment: Environment = copy.deepcopy(environment)
29+
# self.environment: Environment = copy.deepcopy(environment)
30+
self.environment: Environment = environment
2931
self.node_eval = NodeEval(node.node_name, "", "", "", "")
3032
self.backward: StateBackward = StateBackward()
3133
self.node_backward: StateBackward = StateBackward()

0 commit comments

Comments
 (0)