From 8e2f505707e01ebc6b3713104249e4ec7900b7ca Mon Sep 17 00:00:00 2001 From: Rohan Marwaha Date: Thu, 11 Jan 2024 12:54:26 -0600 Subject: [PATCH] Changes to merge memory management with latest changes in github_agent_webhooks branch --- ai_ta_backend/agents/customcallbacks.py | 2 +- .../agents/github_webhook_handlers.py | 11 +++++--- ai_ta_backend/agents/ml4bio_agent.py | 27 ++++++++++++------- ai_ta_backend/agents/tools.py | 6 +++-- ai_ta_backend/agents/utils.py | 15 +++++++++-- ai_ta_backend/main.py | 2 +- 6 files changed, 44 insertions(+), 19 deletions(-) diff --git a/ai_ta_backend/agents/customcallbacks.py b/ai_ta_backend/agents/customcallbacks.py index ab5b1024..9aecbcb1 100644 --- a/ai_ta_backend/agents/customcallbacks.py +++ b/ai_ta_backend/agents/customcallbacks.py @@ -7,7 +7,7 @@ from langchain.callbacks import tracing_enabled from langchain.llms import OpenAI from langchain import hub -from utils import SupabaseDB, get_langsmith_id +from ai_ta_backend.agents.utils import SupabaseDB, get_langsmith_id import marvin from marvin import ai_model from pydantic import BaseModel, Field diff --git a/ai_ta_backend/agents/github_webhook_handlers.py b/ai_ta_backend/agents/github_webhook_handlers.py index 2124b1ab..b2048d00 100644 --- a/ai_ta_backend/agents/github_webhook_handlers.py +++ b/ai_ta_backend/agents/github_webhook_handlers.py @@ -26,7 +26,7 @@ # from github_agent import GH_Agent from ai_ta_backend.agents.ml4bio_agent import WorkflowAgent -from ai_ta_backend.agents.utils import get_langsmith_trace_sharable_url +from ai_ta_backend.agents.utils import SupabaseDB, get_langsmith_id, get_langsmith_trace_sharable_url hostname = socket.gethostname() @@ -60,7 +60,9 @@ def handle_github_event(payload: Dict[str, Any]): ValueError: _description_ """ # payload: Dict[str, Any] = json.loads(gh_webhook_payload) - langsmith_run_id = str(uuid.uuid4()) # for Langsmith + langsmith_run_id = get_langsmith_id() # for Langsmith + + if not payload: raise ValueError(f"Missing the body of the webhook response. Response is {payload}") @@ -105,6 +107,9 @@ def handle_issue_opened(payload, langsmith_run_id): # Construct Docker image name as ID for supabase table image_name = f"{repo_name}_{number}:our_tag" + # Add image name to supabase table + SupabaseDB("docker_images", image_name=image_name).check_and_insert_image_name(image_name) + metadata = {"issue": str(issue), 'number': number, "repo_name": repo_name, "langsmith_run_id": langsmith_run_id} # logging.info(f"New issue created: #{number}", metadata) # logging.info(f"New issue created: #{number}. Metadata: {metadata}") @@ -128,7 +133,7 @@ def handle_issue_opened(payload, langsmith_run_id): # bot = github_agent.GH_Agent.remote() prompt = hub.pull("kastanday/new-github-issue").format(issue_description=format_issue(issue)) # result_futures.append(bot.launch_gh_agent.remote(prompt, active_branch=base_branch, langsmith_run_id=langsmith_run_id)) - bot = WorkflowAgent(langsmith_run_id=langsmith_run_id) + bot = WorkflowAgent(langsmith_run_id=langsmith_run_id, image_name=image_name) result = bot.run(prompt) # COLLECT PARALLEL RESULTS diff --git a/ai_ta_backend/agents/ml4bio_agent.py b/ai_ta_backend/agents/ml4bio_agent.py index 97f9b849..a46b58ce 100644 --- a/ai_ta_backend/agents/ml4bio_agent.py +++ b/ai_ta_backend/agents/ml4bio_agent.py @@ -17,10 +17,10 @@ from langchain.tools import BaseTool from langchain_experimental.plan_and_execute.executors.base import ChainExecutor -from tools import get_tools -from utils import fancier_trim_intermediate_steps -from utils import SupabaseDB -from customcallbacks import CustomCallbackHandler +from ai_ta_backend.agents.tools import get_tools +from ai_ta_backend.agents.utils import fancier_trim_intermediate_steps +from ai_ta_backend.agents.utils import SupabaseDB +from ai_ta_backend.agents.customcallbacks import CustomCallbackHandler HUMAN_MESSAGE_TEMPLATE = """Previous steps: {previous_steps} @@ -71,11 +71,10 @@ def get_memory_context(table_name: str, image_name: str): class WorkflowAgent: - def __init__(self, run_id_in_metadata, image_name): - self.run_id_in_metadata = run_id_in_metadata + def __init__(self, langsmith_run_id, image_name): + self.langsmith_run_id = langsmith_run_id self.image_name = image_name - self.callback_handler = CustomCallbackHandler(run_id=self.run_id_in_metadata, - image_name=self.image_name) + self.callback_handler = CustomCallbackHandler(run_id=self.image_name) if os.environ['OPENAI_API_TYPE'] == 'azure': self.llm = AzureChatOpenAI(temperature=0, model="gpt-4-0613", max_retries=3, request_timeout=60 * 3, deployment_name=os.environ['AZURE_OPENAI_ENGINE'], @@ -89,7 +88,15 @@ def __init__(self, run_id_in_metadata, image_name): def run(self, input): result = self.agent.with_config({"run_name": "ML4BIO Plan & Execute Agent"}).invoke({"input": f"{input}"}, { - "metadata": {"run_id_in_metadata": str(self.run_id_in_metadata)}}) + "metadata": {"run_id_in_metadata": str(self.langsmith_run_id)}}) + + # Todo: Remove this once on_agent_finish has been tested, otherwise this can be explored to capture individual agent output + # print(result["intermediate_steps"]) + # Add agent output from intermediate steps to the database + # SupabaseDB(table_name="docker_images", image_name=self.image_name).upsert_field_in_db("on_agent_action", result["intermediate_steps"]) + # for step in self.agent.stream({"input": f"{input}"}, { + # "metadata": {"run_id_in_metadata": str(self.langsmith_run_id)}}): + # print(f"Step: {step}") print(f"Result: {result}") return result @@ -137,7 +144,7 @@ def custom_load_agent_executor(self, def make_agent(self): # TOOLS - tools = get_tools(callback=self.callback_handler) + tools = get_tools(langsmith_run_id=self.langsmith_run_id, callback=self.callback_handler) # PLANNER planner = load_chat_planner(self.llm, system_prompt=hub.pull("kastanday/ml4bio-rnaseq-planner").format(user_info=get_user_info_string)) diff --git a/ai_ta_backend/agents/tools.py b/ai_ta_backend/agents/tools.py index de9a31de..b03ca344 100644 --- a/ai_ta_backend/agents/tools.py +++ b/ai_ta_backend/agents/tools.py @@ -31,7 +31,7 @@ langchain.debug = False # type: ignore VERBOSE = True -def get_tools(langsmith_run_id: str, sync=True): +def get_tools(langsmith_run_id: str, sync=True, callback=None): '''Main function to assemble tools for ML for Bio project.''' # CODE EXECUTION - langsmith_run_id as unique identifier for the sandbox @@ -40,11 +40,13 @@ def get_tools(langsmith_run_id: str, sync=True): func=code_execution_class.run_python_code, name="Code Execution", description="Executes code in an safe Docker container.", + callbacks=[callback] ) e2b_shell_tool = StructuredTool.from_function( func=code_execution_class.run_shell, name="Shell commands (except for git)", description="Run shell commands to, for example, execute shell scripts or R scripts. It is in the same environment as the Code Execution tool.", + callbacks=[callback] ) # AutoGen's Code Execution Tool # def execute_code_tool(code: str, timeout: int = 60, filename: str = "execution_file.py", work_dir: str = "work_dir", use_docker: bool = True, lang: str = "python"): @@ -75,7 +77,7 @@ def get_tools(langsmith_run_id: str, sync=True): llm = ChatOpenAI(temperature=0.1, model="gpt-4-0613", max_retries=3, request_timeout=60 * 3) # type: ignore # human_tools = load_tools(["human"], llm=llm, input_func=get_human_input) # GOOGLE SEARCH - search = load_tools(["serpapi"]) + search = load_tools(["serpapi"], callbacks=[callback]) # GITHUB github = GitHubAPIWrapper() # type: ignore diff --git a/ai_ta_backend/agents/utils.py b/ai_ta_backend/agents/utils.py index b5e713a5..44c7f8ff 100644 --- a/ai_ta_backend/agents/utils.py +++ b/ai_ta_backend/agents/utils.py @@ -13,6 +13,7 @@ from langsmith import Client from langsmith.schemas import Run import tiktoken +from supabase.client import create_client def fancier_trim_intermediate_steps(steps: List[Tuple[AgentAction, str]]) -> List[Tuple[AgentAction, str]]: """ @@ -224,7 +225,7 @@ def get_supabase_client(): def get_langsmith_id(): - return langsmith_id # for Langsmith + return str(uuid.uuid4()) # for Langsmith class SupabaseDB: @@ -274,4 +275,14 @@ def upsert_field_in_db(self, field, value): """ response = self.supabase_client.table("docker_images").upsert({field: value}). \ eq("image_name", self.image_name).execute() - return response \ No newline at end of file + return response + + def check_and_insert_image_name(self, image_name): + """Check if the image name exists in the Supabase table, if not, insert it and build a Docker image. + Args: + image_name (str): The Docker image name. + """ + + # If the image name does not exist in the table, insert it and build a Docker image + if self.is_exists_image() is False: + self.supabase_client.table("docker_images").insert({"image_name": image_name}).execute() \ No newline at end of file diff --git a/ai_ta_backend/main.py b/ai_ta_backend/main.py index 6d6b19eb..c1df49ba 100644 --- a/ai_ta_backend/main.py +++ b/ai_ta_backend/main.py @@ -9,7 +9,7 @@ from sqlalchemy import JSON from ai_ta_backend.vector_database import Ingest -from ai_ta_backend.web_scrape import main_crawler, mit_course_download +from ai_ta_backend.web_scrape import WebScrape, mit_course_download from ai_ta_backend.agents.github_webhook_handlers import handle_github_event app = Flask(__name__)