Handling async execution, adding example env and issue

UIUC-Chatbot · Apr 4, 2024 · 8cc5886 · 8cc5886
1 parent 0799a40
commit 8cc5886
Show file tree

Hide file tree

Showing 8 changed files with 94 additions and 21 deletions.
diff --git a/.env.example b/.env.example
@@ -0,0 +1,20 @@
+QDRANT_COLLECTION_NAME=your_collection_name
+QDRANT_URL=http://your_qdrant_url:port/
+QDRANT_API_KEY=your_qdrant_api_key
+
+# Set this to `azure` or `openai`
+OPENAI_API_TYPE=your_api_type
+OPENAI_API_VERSION=your_api_version
+# You can find this in the Azure portal under your Azure OpenAI resource.
+AZURE_OPENAI_API_KEY=your_azure_openai_api_key
+AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint
+AZURE_MODEL_VERSION=your_azure_model_version
+AZURE_OPENAI_ENGINE=your_azure_openai_engine
+
+LANGCHAIN_TRACING_V2=your_langchain_tracing_setting
+LANGCHAIN_ENDPOINT=your_langchain_endpoint
+LANGCHAIN_API_KEY=your_langchain_api_key
+LANGCHAIN_PROJECT=your_langchain_project
+LANGCHAIN_WANDB_TRACING=your_langchain_wandb_tracing_setting
+
+SERPAPI_API_KEY=your_serpapi_api_key
diff --git a/agent/langgraph_agent_v2.py b/agent/langgraph_agent_v2.py
@@ -50,10 +50,10 @@ def get_llm():
   if os.getenv('OPENAI_API_TYPE') == 'azure':
     return AzureChatOpenAI(
         azure_deployment="gpt-4-128k",
-        openai_api_version=os.getenv("AZURE_0125_MODEL_VERSION"),
+        openai_api_version=os.getenv("AZURE_MODEL_VERSION"),
         temperature=0,
-        azure_endpoint=os.getenv("AZURE_0125_MODEL_ENDPOINT"),
-        openai_api_key=os.getenv("AZURE_0125_MODEL_API_KEY"),
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+        openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"),
     )
   else:
     return ChatOpenAI(
@@ -64,11 +64,13 @@ def get_llm():
 
 class WorkflowAgent:
 
-  def __init__(self, langsmith_run_id):
+  @classmethod
+  async def create(cls, langsmith_run_id):
+    self = cls()
     print("Langgraph v2 agent initialized")
     self.langsmith_run_id = langsmith_run_id
     self.llm = get_llm()
-    self.tools = get_tools(langsmith_run_id)
+    self.tools = await get_tools(langsmith_run_id, sync=False)
     self.planner_prompt = ChatPromptTemplate.from_template(
         """For the given objective, come up with a simple step by step plan. \
 This plan should involve individual tasks, that if executed correctly will yield the correct answer. Do not add any superfluous steps. \
@@ -110,6 +112,7 @@ def __init__(self, langsmith_run_id):
                                                         hub.pull("hwchase17/openai-functions-agent"))
     self.agent_executor = create_agent_executor(self.agent_runnable, self.tools)
     self.workflow = self.create_workflow()
+    return self
 
   def create_workflow(self):
     workflow = StateGraph(State)

diff --git a/issue.json b/issue.json
@@ -0,0 +1,19 @@
+{
+    "title": "Calculate Mean of Numbers",
+    "body": "Create a function to calculate the mean of an array of numbers. The function should take an array of numbers as input and return the mean of those numbers.",
+    "labels": ["enhancement"],
+    "assignees": ["your-username"],
+    "milestone": null,
+    "state": "open",
+    "number": 1,
+    "created_at": "2023-04-01T00:00:00Z",
+    "updated_at": "2023-04-01T00:00:00Z",
+    "closed_at": null,
+    "author": "your-username",
+    "comments": [
+      {
+        "user": "commenter-username",
+        "message": "Remember to handle empty arrays by returning 0 or null."
+      }
+    ]
+  }
diff --git a/main.py b/main.py
@@ -23,8 +23,12 @@ async def main():
   API reference for Webhook objects: https://docs.github.com/en/webhooks-and-events/webhooks/webhook-events-and-payloads#issue_comment
   WEBHOOK explainer: https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/using-webhooks-with-github-apps
   """
-  f = open('issue.json')
-  issue: Issue = json.load(f)
+  with open('issue.json') as f:
+    issue_data = json.load(f)
+
+  if issue_data:
+    issue: Issue = Issue.from_json(issue_data)
+
   langsmith_run_id = str(uuid.uuid4())
 
   if not issue:
@@ -45,7 +49,7 @@ async def main():
     prompt = hub.pull("kastanday/new-github-issue").format(issue_description=issue.format_issue())
 
     print("ABOUT TO CALL WORKFLOW AGENT on COMMENT OPENED")
-    bot = WorkflowAgent(langsmith_run_id=langsmith_run_id)
+    bot = await WorkflowAgent.create(langsmith_run_id=langsmith_run_id)
     result = await bot.run(prompt)
 
     # COLLECT PARALLEL RESULTS

diff --git a/requirements.txt b/requirements.txt
@@ -7,10 +7,13 @@ langchain-openai==0.1.*
 langchain-community==0.0.*
 langgraph==0.0.*
 langchainhub==0.1.*
+langchain-experimental==0.0.*
 langsmith==0.1.*
 openai==1.16.*
 tiktoken==0.5.2
 google-search-results==2.4.2
 termcolor==2.3.0
 playwright==1.40.0
-ray==2.8.1
+ray==2.8.1
+nest_asyncio==1.6.0
+beautifulsoup4==4.12.2
diff --git a/type/issue.py b/type/issue.py
@@ -1,4 +1,4 @@
-class Issue():
+class Issue:
     def __init__(self, title, body, labels, assignees, milestone, state, number, created_at, updated_at, closed_at, author, comments):
         self.title = title
         self.body = body
@@ -12,14 +12,31 @@ def __init__(self, title, body, labels, assignees, milestone, state, number, cre
         self.closed_at = closed_at
         self.author = author
         self.comments = comments
+
+    @classmethod
+    def from_json(cls, data):
+        return cls(
+            title=data.get("title", ""),
+            body=data.get("body", ""),
+            labels=data.get("labels", []),
+            assignees=data.get("assignees", []),
+            milestone=data.get("milestone", None),
+            state=data.get("state", ""),
+            number=data.get("number", 0),
+            created_at=data.get("created_at", ""),
+            updated_at=data.get("updated_at", ""),
+            closed_at=data.get("closed_at", None),
+            author=data.get("author", ""),
+            comments=data.get("comments", [])
+        )
 
     def __str__(self):
         return f"Title: {self.title}\nBody: {self.body}\nLabels: {self.labels}\nAssignees: {self.assignees}\nMilestone: {self.milestone}\nState: {self.state}\nNumber: {self.number}\nCreated At: {self.created_at}\nUpdated At: {self.updated_at}\nClosed At: {self.closed_at}\nAuthor: {self.author}\nComments: {self.comments}"
 
     def format_issue(self):
         #   linked_pr = get_linked_pr_from_issue(issue)
-        title = f"Title: {self.issue.title}."
+        title = f"Title: {self.title}."
         #   existing_pr = f"Existing PR addressing issue: {linked_pr}" if linked_pr else ""
-        opened_by = f"Opened by user: {self.issue.opened_by}"
-        body = f"Body: {self.issue.body}"
-        return "\n".join([title, opened_by, body])
+        author = f"Opened by user: {self.author}"
+        body = f"Body: {self.body}"
+        return "\n".join([title, author, body])
diff --git a/utils/tools.py b/utils/tools.py
@@ -6,7 +6,7 @@
 from dotenv import load_dotenv
 from langchain.agents import load_tools
 from langchain.agents.agent_toolkits.github.toolkit import GitHubToolkit
-from langchain.tools.shell.tool import ShellTool
+from langchain_community.tools import ShellTool
 from langchain.agents.agent_toolkits.file_management.toolkit import FileManagementToolkit
 from langchain_openai import ChatOpenAI, AzureChatOpenAI
 from langchain_community.tools import VectorStoreQATool
@@ -21,14 +21,18 @@
 from utils.vector_db import get_vectorstore_retriever_tool
 
 load_dotenv(override=True, dotenv_path='../../.env')
+# This import is required only for jupyter notebooks, since they have their own eventloop
+import nest_asyncio
+
+nest_asyncio.apply()
 
 os.environ["LANGCHAIN_TRACING"] = "true"  # If you want to trace the execution of the program, set to "true"
 langchain.debug = False  # type: ignore
 VERBOSE = True
 root_dir = os.getenv("root_dir","/app")
 
 
-def get_tools(langsmith_run_id: str, sync=True):
+async def get_tools(langsmith_run_id: str, sync=True):
   """Main function to assemble tools for ML for Bio project."""
 
   # CODE EXECUTION - langsmith_run_id as unique identifier for the sandbox
@@ -56,6 +60,7 @@ def get_tools(langsmith_run_id: str, sync=True):
     llm = AzureChatOpenAI(
         temperature=0.1,
         model="gpt-4-1106-Preview",
+
     )
     # max_retries=3,
     # request_timeout=60 * 3,
@@ -66,10 +71,10 @@ def get_tools(langsmith_run_id: str, sync=True):
   # GOOGLE SEARCH
   search = load_tools(["serpapi"])
 
-  # GITHUB
-  github = GitHubAPIWrapper()  # type: ignore
-  toolkit = GitHubToolkit.from_github_api_wrapper(github)
-  github_tools: list[BaseTool] = toolkit.get_tools()
+  # # GITHUB
+  # github = GitHubAPIWrapper()  # type: ignore
+  # toolkit = GitHubToolkit.from_github_api_wrapper(github)
+  # github_tools: list[BaseTool] = toolkit.get_tools()
 
   # TODO: more vector stores per Bio package: trimmomatic, gffread, samtools, salmon, DESeq2 and ggpubr
   docs_tools: List[VectorStoreQATool] = [
@@ -108,7 +113,7 @@ def get_tools(langsmith_run_id: str, sync=True):
   # Probably unnecessary: WikipediaQueryRun, WolframAlphaQueryRun, PubmedQueryRun, ArxivQueryRun
   # arxiv_tool = ArxivQueryRun()
 
-  tools: list[BaseTool] = github_tools + search + docs_tools + shell + browser_tools + file_management + human_tools
+  tools: list[BaseTool] =  search + docs_tools + [shell] + browser_tools + file_management + human_tools # + github_tools
   return tools
 
 

diff --git a/utils/utils.py b/utils/utils.py
@@ -6,7 +6,9 @@
 
 from langsmith import Client
 import langsmith
+import ray
 
+@ray.remote
 def post_sharable_url(issue, langsmith_run_id, time_delay_s):
   sharable_url = get_langsmith_trace_sharable_url(langsmith_run_id, time_delay_s=time_delay_s)
   text = f"👉 [Follow the bot's progress in real time on LangSmith]({sharable_url})."