add actions and evaluators -- backup before fixing tests

JoinTheAlliance · Feb 16, 2024 · 4bcca5c · 4bcca5c
1 parent 62ebbcd
commit 4bcca5c
Show file tree

Hide file tree

Showing 12 changed files with 2,025 additions and 154 deletions.
diff --git a/scripts/concat.mjs b/scripts/concat.mjs
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url'
 const instructions = 'The above code was taken from my codebase at https://github.com/lalalune/bgent. You are writing tests and documentation for my codebase. Please use the above code as a reference. Tests should be written with Jest and Typescript. Do not use mocks or stubs. Keep it very simple and straightforward.'
 
 // Patterns to ignore
-const ignorePatterns = ['actionExamples.ts', 'agents', 'goal', 'goals', 'utils', 'logger', 'index', 'data', 'constants', 'templates', 'worker']
+const ignorePatterns = ['actionExamples.ts', 'agents', 'goal', 'cache', 'goals', 'supabase',  'utils', 'logger', 'index', 'data', 'constants', 'templates', 'worker']
 
 // __dirname is not defined in ES module scope, so we need to create it
 const __filename = fileURLToPath(import.meta.url)
@@ -24,7 +24,7 @@ const shouldIgnore = (filePath) => {
 
 // Function to recursively read through directories and concatenate .ts files
 const readDirectory = (dirPath) => {
-    let concatenatedContent = '# START MY CODEBASE'
+    let concatenatedContent = ''
 
     fs.readdirSync(dirPath).forEach(file => {
         const filePath = path.join(dirPath, file)
@@ -51,7 +51,7 @@ const readDirectory = (dirPath) => {
 }
 
 // Start reading from the root TypeScript directory
-const concatenatedContent = readDirectory(directoryPath)
+const concatenatedContent = '# START MY CODEBASE' + readDirectory(directoryPath)
 
 // Write the concatenated content to the output file
 fs.writeFileSync(outputFile, concatenatedContent + '# END MY CODEBASE\n\n' + instructions)

diff --git a/src/lib/__tests__/evaluation.test.ts b/src/lib/__tests__/evaluation.test.ts
@@ -1,84 +1,94 @@
-import { type User } from "@supabase/supabase-js";
-import { BgentRuntime } from "../runtime";
-import { type Message } from "../types";
+import { User } from "@supabase/supabase-js";
+import { UUID } from "crypto";
 import { createRuntime } from "../../test/createRuntime";
-import { type UUID } from "crypto";
+import testEvaluator from "../../test/testEvaluator";
+import { getRelationship } from "../relationships";
+import { BgentRuntime } from "../runtime";
+import { Message } from "../types";
 
 describe("Evaluation Process", () => {
   let runtime: BgentRuntime;
   let user: User;
-  const zeroUuid = "00000000-0000-0000-0000-000000000000" as UUID;
+  let room_id: UUID;
+  const zeroUuid: UUID = "00000000-0000-0000-0000-000000000000";
 
   beforeAll(async () => {
-    const setup = await createRuntime();
+    const setup = await createRuntime({ evaluators: [testEvaluator] });
     runtime = setup.runtime;
     user = setup.session.user;
 
-    // Assuming the evaluator 'summary' is already registered in the runtime setup
+    const relationship = await getRelationship({
+      runtime,
+      userA: user.id as UUID,
+      userB: zeroUuid,
+    });
+    room_id = relationship?.room_id;
   });
 
-  test("Evaluation Injection - Evaluator Creates Memory", async () => {
-    const message: Message = {
-      senderId: user.id as UUID,
-      agentId: zeroUuid,
-      userIds: [user?.id as UUID, zeroUuid],
-      content: "Trigger evaluator content",
-      room_id: zeroUuid,
-    };
+  test("Custom evaluator is loaded into state", async () => {
+    // const state = await runtime.composeState({
+    //   agentId: zeroUuid,
+    //   senderId: user.id as UUID,
+    //   userIds: [user?.id as UUID, zeroUuid],
+    //   content: "Test message",
+    //   room_id,
+    // });
 
-    await runtime.handleRequest(message);
+    // expect(state.evaluators).toContain(testEvaluator.name);
+  });
 
-    // Assuming the 'summary' evaluator tags the memories it creates with 'summarization'
-    const memories = await runtime.summarizationManager.getMemoriesByIds({
-      userIds: [user.id as UUID, zeroUuid],
-      count: 1,
+  test("Validate the format of the examples from the evaluator", () => {
+    expect(testEvaluator.examples).toBeInstanceOf(Array);
+    testEvaluator.examples.forEach((example) => {
+      expect(example).toHaveProperty("context");
+      expect(example).toHaveProperty("messages");
+      expect(example.messages).toBeInstanceOf(Array);
+      example.messages.forEach((message) => {
+        expect(message).toHaveProperty("user");
+        expect(message).toHaveProperty("content");
+        expect(message).toHaveProperty("action");
+      });
+      expect(example).toHaveProperty("outcome");
     });
-
-    // Expect at least one memory to be created with the 'summarization' tag
-    expect(memories.length).toBeGreaterThan(0);
   });
 
-  test("Evaluator Not Running if No Evaluation Handlers are True", async () => {
+  test("Check if test and examples appear in prompt", async () => {
     const message: Message = {
-      senderId: user?.id as UUID,
+      senderId: user.id as UUID,
       agentId: zeroUuid,
-      userIds: [user?.id as UUID, zeroUuid],
-      content: "Non-triggering content",
-      room_id: zeroUuid,
-    };
-
-    await runtime.handleRequest(message);
-
-    // Assuming the 'summary' evaluator tags the memories it creates with 'summarization'
-    const memories = await runtime.summarizationManager.getMemoriesByIds({
       userIds: [user.id as UUID, zeroUuid],
-      count: 10,
-    });
+      content: "Test message for evaluation",
+      room_id,
+    };
 
-    // Assuming the previous test ran and created exactly one memory
-    // Expect the number of memories to remain unchanged
-    expect(memories.length).toBe(1);
+    const response = await runtime.handleRequest(message);
+    expect(response).toContain("TEST_EVALUATOR");
+    expect(response).toContain(testEvaluator.examples[0].outcome);
   });
 
-  test("Evaluation Handling and Response - Evaluator Updates Memory", async () => {
+  test("Create prompt to call TEST_EVALUATOR action and validate response", async () => {
     const message: Message = {
       senderId: user.id as UUID,
       agentId: zeroUuid,
       userIds: [user.id as UUID, zeroUuid],
-      content: "Content that leads to a specific evaluator response",
-      room_id: zeroUuid,
+      content: "Trigger TEST_EVALUATOR",
+      room_id,
     };
 
-    await runtime.handleRequest(message);
+    const response = await runtime.handleRequest(message);
+    expect(response.action).toEqual("TEST_EVALUATOR");
+  });
 
-    // Assuming the 'summary' evaluator updates the 'content' of memories it processes
-    // Fetch the updated memory
-    const memories = await runtime.summarizationManager.getMemoriesByIds({
+  test("Run the TEST_EVALUATOR handler and validate output", async () => {
+    const message: Message = {
+      senderId: user.id as UUID,
+      agentId: zeroUuid,
       userIds: [user.id as UUID, zeroUuid],
-      count: 1,
-    });
+      content: "Run TEST_EVALUATOR handler",
+      room_id,
+    };
 
-    // Expect the updated memory to contain specific content
-    expect(memories[0].content).toContain("specific content");
+    const result = await testEvaluator.handler(runtime, message);
+    expect(result).toBeTruthy();
   });
 });
diff --git a/src/lib/evaluation.ts b/src/lib/evaluation.ts
@@ -57,3 +57,72 @@ export function formatEvaluatorConditions(evaluators: Evaluator[]) {
     )
     .join(",\n");
 }
+
+import { uniqueNamesGenerator, names } from "unique-names-generator";
+
+// Formats evaluator examples into a readable string
+export function formatEvaluatorExamples(evaluators: Evaluator[]) {
+  return evaluators
+    .map((evaluator) => {
+      return evaluator.examples
+        .map((example) => {
+          const exampleNames = Array.from({ length: 5 }, () =>
+            uniqueNamesGenerator({ dictionaries: [names] }),
+          );
+
+          let formattedContext = example.context;
+          let formattedOutcome = example.outcome;
+
+          exampleNames.forEach((name, index) => {
+            const placeholder = `{{user${index + 1}}}`;
+            formattedContext = formattedContext.replaceAll(placeholder, name);
+            formattedOutcome = formattedOutcome.replaceAll(placeholder, name);
+          });
+
+          const formattedMessages = example.messages
+            .map((message) => {
+              let messageString = `${message.user}: ${message.content}`;
+              exampleNames.forEach((name, index) => {
+                const placeholder = `{{user${index + 1}}}`;
+                messageString = messageString.replaceAll(placeholder, name);
+              });
+              return (
+                messageString + (message.action ? ` (${message.action})` : "")
+              );
+            })
+            .join("\n");
+
+          return `Context:\n${formattedContext}\n\nMessages:\n${formattedMessages}\n\nOutcome:\n${formattedOutcome}`;
+        })
+        .join("\n\n");
+    })
+    .join("\n\n");
+}
+
+// Generates a string describing the conditions under which each evaluator example is relevant
+export function formatEvaluatorExampleConditions(evaluators: Evaluator[]) {
+  return evaluators
+    .map((evaluator) =>
+      evaluator.examples
+        .map(
+          (_example, index) =>
+            `${evaluator.name} Example ${index + 1}: ${evaluator.condition}`,
+        )
+        .join("\n"),
+    )
+    .join("\n\n");
+}
+
+// Generates a string summarizing the descriptions of each evaluator example
+export function formatEvaluatorExampleDescriptions(evaluators: Evaluator[]) {
+  return evaluators
+    .map((evaluator) =>
+      evaluator.examples
+        .map(
+          (_example, index) =>
+            `${evaluator.name} Example ${index + 1}: ${evaluator.description}`,
+        )
+        .join("\n"),
+    )
+    .join("\n\n");
+}