Skip to content

Commit

Permalink
add actions and evaluators -- backup before fixing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lalalune committed Feb 16, 2024
1 parent 62ebbcd commit 4bcca5c
Show file tree
Hide file tree
Showing 12 changed files with 2,025 additions and 154 deletions.
6 changes: 3 additions & 3 deletions scripts/concat.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { fileURLToPath } from 'url'
const instructions = 'The above code was taken from my codebase at https://github.com/lalalune/bgent. You are writing tests and documentation for my codebase. Please use the above code as a reference. Tests should be written with Jest and Typescript. Do not use mocks or stubs. Keep it very simple and straightforward.'

// Patterns to ignore
const ignorePatterns = ['actionExamples.ts', 'agents', 'goal', 'goals', 'utils', 'logger', 'index', 'data', 'constants', 'templates', 'worker']
const ignorePatterns = ['actionExamples.ts', 'agents', 'goal', 'cache', 'goals', 'supabase', 'utils', 'logger', 'index', 'data', 'constants', 'templates', 'worker']

// __dirname is not defined in ES module scope, so we need to create it
const __filename = fileURLToPath(import.meta.url)
Expand All @@ -24,7 +24,7 @@ const shouldIgnore = (filePath) => {

// Function to recursively read through directories and concatenate .ts files
const readDirectory = (dirPath) => {
let concatenatedContent = '# START MY CODEBASE'
let concatenatedContent = ''

fs.readdirSync(dirPath).forEach(file => {
const filePath = path.join(dirPath, file)
Expand All @@ -51,7 +51,7 @@ const readDirectory = (dirPath) => {
}

// Start reading from the root TypeScript directory
const concatenatedContent = readDirectory(directoryPath)
const concatenatedContent = '# START MY CODEBASE' + readDirectory(directoryPath)

// Write the concatenated content to the output file
fs.writeFileSync(outputFile, concatenatedContent + '# END MY CODEBASE\n\n' + instructions)
Expand Down
110 changes: 60 additions & 50 deletions src/lib/__tests__/evaluation.test.ts
Original file line number Diff line number Diff line change
@@ -1,84 +1,94 @@
import { type User } from "@supabase/supabase-js";
import { BgentRuntime } from "../runtime";
import { type Message } from "../types";
import { User } from "@supabase/supabase-js";
import { UUID } from "crypto";
import { createRuntime } from "../../test/createRuntime";
import { type UUID } from "crypto";
import testEvaluator from "../../test/testEvaluator";
import { getRelationship } from "../relationships";
import { BgentRuntime } from "../runtime";
import { Message } from "../types";

describe("Evaluation Process", () => {
let runtime: BgentRuntime;
let user: User;
const zeroUuid = "00000000-0000-0000-0000-000000000000" as UUID;
let room_id: UUID;
const zeroUuid: UUID = "00000000-0000-0000-0000-000000000000";

beforeAll(async () => {
const setup = await createRuntime();
const setup = await createRuntime({ evaluators: [testEvaluator] });
runtime = setup.runtime;
user = setup.session.user;

// Assuming the evaluator 'summary' is already registered in the runtime setup
const relationship = await getRelationship({
runtime,
userA: user.id as UUID,
userB: zeroUuid,
});
room_id = relationship?.room_id;
});

test("Evaluation Injection - Evaluator Creates Memory", async () => {
const message: Message = {
senderId: user.id as UUID,
agentId: zeroUuid,
userIds: [user?.id as UUID, zeroUuid],
content: "Trigger evaluator content",
room_id: zeroUuid,
};
test("Custom evaluator is loaded into state", async () => {
// const state = await runtime.composeState({
// agentId: zeroUuid,
// senderId: user.id as UUID,
// userIds: [user?.id as UUID, zeroUuid],
// content: "Test message",
// room_id,
// });

await runtime.handleRequest(message);
// expect(state.evaluators).toContain(testEvaluator.name);
});

// Assuming the 'summary' evaluator tags the memories it creates with 'summarization'
const memories = await runtime.summarizationManager.getMemoriesByIds({
userIds: [user.id as UUID, zeroUuid],
count: 1,
test("Validate the format of the examples from the evaluator", () => {
expect(testEvaluator.examples).toBeInstanceOf(Array);
testEvaluator.examples.forEach((example) => {
expect(example).toHaveProperty("context");
expect(example).toHaveProperty("messages");
expect(example.messages).toBeInstanceOf(Array);
example.messages.forEach((message) => {
expect(message).toHaveProperty("user");
expect(message).toHaveProperty("content");
expect(message).toHaveProperty("action");
});
expect(example).toHaveProperty("outcome");
});

// Expect at least one memory to be created with the 'summarization' tag
expect(memories.length).toBeGreaterThan(0);
});

test("Evaluator Not Running if No Evaluation Handlers are True", async () => {
test("Check if test and examples appear in prompt", async () => {
const message: Message = {
senderId: user?.id as UUID,
senderId: user.id as UUID,
agentId: zeroUuid,
userIds: [user?.id as UUID, zeroUuid],
content: "Non-triggering content",
room_id: zeroUuid,
};

await runtime.handleRequest(message);

// Assuming the 'summary' evaluator tags the memories it creates with 'summarization'
const memories = await runtime.summarizationManager.getMemoriesByIds({
userIds: [user.id as UUID, zeroUuid],
count: 10,
});
content: "Test message for evaluation",
room_id,
};

// Assuming the previous test ran and created exactly one memory
// Expect the number of memories to remain unchanged
expect(memories.length).toBe(1);
const response = await runtime.handleRequest(message);
expect(response).toContain("TEST_EVALUATOR");
expect(response).toContain(testEvaluator.examples[0].outcome);
});

test("Evaluation Handling and Response - Evaluator Updates Memory", async () => {
test("Create prompt to call TEST_EVALUATOR action and validate response", async () => {
const message: Message = {
senderId: user.id as UUID,
agentId: zeroUuid,
userIds: [user.id as UUID, zeroUuid],
content: "Content that leads to a specific evaluator response",
room_id: zeroUuid,
content: "Trigger TEST_EVALUATOR",
room_id,
};

await runtime.handleRequest(message);
const response = await runtime.handleRequest(message);
expect(response.action).toEqual("TEST_EVALUATOR");
});

// Assuming the 'summary' evaluator updates the 'content' of memories it processes
// Fetch the updated memory
const memories = await runtime.summarizationManager.getMemoriesByIds({
test("Run the TEST_EVALUATOR handler and validate output", async () => {
const message: Message = {
senderId: user.id as UUID,
agentId: zeroUuid,
userIds: [user.id as UUID, zeroUuid],
count: 1,
});
content: "Run TEST_EVALUATOR handler",
room_id,
};

// Expect the updated memory to contain specific content
expect(memories[0].content).toContain("specific content");
const result = await testEvaluator.handler(runtime, message);
expect(result).toBeTruthy();
});
});
69 changes: 69 additions & 0 deletions src/lib/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,72 @@ export function formatEvaluatorConditions(evaluators: Evaluator[]) {
)
.join(",\n");
}

import { uniqueNamesGenerator, names } from "unique-names-generator";

// Formats evaluator examples into a readable string
export function formatEvaluatorExamples(evaluators: Evaluator[]) {
return evaluators
.map((evaluator) => {
return evaluator.examples
.map((example) => {
const exampleNames = Array.from({ length: 5 }, () =>
uniqueNamesGenerator({ dictionaries: [names] }),
);

let formattedContext = example.context;
let formattedOutcome = example.outcome;

exampleNames.forEach((name, index) => {
const placeholder = `{{user${index + 1}}}`;
formattedContext = formattedContext.replaceAll(placeholder, name);
formattedOutcome = formattedOutcome.replaceAll(placeholder, name);
});

const formattedMessages = example.messages
.map((message) => {
let messageString = `${message.user}: ${message.content}`;
exampleNames.forEach((name, index) => {
const placeholder = `{{user${index + 1}}}`;
messageString = messageString.replaceAll(placeholder, name);
});
return (
messageString + (message.action ? ` (${message.action})` : "")
);
})
.join("\n");

return `Context:\n${formattedContext}\n\nMessages:\n${formattedMessages}\n\nOutcome:\n${formattedOutcome}`;
})
.join("\n\n");
})
.join("\n\n");
}

// Generates a string describing the conditions under which each evaluator example is relevant
export function formatEvaluatorExampleConditions(evaluators: Evaluator[]) {
return evaluators
.map((evaluator) =>
evaluator.examples
.map(
(_example, index) =>
`${evaluator.name} Example ${index + 1}: ${evaluator.condition}`,
)
.join("\n"),
)
.join("\n\n");
}

// Generates a string summarizing the descriptions of each evaluator example
export function formatEvaluatorExampleDescriptions(evaluators: Evaluator[]) {
return evaluators
.map((evaluator) =>
evaluator.examples
.map(
(_example, index) =>
`${evaluator.name} Example ${index + 1}: ${evaluator.description}`,
)
.join("\n"),
)
.join("\n\n");
}
Loading

0 comments on commit 4bcca5c

Please sign in to comment.