-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add actions and evaluators -- backup before fixing tests
- Loading branch information
Showing
12 changed files
with
2,025 additions
and
154 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,84 +1,94 @@ | ||
import { type User } from "@supabase/supabase-js"; | ||
import { BgentRuntime } from "../runtime"; | ||
import { type Message } from "../types"; | ||
import { User } from "@supabase/supabase-js"; | ||
import { UUID } from "crypto"; | ||
import { createRuntime } from "../../test/createRuntime"; | ||
import { type UUID } from "crypto"; | ||
import testEvaluator from "../../test/testEvaluator"; | ||
import { getRelationship } from "../relationships"; | ||
import { BgentRuntime } from "../runtime"; | ||
import { Message } from "../types"; | ||
|
||
describe("Evaluation Process", () => { | ||
let runtime: BgentRuntime; | ||
let user: User; | ||
const zeroUuid = "00000000-0000-0000-0000-000000000000" as UUID; | ||
let room_id: UUID; | ||
const zeroUuid: UUID = "00000000-0000-0000-0000-000000000000"; | ||
|
||
beforeAll(async () => { | ||
const setup = await createRuntime(); | ||
const setup = await createRuntime({ evaluators: [testEvaluator] }); | ||
runtime = setup.runtime; | ||
user = setup.session.user; | ||
|
||
// Assuming the evaluator 'summary' is already registered in the runtime setup | ||
const relationship = await getRelationship({ | ||
runtime, | ||
userA: user.id as UUID, | ||
userB: zeroUuid, | ||
}); | ||
room_id = relationship?.room_id; | ||
}); | ||
|
||
test("Evaluation Injection - Evaluator Creates Memory", async () => { | ||
const message: Message = { | ||
senderId: user.id as UUID, | ||
agentId: zeroUuid, | ||
userIds: [user?.id as UUID, zeroUuid], | ||
content: "Trigger evaluator content", | ||
room_id: zeroUuid, | ||
}; | ||
test("Custom evaluator is loaded into state", async () => { | ||
// const state = await runtime.composeState({ | ||
// agentId: zeroUuid, | ||
// senderId: user.id as UUID, | ||
// userIds: [user?.id as UUID, zeroUuid], | ||
// content: "Test message", | ||
// room_id, | ||
// }); | ||
|
||
await runtime.handleRequest(message); | ||
// expect(state.evaluators).toContain(testEvaluator.name); | ||
}); | ||
|
||
// Assuming the 'summary' evaluator tags the memories it creates with 'summarization' | ||
const memories = await runtime.summarizationManager.getMemoriesByIds({ | ||
userIds: [user.id as UUID, zeroUuid], | ||
count: 1, | ||
test("Validate the format of the examples from the evaluator", () => { | ||
expect(testEvaluator.examples).toBeInstanceOf(Array); | ||
testEvaluator.examples.forEach((example) => { | ||
expect(example).toHaveProperty("context"); | ||
expect(example).toHaveProperty("messages"); | ||
expect(example.messages).toBeInstanceOf(Array); | ||
example.messages.forEach((message) => { | ||
expect(message).toHaveProperty("user"); | ||
expect(message).toHaveProperty("content"); | ||
expect(message).toHaveProperty("action"); | ||
}); | ||
expect(example).toHaveProperty("outcome"); | ||
}); | ||
|
||
// Expect at least one memory to be created with the 'summarization' tag | ||
expect(memories.length).toBeGreaterThan(0); | ||
}); | ||
|
||
test("Evaluator Not Running if No Evaluation Handlers are True", async () => { | ||
test("Check if test and examples appear in prompt", async () => { | ||
const message: Message = { | ||
senderId: user?.id as UUID, | ||
senderId: user.id as UUID, | ||
agentId: zeroUuid, | ||
userIds: [user?.id as UUID, zeroUuid], | ||
content: "Non-triggering content", | ||
room_id: zeroUuid, | ||
}; | ||
|
||
await runtime.handleRequest(message); | ||
|
||
// Assuming the 'summary' evaluator tags the memories it creates with 'summarization' | ||
const memories = await runtime.summarizationManager.getMemoriesByIds({ | ||
userIds: [user.id as UUID, zeroUuid], | ||
count: 10, | ||
}); | ||
content: "Test message for evaluation", | ||
room_id, | ||
}; | ||
|
||
// Assuming the previous test ran and created exactly one memory | ||
// Expect the number of memories to remain unchanged | ||
expect(memories.length).toBe(1); | ||
const response = await runtime.handleRequest(message); | ||
expect(response).toContain("TEST_EVALUATOR"); | ||
expect(response).toContain(testEvaluator.examples[0].outcome); | ||
}); | ||
|
||
test("Evaluation Handling and Response - Evaluator Updates Memory", async () => { | ||
test("Create prompt to call TEST_EVALUATOR action and validate response", async () => { | ||
const message: Message = { | ||
senderId: user.id as UUID, | ||
agentId: zeroUuid, | ||
userIds: [user.id as UUID, zeroUuid], | ||
content: "Content that leads to a specific evaluator response", | ||
room_id: zeroUuid, | ||
content: "Trigger TEST_EVALUATOR", | ||
room_id, | ||
}; | ||
|
||
await runtime.handleRequest(message); | ||
const response = await runtime.handleRequest(message); | ||
expect(response.action).toEqual("TEST_EVALUATOR"); | ||
}); | ||
|
||
// Assuming the 'summary' evaluator updates the 'content' of memories it processes | ||
// Fetch the updated memory | ||
const memories = await runtime.summarizationManager.getMemoriesByIds({ | ||
test("Run the TEST_EVALUATOR handler and validate output", async () => { | ||
const message: Message = { | ||
senderId: user.id as UUID, | ||
agentId: zeroUuid, | ||
userIds: [user.id as UUID, zeroUuid], | ||
count: 1, | ||
}); | ||
content: "Run TEST_EVALUATOR handler", | ||
room_id, | ||
}; | ||
|
||
// Expect the updated memory to contain specific content | ||
expect(memories[0].content).toContain("specific content"); | ||
const result = await testEvaluator.handler(runtime, message); | ||
expect(result).toBeTruthy(); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.