From 65561eaf5041af4f32aa9184624d500f90ac7549 Mon Sep 17 00:00:00 2001 From: moon Date: Thu, 22 Feb 2024 15:14:12 -0800 Subject: [PATCH] Add lore functions --- .gitignore | 4 +- README.md | 2 +- package-lock.json | 5 +- package.json | 6 +- rollup.config.js | 4 +- scripts/processDocs.mjs | 163 +++++++++++++++++++++++++++++++++ src/index.ts | 1 - src/lib/__tests__/lore.test.ts | 8 +- src/lib/runtime.ts | 2 +- 9 files changed, 180 insertions(+), 15 deletions(-) create mode 100644 scripts/processDocs.mjs diff --git a/.gitignore b/.gitignore index c0599b2..fe4b1c4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ node_modules .env concatenated-output.ts embedding-cache.json -dist \ No newline at end of file +dist +scripts/docs +output.log \ No newline at end of file diff --git a/README.md b/README.md index ec83e7e..6194de1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # bgent -Alexible, scalable and customizable agent to do your bidding. +A flexible, scalable and customizable agent to do your bidding. ![cj](https://github.com/lalalune/bgent/assets/18633264/7513b5a6-2352-45f3-8b87-7ee0e2171a30) diff --git a/package-lock.json b/package-lock.json index d6169cd..682fb7d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,13 @@ { "name": "bgent", - "version": "0.0.6", + "version": "0.0.14", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "bgent", - "version": "0.0.6", + "version": "0.0.14", + "hasInstallScript": true, "license": "MIT", "dependencies": { "@rollup/plugin-json": "^6.1.0", diff --git a/package.json b/package.json index 89e9c71..cac334b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "bgent", - "version": "0.0.14", + "version": "0.0.15", "private": false, "description": "bgent. because agent was taken.", "type": "module", @@ -12,12 +12,14 @@ "build": "rm -r ./dist && rollup -c && tsc", "lint": "eslint --ext .ts --ext .js . --fix", "dev": "wrangler dev -- --dev", + "docs": "cd docs && npm run start", "shell:dev": "node --no-warnings scripts/shell.mjs --dev", "concat": "node ./scripts/concat.mjs", "shell:cloud": "node --no-warnings scripts/shell.mjs", "test": "NODE_OPTIONS=\"$NODE_OPTIONS --experimental-vm-modules\" jest", "reset-profile": "rm ~/.cjrc", - "deploy": "wrangler deploy" + "deploy": "wrangler deploy", + "postinstall": "cd docs && npm install" }, "bin": { "bgent": "./scripts/shell.mjs" diff --git a/rollup.config.js b/rollup.config.js index b539ab0..2af1e77 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -8,8 +8,8 @@ export default defineConfig([ { input: "src/index.ts", external: [ - ...Object.keys(pkg.dependencies || {}), - ...Object.keys(pkg.peerDependencies || {}), + // ...Object.keys(pkg.dependencies || {}), + // ...Object.keys(pkg.peerDependencies || {}), ], output: [ { diff --git a/scripts/processDocs.mjs b/scripts/processDocs.mjs new file mode 100644 index 0000000..d2ff3a7 --- /dev/null +++ b/scripts/processDocs.mjs @@ -0,0 +1,163 @@ +import s from '@supabase/supabase-js'; +const { SupabaseClient } = s; +import dotenv from 'dotenv'; +import fs from 'fs/promises'; +import path from 'path'; +import { BgentRuntime, addLore } from '../dist/index.esm.js'; +dotenv.config({ path: '.dev.vars' }); + +const SUPABASE_URL = process.env.SUPABASE_URL ?? "https://rnxwpsbkzcugmqauwdax.supabase.co"; +const SUPABASE_SERVICE_API_KEY = process.env.SUPABASE_SERVICE_API_KEY; +const SERVER_URL = "https://api.openai.com/v1"; +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +const zeroUuid = '00000000-0000-0000-0000-000000000000'; + +const supabase = new SupabaseClient(SUPABASE_URL, SUPABASE_SERVICE_API_KEY); + +// The first argument from the command line is the starting path +const startingPath = process.argv[2]; + +const runtime = new BgentRuntime({ + debugMode: process.env.NODE_ENV === "development", + supabase, + serverUrl: SERVER_URL, + token: OPENAI_API_KEY, + actions: [], + evaluators: [] +}); + +// Function to process each Markdown file +const processDocument = async (filePath) => { + console.log(`Processing file: ${filePath}`); + + // Read the markdown file + let markdown = await fs.readFile(filePath, 'utf8'); + console.log('markdown:', markdown); + + // Remove the front matter if it exists + const firstSectionMatch = markdown.match(/^---\s*[\r\n]+([\s\S]+?)[\r\n]+---/); + markdown = markdown.replace(firstSectionMatch ? firstSectionMatch[0] : '', '').trim(); + + // Function to split content by headings and ensure chunks are not too large or empty + const splitContent = (content, separator) => { + const sections = content.split(new RegExp(`(?=^${separator})`, 'gm')).filter(Boolean); // Split and keep the separator + let chunks = []; + + sections.forEach(section => { + chunks.push(section.trim()); + }); + + return chunks; + }; + + // Check for large sections without any headings and split them first + let chunks = [markdown.replaceAll('\n\n', '\n')]; + + // Then, try to split by headings if applicable + ['# ', '## '].forEach(heading => { + chunks = chunks.flatMap(chunk => chunk.includes(heading) ? splitContent(chunk, heading) : chunk); + }); + + // For each chunk, handle embedding and saving (this is pseudo-code, adjust based on your actual implementation) + for (let index = 0; index < chunks.length; index++) { + const chunk = chunks[index]; + console.log(`Embedding chunk ${index + 1}/${chunks.length}`); + console.log('chunk is', chunk) + // TODO: check if the check exists in the database + + + + if (chunk) { + const { data, error } = await supabase.from('lore').select('*').eq('content', chunk); + if (error) { + console.error('Error fetching lore:', error); + return; + } + + if (data.length === 0) { + + // write to output.log + // await fs.appendFile('output.log', '***** ' + filePath + '\n\n' + chunk + '\n*****'); + await addLore({ runtime, source: filePath.replace(startingPath, ''), content: chunk, embedContent: chunk }); + } + } + // wait for 250 ms + await new Promise(resolve => setTimeout(resolve, 100)); + } + + console.log('All chunks processed.'); +}; + + + +// Asynchronous function to recursively find .md files and process them, ignoring specified directories +const findAndProcessMarkdownFiles = async (dirPath) => { + try { + const filesAndDirectories = await fs.readdir(dirPath, { withFileTypes: true }); + + // Iterate over all items in the directory + for (const dirent of filesAndDirectories) { + const fullPath = path.join(dirPath, dirent.name); + + // Skip 'node_modules' and 'static' directories + if (dirent.isDirectory() && !['node_modules', 'static'].includes(dirent.name)) { + // If the item is a directory (and not one to ignore), recurse into it + await findAndProcessMarkdownFiles(fullPath); + } else if (dirent.isFile() && dirent.name.endsWith('.md') && !dirent.name.includes('README')) { + // If the item is a file and ends with .md, process it + await processDocument(fullPath); + } + } + } catch (error) { + console.error(`Error processing directory ${dirPath}: ${error}`); + } +}; + +// Main function to kick off the script +const main = async () => { + // check if accounts contains the default agent + const { data: accounts, error } = await supabase.from('accounts').select('*').eq('id', zeroUuid); + + if (error) { + console.error('Error fetching accounts:', error); + return; + } + + if (accounts.length === 0) { + const result = await supabase.from('accounts').upsert({ + id: zeroUuid, + name: 'Default Agent', + email: 'default@agent', + register_complete: true, + details: {}, + }); + } + + const { data: rooms, error: error2 } = await supabase.from('rooms').select('*').eq('id', zeroUuid); + + if (error2) { + console.error('Error fetching rooms:', error2); + return; + } + + if (rooms.length === 0) { + const result2 = await supabase.from('rooms').upsert({ + id: zeroUuid, + name: 'Lore Room', + created_by: zeroUuid, + }); + } + + if (!startingPath) { + console.log('Please provide a starting path as an argument.'); + return; + } + + console.log(`Searching for Markdown files in: ${startingPath}`); + await findAndProcessMarkdownFiles(startingPath); + console.log('Done processing Markdown files.'); +}; + +// Execute the main function +main(); diff --git a/src/index.ts b/src/index.ts index 6bf6ccb..8cd5167 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,2 +1 @@ -export * from "./agents"; export * from "./lib"; diff --git a/src/lib/__tests__/lore.test.ts b/src/lib/__tests__/lore.test.ts index d50f2eb..c383b71 100644 --- a/src/lib/__tests__/lore.test.ts +++ b/src/lib/__tests__/lore.test.ts @@ -2,14 +2,12 @@ import { type User } from "@supabase/supabase-js"; import { type UUID } from "crypto"; import dotenv from "dotenv"; import { createRuntime } from "../../test/createRuntime"; -import { MemoryManager } from "../memory"; +import { composeContext } from "../context"; +import { addLore, getLore } from "../lore"; import { getRelationship } from "../relationships"; -import { type Content, type Memory } from "../types"; -import { getCachedEmbedding, writeCachedEmbedding } from "../../test/cache"; import { BgentRuntime } from "../runtime"; -import { addLore, getLore } from "../lore"; -import { composeContext } from "../context"; import { requestHandlerTemplate } from "../templates"; +import { type Content } from "../types"; dotenv.config(); describe("Lore", () => { diff --git a/src/lib/runtime.ts b/src/lib/runtime.ts index 894b340..676a227 100644 --- a/src/lib/runtime.ts +++ b/src/lib/runtime.ts @@ -36,7 +36,7 @@ import { getMessageActors, } from "./messages"; import { type Actor, /*type Goal,*/ type Memory } from "./types"; -import { getLore } from "./lore"; +import { getLore, formatLore } from "./lore"; export interface AgentRuntimeOpts { recentMessageCount?: number; // number of messages to hold in the recent message cache token: string; // JWT token, can be a JWT token if outside worker, or an OpenAI token if inside worker