Skip to content

Commit d703bb9

Browse files
committed
The base commit
1 parent 5eb9b55 commit d703bb9

File tree

26 files changed

+601
-196
lines changed

26 files changed

+601
-196
lines changed

Makefile

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,21 @@ endif
1111
# ==============================================================================
1212
PACKAGE_MANAGER ?= npm
1313
NODE_MODULES_DIR ?= node_modules
14-
REMOVABLE_THINGS ?= .vitest-cache coverage
14+
REMOVABLE_THINGS ?= .vitest-cache coverage site
1515

1616
# ==============================================================================
1717
# SETUP & CHECKS
1818
# ==============================================================================
1919
# Check for required tools
20-
REQUIRED_BINS := node $(PACKAGE_MANAGER) docker
20+
REQUIRED_BINS := node $(PACKAGE_MANAGER)
2121
$(foreach bin,$(REQUIRED_BINS),\
22-
$(if $(shell command -v $(bin) 2> /dev/null),,$(error Please install $(bin) to continue)))
22+
$(if $(shell command -v $(bin) 2> /dev/null),,$(error Please install $(bin) to continue)))
2323

2424
# Internal target to check for node_modules. Not intended for direct use.
2525
check-deps:
2626
@if [ ! -d "$(NODE_MODULES_DIR)" ]; then \
27-
echo "Dependencies not found. Running 'make install' first..."; \
28-
$(MAKE) install; \
27+
echo "Dependencies not found. Running 'make install' first..."; \
28+
$(MAKE) install; \
2929
fi
3030

3131
# Declare all targets as phony (not files)
@@ -50,14 +50,17 @@ install: ## Install project dependencies
5050
build: check-deps ## Build the project for production
5151
$(PACKAGE_MANAGER) run build
5252

53-
start: ## Start the production server
53+
start: ## Run the application
5454
$(PACKAGE_MANAGER) start
5555

56-
dev: ## Start the development server
56+
dev: ## Run the application in development mode (with hot-reload)
5757
$(PACKAGE_MANAGER) run dev
5858

5959
clean: ## Remove caches, build artifacts and documentation
60-
rm -rf dist $(NODE_MODULES_DIR) $(REMOVABLE_THINGS) site
60+
rm -rf dist $(NODE_MODULES_DIR) $(REMOVABLE_THINGS)
61+
62+
reset: clean ## Reset the project to a clean state by removing all artifacts and re-installing dependencies
63+
$(MAKE) install
6164

6265
# ==============================================================================
6366
# DEVELOPMENT

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to make a contribution
5656
### License
5757

5858
This template is licensed under the MIT License ([LICENSE](LICENSE) or https://opensource.org/licenses/MIT)
59+
60+
### Acknowledgements
61+
62+
The logo is from [SVG Repo](https://www.svgrepo.com/svg/395899/blue-circle).

docs/README.md

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,203 @@
11
## Project Documentation
22

33
Add the documentation here.
4+
5+
---
6+
7+
Of course. Building a tool like "Octo" is a fantastic project. Based on my analysis of its architecture and my own
8+
experience, here are some ideas for building a similar tool, broken down into core concepts, advanced features, and
9+
product-level thinking.
10+
11+
### 1\. Core Architecture & Foundational Ideas
12+
13+
This is the minimum viable product (MVP) you'd need to get a functioning agent.
14+
15+
- **The Agent Loop:** The heart of any agent is its operational cycle. The classic model is a variation of a REPL
16+
(`Read-Eval-Print Loop`):
17+
$$[Input] \rightarrow [Think] \rightarrow [Act] \rightarrow [Observe]$$
18+
- **Input:** The user provides a prompt.
19+
- **Think:** The agent (your code) sends the prompt and conversation history to an LLM. The LLM's "thinking"
20+
process might involve generating a plan or deciding which tool to use.
21+
- **Act:** Your code parses the LLM's response. If it's a tool call, you execute it. If it's a text response, you
22+
display it.
23+
- **Observe:** The result of the action (tool output or error) is formatted and added to the history. The loop
24+
then repeats with this new context.
25+
26+
- **A Modular Tool System:** This is non-negotiable. Don't hard-code your tools. Create a `Tool` interface and a
27+
registry. "Octo" does this very well. A simple version could look like this:
28+
29+
```typescript
30+
interface Tool {
31+
name: string;
32+
description: string; // Crucial for the LLM to know when to use it
33+
argumentsSchema: t.Type<any>; // Using 'structural' or 'zod' for schemas
34+
execute(args: any): Promise<string>;
35+
}
36+
37+
const toolRegistry: Map<string, Tool> = new Map();
38+
```
39+
40+
This allows you to add new tools like `git_diff` or `run_tests` just by defining a new object that fits the
41+
interface.
42+
43+
- **Rich History Management:** Your history isn't just a list of strings. It's a structured log of events. "Octo's"
44+
`HistoryItem` type is a good example. You should explicitly differentiate between:
45+
- `UserMessage`
46+
- `AssistantMessage` (the LLM's text response)
47+
- `AssistantToolRequest` (the LLM's decision to call a tool)
48+
- `ToolResult` (the output from your code running the tool)
49+
- `SystemNotification` (e.g., "File `x.ts` was modified externally.")
50+
51+
### 2\. Enhancing the Core - "Leveling Up"
52+
53+
These are features that move from a simple proof-of-concept to a robust and reliable tool.
54+
55+
- **LLM Abstraction Layer:** "Octo" uses an IR for this. Your goal is to write code against your own generic
56+
`LLMProvider` interface, not directly against the OpenAI or Anthropic SDKs.
57+
58+
```typescript
59+
interface LLMProvider {
60+
generateResponse(history: LlmIR[], tools: Tool[]): AsyncGenerator<ResponseChunk>;
61+
}
62+
```
63+
64+
This lets you swap models mid-conversation, test new providers, or even integrate local models running via Ollama or
65+
llama.cpp with minimal friction.
66+
67+
- **Context Window Management:** This is a critical, practical problem. A long conversation will exceed the LLM's
68+
context limit.
69+
- **Simple:** Use a "sliding window" approach like "Octo" does in `windowing.ts`. Keep only the last N tokens of
70+
the conversation.
71+
- **Advanced:** Implement a summarization strategy. For older parts of the conversation, use a cheaper/faster LLM
72+
to create a summary and replace the original messages with it.
73+
- **RAG (Retrieval-Augmented Generation):** For providing context about a large codebase, don't stuff entire files
74+
into the prompt. Use vector embeddings (e.g., with `pgvector` or a library like `llamaindex`) to find the most relevant
75+
code snippets for the user's current query and inject only those into the prompt.
76+
77+
- **Self-Correction and Autofix:** "Octo's" use of a separate model to fix malformed JSON is brilliant. Expand on
78+
this:
79+
- **JSON Repair:** This is the most common use case. LLMs often produce JSON with trailing commas or missing
80+
brackets.
81+
- **Code Syntax Repair:** If a tool generates code (`edit` or `create`), you can have a "linter" step that uses an
82+
LLM to fix basic syntax errors before writing to disk.
83+
- **Search String Repair:** "Octo" does this for its `diff` edits. This is a great feature to prevent frustrating
84+
"search text not found" errors.
85+
86+
### 3\. Advanced Concepts & "Next Frontier" Ideas
87+
88+
These are more speculative ideas that could give your tool a unique edge.
89+
90+
- **Multi-Step Planning:** Instead of having the LLM emit one tool call at a time, prompt it to produce a full plan of
91+
action as a JSON object (e.g., a list of steps with dependencies). Your agent then becomes an executor for this plan,
92+
running the tools in sequence and feeding the results back for the next step. This dramatically increases autonomy.
93+
94+
- **Sandboxed Execution Environment:** Running `bash` commands from an LLM directly on your machine is a massive
95+
security risk.
96+
- Use Docker to spin up a container for each session or command. The agent can only modify files inside the
97+
container's volume mount.
98+
- Explore WebAssembly (Wasm) as a secure, lightweight sandboxing target for running code or tools.
99+
100+
- **GUI / Rich Interface:** While "Octo" is a great CLI app, a simple web UI or a VS Code extension could provide huge
101+
value.
102+
- Visualize the agent's plan as a graph.
103+
- Provide rich diff viewers for proposed changes.
104+
- Allow the user to directly edit the agent's proposed tool arguments before execution.
105+
106+
### 4\. Technical Stack & Library Choices
107+
108+
- **Language:** **TypeScript**. For a project of this complexity, type safety is not optional.
109+
- **CLI Framework:** **Ink** (like Octo) is great for rich, interactive UIs. For a more traditional CLI,
110+
**Commander.js** or **Yargs** are standard.
111+
- **Schema & Validation:** **Zod** is the current industry standard and is excellent for parsing and validating
112+
unpredictable LLM outputs. `structural` is also a fine choice.
113+
- **LLM Interaction:** The **Vercel AI SDK (`ai`)** is a strong starting point. It has built-in helpers for streaming,
114+
tool usage, and supports multiple providers.
115+
116+
### 5\. Product & SaaS Ideas
117+
118+
If you're thinking of this as more than a personal project:
119+
120+
- **The "Bring-Your-Own-Key" (BYOK) Model:** This is the easiest way to start. Users provide their own API keys, and
121+
your tool is just the client-side orchestrator. You can sell the tool itself as a one-time purchase or a subscription.
122+
- **The Full SaaS Model:** You manage the API keys and bill users for usage (with a markup). This is more complex but
123+
offers more value. You could provide premium features:
124+
- **Hosted Sandboxes:** Users run their code in your secure, cloud-based environments.
125+
- **Team Collaboration:** Shared sessions, toolsets, and prompts.
126+
- **Specialized Fine-Tuned Models:** Offer your own fine-tuned "autofix" or planning models as a premium feature.
127+
128+
Start with the core loop and a solid, modular tool system. The `FileTracker` and `autofix` ideas from "Octo" are
129+
high-impact features I'd prioritize next. Good luck.
130+
131+
---
132+
133+
Excellent question. We've built a solid foundation for Abi, which is the core agent loop. To evolve it into something as robust and feature-rich as Octo, we need to focus on several key areas.
134+
135+
Think of Abi as the engine and Octo as the full vehicle with a chassis, safety systems, a dashboard, and support for different fuel types.
136+
137+
Here is a breakdown of what's missing, categorized by function.
138+
139+
### 1. Core Functionality & Tooling
140+
141+
This is about expanding what the agent can actually *do*.
142+
143+
* **Expanded Toolset:** Abi currently only has `readFile`. Octo has a comprehensive set of tools for interacting with a developer's environment:
144+
* `list`: To list files and directories (`ls`).
145+
* `create`: To create new files.
146+
* `edit`: A powerful tool with sub-types (`diff`, `append`, `prepend`, `rewrite-whole`) for modifying files. This is the most critical tool for a coding assistant.
147+
* `bash`: To run arbitrary shell commands.
148+
* `fetch`: To retrieve content from URLs.
149+
* **Autofix Mechanism:** This is one of Octo's killer features. When a primary LLM fails (e.g., by creating invalid JSON for a tool call or a bad search string for a file edit), Octo uses a smaller, specialized LLM to try and fix the error automatically instead of failing and asking the user for help.
150+
* **Configuration File:** Octo is driven by a `config.json5` file that defines available models, user preferences, and API key locations. Abi currently has everything hardcoded.
151+
152+
### 2. Safety & Robustness
153+
154+
These are the features that make the tool safe to use and prevent it from making costly mistakes.
155+
156+
* **File Tracker for Stale Edits:** This is the most important safety feature missing from Abi. Octo's `FileTracker` remembers when a file was last read. If you modify the file in your editor and then Abi tries to edit it, the `FileTracker` will block the operation because the agent is working with outdated information. This prevents the AI from accidentally overwriting your work.
157+
* **Token Tracking & Cost Management:** A production-ready tool needs to track API usage. Octo's `token-tracker.ts` keeps a running count of input/output tokens so the user is aware of the cost.
158+
* **Context Window Management:** Abi's history is an array that will grow until it inevitably exceeds the LLM's context window, causing an API error. Octo has a `windowing.ts` module that intelligently truncates the history to ensure it always fits, preventing crashes on long conversations.
159+
160+
### 3. Architectural Maturity
161+
162+
These are structural differences that make Octo more maintainable and extensible.
163+
164+
* **LLM Abstraction (IR Layer):** Right now, Abi is hardcoded to use the OpenAI provider via the Vercel AI SDK. Octo is multi-provider (OpenAI, Anthropic, etc.). It achieves this with an **Intermediate Representation (`LlmIR`)**. The conversation history is converted to this generic IR, and then a provider-specific "compiler" (`responses.ts` or `anthropic.ts`) translates the IR into the exact format the target API needs. This is a major architectural step up.
165+
* **Dedicated State Management:** Abi's state is a simple `history` array managed in a `while` loop. Octo uses `zustand` to manage a much more complex state machine (e.g., `mode: 'responding'`, `mode: 'tool-request'`, `mode: 'error-recovery'`). This is essential for driving a more complex user interface.
166+
167+
### 4. User Experience (UX)
168+
169+
* **Rich CLI with Ink:** Abi uses Node's basic `readline`, which is a simple prompt. Octo uses **Ink**, which is a library for building React-based user interfaces in the terminal. This allows for features like:
170+
* Loading spinners while the AI is thinking.
171+
* Properly formatted code blocks and diffs.
172+
* Clear visual separation between messages from the user, Abi, and the tools.
173+
* Interactive confirmation dialogs for tool usage.
174+
175+
---
176+
177+
### A Prioritized Roadmap to Get Abi Closer to Octo
178+
179+
Here's a logical order to implement these features:
180+
181+
1. **Expand the Toolset:** Start by implementing `list`, `create`, and especially the `edit` tool. This will give Abi the core capabilities of a coding assistant.
182+
2. **Implement `FileTracker`:** Before you do any serious work with the `edit` tool, build the `FileTracker` to prevent accidents. This is your most important safety net.
183+
3. **Upgrade the UI and State:** Replace the `readline` loop with an **Ink**-based UI. At the same time, introduce **Zustand** for state management. The state machine will be necessary to handle the more complex, asynchronous nature of an Ink app.
184+
4. **Refactor for LLM Abstraction:** Once the core features are stable, undertake the larger architectural task of creating the `LlmIR` and separate "compiler" modules for different LLM providers.
185+
5. **Add Advanced Features:** Finally, add the "polish" that makes a tool feel professional, like the autofix mechanism and token tracking.
186+
187+
---
188+
189+
After replacing these files, run make dev. Then, try these prompts one by one:
190+
191+
list the files in the src/agent directory
192+
193+
create a file named 'test.txt' with the content 'hello from Abi'
194+
195+
read the file 'test.txt'
196+
197+
append the text '\nand goodbye!' to the file 'test.txt'
198+
199+
read the file 'test.txt' again
200+
201+
in the file 'test.txt', replace the word 'hello' with 'greetings'
202+
203+
read the file 'test.txt' one last time

env.example

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
1-
# Backend Configuration
2-
ENVIRONMENT=development
3-
PORT=3000
1+
OPENAI_API_KEY="OPENAI_AP_KEY"
2+
GOOGLE_API_KEY="GOOGLE_API_KEY"

logo.svg

Lines changed: 2 additions & 7 deletions
Loading

nodemon.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"watch": ["src"],
3+
"ext": "ts",
4+
"ignore": ["src/**/*.test.ts", "dist", "node_modules", ".git", ".vscode"],
5+
"exec": "tsx src/cli.ts"
6+
}

package.json

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,47 @@
11
{
2-
"name": "template-typescript-project",
2+
"name": "abi-coding-assistant",
33
"version": "0.1.0",
4-
"description": "A template for strict TypeScript projects.",
5-
"main": "dist/backend/server.js",
4+
"description": "An AI coding assistant.",
5+
"main": "dist/cli.js",
66
"type": "module",
77
"scripts": {
8-
"start": "node dist/backend/server.js",
9-
"predev": "npm run build",
10-
"dev": "concurrently \"tsc -w\" \"cpx \\\"src/frontend/public/**\\\" \\\"dist/frontend/public\\\" -w\" \"tsx watch src/backend/server.ts\" \"serve dist/frontend/public -l 5000\"",
11-
"build": "rm -rf ./dist && tsc && cpx \"src/frontend/public/**\" \"dist/frontend/public\"",
8+
"start": "node dist/cli.js",
9+
"dev": "nodemon",
10+
"build": "rm -rf ./dist && tsc",
1211
"test": "vitest run",
1312
"coverage": "vitest run --coverage",
1413
"test:watch": "vitest",
1514
"lint": "eslint . --ext .ts",
15+
"lint:fix": "eslint . --ext .ts --fix",
1616
"format": "prettier . --write",
1717
"typecheck": "tsc --noEmit"
1818
},
19-
"keywords": [],
19+
"keywords": [
20+
"ai",
21+
"typescript",
22+
"cli"
23+
],
2024
"author": "",
2125
"license": "MIT",
2226
"dependencies": {
27+
"@ai-sdk/openai": "^0.0.33",
28+
"ai": "^3.2.16",
2329
"dotenv": "^16.4.5",
24-
"express": "^4.19.2",
25-
"express-async-errors": "^3.1.1"
30+
"zod": "^3.23.8"
2631
},
2732
"devDependencies": {
28-
"@types/express": "^4.17.21",
2933
"@types/node": "^20.14.9",
30-
"@types/supertest": "^6.0.2",
3134
"@typescript-eslint/eslint-plugin": "^7.15.0",
3235
"@typescript-eslint/parser": "^7.15.0",
3336
"@vitest/coverage-v8": "^3.2.4",
34-
"concurrently": "^9.2.0",
35-
"cpx": "^1.5.0",
3637
"eslint": "^8.57.0",
3738
"eslint-config-prettier": "^9.1.0",
39+
"nodemon": "^3.1.4",
3840
"prettier": "^3.3.2",
39-
"serve": "^14.2.4",
40-
"supertest": "^7.0.0",
4141
"tsx": "^4.16.2",
4242
"typescript": "^5.5.3",
4343
"vite-tsconfig-paths": "^4.3.2",
44-
"vitest": "^3.2.4",
45-
"zod": "^3.23.8"
44+
"vitest": "^3.2.4"
4645
},
4746
"engines": {
4847
"node": ">=20.0.0"

0 commit comments

Comments
 (0)