Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 39 additions & 26 deletions src/agent/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,15 @@ export function estimateTokens(text: string): number {
* Truncate a tool result to fit within the size limit.
* Appends a truncation notice if content was trimmed.
*/
export function truncateToolResult(result: string, maxSize: number = MAX_TOOL_RESULT_SIZE): string {
export function truncateToolResult(
result: string,
maxSize: number = MAX_TOOL_RESULT_SIZE,
): string {
if (result.length <= maxSize) return result;
return result.slice(0, maxSize) +
`\n\n[TRUNCATED: ${result.length - maxSize} characters omitted]`;
return (
result.slice(0, maxSize) +
`\n\n[TRUNCATED: ${result.length - maxSize} characters omitted]`
);
}

/**
Expand Down Expand Up @@ -95,9 +100,7 @@ export function buildContextMessages(
): ChatMessage[] {
const budget = options?.budget ?? DEFAULT_TOKEN_BUDGET;

const messages: ChatMessage[] = [
{ role: "system", content: systemPrompt },
];
const messages: ChatMessage[] = [{ role: "system", content: systemPrompt }];

// Calculate token estimates for all turns
const turnTokens = recentTurns.map((turn) => ({
Expand Down Expand Up @@ -127,7 +130,8 @@ export function buildContextMessages(

// Ensure we always summarize at least something
if (splitIndex === 0) splitIndex = 1;
if (splitIndex >= recentTurns.length) splitIndex = Math.max(1, recentTurns.length - 1);
if (splitIndex >= recentTurns.length)
splitIndex = Math.max(1, recentTurns.length - 1);

const oldTurns = recentTurns.slice(0, splitIndex);
turnsToRender = recentTurns.slice(splitIndex);
Expand Down Expand Up @@ -185,9 +189,7 @@ export function buildContextMessages(

// Add tool results with truncation
for (const tc of turn.toolCalls) {
const rawContent = tc.error
? `Error: ${tc.error}`
: tc.result;
const rawContent = tc.error ? `Error: ${tc.error}` : tc.result;
messages.push({
role: "tool",
content: truncateToolResult(rawContent),
Expand Down Expand Up @@ -260,14 +262,18 @@ export function formatMemoryBlock(memories: MemoryRetrievalResult): string {
if (memories.workingMemory.length > 0) {
sections.push("### Working Memory");
for (const e of memories.workingMemory) {
sections.push(`- [${e.contentType}] (p=${e.priority.toFixed(1)}) ${e.content}`);
sections.push(
`- [${e.contentType}] (p=${e.priority.toFixed(1)}) ${e.content}`,
);
}
}

if (memories.episodicMemory.length > 0) {
sections.push("### Recent History");
for (const e of memories.episodicMemory) {
sections.push(`- [${e.eventType}] ${e.summary} (${e.outcome || "neutral"})`);
sections.push(
`- [${e.eventType}] ${e.summary} (${e.outcome || "neutral"})`,
);
}
}

Expand All @@ -281,14 +287,18 @@ export function formatMemoryBlock(memories: MemoryRetrievalResult): string {
if (memories.proceduralMemory.length > 0) {
sections.push("### Known Procedures");
for (const e of memories.proceduralMemory) {
sections.push(`- ${e.name}: ${e.description} (${e.steps.length} steps, ${e.successCount}/${e.successCount + e.failureCount} success)`);
sections.push(
`- ${e.name}: ${e.description} (${e.steps.length} steps, ${e.successCount}/${e.successCount + e.failureCount} success)`,
);
}
}

if (memories.relationships.length > 0) {
sections.push("### Known Entities");
for (const e of memories.relationships) {
sections.push(`- ${e.entityName || e.entityAddress}: ${e.relationshipType} (trust: ${e.trustScore.toFixed(1)})`);
sections.push(
`- ${e.entityName || e.entityAddress}: ${e.relationshipType} (trust: ${e.trustScore.toFixed(1)})`,
);
}
}

Expand Down Expand Up @@ -321,20 +331,23 @@ export async function summarizeTurns(

// For many turns, use inference to create a summary
try {
const response = await inference.chat([
const response = await inference.chat(
[
{
role: "system",
content:
"Summarize the following agent activity log into a concise paragraph. Focus on: what was accomplished, what failed, current goals, and important context for the next turn.",
},
{
role: "user",
content: turnSummaries.join("\n"),
},
],
{
role: "system",
content:
"Summarize the following agent activity log into a concise paragraph. Focus on: what was accomplished, what failed, current goals, and important context for the next turn.",
maxTokens: 500,
temperature: 0.1, // GLM-5 requires temperature > 0
},
{
role: "user",
content: turnSummaries.join("\n"),
},
], {
maxTokens: 500,
temperature: 0,
});
);

return `Previous activity summary:\n${response.message.content}`;
} catch {
Expand Down
6 changes: 6 additions & 0 deletions src/agent/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,14 @@ export async function runAgentLoop(
};

// Initialize inference router (Phase 2.3)
// Merge: defaults < top-level config fields < nested modelStrategy
const modelStrategyConfig: ModelStrategyConfig = {
...DEFAULT_MODEL_STRATEGY_CONFIG,
// Bridge top-level config fields for backward compatibility
// This ensures inferenceModel/maxTokensPerTurn work without nested modelStrategy
...(config.inferenceModel ? { inferenceModel: config.inferenceModel } : {}),
...(config.maxTokensPerTurn ? { maxTokensPerTurn: config.maxTokensPerTurn } : {}),
// Nested modelStrategy takes highest priority
...(config.modelStrategy ?? {}),
};
const modelRegistry = new ModelRegistry(db.raw);
Expand Down
92 changes: 61 additions & 31 deletions src/conway/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ interface InferenceClientOptions {
maxTokens: number;
lowComputeModel?: string;
openaiApiKey?: string;
/** Custom OpenAI API base URL (e.g., for GLM Coding Plan: https://open.bigmodel.cn/api/coding/paas/v4) */
openaiApiBaseUrl?: string;
anthropicApiKey?: string;
ollamaBaseUrl?: string;
/** Optional registry lookup — if provided, used before name heuristics */
Expand All @@ -36,7 +38,14 @@ type InferenceBackend = "conway" | "openai" | "anthropic" | "ollama";
export function createInferenceClient(
options: InferenceClientOptions,
): InferenceClient {
const { apiUrl, apiKey, openaiApiKey, anthropicApiKey, ollamaBaseUrl, getModelProvider } = options;
const {
apiUrl,
apiKey,
openaiApiKey,
anthropicApiKey,
ollamaBaseUrl,
getModelProvider,
} = options;
const httpClient = new ResilientHttpClient({
baseTimeout: INFERENCE_TIMEOUT_MS,
retryableStatuses: [429, 500, 502, 503, 504],
Expand Down Expand Up @@ -98,13 +107,17 @@ export function createInferenceClient(
}

const openAiLikeApiUrl =
backend === "openai" ? "https://api.openai.com" :
backend === "ollama" ? (ollamaBaseUrl as string).replace(/\/$/, "") :
apiUrl;
backend === "openai"
? options.openaiApiBaseUrl || "https://api.openai.com"
: backend === "ollama"
? (ollamaBaseUrl as string).replace(/\/$/, "")
: apiUrl;
const openAiLikeApiKey =
backend === "openai" ? (openaiApiKey as string) :
backend === "ollama" ? "ollama" :
apiKey;
backend === "openai"
? (openaiApiKey as string)
: backend === "ollama"
? "ollama"
: apiKey;

return chatViaOpenAiCompatible({
model,
Expand Down Expand Up @@ -141,9 +154,7 @@ export function createInferenceClient(
};
}

function formatMessage(
msg: ChatMessage,
): Record<string, unknown> {
function formatMessage(msg: ChatMessage): Record<string, unknown> {
const formatted: Record<string, unknown> = {
role: msg.role,
content: msg.content,
Expand Down Expand Up @@ -182,9 +193,14 @@ function resolveInferenceBackend(

// Heuristic fallback (model not in registry yet)
if (keys.anthropicApiKey && /^claude/i.test(model)) return "anthropic";
if (keys.openaiApiKey && /^(gpt-[3-9]|gpt-4|gpt-5|o[1-9][-\s.]|o[1-9]$|chatgpt)/i.test(model)) return "openai";
// GLM models (智谱): glm-* - 使用 OpenAI 兼容 API
if (keys.openaiApiKey && /^glm/i.test(model)) return "openai";
if (
keys.openaiApiKey &&
/^(gpt-[3-9]|gpt-4|gpt-5|o[1-9][-\s.]|o[1-9]$|chatgpt)/i.test(model)
)
return "openai";
return "conway";

}

async function chatViaOpenAiCompatible(params: {
Expand All @@ -195,7 +211,14 @@ async function chatViaOpenAiCompatible(params: {
backend: "conway" | "openai" | "ollama";
httpClient: ResilientHttpClient;
}): Promise<InferenceResponse> {
const resp = await params.httpClient.request(`${params.apiUrl}/v1/chat/completions`, {
// GLM Coding Plan uses /chat/completions instead of /v1/chat/completions
const isGlmApi =
params.apiUrl.includes("bigmodel.cn") ||
params.apiUrl.includes("/v4") ||
params.apiUrl.includes("/paas");
const endpoint = isGlmApi ? "/chat/completions" : "/v1/chat/completions";

const resp = await params.httpClient.request(`${params.apiUrl}${endpoint}`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Expand All @@ -215,7 +238,7 @@ async function chatViaOpenAiCompatible(params: {
);
}

const data = await resp.json() as any;
const data = (await resp.json()) as any;
const choice = data.choices?.[0];

if (!choice) {
Expand All @@ -229,15 +252,16 @@ async function chatViaOpenAiCompatible(params: {
totalTokens: data.usage?.total_tokens || 0,
};

const toolCalls: InferenceToolCall[] | undefined =
message.tool_calls?.map((tc: any) => ({
const toolCalls: InferenceToolCall[] | undefined = message.tool_calls?.map(
(tc: any) => ({
id: tc.id,
type: "function" as const,
function: {
name: tc.function.name,
arguments: tc.function.arguments,
},
}));
}),
);

return {
id: data.id || "",
Expand Down Expand Up @@ -269,7 +293,9 @@ async function chatViaAnthropic(params: {
messages:
transformed.messages.length > 0
? transformed.messages
: (() => { throw new Error("Cannot send empty message array to Anthropic API"); })(),
: (() => {
throw new Error("Cannot send empty message array to Anthropic API");
})(),
};

if (transformed.system) {
Expand All @@ -289,23 +315,26 @@ async function chatViaAnthropic(params: {
body.tool_choice = { type: "auto" };
}

const resp = await params.httpClient.request("https://api.anthropic.com/v1/messages", {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": params.anthropicApiKey,
"anthropic-version": "2023-06-01",
const resp = await params.httpClient.request(
"https://api.anthropic.com/v1/messages",
{
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": params.anthropicApiKey,
"anthropic-version": "2023-06-01",
},
body: JSON.stringify(body),
timeout: INFERENCE_TIMEOUT_MS,
},
body: JSON.stringify(body),
timeout: INFERENCE_TIMEOUT_MS,
});
);

if (!resp.ok) {
const text = await resp.text();
throw new Error(`Inference error (anthropic): ${resp.status}: ${text}`);
}

const data = await resp.json() as any;
const data = (await resp.json()) as any;
const content = Array.isArray(data.content) ? data.content : [];
const textBlocks = content.filter((c: any) => c?.type === "text");
const toolUseBlocks = content.filter((c: any) => c?.type === "tool_use");
Expand Down Expand Up @@ -353,9 +382,10 @@ async function chatViaAnthropic(params: {
};
}

function transformMessagesForAnthropic(
messages: ChatMessage[],
): { system?: string; messages: Array<Record<string, unknown>> } {
function transformMessagesForAnthropic(messages: ChatMessage[]): {
system?: string;
messages: Array<Record<string, unknown>>;
} {
const systemParts: string[] = [];
const transformed: Array<Record<string, unknown>> = [];

Expand Down
Loading
Loading