Skip to content
Merged

logging #3007

Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions agents-api/src/domains/run/agents/tools/function-tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export async function getFunctionTools(
const functionTools: ToolSet = {};
const project = ctx.executionContext.project;
try {
const withRefStart = Date.now();
const functionToolsForAgent = await withRef(
manageDbPool,
ctx.executionContext.resolvedRef,
Expand All @@ -38,6 +39,13 @@ export async function getFunctionTools(
});
}
);
const withRefMs = Date.now() - withRefStart;
if (withRefMs > 5_000) {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

optionally, you can just always log the time in case 5 seconds doesn't capture what you are looking for

logger.info(
{ ref: ctx.executionContext.resolvedRef.name, withRefMs, subAgentId: ctx.config.id },
'Slow withRef in getFunctionTools'
);
}

const functionToolsData = functionToolsForAgent.data ?? [];

Expand Down
20 changes: 18 additions & 2 deletions agents-api/src/domains/run/services/TriggerService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,10 @@ export async function dispatchExecution(params: {
// Wrap agent execution in a single promise protected by waitUntil
// The trigger.message_received span is created inside executeAgentAsync
const dispatchedAt = Date.now();
logger.info(
{ tenantId, projectId, agentId, triggerId, invocationId, conversationId, dispatchedAt },
'Trigger execution dispatched and starting execution'
);
Comment on lines +549 to +552
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: Duplicate of the log 5 lines above. Line 541–543 already logs 'Trigger invocation created' with the same structured fields (tenantId, projectId, agentId, triggerId, invocationId, conversationId). This second log at the same level with nearly the same payload adds noise without new information. Remove it or merge the two.

const executionPromise = executeAgentAsync({
tenantId,
projectId,
Expand Down Expand Up @@ -653,7 +657,10 @@ export async function executeAgentAsync(
forwardedHeaders,
invocationType = 'trigger',
} = params;

logger.info(
{ tenantId, projectId, agentId, triggerId, invocationId },
'executeAgentAsync: starting'
Comment on lines +660 to +662
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: Redundant entry-point log. executeAgentAsync already has a log at line 891 ('Starting async trigger execution') with the same fields plus conversationId. This extra log at function entry doesn't add diagnostic value — if you want it, make it debug.

);
let userMessage: string;
let messageParts: Part[];

Expand Down Expand Up @@ -695,14 +702,16 @@ export async function executeAgentAsync(
}

// Load project FIRST to get agent name
const loadProjectStart = Date.now();
const project = await withRef(manageDbPool, resolvedRef, async (db) => {
return await getFullProjectWithRelationIds(db)({
scopes: { tenantId, projectId },
});
});
const loadProjectMs = Date.now() - loadProjectStart;

logger.info(
{ tenantId, projectId, agentId, triggerId, invocationId, hasProject: !!project },
{ tenantId, projectId, agentId, triggerId, invocationId, hasProject: !!project, loadProjectMs },
'executeAgentAsync: project loaded'
);

Expand Down Expand Up @@ -884,6 +893,7 @@ export async function executeAgentAsync(

try {
// Create conversation and set active agent
const convStart = Date.now();
await createOrGetConversation(runDbClient)({
id: conversationId,
tenantId,
Expand All @@ -900,6 +910,12 @@ export async function executeAgentAsync(
agentId,
ref: resolvedRef,
});
const convMs = Date.now() - convStart;

logger.info(
{ invocationId, conversationId, convMs },
'executeAgentAsync: conversation created'
);

if (messages && messages.length > 0) {
for (const msg of messages) {
Expand Down
26 changes: 26 additions & 0 deletions agents-api/src/middleware/branchScopedDb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ export const branchScopedDbMiddleware = async (c: Context, next: Next) => {
}

// Get a dedicated connection from the pool
const mwStartTime = Date.now();
const connection: PoolClient = await pool.connect();
const connectMs = Date.now() - mwStartTime;
if (connectMs > 5_000) {
logger.info({ ref: resolvedRef.name, connectMs }, 'Slow pool.connect in branchScopedDb');
}
let tempBranch: string | null = null;

try {
Expand All @@ -79,7 +84,12 @@ export const branchScopedDbMiddleware = async (c: Context, next: Next) => {

if (resolvedRef.type === 'branch') {
logger.debug({ branch: resolvedRef.name }, 'Checking out branch');
const checkoutStart = Date.now();
await checkoutBranch(requestDb)({ branchName: resolvedRef.name, autoCommitPending: true });
const checkoutMs = Date.now() - checkoutStart;
if (checkoutMs > 5_000) {
logger.info({ ref: resolvedRef.name, checkoutMs }, 'Slow checkoutBranch in branchScopedDb');
}
} else {
// For tags/commits, create temporary branch (needed for reads)
tempBranch = `temp_${resolvedRef.type}_${resolvedRef.hash}_${generateId()}`;
Expand Down Expand Up @@ -139,15 +149,31 @@ export const branchScopedDbMiddleware = async (c: Context, next: Next) => {
}
} finally {
// Always cleanup: checkout main and release connection
const cleanupStart = Date.now();
try {
await connection.query(`SELECT DOLT_CHECKOUT('main')`);

if (tempBranch) {
await connection.query(`SELECT DOLT_BRANCH('-D', $1)`, [tempBranch]);
}
} catch (cleanupError) {
logger.info(
{
ref: resolvedRef.name,
cleanupMs: Date.now() - cleanupStart,
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
},
'branchScopedDb cleanup failed'
);
Comment on lines +160 to +167
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: Same duplicate-log pattern as ref-scope.ts. The new logger.info for cleanup failure is immediately followed by the existing logger.error for the same error. Consolidate into the single error-level call and add the timing fields there.

Suggested change
logger.info(
{
ref: resolvedRef.name,
cleanupMs: Date.now() - cleanupStart,
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
},
'branchScopedDb cleanup failed'
);
logger.error(
{
error: cleanupError,
ref: resolvedRef.name,
cleanupMs: Date.now() - cleanupStart,
},
'Error during connection cleanup'
);

logger.error({ error: cleanupError }, 'Error during connection cleanup');
} finally {
const totalMs = Date.now() - mwStartTime;
Comment on lines +165 to +170
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Minor: Same duplicate logging pattern as ref-scope.ts

Issue: Lines 166-169 log at INFO, then line 170 logs the same error at ERROR level.

Fix: Keep only the ERROR log, include cleanupMs in it:

Suggested change
},
'branchScopedDb cleanup failed'
);
logger.error({ error: cleanupError }, 'Error during connection cleanup');
} finally {
const totalMs = Date.now() - mwStartTime;
} catch (cleanupError) {
logger.error({ error: cleanupError, ref: resolvedRef.name, cleanupMs: Date.now() - cleanupStart }, 'Error during connection cleanup');

if (totalMs > 5_000) {
logger.info(
{ ref: resolvedRef.name, totalMs, connectMs },
'Slow branchScopedDb middleware total duration'
);
}
connection.release();
}
}
Expand Down
12 changes: 12 additions & 0 deletions agents-api/src/middleware/ref.ts
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,20 @@ async function resolveProjectRef(

let refResult: Awaited<ReturnType<ReturnType<typeof resolveRef>>> = null;
try {
const resolveStart = Date.now();
refResult = await resolveRef(db)(projectMain);
const resolveMs = Date.now() - resolveStart;
if (resolveMs > 5_000) {
logger.info(
{ projectMain, resolveMs, found: !!refResult },
'Slow resolveRef in ref middleware'
);
}
} catch (error) {
logger.info(
{ projectMain, error: error instanceof Error ? error.message : String(error) },
'resolveRef threw in ref middleware'
);
logger.warn({ error, projectMain }, 'Failed to resolve project main branch');
Comment on lines +242 to 246
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: Duplicate log — this logger.info and the existing logger.warn on line 246 log the same error. The warn already captures error and projectMain. Remove this new info call to avoid double-logging the same failure.

Suggested change
logger.info(
{ projectMain, error: error instanceof Error ? error.message : String(error) },
'resolveRef threw in ref middleware'
);
logger.warn({ error, projectMain }, 'Failed to resolve project main branch');
logger.warn({ error, projectMain }, 'Failed to resolve project main branch');

refResult = null;
}
Comment on lines 241 to 248
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 MAJOR: Duplicate logging — error logged at both INFO and WARN levels

Issue: Lines 242-245 log at INFO level, then line 246 logs the same error at WARN level. This creates redundant log entries for the same event.

Why: The established codebase pattern (per api-logging-guidelines) is to use WARN for recoverable issues, not to double-log at multiple levels. This adds noise and makes log analysis harder.

Fix: Remove the INFO log and keep only the WARN log:

Suggested change
} catch (error) {
logger.info(
{ projectMain, error: error instanceof Error ? error.message : String(error) },
'resolveRef threw in ref middleware'
);
logger.warn({ error, projectMain }, 'Failed to resolve project main branch');
refResult = null;
}
} catch (error) {
logger.warn({ error, projectMain }, 'Failed to resolve project main branch');
refResult = null;
}

Expand Down
19 changes: 19 additions & 0 deletions packages/agents-core/src/dolt/ref-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ export const getTenantScopedRef = (tenantId: string, ref: string): string => {
export const resolveRef =
(db: AgentsManageDatabaseClient) =>
async (ref: string): Promise<ResolvedRef | null> => {
const startTime = Date.now();

if (isValidCommitHash(ref)) {
return {
type: 'commit',
Expand All @@ -43,8 +45,25 @@ export const resolveRef =
};
}

const branchQueryStart = Date.now();
const branches = await doltListBranches(db)();
const branchQueryMs = Date.now() - branchQueryStart;
const branch = branches.find((b) => b.name === ref);

if (!branch) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟠 MAJOR: Unconditional INFO log on expected code path creates noise

Issue: resolveRef is called multiple times during fallback resolution (e.g., try project-scoped, then raw ref). A not-found result is often expected, not exceptional. Logging INFO on every miss inflates log volume.

Why: Per api-logging-guidelines, INFO is for "important business events, NOT routine operations". Branch-not-found is a normal return path — the caller handles null. This will fire on every request that uses a non-existent or fallback ref.

Fix: Downgrade to DEBUG level, or let the caller log at INFO when resolution ultimately fails:

Suggested change
if (!branch) {
if (!branch) {
const totalMs = Date.now() - startTime;
logger.debug(
{ ref, branchCount: branches.length, branchQueryMs, totalMs },
'resolveRef: branch not found in dolt_branches result'
);
}

const totalMs = Date.now() - startTime;
logger.info(
{
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 CRITICAL: Logging all branch names exposes tenant/project identifiers

Issue: Branch names follow the pattern {tenantId}_{projectId}_{branchName}. Logging ALL branch names via branchNames: branches.map((b) => b.name) leaks every tenant and project ID in the database whenever any branch resolution fails.

Why: This creates:

  1. A compliance/data-isolation issue where one tenant's IDs appear in logs caused by another tenant's request
  2. Unbounded log line size as branch count grows (could be hundreds or thousands)
  3. Log ingestion costs scaling with total branch count per failed lookup

Fix: Remove branchNames from the log payload:

Suggested change
{
logger.info(
{ ref, branchCount: branches.length, branchQueryMs, totalMs },
'resolveRef: branch not found in dolt_branches result'
);

Refs:

ref,
branchCount: branches.length,
branchQueryMs,
totalMs,
branchNames: branches.map((b) => b.name),
},
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

High: branchNames leaks all tenant branch names into logs. This array contains tenant-scoped branch names for every project. In a production environment with many tenants sharing a Doltgres instance, this could produce enormous log entries and expose tenant data in log aggregators.

Drop branchNames entirely — branchCount is sufficient to diagnose the "branch not found" scenario.

Suggested change
},
branchCount: branches.length,
branchQueryMs,
totalMs,

'resolveRef: branch not found in dolt_branches result'
);
}
Comment on lines +53 to +65
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: Log level should be warn, not info. A branch not being found during resolveRef is an unexpected/concerning condition — callers generally expect the branch to exist. logger.warn is the appropriate level per the repo's logging guidelines ("recoverable issues or concerning patterns"). Also, this fires unconditionally (no slow-path threshold), so at info it will add noise for every cache miss or missing-branch lookup.


if (branch) {
return {
type: 'branch',
Expand Down
33 changes: 32 additions & 1 deletion packages/agents-core/src/dolt/ref-scope.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,29 @@ export async function withRef<T>(
'Acquiring connection for ref scope'
);

const connectStart = Date.now();
const connection: PoolClient = await pool.connect();
const connectMs = Date.now() - connectStart;
if (connectMs > 5_000) {
logger.info({ ref: resolvedRef.name, connectMs, connectionId }, 'Slow pool.connect in withRef');
}

let tempBranch: string | null = null;

try {
const db = drizzle(connection, { schema }) as unknown as AgentsManageDatabaseClient;

if (resolvedRef.type === 'branch') {
logger.debug({ branch: resolvedRef.name, connectionId }, 'Checking out branch');
const checkoutStart = Date.now();
await checkoutBranch(db)({ branchName: resolvedRef.name, syncSchema: false });
const checkoutMs = Date.now() - checkoutStart;
if (checkoutMs > 5_000) {
logger.info(
{ ref: resolvedRef.name, checkoutMs, connectionId },
'Slow checkoutBranch in withRef'
);
}
} else {
// For tags/commits, create temporary branch from the hash
// Include timestamp for easier cleanup of orphaned branches
Expand Down Expand Up @@ -237,6 +251,7 @@ export async function withRef<T>(
throw error;
} finally {
// Cleanup: checkout main, delete temp branch, release connection
const cleanupStart = Date.now();
try {
await connection.query(`SELECT DOLT_CHECKOUT('main')`);

Expand All @@ -245,14 +260,30 @@ export async function withRef<T>(
await connection.query(`SELECT DOLT_BRANCH('-D', $1)`, [tempBranch]);
}
} catch (cleanupError) {
logger.info(
{
ref: resolvedRef.name,
connectionId,
cleanupMs: Date.now() - cleanupStart,
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
},
'withRef cleanup failed'
);
Comment on lines +263 to +271
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Medium: Duplicate log — same cleanup failure is logged twice. This new logger.info and the existing logger.error on lines 272–275 report the same cleanupError. Remove one. The error level is the correct one for a cleanup failure; the info call should be dropped.

Suggested change
logger.info(
{
ref: resolvedRef.name,
connectionId,
cleanupMs: Date.now() - cleanupStart,
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
},
'withRef cleanup failed'
);
logger.error(
{ error: cleanupError, tempBranch, connectionId, cleanupMs: Date.now() - cleanupStart },
'Error during ref scope cleanup'
);

logger.error(
{ error: cleanupError, tempBranch, connectionId },
Comment on lines +265 to 273
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Minor: Duplicate logging — cleanup failure logged at both INFO and ERROR levels

Issue: Lines 266-269 log at INFO level, then lines 270-273 log the same error at ERROR level. The same pattern appears in branchScopedDb.ts.

Why: Cleanup failures are unexpected errors warranting ERROR level, not INFO. Double-logging the same event at different levels creates noise.

Fix: Keep only the ERROR log:

Suggested change
ref: resolvedRef.name,
connectionId,
cleanupMs: Date.now() - cleanupStart,
error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError),
},
'withRef cleanup failed'
);
logger.error(
{ error: cleanupError, tempBranch, connectionId },
} catch (cleanupError) {
logger.error(
{ error: cleanupError, tempBranch, connectionId, cleanupMs: Date.now() - cleanupStart },
'Error during ref scope cleanup'
);

'Error during ref scope cleanup'
);
} finally {
const totalMs = Date.now() - startTime;
if (totalMs > 5_000) {
logger.info(
{ ref: resolvedRef.name, totalMs, connectMs, connectionId },
'Slow withRef total duration'
);
}
connection.release();
logger.debug(
{ ref: resolvedRef.name, duration: Date.now() - startTime, connectionId },
{ ref: resolvedRef.name, duration: totalMs, connectionId },
'Connection released'
);
}
Expand Down
Loading