Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions agents-api/src/domains/run/artifacts/ArtifactService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import jmespath from 'jmespath';
import runDbClient from '../../../data/db/runDbClient';
import { getLogger } from '../../../logger';
import { toolSessionManager } from '../agents/services/ToolSessionManager';
import { sanitizeArtifactBinaryData } from '../services/blob-storage/artifact-binary-sanitizer';
import { agentSessionManager } from '../session/AgentSession';
import {
type ExtendedJsonSchema,
Expand Down Expand Up @@ -884,11 +885,25 @@ export class ArtifactService {
metadata?: Record<string, any>;
toolCallId?: string;
}): Promise<void> {
// Use provided summaryData if available, otherwise default to artifact.data
let summaryData = artifact.summaryData || artifact.data;
let fullData = artifact.data;
const { tenantId, projectId } = this.context.executionContext;

const sanitizedData = (await sanitizeArtifactBinaryData(artifact.data, {
tenantId,
projectId,
artifactId: artifact.artifactId,
})) as Record<string, any>;
const sanitizedSummaryData = artifact.summaryData
? ((await sanitizeArtifactBinaryData(artifact.summaryData, {
tenantId,
projectId,
artifactId: artifact.artifactId,
})) as Record<string, any>)
: undefined;

// Use provided summaryData if available, otherwise default to sanitized data
let summaryData = sanitizedSummaryData || sanitizedData;
let fullData = sanitizedData;

if (this.context.artifactComponents) {
const artifactComponent = this.context.artifactComponents.find(
(ac) => ac.name === artifact.type
Expand All @@ -899,8 +914,8 @@ export class ArtifactService {
const previewSchema = extractPreviewFields(schema);
const fullSchema = extractFullFields(schema);

summaryData = this.filterBySchema(artifact.data, previewSchema);
fullData = this.filterBySchema(artifact.data, fullSchema);
summaryData = this.filterBySchema(sanitizedData, previewSchema);
fullData = this.filterBySchema(sanitizedData, fullSchema);
} catch (error) {
logger.warn(
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const {
upsertLedgerArtifactMock,
toolSessionManagerMock,
agentSessionManagerMock,
sanitizeArtifactBinaryDataMock,
} = vi.hoisted(() => ({
listTaskIdsByContextIdMock: vi.fn(),
getTaskMock: vi.fn(),
Expand All @@ -33,6 +34,20 @@ const {
setArtifactCache: vi.fn(),
getArtifactCache: vi.fn(),
},
sanitizeArtifactBinaryDataMock: vi.fn(async (value: unknown) => {
if (value !== null && typeof value === 'object' && !Array.isArray(value)) {
const part = value as Record<string, unknown>;
if (
(part.type === 'image' || part.type === 'file') &&
typeof part.data === 'string' &&
part.data.length > 0 &&
!part.data.startsWith('blob://')
) {
return { ...part, data: 'blob://mock-key' };
}
}
return value;
}),
}));

// Mock @inkeep/agents-core WITHOUT importOriginal to avoid loading the heavy module
Expand Down Expand Up @@ -78,6 +93,11 @@ vi.mock('../../../../logger', () => ({
})),
}));

vi.mock('../../services/blob-storage/artifact-binary-sanitizer', () => ({
sanitizeArtifactBinaryData: sanitizeArtifactBinaryDataMock,
stripBinaryDataForObservability: vi.fn((value: unknown) => value),
}));

// Mock schema-validation to prevent @inkeep/agents-core/utils imports
vi.mock('../../utils/schema-validation', () => ({
extractPreviewFields: vi.fn((schema: any) => ({
Expand Down Expand Up @@ -744,6 +764,42 @@ describe('ArtifactService', () => {
});
});

describe('saveArtifact', () => {
it('sanitizes both data and summaryData before persistence', async () => {
const upsertInvokerMock = vi.fn().mockResolvedValue({ created: true, existing: null });
upsertLedgerArtifactMock.mockReturnValue(upsertInvokerMock);

await artifactService.saveArtifact({
artifactId: 'art-save-1',
name: 'Artifact Name',
description: 'Artifact Description',
type: 'UnknownType',
data: { type: 'image', data: 'base64rawdata', mimeType: 'image/png' } as any,
summaryData: { type: 'image', data: 'base64summary', mimeType: 'image/png' } as any,
metadata: {},
toolCallId: 'tool-call-1',
});

expect(sanitizeArtifactBinaryDataMock).toHaveBeenCalledTimes(2);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test asserts sanitizeArtifactBinaryDataMock was called twice but never verifies the arguments. Assert that the first call received (artifact.data, { tenantId, projectId, artifactId }) and the second received (artifact.summaryData, ...) to ensure the correct context is forwarded.

expect(upsertLedgerArtifactMock).toHaveBeenCalledWith('mock-run-db-client');
expect(upsertInvokerMock).toHaveBeenCalledWith(
expect.objectContaining({
artifact: expect.objectContaining({
parts: [
{
kind: 'data',
data: {
summary: expect.objectContaining({ data: 'blob://mock-key' }),
full: expect.objectContaining({ data: 'blob://mock-key' }),
},
},
],
}),
})
);
});
});

describe('cache key regression (data → full)', () => {
it('should store full data under parts[0].data.full in the cache', async () => {
const mockToolResult = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import { beforeEach, describe, expect, it, vi } from 'vitest';
import {
sanitizeArtifactBinaryData,
stripBinaryDataForObservability,
} from '../blob-storage/artifact-binary-sanitizer';

vi.mock('../blob-storage/index', () => ({
getBlobStorageProvider: vi.fn(),
isBlobUri: (s: string) => s.startsWith('blob://'),
toBlobUri: (key: string) => `blob://${key}`,
fromBlobUri: (uri: string) => uri.slice('blob://'.length),
BLOB_URI_PREFIX: 'blob://',
}));

vi.mock('../blob-storage/storage-keys', () => ({
buildStorageKey: vi.fn(
(input: any) =>
`v1/t_${input.tenantId}/artifact-data/p_${input.projectId}/a_${input.artifactId}/sha256-${input.contentHash}.${input.ext}`
),
}));

const SMALL_BASE64 = 'aGVsbG8='; // "hello" — only 8 chars, below threshold
const LARGE_BASE64 = Buffer.from('x'.repeat(200)).toString('base64'); // > 100 chars

const CTX = { tenantId: 'tenant-1', projectId: 'proj-1', artifactId: 'art-1' };

describe('stripBinaryDataForObservability', () => {
it('replaces image part data with placeholder', () => {
const input = { type: 'image', data: LARGE_BASE64, mimeType: 'image/png' };
const result = stripBinaryDataForObservability(input) as any;
expect(result.type).toBe('image');
expect(result.data).toMatch(/^\[binary data ~\d+ bytes, mimeType: image\/png\]$/);
expect(result.mimeType).toBe('image/png');
});

it('replaces file part data with placeholder', () => {
const input = { type: 'file', data: LARGE_BASE64, mimeType: 'application/pdf' };
const result = stripBinaryDataForObservability(input) as any;
expect(result.data).toMatch(/^\[binary data ~\d+ bytes/);
});

it('leaves already-blob-uri data untouched', () => {
const input = { type: 'image', data: 'blob://some/key', mimeType: 'image/png' };
const result = stripBinaryDataForObservability(input) as any;
expect(result.data).toBe('blob://some/key');
});

it('leaves small strings untouched (below 100 char threshold)', () => {
const input = { type: 'image', data: SMALL_BASE64, mimeType: 'image/png' };
const result = stripBinaryDataForObservability(input) as any;
expect(result.data).toBe(SMALL_BASE64);
});

it('leaves http URLs untouched', () => {
const input = { type: 'image', data: 'https://example.com/img.png', mimeType: 'image/png' };
const result = stripBinaryDataForObservability(input) as any;
expect(result.data).toBe('https://example.com/img.png');
});

it('recursively strips nested binary parts', () => {
const input = {
toolResult: [
{ type: 'text', text: 'Ticket info' },
{ type: 'image', data: LARGE_BASE64, mimeType: 'image/jpeg' },
],
};
const result = stripBinaryDataForObservability(input) as any;
expect(result.toolResult[0]).toEqual({ type: 'text', text: 'Ticket info' });
expect(result.toolResult[1].data).toMatch(/^\[binary data/);
});

it('handles arrays at top level', () => {
const input = [
{ type: 'text', text: 'hi' },
{ type: 'image', data: LARGE_BASE64, mimeType: 'image/png' },
];
const result = stripBinaryDataForObservability(input) as any[];
expect(result[0]).toEqual({ type: 'text', text: 'hi' });
expect(result[1].data).toMatch(/^\[binary data/);
});

it('passes through non-object primitives unchanged', () => {
expect(stripBinaryDataForObservability('hello')).toBe('hello');
expect(stripBinaryDataForObservability(42)).toBe(42);
expect(stripBinaryDataForObservability(null)).toBeNull();
});

it('handles circular references safely', () => {
const input: Record<string, unknown> = { type: 'container' };
input.self = input;

const result = stripBinaryDataForObservability(input) as Record<string, unknown>;
expect(result.type).toBe('container');
expect(result.self).toBe('[Circular Reference]');
});
});

describe('sanitizeArtifactBinaryData', () => {
let mockUpload: ReturnType<typeof vi.fn>;

beforeEach(async () => {
mockUpload = vi.fn().mockResolvedValue(undefined);
const { getBlobStorageProvider } = await import('../blob-storage/index');
vi.mocked(getBlobStorageProvider).mockReturnValue({
upload: mockUpload,
download: vi.fn(),
delete: vi.fn(),
});
});

it('uploads an inline image part and replaces data with blob:// URI', async () => {
const input = { type: 'image', data: LARGE_BASE64, mimeType: 'image/png' };
const result = (await sanitizeArtifactBinaryData(input, CTX)) as any;

expect(mockUpload).toHaveBeenCalledOnce();
expect(result.type).toBe('image');
expect(result.data).toMatch(/^blob:\/\//);
expect(result.mimeType).toBe('image/png');
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sanitizeArtifactBinaryData suite only tests type: 'image' parts. The implementation also handles 'file' — add a test with { type: 'file', data: LARGE_BASE64, mimeType: 'application/pdf' } to cover that branch.


it('preserves non-binary fields on the image part', async () => {
const input = { type: 'image', data: LARGE_BASE64, mimeType: 'image/jpeg', extra: 'keep' };
const result = (await sanitizeArtifactBinaryData(input, CTX)) as any;
expect(result.extra).toBe('keep');
});

it('does not re-upload data that is already a blob:// URI', async () => {
const input = { type: 'image', data: 'blob://v1/t_x/artifact-data/p_y/a_z/sha256-abc.png' };
await sanitizeArtifactBinaryData(input, CTX);
expect(mockUpload).not.toHaveBeenCalled();
});

it('recursively sanitizes nested structures', async () => {
const input = {
toolResult: [
{ type: 'text', text: 'Ticket data' },
{ type: 'image', data: LARGE_BASE64, mimeType: 'image/png' },
],
toolName: 'get-zendesk-ticket',
};
const result = (await sanitizeArtifactBinaryData(input, CTX)) as any;

expect(result.toolName).toBe('get-zendesk-ticket');
expect(result.toolResult[0]).toEqual({ type: 'text', text: 'Ticket data' });
expect(result.toolResult[1].data).toMatch(/^blob:\/\//);
expect(mockUpload).toHaveBeenCalledOnce();
});

it('uploads multiple image parts independently', async () => {
const input = {
images: [
{ type: 'image', data: LARGE_BASE64, mimeType: 'image/png' },
{ type: 'image', data: LARGE_BASE64, mimeType: 'image/jpeg' },
],
};
await sanitizeArtifactBinaryData(input, CTX);
expect(mockUpload).toHaveBeenCalledTimes(2);
});

it('leaves non-binary values unchanged', async () => {
const input = {
toolName: 'search',
toolInput: { query: 'test' },
count: 5,
flag: true,
};
const result = await sanitizeArtifactBinaryData(input, CTX);
expect(result).toEqual(input);
expect(mockUpload).not.toHaveBeenCalled();
});

it('produces a deterministic blob:// URI via content hash', async () => {
const input = { type: 'image', data: LARGE_BASE64, mimeType: 'image/png' };
const r1 = (await sanitizeArtifactBinaryData(input, CTX)) as any;
const r2 = (await sanitizeArtifactBinaryData(input, CTX)) as any;
expect(r1.data).toBe(r2.data);
});
Comment on lines +172 to +177
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This determinism test is tautological: buildStorageKey is mocked as a pure function of its inputs, so identical inputs always yield identical outputs regardless of whether the real implementation uses content-hashing. Either un-mock buildStorageKey here or pass two inputs with different metadata but identical data and assert the blob URIs match.


it('handles circular references safely', async () => {
const input: Record<string, unknown> = {
toolResult: [{ type: 'image', data: LARGE_BASE64, mimeType: 'image/png' }],
};
input.self = input;

const result = (await sanitizeArtifactBinaryData(input, CTX)) as Record<string, unknown>;
expect(result.self).toBe('[Circular Reference]');
expect(mockUpload).toHaveBeenCalledOnce();
});
});
Loading
Loading