Skip to content

Commit 44e4911

Browse files
authored
Merge pull request #1628 from gchq/feature/BAI-1459-create-a-new-file-scan-connector-for-modelscan
Feature/bai 1459 create a new file scan connector for modelscan
2 parents 5f6b52d + dbd0f90 commit 44e4911

File tree

17 files changed

+376
-24
lines changed

17 files changed

+376
-24
lines changed

Diff for: backend/config/default.cjs

+6
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ module.exports = {
148148
host: '127.0.0.1',
149149
port: 3310,
150150
},
151+
152+
modelscan: {
153+
protocol: 'http',
154+
host: '127.0.0.1',
155+
port: 3311,
156+
},
151157
},
152158

153159
// These settings are PUBLIC and shared with the UI

Diff for: backend/config/docker_compose.cjs

+5-1
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,15 @@ module.exports = {
6565
clamdscan: {
6666
host: 'clamd',
6767
},
68+
69+
modelscan: {
70+
host: 'modelscan',
71+
},
6872
},
6973

7074
connectors: {
7175
fileScanners: {
72-
kinds: ['clamAV'],
76+
kinds: ['clamAV', 'modelScan'],
7377
},
7478
},
7579
}

Diff for: backend/src/clients/modelScan.ts

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import fetch, { Response } from 'node-fetch'
2+
3+
import config from '../utils/config.js'
4+
import { BadReq, InternalError } from '../utils/error.js'
5+
6+
interface ModelScanInfoResponse {
7+
apiName: string
8+
apiVersion: string
9+
scannerName: string
10+
modelscanVersion: string
11+
}
12+
13+
interface ModelScanResponse {
14+
summary: {
15+
total_issues: number
16+
total_issues_by_severity: {
17+
LOW: number
18+
MEDIUM: number
19+
HIGH: number
20+
CRITICAL: number
21+
}
22+
input_path: string
23+
absolute_path: string
24+
modelscan_version: string
25+
timestamp: string
26+
scanned: {
27+
total_scanned: number
28+
scanned_files: string[]
29+
}
30+
skipped: {
31+
total_skipped: number
32+
skipped_files: string[]
33+
}
34+
}
35+
issues: [
36+
{
37+
description: string
38+
operator: string
39+
module: string
40+
source: string
41+
scanner: string
42+
severity: string
43+
},
44+
]
45+
// TODO: currently unknown what this might look like
46+
errors: object[]
47+
}
48+
49+
export async function getModelScanInfo() {
50+
const url = `${config.avScanning.modelscan.protocol}://${config.avScanning.modelscan.host}:${config.avScanning.modelscan.port}`
51+
let res: Response
52+
53+
try {
54+
res = await fetch(`${url}/info`, {
55+
method: 'GET',
56+
headers: { 'Content-Type': 'application/json' },
57+
})
58+
} catch (err) {
59+
throw InternalError('Unable to communicate with the ModelScan service.', { err })
60+
}
61+
if (!res.ok) {
62+
throw BadReq('Unrecognised response returned by the ModelScan service.')
63+
}
64+
65+
return (await res.json()) as ModelScanInfoResponse
66+
}
67+
68+
export async function scanFile(file: Blob, file_name: string) {
69+
const url = `${config.avScanning.modelscan.protocol}://${config.avScanning.modelscan.host}:${config.avScanning.modelscan.port}`
70+
let res: Response
71+
72+
try {
73+
const formData = new FormData()
74+
formData.append('in_file', file, file_name)
75+
76+
res = await fetch(`${url}/scan/file`, {
77+
method: 'POST',
78+
headers: {
79+
accept: 'application/json',
80+
},
81+
body: formData,
82+
})
83+
} catch (err) {
84+
throw InternalError('Unable to communicate with the ModelScan service.', { err })
85+
}
86+
if (!res.ok) {
87+
throw BadReq('Unrecognised response returned by the ModelScan service.', {
88+
body: JSON.stringify(await res.json()),
89+
})
90+
}
91+
92+
return (await res.json()) as ModelScanResponse
93+
}

Diff for: backend/src/connectors/fileScanning/clamAv.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export class ClamAvFileScanningConnector extends BaseFileScanningConnector {
2525
av = await new NodeClam().init({ clamdscan: config.avScanning.clamdscan })
2626
} catch (error) {
2727
throw ConfigurationError('Could not scan file as Clam AV is not running.', {
28-
clamAvConfig: config.avScanning,
28+
clamAvConfig: config.avScanning.clamdscan,
2929
})
3030
}
3131
}
@@ -35,7 +35,7 @@ export class ClamAvFileScanningConnector extends BaseFileScanningConnector {
3535
throw ConfigurationError(
3636
'Clam AV does not look like it is running. Check that it has been correctly initialised by calling the init function.',
3737
{
38-
clamAvConfig: config.avScanning,
38+
clamAvConfig: config.avScanning.clamdscan,
3939
},
4040
)
4141
}

Diff for: backend/src/connectors/fileScanning/index.ts

+11
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ import config from '../../utils/config.js'
22
import { ConfigurationError } from '../../utils/error.js'
33
import { BaseFileScanningConnector } from './Base.js'
44
import { ClamAvFileScanningConnector } from './clamAv.js'
5+
import { ModelScanFileScanningConnector } from './modelScan.js'
56
import { FileScanningWrapper } from './wrapper.js'
67

78
export const FileScanKind = {
89
ClamAv: 'clamAV',
10+
ModelScan: 'modelScan',
911
} as const
1012
export type FileScanKindKeys = (typeof FileScanKind)[keyof typeof FileScanKind]
1113

@@ -26,6 +28,15 @@ export function runFileScanners(cache = true) {
2628
throw ConfigurationError('Could not configure or initialise Clam AV')
2729
}
2830
break
31+
case FileScanKind.ModelScan:
32+
try {
33+
const scanner = new ModelScanFileScanningConnector()
34+
await scanner.ping()
35+
fileScanConnectors.push(scanner)
36+
} catch (error) {
37+
throw ConfigurationError('Could not configure or initialise ModelScan')
38+
}
39+
break
2940
default:
3041
throw ConfigurationError(`'${fileScanner}' is not a valid file scanning kind.`, {
3142
validKinds: Object.values(FileScanKind),

Diff for: backend/src/connectors/fileScanning/modelScan.ts

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import { Response } from 'node-fetch'
2+
import { Readable } from 'stream'
3+
4+
import { getModelScanInfo, scanFile } from '../../clients/modelScan.js'
5+
import { getObjectStream } from '../../clients/s3.js'
6+
import { FileInterfaceDoc, ScanState } from '../../models/File.js'
7+
import log from '../../services/log.js'
8+
import config from '../../utils/config.js'
9+
import { ConfigurationError } from '../../utils/error.js'
10+
import { BaseFileScanningConnector, FileScanResult } from './Base.js'
11+
12+
export const modelScanToolName = 'ModelScan'
13+
14+
export class ModelScanFileScanningConnector extends BaseFileScanningConnector {
15+
constructor() {
16+
super()
17+
}
18+
19+
info() {
20+
return [modelScanToolName]
21+
}
22+
23+
async ping() {
24+
try {
25+
// discard the results as we only want to know if the endpoint is reachable
26+
await getModelScanInfo()
27+
} catch (error) {
28+
throw ConfigurationError(
29+
'ModelScan does not look like it is running. Check that the service configuration is correct.',
30+
{
31+
modelScanConfig: config.avScanning.modelscan,
32+
},
33+
)
34+
}
35+
}
36+
37+
async scan(file: FileInterfaceDoc): Promise<FileScanResult[]> {
38+
this.ping()
39+
40+
const s3Stream = (await getObjectStream(file.bucket, file.path)).Body as Readable
41+
try {
42+
// TODO: see if it's possible to directly send the Readable stream rather than a blob
43+
const fileBlob = await new Response(s3Stream).blob()
44+
const scanResults = await scanFile(fileBlob, file.name)
45+
46+
const issues = scanResults.summary.total_issues
47+
const isInfected = issues > 0
48+
const viruses: string[] = []
49+
if (isInfected) {
50+
for (const issue of scanResults.issues) {
51+
viruses.push(`${issue.severity}: ${issue.description}. ${issue.scanner}`)
52+
}
53+
}
54+
log.info(
55+
{ modelId: file.modelId, fileId: file._id, name: file.name, result: { isInfected, viruses } },
56+
'Scan complete.',
57+
)
58+
return [
59+
{
60+
toolName: modelScanToolName,
61+
state: ScanState.Complete,
62+
isInfected,
63+
viruses,
64+
},
65+
]
66+
} catch (error) {
67+
log.error({ error, modelId: file.modelId, fileId: file._id, name: file.name }, 'Scan errored.')
68+
return [
69+
{
70+
toolName: modelScanToolName,
71+
state: ScanState.Error,
72+
},
73+
]
74+
}
75+
}
76+
}

Diff for: backend/src/services/file.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ async function updateFileWithResults(_id: Schema.Types.ObjectId, results: FileSc
5959
)
6060
if (updateExistingResult.modifiedCount === 0) {
6161
await FileModel.updateOne(
62-
{ _id },
62+
{ _id, avScan: { $exists: true } },
6363
{
64-
$set: { avScan: { toolName: result.toolName, state: result.state } },
64+
$push: { avScan: { toolName: result.toolName, state: result.state } },
6565
},
6666
)
6767
}

Diff for: backend/src/utils/config.ts

+6
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ export interface Config {
140140
host: string
141141
port: number
142142
}
143+
144+
modelscan: {
145+
protocol: string
146+
host: string
147+
port: number
148+
}
143149
}
144150

145151
modelMirror: {
+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
2+
3+
exports[`clients > modelScan > getModelScanInfo > success 1`] = `
4+
[
5+
[
6+
"undefined://undefined:undefined/info",
7+
{
8+
"headers": {
9+
"Content-Type": "application/json",
10+
},
11+
"method": "GET",
12+
},
13+
],
14+
]
15+
`;
16+
17+
exports[`clients > modelScan > scanFile > success 1`] = `
18+
[
19+
[
20+
"undefined://undefined:undefined/scan/file",
21+
{
22+
"body": FormData {
23+
Symbol(state): [
24+
{
25+
"name": "in_file",
26+
"value": File {
27+
Symbol(kHandle): Blob {},
28+
Symbol(kLength): 0,
29+
Symbol(kType): "application/x-hdf5",
30+
},
31+
},
32+
],
33+
},
34+
"headers": {
35+
"accept": "application/json",
36+
},
37+
"method": "POST",
38+
},
39+
],
40+
]
41+
`;

0 commit comments

Comments
 (0)