Skip to content

Commit ac29298

Browse files
committed
feat: work on size and dataUpdatedAt metadata
1 parent 3d53fcf commit ac29298

18 files changed

Lines changed: 429 additions & 23 deletions

File tree

api/src/app.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import { publicThumbnailsRouter } from './thumbnails/router.ts'
1212
import remoteRegistriesRouter from './remote-registries/router.ts'
1313
import mongo from '#mongo'
1414
import { cleanFiles } from './files-storage/index.ts'
15+
import { backfillSize } from './upgrades/backfill-size.ts'
16+
import { backfillDataUpdatedAt } from './upgrades/backfill-data-updated-at.ts'
1517
import config from '#config'
1618

1719
export const app = express()
@@ -63,6 +65,35 @@ if (process.env.NODE_ENV === 'development') {
6365
}
6466
res.send()
6567
})
68+
69+
// TODO: remove with backfill-size upgrade
70+
app.post('/api/test-env/backfill-size/reset', async (req, res) => {
71+
assertReqInternal(req)
72+
await mongo.artefacts.updateMany({}, { $unset: { size: '' } })
73+
await mongo.versions.updateMany({}, { $unset: { size: '' } })
74+
res.send()
75+
})
76+
77+
// TODO: remove with backfill-size upgrade
78+
app.post('/api/test-env/backfill-size/run', async (req, res) => {
79+
assertReqInternal(req)
80+
await backfillSize()
81+
res.send()
82+
})
83+
84+
// TODO: remove with backfill-data-updated-at upgrade
85+
app.post('/api/test-env/backfill-data-updated-at/reset', async (req, res) => {
86+
assertReqInternal(req)
87+
await mongo.artefacts.updateMany({}, { $unset: { dataUpdatedAt: '' } })
88+
res.send()
89+
})
90+
91+
// TODO: remove with backfill-data-updated-at upgrade
92+
app.post('/api/test-env/backfill-data-updated-at/run', async (req, res) => {
93+
assertReqInternal(req)
94+
await backfillDataUpdatedAt()
95+
res.send()
96+
})
6697
}
6798

6899
app.use('/api', (req, res) => res.status(404).send('unknown api endpoint'))

api/src/artefacts/router.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import mongo from '#mongo'
1212
import config from '#config'
1313
import { authenticateApiKey, tryAuthenticateReadKey } from '../auth.ts'
1414
import { artefactAccessFilter, artefactAccessFilterForAccount, assertDownloadAccess, assertDownloadAccessForAccount } from '../access.ts'
15-
import { writeFile, readFile, getDownloadUrl, deleteFile, moveFile } from '../files-storage/index.ts'
15+
import { writeFile, readFile, getDownloadUrl, deleteFile, moveFile, fileStats } from '../files-storage/index.ts'
1616
import { extractManifest, parseSemver, resolveVersionQuery, pruneOldVersions } from './service.ts'
1717
import * as patchReqBody from '#doc/artefacts/patch-req/index.ts'
1818
import { artefactThumbnailRouter } from '../thumbnails/router.ts'
@@ -92,7 +92,7 @@ router.get('/', async (req, res, next) => {
9292
}
9393
const skip = Math.max(0, Math.min(parseInt(req.query.skip as string) || 0, 100000))
9494
const size = Math.min(parseInt(req.query.size as string) || 10, 100)
95-
const sort: Record<string, 1 | -1> = req.query.sort === 'name' ? { name: 1 } : { updatedAt: -1 }
95+
const sort: Record<string, 1 | -1> = req.query.sort === 'name' ? { name: 1 } : { dataUpdatedAt: -1 }
9696

9797
// Text search on name
9898
if (req.query.q) {
@@ -265,6 +265,7 @@ router.post('/:name/versions', async (req, res, next) => {
265265
await moveFile(stagingPath, tarballPath)
266266
stagingStored = false
267267
finalTarballPath = tarballPath
268+
const { size } = await fileStats(tarballPath)
268269

269270
// Upsert artefact
270271
const now = new Date().toISOString()
@@ -278,7 +279,8 @@ router.post('/:name/versions', async (req, res, next) => {
278279
category,
279280
...(manifest.processingConfigSchema ? { processingConfigSchema: manifest.processingConfigSchema } : {}),
280281
...(manifest.applicationConfigSchema ? { applicationConfigSchema: manifest.applicationConfigSchema } : {}),
281-
updatedAt: now
282+
updatedAt: now,
283+
dataUpdatedAt: now
282284
},
283285
$setOnInsert: {
284286
_id: artefactId,
@@ -302,6 +304,7 @@ router.post('/:name/versions', async (req, res, next) => {
302304
...(architecture ? { architecture } : {}),
303305
...semverParts,
304306
tarballPath,
307+
size,
305308
uploadedAt: now,
306309
uploadedBy: apiKey
307310
? { apiKeyId: apiKey._id, apiKeyName: apiKey.name, shortId: apiKey.shortId }
@@ -442,6 +445,7 @@ router.post('/file/:name', async (req, res, next) => {
442445
await moveFile(stagingPath, filePath)
443446
stagingStored = false
444447
newFilePath = filePath
448+
const { size } = await fileStats(filePath)
445449

446450
const now = new Date().toISOString()
447451
await mongo.artefacts.updateOne(
@@ -450,13 +454,15 @@ router.post('/file/:name', async (req, res, next) => {
450454
$set: {
451455
filePath,
452456
fileName,
457+
size,
453458
category,
454459
...(title !== undefined ? { title } : {}),
455460
...(description !== undefined ? { description } : {}),
456461
uploadedBy: apiKey
457462
? { apiKeyId: apiKey._id, apiKeyName: apiKey.name, shortId: apiKey.shortId }
458463
: { internal: true },
459-
updatedAt: now
464+
updatedAt: now,
465+
dataUpdatedAt: now
460466
},
461467
$setOnInsert: {
462468
_id: artefactId,

api/src/files-storage/fs.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ export class FsBackend implements FileBackend {
4646
}
4747
}
4848

49+
async stats (path: string) {
50+
const { size, mtime } = await stat(resolvePath(basePath(), path))
51+
return { size, lastModified: mtime }
52+
}
53+
4954
async move (srcPath: string, dstPath: string) {
5055
const src = resolvePath(basePath(), srcPath)
5156
const dst = resolvePath(basePath(), dstPath)

api/src/files-storage/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ export const readFile = (path: string, ifModifiedSince?: string) => filesStorage
1111
export const getDownloadUrl = (path: string, opts: { filename: string }) => filesStorage.getDownloadUrl(path, opts)
1212
export const deleteFile = (path: string) => filesStorage.delete(path)
1313
export const fileExists = (path: string) => filesStorage.exists(path)
14+
export const fileStats = (path: string) => filesStorage.stats(path)
1415
export const moveFile = (src: string, dst: string) => filesStorage.move(src, dst)
1516
export const cleanFiles = () => filesStorage.clean()

api/src/files-storage/s3.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,14 @@ export class S3Backend implements FileBackend {
117117
}
118118
}
119119

120+
async stats (path: string) {
121+
const head = await this.metadataClient.send(new HeadObjectCommand({
122+
Bucket: config.s3!.bucket,
123+
Key: path
124+
}))
125+
return { size: head.ContentLength!, lastModified: head.LastModified! }
126+
}
127+
120128
async move (srcPath: string, dstPath: string) {
121129
await this.copy(srcPath, dstPath)
122130
await this.metadataClient.send(new DeleteObjectCommand({

api/src/files-storage/types.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,18 @@ export interface ReadStreamResult {
66
lastModified: Date
77
}
88

9+
export interface FileStats {
10+
size: number
11+
lastModified: Date
12+
}
13+
914
export interface FileBackend {
1015
writeStream (stream: Readable, path: string): Promise<void>
1116
readStream (path: string, ifModifiedSince?: string): Promise<ReadStreamResult>
1217
getDownloadUrl (path: string, opts: { filename: string }): Promise<string | null>
1318
delete (path: string): Promise<void>
1419
exists (path: string): Promise<boolean>
20+
stats (path: string): Promise<FileStats>
1521
move (srcPath: string, dstPath: string): Promise<void>
1622
clean (): Promise<void>
1723
}

api/src/remote-registries/sync.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ const syncNpmArtefact = async (ax: AxiosInstance, remoteUrl: string, artefactId:
3939
semverPatch: rv.semverPatch,
4040
...(rv.semverPrerelease ? { semverPrerelease: rv.semverPrerelease } : {}),
4141
tarballPath: rv.tarballPath,
42+
...(typeof rv.size === 'number' ? { size: rv.size } : {}),
4243
uploadedAt: rv.uploadedAt,
4344
...(rv.uploadedBy ? { uploadedBy: rv.uploadedBy } : {})
4445
})
@@ -68,7 +69,8 @@ const syncNpmArtefact = async (ax: AxiosInstance, remoteUrl: string, artefactId:
6869
...(remoteArtefact.processingConfigSchema ? { processingConfigSchema: remoteArtefact.processingConfigSchema } : {}),
6970
...(remoteArtefact.applicationConfigSchema ? { applicationConfigSchema: remoteArtefact.applicationConfigSchema } : {}),
7071
origin: remoteUrl,
71-
updatedAt: now
72+
updatedAt: now,
73+
dataUpdatedAt: remoteArtefact.dataUpdatedAt || remoteArtefact.updatedAt
7274
},
7375
$setOnInsert: {
7476
_id: artefactId,
@@ -109,11 +111,13 @@ const syncFileArtefact = async (ax: AxiosInstance, remoteUrl: string, artefactId
109111
$set: {
110112
filePath,
111113
fileName,
114+
...(typeof remoteArtefact.size === 'number' ? { size: remoteArtefact.size } : {}),
112115
category: remoteArtefact.category,
113116
...(remoteArtefact.title ? { title: remoteArtefact.title } : {}),
114117
...(remoteArtefact.description ? { description: remoteArtefact.description } : {}),
115118
origin: remoteUrl,
116-
updatedAt: now
119+
updatedAt: now,
120+
dataUpdatedAt: remoteArtefact.dataUpdatedAt || remoteArtefact.updatedAt
117121
},
118122
$setOnInsert: {
119123
_id: artefactId,

api/src/server.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import { app } from './app.ts'
99
import config from '#config'
1010
import mongo from '#mongo'
1111
import { syncAllRemoteRegistries } from './remote-registries/sync.ts'
12+
import { backfillSize } from './upgrades/backfill-size.ts'
13+
import { backfillDataUpdatedAt } from './upgrades/backfill-data-updated-at.ts'
1214

1315
const server = createServer(app)
1416
const httpTerminator = createHttpTerminator({ server })
@@ -46,6 +48,11 @@ export const start = async () => {
4648
})
4749
}, 24 * 60 * 60 * 1000)
4850

51+
// TODO: remove with backfill-size upgrade
52+
backfillSize().catch(err => internalError('backfill-size', err))
53+
// TODO: remove with backfill-data-updated-at upgrade
54+
backfillDataUpdatedAt().catch(err => internalError('backfill-data-updated-at', err))
55+
4956
console.log(`API server listening on port ${config.port}`)
5057
}
5158

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// TODO: remove this backfill (and its callers / test-env endpoints / test) once
2+
// all instances have been upgraded past the introduction of artefact.dataUpdatedAt.
3+
// One-shot backfill: for npm artefacts we use the most recent version.uploadedAt;
4+
// for file artefacts we read the underlying object's last-modified time from
5+
// storage. updatedAt is only used as a last-resort fallback.
6+
7+
import { internalError } from '@data-fair/lib-node/observer.js'
8+
import locks from '@data-fair/lib-node/locks.js'
9+
import mongo from '#mongo'
10+
import { fileStats } from '../files-storage/index.ts'
11+
12+
export const backfillDataUpdatedAt = async () => {
13+
const acquired = await locks.acquire('backfill-data-updated-at')
14+
if (!acquired) return
15+
16+
try {
17+
const artefacts = mongo.artefacts.find({ dataUpdatedAt: { $exists: false } })
18+
for await (const artefact of artefacts) {
19+
try {
20+
let dataUpdatedAt: string | undefined
21+
if (artefact.format === 'npm') {
22+
const latest = await mongo.versions.find({ artefactId: artefact._id })
23+
.sort({ uploadedAt: -1 })
24+
.limit(1)
25+
.next()
26+
if (latest?.uploadedAt) dataUpdatedAt = latest.uploadedAt
27+
} else if (artefact.filePath) {
28+
const { lastModified } = await fileStats(artefact.filePath)
29+
dataUpdatedAt = lastModified.toISOString()
30+
}
31+
if (!dataUpdatedAt) dataUpdatedAt = artefact.updatedAt
32+
await mongo.artefacts.updateOne({ _id: artefact._id }, { $set: { dataUpdatedAt } })
33+
} catch (err) {
34+
internalError('backfill-data-updated-at', `artefact ${artefact._id}: ${(err as Error).message || err}`)
35+
}
36+
}
37+
} finally {
38+
await locks.release('backfill-data-updated-at')
39+
}
40+
}

api/src/upgrades/backfill-size.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// TODO: remove this backfill (and its callers / test-env endpoints / test) once
2+
// all instances have been upgraded past the introduction of size on artefacts/versions.
3+
// One-shot backfill that reads the underlying file size for documents missing it.
4+
5+
import { internalError } from '@data-fair/lib-node/observer.js'
6+
import locks from '@data-fair/lib-node/locks.js'
7+
import mongo from '#mongo'
8+
import { fileStats } from '../files-storage/index.ts'
9+
10+
export const backfillSize = async () => {
11+
const acquired = await locks.acquire('backfill-size')
12+
if (!acquired) return
13+
14+
try {
15+
const versions = mongo.versions.find({ size: { $exists: false } })
16+
for await (const version of versions) {
17+
try {
18+
const { size } = await fileStats(version.tarballPath)
19+
await mongo.versions.updateOne({ _id: version._id }, { $set: { size } })
20+
} catch (err) {
21+
internalError('backfill-size', `version ${version._id}: ${(err as Error).message || err}`)
22+
}
23+
}
24+
25+
const artefacts = mongo.artefacts.find({ format: 'file', filePath: { $exists: true }, size: { $exists: false } })
26+
for await (const artefact of artefacts) {
27+
try {
28+
const { size } = await fileStats(artefact.filePath!)
29+
await mongo.artefacts.updateOne({ _id: artefact._id }, { $set: { size } })
30+
} catch (err) {
31+
internalError('backfill-size', `artefact ${artefact._id}: ${(err as Error).message || err}`)
32+
}
33+
}
34+
} finally {
35+
await locks.release('backfill-size')
36+
}
37+
}

0 commit comments

Comments
 (0)