From 9c517329db079c922fe7f092a78b658cb295e158 Mon Sep 17 00:00:00 2001 From: Viet Nguyen <3805254+vnugent@users.noreply.github.com> Date: Sat, 27 Jan 2024 14:50:38 -0700 Subject: [PATCH] fix: keep stats and geo data of all area up-to-date (#387) * fix: recalculate ancestors' bbox/boundary on crag's lat/lng change. propagate changes up to all ancenstors. * fix unit tests --- .vscode/launch.json | 2 +- jest.config.cjs | 1 + package.json | 4 +- src/__tests__/history.ts | 12 ++- src/db/AreaTypes.ts | 2 + .../updateAllAreas.ts} | 54 +++++++----- src/db/utils/jobs/UpdateStatsJob.ts | 4 +- src/geo-utils.ts | 2 +- src/model/MutableAreaDataSource.ts | 88 +++++++++++++++++-- src/model/MutableClimbDataSource.ts | 49 ++++++++++- 10 files changed, 176 insertions(+), 42 deletions(-) rename src/db/utils/jobs/{TreeUpdater.ts => TreeUpdaters/updateAllAreas.ts} (76%) diff --git a/.vscode/launch.json b/.vscode/launch.json index 388786e2..2c7343b1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -87,7 +87,7 @@ "--inspect-brk", "${workspaceRoot}/node_modules/.bin/jest", "--runInBand", - "MediaData" + "history" ], "console": "integratedTerminal", "internalConsoleOptions": "neverOpen", diff --git a/jest.config.cjs b/jest.config.cjs index 18bb8139..29335854 100644 --- a/jest.config.cjs +++ b/jest.config.cjs @@ -1,4 +1,5 @@ module.exports = { + testTimeout: 2 * 60 * 1000, moduleNameMapper: { '^(\\.{1,2}/.*)\\.js$': '$1' }, diff --git a/package.json b/package.json index 0b772102..419476ea 100644 --- a/package.json +++ b/package.json @@ -74,8 +74,6 @@ "clean": "tsc -b --clean && rm -rf build/*", "serve": "yarn build && node --experimental-json-modules build/main.js", "serve-dev": "echo \"🚨 LOCAL_DEV_BYPASS_AUTH enabled 🚨\" && LOCAL_DEV_BYPASS_AUTH=true yarn serve", - "refresh-db": "./refresh-db.sh", - "seed-usa": "yarn build && node build/db/import/usa/USADay0Seed.js", "seed-db": "./seed-db.sh", "add-countries": "yarn build && node build/db/utils/jobs/AddCountriesJob.js", "update-stats": "yarn build && node build/db/utils/jobs/UpdateStatsJob.js", @@ -106,4 +104,4 @@ "engines": { "node": ">=16.14.0" } -} +} \ No newline at end of file diff --git a/src/__tests__/history.ts b/src/__tests__/history.ts index 202747b2..d33e218e 100644 --- a/src/__tests__/history.ts +++ b/src/__tests__/history.ts @@ -119,9 +119,15 @@ describe('history API', () => { expect(climbChange.operation).toBe('updateClimb') expect(climbChange.editedBy).toBe(userUuid) - // Two changes: Insert the climb, update the parent area - // Ordering is non-deterministic. - expect(climbChange.changes.length).toBe(2) + + /** + * Four changes (Ordering is non-deterministic) + * 1. Insert the climb + * 2. Update the parent area + * 3. Update aggregate object on crag + * 4. Update the parent area + */ + expect(climbChange.changes.length).toBe(4) const insertChange = climbChange.changes.filter(c => c.dbOp === 'insert')[0] const updateChange = climbChange.changes.filter(c => c.dbOp === 'update')[0] expect(insertChange.fullDocument.uuid).toBe(climbIds[0]) diff --git a/src/db/AreaTypes.ts b/src/db/AreaTypes.ts index 1fc5e5eb..51545d8f 100644 --- a/src/db/AreaTypes.ts +++ b/src/db/AreaTypes.ts @@ -8,6 +8,8 @@ import { GradeContexts } from '../GradeUtils.js' import { ExperimentalAuthorType } from './UserTypes.js' import { AuthorMetadata } from '../types.js' +export type AreaDocumnent = mongoose.Document & AreaType + /** * Areas are a grouping mechanism in the OpenBeta data model that allow * the organization of climbs into geospatial and hierarchical structures. diff --git a/src/db/utils/jobs/TreeUpdater.ts b/src/db/utils/jobs/TreeUpdaters/updateAllAreas.ts similarity index 76% rename from src/db/utils/jobs/TreeUpdater.ts rename to src/db/utils/jobs/TreeUpdaters/updateAllAreas.ts index e199f658..e7ff4201 100644 --- a/src/db/utils/jobs/TreeUpdater.ts +++ b/src/db/utils/jobs/TreeUpdaters/updateAllAreas.ts @@ -5,10 +5,11 @@ import bboxFromGeojson from '@turf/bbox' import convexHull from '@turf/convex' import pLimit from 'p-limit' -import { getAreaModel } from '../../AreaSchema.js' -import { AreaType, AggregateType } from '../../AreaTypes.js' -import { areaDensity } from '../../../geo-utils.js' -import { mergeAggregates } from '../Aggregate.js' +import { getAreaModel } from '../../../AreaSchema.js' +import { AreaType, AggregateType } from '../../../AreaTypes.js' +import { areaDensity } from '../../../../geo-utils.js' +import { mergeAggregates } from '../../Aggregate.js' +import { ChangeRecordMetadataType } from '../../../../db/ChangeLogType.js' const limiter = pLimit(1000) @@ -31,7 +32,7 @@ type AreaMongoType = mongoose.Document & AreaType * create a new bottom-up traversal, starting from the updated node/area and bubble the * update up to its parent. */ -export const visitAllAreas = async (): Promise => { +export const updateAllAreas = async (): Promise => { const areaModel = getAreaModel('areas') // Step 1: Start with 2nd level of tree, eg 'state' or 'province' level and recursively update all nodes. @@ -57,7 +58,7 @@ export const visitAllAreas = async (): Promise => { } } -interface ResultType { +export interface StatsSummary { density: number totalClimbs: number bbox?: BBox @@ -66,7 +67,7 @@ interface ResultType { polygon?: Polygon } -async function postOrderVisit (node: AreaMongoType): Promise { +async function postOrderVisit (node: AreaMongoType): Promise { if (node.metadata.leaf || node.children.length === 0) { return leafReducer((node.toObject() as AreaType)) } @@ -91,7 +92,7 @@ async function postOrderVisit (node: AreaMongoType): Promise { * @param node leaf area/crag * @returns aggregate type */ -const leafReducer = (node: AreaType): ResultType => { +export const leafReducer = (node: AreaType): StatsSummary => { return { totalClimbs: node.totalClimbs, bbox: node.metadata.bbox, @@ -115,9 +116,9 @@ const leafReducer = (node: AreaType): ResultType => { /** * Calculate convex hull polyon contain all child areas */ -const calculatePolygonFromChildren = (nodes: ResultType[]): Feature | null => { +const calculatePolygonFromChildren = (nodes: StatsSummary[]): Feature | null => { const childAsPolygons = nodes.reduce>>((acc, curr) => { - if (curr.bbox != null) { + if (Array.isArray(curr.bbox) && curr?.bbox.length === 4) { acc.push(bbox2Polygon(curr.bbox)) } return acc @@ -127,14 +128,19 @@ const calculatePolygonFromChildren = (nodes: ResultType[]): Feature | n return polygonFeature } +interface OPTIONS { + session: mongoose.ClientSession + changeRecord: ChangeRecordMetadataType +} + /** * Calculate stats from a list of nodes - * @param result nodes + * @param childResults nodes * @param parent parent node to save stats to * @returns Calculated stats */ -const nodesReducer = async (result: ResultType[], parent: AreaMongoType): Promise => { - const initial: ResultType = { +export const nodesReducer = async (childResults: StatsSummary[], parent: AreaMongoType, options?: OPTIONS): Promise => { + const initial: StatsSummary = { totalClimbs: 0, bbox: undefined, lnglat: undefined, @@ -152,16 +158,14 @@ const nodesReducer = async (result: ResultType[], parent: AreaMongoType): Promis } } } - let nodeSummary: ResultType = initial - if (result.length === 0) { + let nodeSummary: StatsSummary = initial + if (childResults.length === 0) { const { totalClimbs, aggregate, density } = initial parent.totalClimbs = totalClimbs parent.density = density parent.aggregate = aggregate - await parent.save() - return initial } else { - nodeSummary = result.reduce((acc, curr) => { + nodeSummary = childResults.reduce((acc, curr) => { const { totalClimbs, aggregate, lnglat, bbox } = curr return { totalClimbs: acc.totalClimbs + totalClimbs, @@ -173,9 +177,9 @@ const nodesReducer = async (result: ResultType[], parent: AreaMongoType): Promis } }, initial) - const polygon = calculatePolygonFromChildren(result) + const polygon = calculatePolygonFromChildren(childResults) nodeSummary.polygon = polygon?.geometry - nodeSummary.bbox = bboxFromGeojson(polygon) + nodeSummary.bbox = polygon == null ? undefined : bboxFromGeojson(polygon) nodeSummary.density = areaDensity(nodeSummary.bbox, nodeSummary.totalClimbs) const { totalClimbs, bbox, density, aggregate } = nodeSummary @@ -185,7 +189,15 @@ const nodesReducer = async (result: ResultType[], parent: AreaMongoType): Promis parent.density = density parent.aggregate = aggregate parent.metadata.polygon = nodeSummary.polygon + } + + if (options != null) { + const { session, changeRecord } = options + parent._change = changeRecord + parent.updatedBy = changeRecord.user + await parent.save({ session }) + } else { await parent.save() - return nodeSummary } + return nodeSummary } diff --git a/src/db/utils/jobs/UpdateStatsJob.ts b/src/db/utils/jobs/UpdateStatsJob.ts index e4fb853f..4967a930 100644 --- a/src/db/utils/jobs/UpdateStatsJob.ts +++ b/src/db/utils/jobs/UpdateStatsJob.ts @@ -1,5 +1,5 @@ import { connectDB, gracefulExit } from '../../index.js' -import { visitAllAreas } from './TreeUpdater.js' +import { updateAllAreas } from './TreeUpdaters/updateAllAreas.js' import { visitAllCrags } from './CragUpdater.js' import { logger } from '../../../logger.js' @@ -7,7 +7,7 @@ const onConnected = async (): Promise => { logger.info('Initializing database') console.time('Calculating global stats') await visitAllCrags() - await visitAllAreas() + await updateAllAreas() console.timeEnd('Calculating global stats') await gracefulExit() return await Promise.resolve() diff --git a/src/geo-utils.ts b/src/geo-utils.ts index 62a2fd64..f46c332d 100644 --- a/src/geo-utils.ts +++ b/src/geo-utils.ts @@ -35,7 +35,7 @@ export const bboxFromList = (bboxList: BBoxType[]): any => { * @returns total climbs per km sq */ export const areaDensity = (bbox: BBoxType | undefined, totalClimbs: number): number => { - if (bbox == null) return 0 + if (!Array.isArray(bbox) || bbox?.length !== 4) return 0 const areaInKm = area(bboxPolygon(bbox)) / 1000000 const minArea = areaInKm < 5 ? 5 : areaInKm return totalClimbs / minArea diff --git a/src/model/MutableAreaDataSource.ts b/src/model/MutableAreaDataSource.ts index f5b72ed7..fbbd7428 100644 --- a/src/model/MutableAreaDataSource.ts +++ b/src/model/MutableAreaDataSource.ts @@ -1,4 +1,4 @@ -import { geometry } from '@turf/helpers' +import { Point, geometry } from '@turf/helpers' import muuid, { MUUID } from 'uuid-mongodb' import { v5 as uuidv5, NIL } from 'uuid' import mongoose, { ClientSession } from 'mongoose' @@ -6,8 +6,9 @@ import { produce } from 'immer' import { UserInputError } from 'apollo-server' import isoCountries from 'i18n-iso-countries' import enJson from 'i18n-iso-countries/langs/en.json' assert { type: 'json' } +import bbox2Polygon from '@turf/bbox-polygon' -import { AreaType, AreaEditableFieldsType, OperationType, UpdateSortingOrderType } from '../db/AreaTypes.js' +import { AreaType, AreaDocumnent, AreaEditableFieldsType, OperationType, UpdateSortingOrderType } from '../db/AreaTypes.js' import AreaDataSource from './AreaDataSource.js' import { createRootNode } from '../db/import/usa/AreaTree.js' import { makeDBArea } from '../db/import/usa/AreaTransformer.js' @@ -19,11 +20,11 @@ import { GradeContexts } from '../GradeUtils.js' import { sanitizeStrict } from '../utils/sanitize.js' import { ExperimentalAuthorType } from '../db/UserTypes.js' import { createInstance as createExperimentalUserDataSource } from '../model/ExperimentalUserDataSource.js' +import { StatsSummary, leafReducer, nodesReducer } from '../db/utils/jobs/TreeUpdaters/updateAllAreas.js' +import { bboxFrom } from '../geo-utils.js' isoCountries.registerLocale(enJson) -type AreaDocumnent = mongoose.Document & AreaType - export default class MutableAreaDataSource extends AreaDataSource { experimentalUserDataSource = createExperimentalUserDataSource() @@ -222,7 +223,7 @@ export default class MutableAreaDataSource extends AreaDataSource { deleting: { $ne: null } } - const area = await this.areaModel.findOne(filter).session(session).lean() + const area = await this.areaModel.findOne(filter).session(session).orFail() if (area == null) { throw new Error('Delete area error. Reason: area not found.') @@ -269,6 +270,8 @@ export default class MutableAreaDataSource extends AreaDataSource { timestamps: false }).orFail().session(session) + await this.updateLeafStatsAndGeoData(session, _change, area, true) + // In order to be able to record the deleted document in area_history, we mark (update) the // document for deletion (set ttl index = now). // See https://www.mongodb.com/community/forums/t/change-stream-fulldocument-on-delete/15963 @@ -363,10 +366,20 @@ export default class MutableAreaDataSource extends AreaDataSource { area.set({ 'content.description': sanitized }) } - if (lat != null && lng != null) { // we should already validate lat,lng before in GQL layer + const latLngHasChanged = lat != null && lng != null + if (latLngHasChanged) { // we should already validate lat,lng before in GQL layer + const point = geometry('Point', [lng, lat]) as Point area.set({ - 'metadata.lnglat': geometry('Point', [lng, lat]) + 'metadata.lnglat': point }) + if (area.metadata.leaf || (area.metadata?.isBoulder ?? false)) { + const bbox = bboxFrom(point) + area.set({ + 'metadata.bbox': bbox, + 'metadata.polygon': bbox == null ? undefined : bbox2Polygon(bbox).geometry + }) + await this.updateLeafStatsAndGeoData(session, _change, area) + } } const cursor = await area.save() @@ -471,6 +484,63 @@ export default class MutableAreaDataSource extends AreaDataSource { return ret } + /** + * Update area stats and geo data for a given leaf node and its ancestors. + * @param session + * @param changeRecord + * @param startingArea + * @param excludeStartingArea true to exlude the starting area from the update. Useful when deleting an area. + */ + async updateLeafStatsAndGeoData (session: ClientSession, changeRecord: ChangeRecordMetadataType, startingArea: AreaDocumnent, excludeStartingArea: boolean = false): Promise { + /** + * Update function. For each node, recalculate stats and recursively update its acenstors until we reach the country node. + */ + const updateFn = async (session: ClientSession, changeRecord: ChangeRecordMetadataType, area: AreaDocumnent, childSummary?: StatsSummary): Promise => { + if (area.pathTokens.length <= 1) { + // we're at the root country node + return + } + + const ancestors = area.ancestors.split(',') + const parentUuid = muuid.from(ancestors[ancestors.length - 2]) + const parentArea = + await this.areaModel.findOne({ 'metadata.area_id': parentUuid }) + .batchSize(10) + .populate<{ children: AreaDocumnent[] }>({ path: 'children', model: this.areaModel }) + .allowDiskUse(true) + .session(session) + .orFail() + + const acc: StatsSummary[] = [] + /** + * Collect existing stats from all children. For affected node, use the stats from previous calculation. + */ + for (const childArea of parentArea.children) { + if (childArea._id.equals(area._id)) { + if (childSummary != null) acc.push(childSummary) + } else { + acc.push(leafReducer(childArea.toObject())) + } + } + + const current = await nodesReducer(acc, parentArea as any as AreaDocumnent, { session, changeRecord }) + await updateFn(session, changeRecord, parentArea as any as AreaDocumnent, current) + } + + /** + * Begin calculation + */ + if (!startingArea.metadata.leaf && !(startingArea.metadata.isBoulder ?? false)) { + return + } + if (excludeStartingArea) { + await updateFn(session, changeRecord, startingArea) + } else { + const leafStats = leafReducer(startingArea.toObject()) + await updateFn(session, changeRecord, startingArea, leafStats) + } + } + static instance: MutableAreaDataSource static getInstance (): MutableAreaDataSource { @@ -500,7 +570,9 @@ export const newAreaHelper = (areaName: string, parentAncestors: string, parentP leaf: false, area_id: uuid, leftRightIndex: -1, - ext_id: '' + ext_id: '', + bbox: undefined, + polygon: undefined }, ancestors, climbs: [], diff --git a/src/model/MutableClimbDataSource.ts b/src/model/MutableClimbDataSource.ts index 10375dd8..d8dd5753 100644 --- a/src/model/MutableClimbDataSource.ts +++ b/src/model/MutableClimbDataSource.ts @@ -2,7 +2,8 @@ import muid, { MUUID } from 'uuid-mongodb' import { UserInputError } from 'apollo-server' import { ClientSession } from 'mongoose' -import { ClimbChangeDocType, ClimbChangeInputType, ClimbEditOperationType, IPitch } from '../db/ClimbTypes.js' +import { AreaDocumnent } from '../db/AreaTypes.js' +import { ClimbType, ClimbChangeDocType, ClimbChangeInputType, ClimbEditOperationType, IPitch } from '../db/ClimbTypes.js' import ClimbDataSource from './ClimbDataSource.js' import { createInstance as createExperimentalUserDataSource } from './ExperimentalUserDataSource.js' import { sanitizeDisciplines, gradeContextToGradeScales, createGradeObject } from '../GradeUtils.js' @@ -10,6 +11,9 @@ import { getClimbModel } from '../db/ClimbSchema.js' import { ChangeRecordMetadataType } from '../db/ChangeLogType.js' import { changelogDataSource } from './ChangeLogDataSource.js' import { sanitize, sanitizeStrict } from '../utils/sanitize.js' +import MutableAreaDataSource from './MutableAreaDataSource.js' +import { aggregateCragStats } from '../db/utils/Aggregate.js' +import { getAreaModel } from '../db/AreaSchema.js' export default class MutableClimbDataSource extends ClimbDataSource { experimentalUserDataSource = createExperimentalUserDataSource() @@ -219,8 +223,11 @@ export default class MutableClimbDataSource extends ClimbDataSource { if (idList.length > 0) { parent.set({ climbs: parent.climbs.concat(idList) }) } + await parent.save() + await updateStats(parent, session, _change) + if (idStrList.length === newClimbIds.length) { return idStrList } @@ -266,11 +273,22 @@ export default class MutableClimbDataSource extends ClimbDataSource { // see https://jira.mongodb.org/browse/NODE-2014 await session.withTransaction( async (session) => { + const changeset = await changelogDataSource.create(session, userId, ClimbEditOperationType.deleteClimb) + const _change: ChangeRecordMetadataType = { + user: userId, + historyId: changeset._id, + operation: ClimbEditOperationType.deleteClimb, + seq: 0 + } // Remove climb IDs from parent.climbs[] await this.areaModel.updateOne( { 'metadata.area_id': parentId }, { - $pullAll: { climbs: idList } + $pullAll: { climbs: idList }, + $set: { + _change, + updatedBy: userId + } }, { session }) @@ -284,7 +302,8 @@ export default class MutableClimbDataSource extends ClimbDataSource { [{ $set: { _deleting: new Date(), - updatedBy: userId + updatedBy: userId, + _change } }], { @@ -292,6 +311,7 @@ export default class MutableClimbDataSource extends ClimbDataSource { session }).lean() ret = rs.modifiedCount + await updateStats(parentId, session, _change) }) return ret } @@ -307,3 +327,26 @@ export default class MutableClimbDataSource extends ClimbDataSource { return MutableClimbDataSource.instance } } + +/** + * Update stats for an area and its ancestors + * @param areaIdOrAreaCursor + * @param session + * @param changeRecord + */ +const updateStats = async (areaIdOrAreaCursor: MUUID | AreaDocumnent, session: ClientSession, changeRecord: ChangeRecordMetadataType): Promise => { + let area: AreaDocumnent + if ((areaIdOrAreaCursor as AreaDocumnent).totalClimbs != null) { + area = areaIdOrAreaCursor as AreaDocumnent + } else { + area = await getAreaModel().findOne({ 'metadata.area_id': areaIdOrAreaCursor as MUUID }).session(session).orFail() + } + + await area.populate<{ climbs: ClimbType[] }>({ path: 'climbs', model: getClimbModel() }) + area.set({ + totalClimbs: area.climbs.length, + aggregate: aggregateCragStats(area.toObject()) + }) + await area.save() + await MutableAreaDataSource.getInstance().updateLeafStatsAndGeoData(session, changeRecord, area) +}