Skip to content

Commit

Permalink
feat: backfill licensed users (#8791)
Browse files Browse the repository at this point in the history
**This migration introduces a query that calculates the licensed user
counts and inserts them into the licensed_users table.**

**The logic ensures that:**

1. All users created up to a specific date are included as active users
until they are explicitly deleted.
2. Deleted users are excluded after their deletion date, except when
their deletion date falls within the last 30 days or before their
creation date.
3. The migration avoids duplicating data by ensuring records are only
inserted if they don’t already exist in the licensed_users table.


**Logic Breakdown:**

**Identify User Events (user_events):** Extracts email addresses from
user-related events (user-created and user-deleted) and tracks the type
and timestamp of the event. This step ensures the ability to
differentiate between user creation and deletion activities.
**Generate a Date Range (dates):** Creates a continuous range of dates
spanning from the earliest recorded event up to the current date. This
ensures we analyze every date, even those without events.
**Determine Active Users (active_emails):** Links dates with user events
to calculate the status of each email address (active or deleted) on a
given day. This step handles:

- The user's creation date.
- The user's deletion date (if applicable).

**Calculate Daily Active User Counts (result):** 
For each date, counts the distinct email addresses that are active based
on the conditions:

- The user has no deletion date.
- The user's deletion date is within the last 30 days relative to the
current date.
- The user's creation date is before the deletion date.
  • Loading branch information
sjaanus authored Nov 20, 2024
1 parent 74535e9 commit 4234020
Show file tree
Hide file tree
Showing 2 changed files with 289 additions and 0 deletions.
85 changes: 85 additions & 0 deletions src/migrations/20241119105837-licensed-users-backfill.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
exports.up = (db, cb) => {
db.runSql(`
WITH user_events AS (
SELECT
DISTINCT CASE WHEN type = 'user-deleted' THEN pre_data ->> 'email' ELSE data ->> 'email' END AS email,
type,
created_at AS event_date
FROM
events
WHERE
type IN ('user-created', 'user-deleted')
),
dates AS (
WITH RECURSIVE generated_dates AS (
SELECT
MIN(event_date):: timestamp AS date
FROM
user_events
UNION ALL
SELECT
date + INTERVAL '1 day'
FROM
generated_dates
WHERE
date + INTERVAL '1 day' <= CURRENT_DATE
)
SELECT
date :: date
FROM
generated_dates
),
active_emails AS (
SELECT
d.date,
ue.email,
MAX(
CASE WHEN ue.type = 'user-created' THEN ue.event_date ELSE NULL END
) AS created_date,
MAX(
CASE WHEN ue.type = 'user-deleted' THEN ue.event_date ELSE NULL END
) AS deleted_date
FROM
dates d
LEFT JOIN user_events ue ON ue.event_date <= d.date
GROUP BY
d.date,
ue.email
),
result AS (
SELECT
d.date,
COALESCE(
COUNT(
DISTINCT CASE
WHEN ae.deleted_date IS NULL
OR ae.deleted_date >= d.date - INTERVAL '30 days'
OR ae.deleted_date < ae.created_date
THEN ae.email
ELSE NULL
END
),
0
) AS active_emails_count
FROM
dates d
LEFT JOIN active_emails ae ON d.date = ae.date
GROUP BY
d.date
ORDER BY
d.date
) INSERT INTO licensed_users (date, count)
SELECT date, active_emails_count
FROM result
WHERE EXISTS (
SELECT 1 FROM user_events
)
ON CONFLICT (date) DO NOTHING;
`, cb);

};

exports.down = (db, cb) => {
db.runSql(``, cb);
};

204 changes: 204 additions & 0 deletions src/test/e2e/licensed-users-backfill.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import { getDbConfig } from './helpers/database-config';
import { createTestConfig } from '../config/test-config';
import { getInstance } from 'db-migrate';
import { Client } from 'pg';

async function initSchema(db) {
const client = new Client(db);
await client.connect();
await client.query(`DROP SCHEMA IF EXISTS ${db.schema} CASCADE`);
await client.query(`CREATE SCHEMA IF NOT EXISTS ${db.schema}`);
await client.end();
}

async function insertEvents(client, events) {
const values = events
.map(
(e) =>
`('${e.type}', '${JSON.stringify(e.data || {})}', '${JSON.stringify(
e.pre_data || {},
)}', '${e.created_at}', '${e.created_by}')`,
)
.join(',');

await client.query(`
INSERT INTO events (type, data, pre_data, created_at, created_by)
VALUES ${values};
`);
}

describe('licensed_users backfill', () => {
jest.setTimeout(15000);

let client: any;
let dbm: any;
const config = createTestConfig({
db: {
...getDbConfig(),
pool: { min: 1, max: 4 },
schema: 'licensed_users_test',
ssl: false,
},
});

beforeAll(async () => {
await initSchema(config.db);

dbm = getInstance(true, {
cwd: `${__dirname}/../../`,
config: { e2e: { ...config.db, connectionTimeoutMillis: 2000 } },
env: 'e2e',
});

await dbm.up('20241114103646-licensed-users.js');
client = new Client(config.db);
await client.connect();
await client.query(`SET search_path = 'licensed_users_test';`);
});

afterAll(async () => {
await client.end();
await dbm.reset();
});

beforeEach(async () => {
await client.query('delete from events;');
await client.query('delete from licensed_users;');
await client.query(
"DELETE FROM migrations WHERE name = '/20241119105837-licensed-users-backfill';",
);
});

test('Counts users from their creation date until their deletion date and 30 days', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-01',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-05',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-10-01').count).toBe(1);
expect(rows.find((row) => row.date === '2024-10-02').count).toBe(1);
expect(rows.find((row) => row.date === '2024-10-05').count).toBe(1);
expect(rows.find((row) => row.date === '2024-11-04').count).toBe(1);
expect(rows.find((row) => row.date === '2024-11-05').count).toBe(0); // 30 days has passed
});

test('Counts multiple users correctly over their active periods, including 30-day retention from deletion', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-09-01',
created_by: 'test',
},
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-01',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-05',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-09-01').count).toBe(1); // user1 created
expect(rows.find((row) => row.date === '2024-10-01').count).toBe(2); // user1 active, user2 created
expect(rows.find((row) => row.date === '2024-10-05').count).toBe(2); // user1 within retention, user2 active
expect(rows.find((row) => row.date === '2024-11-19').count).toBe(1); // Only user2 active, user1's retention has ended
});

test('Handles users created but not deleted', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-11-01',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-11-01').count).toBe(1); // user1 created
expect(rows.find((row) => row.date === '2024-11-19').count).toBe(1); // user1 still active
});

test('Handles overlapping creation and deletion periods with multiple events for the same email (one month earlier)', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-01 00:00:00+00',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-01 00:01:00+00',
created_by: 'test',
},
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-03',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-07',
created_by: 'test',
},
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-05',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-10',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-10-01').count).toBe(1); // user1 created and deleted on the same day
expect(rows.find((row) => row.date === '2024-10-03').count).toBe(1); // user1 re-created
expect(rows.find((row) => row.date === '2024-10-05').count).toBe(2); // user1 within retention, user2 created
expect(rows.find((row) => row.date === '2024-10-07').count).toBe(2); // user1 within retention, user2 active
expect(rows.find((row) => row.date === '2024-11-07').count).toBe(1); // user2 within retention, user1 expired
expect(rows.find((row) => row.date === '2024-11-10').count).toBe(0); // Both users expired
});
});

0 comments on commit 4234020

Please sign in to comment.