Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: backfill licensed users #8791

Merged
merged 4 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions src/migrations/20241119105837-licensed-users-backfill.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
exports.up = (db, cb) => {
db.runSql(`
WITH user_events AS (
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just gets the correct events

SELECT
DISTINCT CASE WHEN type = 'user-deleted' THEN pre_data ->> 'email' ELSE data ->> 'email' END AS email,
type,
created_at AS event_date
FROM
events
WHERE
type IN ('user-created', 'user-deleted')
),
dates AS (
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generates needed dates from first event to today.

WITH RECURSIVE generated_dates AS (
SELECT
MIN(event_date):: timestamp AS date
FROM
user_events
UNION ALL
SELECT
date + INTERVAL '1 day'
FROM
generated_dates
WHERE
date + INTERVAL '1 day' <= CURRENT_DATE
)
SELECT
date :: date
FROM
generated_dates
),
active_emails AS (
Copy link
Contributor Author

@sjaanus sjaanus Nov 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For each date, for each email, find the most recent created and deleted event before the date

SELECT
d.date,
ue.email,
MAX(
CASE WHEN ue.type = 'user-created' THEN ue.event_date ELSE NULL END
) AS created_date,
MAX(
CASE WHEN ue.type = 'user-deleted' THEN ue.event_date ELSE NULL END
) AS deleted_date
FROM
dates d
LEFT JOIN user_events ue ON ue.event_date <= d.date
GROUP BY
d.date,
ue.email
),
result AS (
Copy link
Contributor Author

@sjaanus sjaanus Nov 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Count the emails on date

  1. add 1 if the deleted date is empty, delete happened within last 30 days, delete happened before the creation
  2. add 0 else, so the email was not active during that day.

SELECT
d.date,
COALESCE(
COUNT(
DISTINCT CASE
WHEN ae.deleted_date IS NULL
OR ae.deleted_date >= d.date - INTERVAL '30 days'
OR ae.deleted_date < ae.created_date
THEN ae.email
ELSE NULL
END
),
0
) AS active_emails_count
FROM
dates d
LEFT JOIN active_emails ae ON d.date = ae.date
GROUP BY
d.date
ORDER BY
d.date
) INSERT INTO licensed_users (date, count)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Insert all the date results into db, not overwriting existing results.

SELECT date, active_emails_count
FROM result
WHERE EXISTS (
SELECT 1 FROM user_events
)
ON CONFLICT (date) DO NOTHING;
`, cb);

};

exports.down = (db, cb) => {
db.runSql(``, cb);
};

204 changes: 204 additions & 0 deletions src/test/e2e/licensed-users-backfill.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import { getDbConfig } from './helpers/database-config';
import { createTestConfig } from '../config/test-config';
import { getInstance } from 'db-migrate';
import { Client } from 'pg';

async function initSchema(db) {
const client = new Client(db);
await client.connect();
await client.query(`DROP SCHEMA IF EXISTS ${db.schema} CASCADE`);
await client.query(`CREATE SCHEMA IF NOT EXISTS ${db.schema}`);
await client.end();
}

async function insertEvents(client, events) {
const values = events
.map(
(e) =>
`('${e.type}', '${JSON.stringify(e.data || {})}', '${JSON.stringify(
e.pre_data || {},
)}', '${e.created_at}', '${e.created_by}')`,
)
.join(',');

await client.query(`
INSERT INTO events (type, data, pre_data, created_at, created_by)
VALUES ${values};
`);
}

describe('licensed_users backfill', () => {
jest.setTimeout(15000);

let client: any;
let dbm: any;
const config = createTestConfig({
db: {
...getDbConfig(),
pool: { min: 1, max: 4 },
schema: 'licensed_users_test',
ssl: false,
},
});

beforeAll(async () => {
await initSchema(config.db);

dbm = getInstance(true, {
cwd: `${__dirname}/../../`,
config: { e2e: { ...config.db, connectionTimeoutMillis: 2000 } },
env: 'e2e',
});

await dbm.up('20241114103646-licensed-users.js');
client = new Client(config.db);
await client.connect();
await client.query(`SET search_path = 'licensed_users_test';`);
});

afterAll(async () => {
await client.end();
await dbm.reset();
});

beforeEach(async () => {
await client.query('delete from events;');
await client.query('delete from licensed_users;');
await client.query(
"DELETE FROM migrations WHERE name = '/20241119105837-licensed-users-backfill';",
);
});

test('Counts users from their creation date until their deletion date and 30 days', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-01',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-05',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-10-01').count).toBe(1);
expect(rows.find((row) => row.date === '2024-10-02').count).toBe(1);
expect(rows.find((row) => row.date === '2024-10-05').count).toBe(1);
expect(rows.find((row) => row.date === '2024-11-04').count).toBe(1);
expect(rows.find((row) => row.date === '2024-11-05').count).toBe(0); // 30 days has passed
Comment on lines +96 to +97
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be the same thing as I pointed out in a separate PR, so feel free to ignore if you don't think it's relevant, but October is a 31-day month. So if you create the user on the 1st, and delete them on the 2nd, the 31st will be the 30th day after deletion. Extrapolating, that means that november 3rd is the 30 day mark. Should we switch this to check November 4th? Or should it be 3rd? Again, because this will in reality differentiate on milliseconds, it probably doesn't matter much. I'm sure youv'e got it covered.

If someone is deleted on the 5th, does the 5th count as the first day of 30 or is the 6th the first day?

});

test('Counts multiple users correctly over their active periods, including 30-day retention from deletion', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-09-01',
created_by: 'test',
},
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-01',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-05',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-09-01').count).toBe(1); // user1 created
expect(rows.find((row) => row.date === '2024-10-01').count).toBe(2); // user1 active, user2 created
expect(rows.find((row) => row.date === '2024-10-05').count).toBe(2); // user1 within retention, user2 active
expect(rows.find((row) => row.date === '2024-11-19').count).toBe(1); // Only user2 active, user1's retention has ended
});

test('Handles users created but not deleted', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-11-01',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-11-01').count).toBe(1); // user1 created
expect(rows.find((row) => row.date === '2024-11-19').count).toBe(1); // user1 still active
});

test('Handles overlapping creation and deletion periods with multiple events for the same email (one month earlier)', async () => {
await insertEvents(client, [
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-01 00:00:00+00',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-01 00:01:00+00',
created_by: 'test',
},
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-03',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-07',
created_by: 'test',
},
{
type: 'user-created',
data: { email: '[email protected]' },
created_at: '2024-10-05',
created_by: 'test',
},
{
type: 'user-deleted',
pre_data: { email: '[email protected]' },
created_at: '2024-10-10',
created_by: 'test',
},
]);

await dbm.up('20241119105837-licensed-users-backfill.js');

const { rows } = await client.query(
"SELECT TO_CHAR(date, 'YYYY-MM-DD') AS date, count FROM licensed_users ORDER BY date;",
);
expect(rows.find((row) => row.date === '2024-10-01').count).toBe(1); // user1 created and deleted on the same day
expect(rows.find((row) => row.date === '2024-10-03').count).toBe(1); // user1 re-created
expect(rows.find((row) => row.date === '2024-10-05').count).toBe(2); // user1 within retention, user2 created
expect(rows.find((row) => row.date === '2024-10-07').count).toBe(2); // user1 within retention, user2 active
expect(rows.find((row) => row.date === '2024-11-07').count).toBe(1); // user2 within retention, user1 expired
expect(rows.find((row) => row.date === '2024-11-10').count).toBe(0); // Both users expired
});
});
Loading