-
Notifications
You must be signed in to change notification settings - Fork 4.2k
fix: redacting user retirement data in lms #37886
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
19fc427
5ac51b6
b0e4bce
3c338d4
5972c46
83e52e1
6a33726
74f4aaa
0f4e143
23cac02
0f17086
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,9 @@ | |
| from django.contrib.sites.models import Site | ||
| from django.core import mail | ||
| from django.core.cache import cache | ||
| from django.db import connection | ||
| from django.test import TestCase | ||
| from django.test.utils import CaptureQueriesContext | ||
| from django.urls import reverse | ||
| from enterprise.models import ( | ||
| EnterpriseCourseEnrollment, | ||
|
|
@@ -1079,8 +1081,79 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N | |
| return response | ||
|
|
||
| def test_simple_success(self): | ||
| """ | ||
| Test basic cleanup with default redacted values. | ||
| """ | ||
| # Verify redaction happens (records exist before cleanup) | ||
| assert UserRetirementStatus.objects.count() == 9 | ||
|
|
||
| # Make the cleanup request | ||
| self.cleanup_and_assert_status() | ||
| assert not UserRetirementStatus.objects.all() | ||
|
|
||
| # Records should be deleted after redaction | ||
| retirements = UserRetirementStatus.objects.all() | ||
| assert retirements.count() == 0 | ||
|
|
||
| def test_redaction_before_deletion(self): | ||
| """ | ||
| Verify that redaction (UPDATE) happens before deletion (DELETE). | ||
| Captures actual SQL queries to ensure UPDATE queries contain redacted values. | ||
| """ | ||
| with CaptureQueriesContext(connection) as context: | ||
| self.cleanup_and_assert_status() | ||
|
|
||
| # Verify records are deleted after redaction | ||
| retirements = UserRetirementStatus.objects.all() | ||
| assert retirements.count() == 0 | ||
|
|
||
| # Verify UPDATE queries exist with default 'redacted' value | ||
robrap marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| queries = context.captured_queries | ||
| update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] | ||
| delete_queries = [q for q in queries if 'DELETE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] | ||
|
|
||
| # Should have 9 UPDATE and 9 DELETE queries | ||
| assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" | ||
| assert len(delete_queries) == 9, f"Expected 9 DELETE queries, found {len(delete_queries)}" | ||
|
|
||
| # Verify UPDATE queries contain the redacted values | ||
| for update_query in update_queries: | ||
| sql = update_query['sql'] | ||
| assert "'redacted'" in sql, f"UPDATE query missing 'redacted' value: {sql}" | ||
| assert 'original_username' in sql, f"UPDATE query missing original_username field: {sql}" | ||
| assert 'original_email' in sql, f"UPDATE query missing original_email field: {sql}" | ||
| assert 'original_name' in sql, f"UPDATE query missing original_name field: {sql}" | ||
|
|
||
| def test_custom_redacted_values(self): | ||
| """Test that custom redacted values are applied before deletion.""" | ||
| custom_username = 'username-redacted-12345' | ||
| custom_email = 'email-redacted-67890' | ||
| custom_name = 'name-redacted-abcde' | ||
robrap marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| data = { | ||
| 'usernames': self.usernames, | ||
| 'redacted_username': custom_username, | ||
| 'redacted_email': custom_email, | ||
| 'redacted_name': custom_name | ||
| } | ||
|
|
||
| with CaptureQueriesContext(connection) as context: | ||
| self.cleanup_and_assert_status(data=data) | ||
|
|
||
| # Verify records are deleted after redaction | ||
| retirements = UserRetirementStatus.objects.all() | ||
| assert retirements.count() == 0 | ||
|
|
||
| # Verify UPDATE queries contain the custom redacted values | ||
| queries = context.captured_queries | ||
| update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] | ||
|
|
||
| assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" | ||
|
|
||
| for update_query in update_queries: | ||
| sql = update_query['sql'] | ||
| assert custom_username in sql, f"UPDATE query missing custom username '{custom_username}': {sql}" | ||
|
||
| assert custom_email in sql, f"UPDATE query missing custom email '{custom_email}': {sql}" | ||
| assert custom_name in sql, f"UPDATE query missing custom name '{custom_name}': {sql}" | ||
|
|
||
| def test_leaves_other_users(self): | ||
| remaining_usernames = [] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1024,14 +1024,20 @@ def cleanup(self, request): | |
|
|
||
| ``` | ||
| { | ||
| 'usernames': ['user1', 'user2', ...] | ||
| 'usernames': ['user1', 'user2', ...], | ||
| 'redacted_username': 'Value to store in username field', | ||
| 'redacted_email': 'Value to store in email field', | ||
| 'redacted_name': 'Value to store in name field' | ||
| } | ||
| ``` | ||
|
|
||
| Deletes a batch of retirement requests by username. | ||
| Redacts a batch of retirement requests by redacting PII fields. | ||
| """ | ||
| try: | ||
| usernames = request.data["usernames"] | ||
| redacted_username = request.data.get("redacted_username", "redacted") | ||
| redacted_email = request.data.get("redacted_email", "redacted") | ||
| redacted_name = request.data.get("redacted_name", "redacted") | ||
|
|
||
| if not isinstance(usernames, list): | ||
| raise TypeError("Usernames should be an array.") | ||
|
|
@@ -1045,7 +1051,16 @@ def cleanup(self, request): | |
| if len(usernames) != len(retirements): | ||
| raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") | ||
|
|
||
| retirements.delete() | ||
| # Redact PII fields first, then delete. In case an ETL tool is syncing data | ||
| # to a downstream data warehouse, and treats the deletes as soft-deletes, | ||
| # the data will have first been redacted, protecting the sensitive PII. | ||
| for retirement in retirements: | ||
| retirement.original_username = redacted_username | ||
| retirement.original_email = redacted_email | ||
| retirement.original_name = redacted_name | ||
| retirement.save() | ||
| retirement.delete() | ||
|
Comment on lines
+1054
to
+1062
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ktyagiapphelix2u: I thought you had said this work was complete. I guess I misunderstood. Either way, please take care of this and all copilot comments. Thank you.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh. Is this a very outdated duplicate PR for openedx? We should discuss our process, but I would have imagined that this would be the PR we start with.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @robrap You are seeing the old PR, the new PR is in edx/edx-platform. Thanks. |
||
|
|
||
| return Response(status=status.HTTP_204_NO_CONTENT) | ||
| except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: | ||
| return Response(str(exc), status=status.HTTP_400_BAD_REQUEST) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there is any difference between what is being tested here, and what is tested in
test_redaction_before_deletion, right? It's the same test, but you are just using two different names and asserting on different things. I think I would just deletetest_simple_successand renametest_redaction_before_deletiontotest_default_redacted_values. I thinktest_default_redacted_valuesalready has all the assertions you need. Thoughts?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated thanks.