diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index 4f942417ae0e..60756fb0f3d9 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -13,7 +13,9 @@ from django.contrib.sites.models import Site from django.core import mail from django.core.cache import cache +from django.db import connection from django.test import TestCase +from django.test.utils import CaptureQueriesContext from django.urls import reverse from enterprise.models import ( EnterpriseCourseEnrollment, @@ -1079,8 +1081,79 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N return response def test_simple_success(self): + """ + Test basic cleanup with default redacted values. + """ + # Verify redaction happens (records exist before cleanup) + assert UserRetirementStatus.objects.count() == 9 + + # Make the cleanup request self.cleanup_and_assert_status() - assert not UserRetirementStatus.objects.all() + + # Records should be deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + def test_redaction_before_deletion(self): + """ + Verify that redaction (UPDATE) happens before deletion (DELETE). + Captures actual SQL queries to ensure UPDATE queries contain redacted values. + """ + with CaptureQueriesContext(connection) as context: + self.cleanup_and_assert_status() + + # Verify records are deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + # Verify UPDATE queries exist with default 'redacted' value + queries = context.captured_queries + update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + delete_queries = [q for q in queries if 'DELETE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + + # Should have 9 UPDATE and 9 DELETE queries + assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" + assert len(delete_queries) == 9, f"Expected 9 DELETE queries, found {len(delete_queries)}" + + # Verify UPDATE queries contain the redacted values + for update_query in update_queries: + sql = update_query['sql'] + assert "'redacted'" in sql, f"UPDATE query missing 'redacted' value: {sql}" + assert 'original_username' in sql, f"UPDATE query missing original_username field: {sql}" + assert 'original_email' in sql, f"UPDATE query missing original_email field: {sql}" + assert 'original_name' in sql, f"UPDATE query missing original_name field: {sql}" + + def test_custom_redacted_values(self): + """Test that custom redacted values are applied before deletion.""" + custom_username = 'username-redacted-12345' + custom_email = 'email-redacted-67890' + custom_name = 'name-redacted-abcde' + + data = { + 'usernames': self.usernames, + 'redacted_username': custom_username, + 'redacted_email': custom_email, + 'redacted_name': custom_name + } + + with CaptureQueriesContext(connection) as context: + self.cleanup_and_assert_status(data=data) + + # Verify records are deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + + # Verify UPDATE queries contain the custom redacted values + queries = context.captured_queries + update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + + assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" + + for update_query in update_queries: + sql = update_query['sql'] + assert custom_username in sql, f"UPDATE query missing custom username '{custom_username}': {sql}" + assert custom_email in sql, f"UPDATE query missing custom email '{custom_email}': {sql}" + assert custom_name in sql, f"UPDATE query missing custom name '{custom_name}': {sql}" def test_leaves_other_users(self): remaining_usernames = [] diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index c3ff6ce7a2f2..b80e5c6fbb88 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1024,14 +1024,20 @@ def cleanup(self, request): ``` { - 'usernames': ['user1', 'user2', ...] + 'usernames': ['user1', 'user2', ...], + 'redacted_username': 'Value to store in username field', + 'redacted_email': 'Value to store in email field', + 'redacted_name': 'Value to store in name field' } ``` - Deletes a batch of retirement requests by username. + Redacts a batch of retirement requests by redacting PII fields. """ try: usernames = request.data["usernames"] + redacted_username = request.data.get("redacted_username", "redacted") + redacted_email = request.data.get("redacted_email", "redacted") + redacted_name = request.data.get("redacted_name", "redacted") if not isinstance(usernames, list): raise TypeError("Usernames should be an array.") @@ -1045,7 +1051,16 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - retirements.delete() + # Redact PII fields first, then delete. In case an ETL tool is syncing data + # to a downstream data warehouse, and treats the deletes as soft-deletes, + # the data will have first been redacted, protecting the sensitive PII. + for retirement in retirements: + retirement.original_username = redacted_username + retirement.original_email = redacted_email + retirement.original_name = redacted_name + retirement.save() + retirement.delete() + return Response(status=status.HTTP_204_NO_CONTENT) except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: return Response(str(exc), status=status.HTTP_400_BAD_REQUEST)