From 19fc427069dd4963eb751d0f767527e9f607dbed Mon Sep 17 00:00:00 2001 From: ktyagiapphelix2u Date: Wed, 14 Jan 2026 10:21:53 +0000 Subject: [PATCH 1/6] fix: redacting user retirement data in lms --- openedx/core/djangoapps/user_api/accounts/views.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index c3ff6ce7a2f2..f5c095afa96c 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1045,7 +1045,14 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - retirements.delete() + # Redact PII fields instead of deleting records + # This ensures Fivetran syncs redacted data to Snowflake instead of creating soft deletes with PII + for retirement in retirements: + retirement.original_username = f"jenkins-{retirement.id}" + retirement.original_email = f"jenkins-{retirement.id}" + retirement.original_name = f"jenkins-{retirement.id}" + retirement.save() + return Response(status=status.HTTP_204_NO_CONTENT) except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: return Response(str(exc), status=status.HTTP_400_BAD_REQUEST) From 5ac51b6b9464411a8e898ecdeb576bbffee6857c Mon Sep 17 00:00:00 2001 From: ktyagiapphelix2u Date: Wed, 21 Jan 2026 07:15:07 +0000 Subject: [PATCH 2/6] fix: redacting user retirement data in lms --- .../accounts/tests/test_retirement_views.py | 32 ++++++++++++++++++- .../djangoapps/user_api/accounts/views.py | 22 ++++++++----- 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index 4f942417ae0e..ded8818b5b85 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -1080,7 +1080,37 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N def test_simple_success(self): self.cleanup_and_assert_status() - assert not UserRetirementStatus.objects.all() + # Records should still exist but with redacted PII fields + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == len(self.usernames) + for retirement in retirements: + # All three fields should have the same redacted value + assert retirement.original_username == 'redacted' + assert retirement.original_email == 'redacted' + assert retirement.original_name == 'redacted' + + def test_custom_redacted_values(self): + """Test that custom redacted values are applied to the correct fields.""" + custom_username = 'username-redacted-12345' + custom_email = 'email-redacted-67890' + custom_name = 'name-redacted-abcde' + + data = { + 'usernames': self.usernames, + 'redacted_username': custom_username, + 'redacted_email': custom_email, + 'redacted_name': custom_name + } + self.cleanup_and_assert_status(data=data) + + # Records should still exist but with custom redacted PII fields + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == len(self.usernames) + for retirement in retirements: + # Each field should have its corresponding custom value + assert retirement.original_username == custom_username + assert retirement.original_email == custom_email + assert retirement.original_name == custom_name def test_leaves_other_users(self): remaining_usernames = [] diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index f5c095afa96c..c6a6e70a1183 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1024,14 +1024,20 @@ def cleanup(self, request): ``` { - 'usernames': ['user1', 'user2', ...] + 'usernames': ['user1', 'user2', ...], + 'redacted_username': 'Value to store in username field', + 'redacted_email': 'Value to store in email field', + 'redacted_name': 'Value to store in name field' } ``` - Deletes a batch of retirement requests by username. + Redacts a batch of retirement requests by redacting PII fields. """ try: usernames = request.data["usernames"] + redacted_username = request.data.get("redacted_username", "redacted") + redacted_email = request.data.get("redacted_email", "redacted") + redacted_name = request.data.get("redacted_name", "redacted") if not isinstance(usernames, list): raise TypeError("Usernames should be an array.") @@ -1045,14 +1051,14 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - # Redact PII fields instead of deleting records - # This ensures Fivetran syncs redacted data to Snowflake instead of creating soft deletes with PII + # Redact PII fields instead of deleting records to prevent ETL tools + # from creating soft deletes with visible PII in downstream data warehouses for retirement in retirements: - retirement.original_username = f"jenkins-{retirement.id}" - retirement.original_email = f"jenkins-{retirement.id}" - retirement.original_name = f"jenkins-{retirement.id}" + retirement.original_username = redacted_username + retirement.original_email = redacted_email + retirement.original_name = redacted_name retirement.save() - + return Response(status=status.HTTP_204_NO_CONTENT) except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: return Response(str(exc), status=status.HTTP_400_BAD_REQUEST) From b0e4bce99007cbbe0f614a7c4614212c2d8997e6 Mon Sep 17 00:00:00 2001 From: ktyagiapphelix2u Date: Tue, 27 Jan 2026 06:45:08 +0000 Subject: [PATCH 3/6] fix: redacting user retirement data in lms --- .../accounts/tests/test_retirement_views.py | 20 +++++-------------- .../djangoapps/user_api/accounts/views.py | 7 +++++-- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index ded8818b5b85..6408f100b5c2 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -1080,17 +1080,12 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N def test_simple_success(self): self.cleanup_and_assert_status() - # Records should still exist but with redacted PII fields + # Records should be deleted after redaction retirements = UserRetirementStatus.objects.all() - assert retirements.count() == len(self.usernames) - for retirement in retirements: - # All three fields should have the same redacted value - assert retirement.original_username == 'redacted' - assert retirement.original_email == 'redacted' - assert retirement.original_name == 'redacted' + assert retirements.count() == 0 def test_custom_redacted_values(self): - """Test that custom redacted values are applied to the correct fields.""" + """Test that custom redacted values are applied before deletion.""" custom_username = 'username-redacted-12345' custom_email = 'email-redacted-67890' custom_name = 'name-redacted-abcde' @@ -1103,14 +1098,9 @@ def test_custom_redacted_values(self): } self.cleanup_and_assert_status(data=data) - # Records should still exist but with custom redacted PII fields + # Records should be deleted after redaction retirements = UserRetirementStatus.objects.all() - assert retirements.count() == len(self.usernames) - for retirement in retirements: - # Each field should have its corresponding custom value - assert retirement.original_username == custom_username - assert retirement.original_email == custom_email - assert retirement.original_name == custom_name + assert retirements.count() == 0 def test_leaves_other_users(self): remaining_usernames = [] diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index c6a6e70a1183..189ce11cf155 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1051,13 +1051,16 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - # Redact PII fields instead of deleting records to prevent ETL tools - # from creating soft deletes with visible PII in downstream data warehouses + # Redact PII fields first, then delete. This ensures that when Fivetran syncs + # the delete as a soft-delete to the data warehouse, the record will already + # contain redacted values instead of sensitive PII, eliminating the need for + # custom data warehouse cleanup jobs. for retirement in retirements: retirement.original_username = redacted_username retirement.original_email = redacted_email retirement.original_name = redacted_name retirement.save() + retirement.delete() return Response(status=status.HTTP_204_NO_CONTENT) except (RetirementStateError, UserRetirementStatus.DoesNotExist, TypeError) as exc: From 3c338d49b2491656a8fb70a474d6fcd17a15a01d Mon Sep 17 00:00:00 2001 From: ktyagiapphelix2u Date: Wed, 28 Jan 2026 06:46:50 +0000 Subject: [PATCH 4/6] fix: redacting user retirement data in lms --- .../accounts/tests/test_retirement_views.py | 22 +++++++++++++++++++ .../djangoapps/user_api/accounts/views.py | 7 +++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index 6408f100b5c2..c4717e4afef2 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -1079,11 +1079,33 @@ def cleanup_and_assert_status(self, data=None, expected_status=status.HTTP_204_N return response def test_simple_success(self): + """ + Test basic cleanup with default redacted values. + """ + # Verify redaction happens (records exist before cleanup) + assert UserRetirementStatus.objects.count() == 9 + + # Make the cleanup request self.cleanup_and_assert_status() + # Records should be deleted after redaction retirements = UserRetirementStatus.objects.all() assert retirements.count() == 0 + def test_redaction_before_deletion(self): + """ + Verify that redaction (UPDATE) happens before deletion (DELETE). + Uses assertNumQueries to verify UPDATE queries execute before DELETE queries. + This protects PII from being exposed in soft-deletes to downstream data warehouses. + """ + # Use assertNumQueries to capture and verify the SQL queries execute in correct order. + with self.assertNumQueries(53): # Full request with 9 UPDATEs (redaction) + 9 DELETEs + self.cleanup_and_assert_status() + + # Verify records are deleted after redaction + retirements = UserRetirementStatus.objects.all() + assert retirements.count() == 0 + def test_custom_redacted_values(self): """Test that custom redacted values are applied before deletion.""" custom_username = 'username-redacted-12345' diff --git a/openedx/core/djangoapps/user_api/accounts/views.py b/openedx/core/djangoapps/user_api/accounts/views.py index 189ce11cf155..b80e5c6fbb88 100644 --- a/openedx/core/djangoapps/user_api/accounts/views.py +++ b/openedx/core/djangoapps/user_api/accounts/views.py @@ -1051,10 +1051,9 @@ def cleanup(self, request): if len(usernames) != len(retirements): raise UserRetirementStatus.DoesNotExist("Not all usernames exist in the COMPLETE state.") - # Redact PII fields first, then delete. This ensures that when Fivetran syncs - # the delete as a soft-delete to the data warehouse, the record will already - # contain redacted values instead of sensitive PII, eliminating the need for - # custom data warehouse cleanup jobs. + # Redact PII fields first, then delete. In case an ETL tool is syncing data + # to a downstream data warehouse, and treats the deletes as soft-deletes, + # the data will have first been redacted, protecting the sensitive PII. for retirement in retirements: retirement.original_username = redacted_username retirement.original_email = redacted_email From 5972c46b02675cf9875ef61daead0097bb699a5c Mon Sep 17 00:00:00 2001 From: ktyagiapphelix2u Date: Wed, 28 Jan 2026 06:51:10 +0000 Subject: [PATCH 5/6] fix: redacting user retirement data in lms --- .../user_api/accounts/tests/test_retirement_views.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index c4717e4afef2..5e3652c2e79c 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -1084,10 +1084,10 @@ def test_simple_success(self): """ # Verify redaction happens (records exist before cleanup) assert UserRetirementStatus.objects.count() == 9 - + # Make the cleanup request self.cleanup_and_assert_status() - + # Records should be deleted after redaction retirements = UserRetirementStatus.objects.all() assert retirements.count() == 0 @@ -1101,7 +1101,7 @@ def test_redaction_before_deletion(self): # Use assertNumQueries to capture and verify the SQL queries execute in correct order. with self.assertNumQueries(53): # Full request with 9 UPDATEs (redaction) + 9 DELETEs self.cleanup_and_assert_status() - + # Verify records are deleted after redaction retirements = UserRetirementStatus.objects.all() assert retirements.count() == 0 From 83e52e11e130cd69d10d582985aba96c459a1465 Mon Sep 17 00:00:00 2001 From: ktyagiapphelix2u Date: Thu, 29 Jan 2026 05:54:06 +0000 Subject: [PATCH 6/6] fix: redacting user retirement data in lms --- .../accounts/tests/test_retirement_views.py | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py index 5e3652c2e79c..60756fb0f3d9 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_retirement_views.py @@ -13,7 +13,9 @@ from django.contrib.sites.models import Site from django.core import mail from django.core.cache import cache +from django.db import connection from django.test import TestCase +from django.test.utils import CaptureQueriesContext from django.urls import reverse from enterprise.models import ( EnterpriseCourseEnrollment, @@ -1095,17 +1097,32 @@ def test_simple_success(self): def test_redaction_before_deletion(self): """ Verify that redaction (UPDATE) happens before deletion (DELETE). - Uses assertNumQueries to verify UPDATE queries execute before DELETE queries. - This protects PII from being exposed in soft-deletes to downstream data warehouses. + Captures actual SQL queries to ensure UPDATE queries contain redacted values. """ - # Use assertNumQueries to capture and verify the SQL queries execute in correct order. - with self.assertNumQueries(53): # Full request with 9 UPDATEs (redaction) + 9 DELETEs + with CaptureQueriesContext(connection) as context: self.cleanup_and_assert_status() # Verify records are deleted after redaction retirements = UserRetirementStatus.objects.all() assert retirements.count() == 0 + # Verify UPDATE queries exist with default 'redacted' value + queries = context.captured_queries + update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + delete_queries = [q for q in queries if 'DELETE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + + # Should have 9 UPDATE and 9 DELETE queries + assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" + assert len(delete_queries) == 9, f"Expected 9 DELETE queries, found {len(delete_queries)}" + + # Verify UPDATE queries contain the redacted values + for update_query in update_queries: + sql = update_query['sql'] + assert "'redacted'" in sql, f"UPDATE query missing 'redacted' value: {sql}" + assert 'original_username' in sql, f"UPDATE query missing original_username field: {sql}" + assert 'original_email' in sql, f"UPDATE query missing original_email field: {sql}" + assert 'original_name' in sql, f"UPDATE query missing original_name field: {sql}" + def test_custom_redacted_values(self): """Test that custom redacted values are applied before deletion.""" custom_username = 'username-redacted-12345' @@ -1118,12 +1135,26 @@ def test_custom_redacted_values(self): 'redacted_email': custom_email, 'redacted_name': custom_name } - self.cleanup_and_assert_status(data=data) - # Records should be deleted after redaction + with CaptureQueriesContext(connection) as context: + self.cleanup_and_assert_status(data=data) + + # Verify records are deleted after redaction retirements = UserRetirementStatus.objects.all() assert retirements.count() == 0 + # Verify UPDATE queries contain the custom redacted values + queries = context.captured_queries + update_queries = [q for q in queries if 'UPDATE' in q['sql'] and 'user_api_userretirementstatus' in q['sql']] + + assert len(update_queries) == 9, f"Expected 9 UPDATE queries, found {len(update_queries)}" + + for update_query in update_queries: + sql = update_query['sql'] + assert custom_username in sql, f"UPDATE query missing custom username '{custom_username}': {sql}" + assert custom_email in sql, f"UPDATE query missing custom email '{custom_email}': {sql}" + assert custom_name in sql, f"UPDATE query missing custom name '{custom_name}': {sql}" + def test_leaves_other_users(self): remaining_usernames = []