diff --git a/openedx/core/djangoapps/user_api/accounts/__init__.py b/openedx/core/djangoapps/user_api/accounts/__init__.py index caf78ca54ae0..bb7d284e781d 100644 --- a/openedx/core/djangoapps/user_api/accounts/__init__.py +++ b/openedx/core/djangoapps/user_api/accounts/__init__.py @@ -6,6 +6,9 @@ from django.utils.text import format_lazy from django.utils.translation import gettext_lazy as _ +# Import signals to ensure they are registered +from . import signals # noqa: F401, pylint: disable=unused-import + # The maximum length for the bio ("about me") account field BIO_MAX_LENGTH = 300 diff --git a/openedx/core/djangoapps/user_api/accounts/signals.py b/openedx/core/djangoapps/user_api/accounts/signals.py index a51a8160c93c..f2ce581cc361 100644 --- a/openedx/core/djangoapps/user_api/accounts/signals.py +++ b/openedx/core/djangoapps/user_api/accounts/signals.py @@ -2,8 +2,15 @@ Django Signal related functionality for user_api accounts """ +import logging -from django.dispatch import Signal +from django.db.models.signals import pre_delete +from django.dispatch import Signal, receiver +from social_django.models import UserSocialAuth + +from .utils import redact_user_social_auth_pii + +logger = logging.getLogger(__name__) # Signal to retire a user from LMS-initiated mailings (course mailings, etc) # providing_args=["user"] @@ -16,3 +23,28 @@ # Signal to retire LMS misc information # providing_args=["user"] USER_RETIRE_LMS_MISC = Signal() + + +@receiver(pre_delete, sender=UserSocialAuth) +def redact_social_auth_pii_before_deletion(sender, instance, **kwargs): # pylint: disable=unused-argument + """ + Signal handler to redact PII from UserSocialAuth records before deletion. + + This ensures that when SSO records are deleted (either via user retirement, manual unlinking, + or any other method), PII is redacted first. This prevents soft-deleted records in Snowflake + from retaining sensitive user information. + + Note: We call redact_user_social_auth_pii which saves the redacted data before the actual + deletion happens. This is intentional - when Snowflake syncs, it will capture the redacted + state before marking the record as deleted. + """ + try: + redact_user_social_auth_pii(instance) + except Exception as e: # pylint: disable=broad-except + # Log the error but don't prevent the deletion + logger.exception( + "Failed to redact PII for UserSocialAuth before deletion: user_id=%s, provider=%s, error=%s", + instance.user_id, + instance.provider, + str(e) + ) diff --git a/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py b/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py index 3b9293487361..0bbf3f9551aa 100644 --- a/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py +++ b/openedx/core/djangoapps/user_api/accounts/tests/test_utils.py @@ -6,8 +6,12 @@ from completion.test_utils import CompletionWaffleTestMixin from django.test import TestCase from django.test.utils import override_settings +from social_django.models import UserSocialAuth -from openedx.core.djangoapps.user_api.accounts.utils import retrieve_last_sitewide_block_completed +from openedx.core.djangoapps.user_api.accounts.utils import ( + redact_user_social_auth_pii, + retrieve_last_sitewide_block_completed, +) from openedx.core.djangolib.testing.utils import skip_unless_lms from common.djangoapps.student.models import CourseEnrollment from common.djangoapps.student.tests.factories import UserFactory @@ -128,3 +132,116 @@ def test_retrieve_last_sitewide_block_completed(self): ) assert empty_block_url is None + + +@skip_unless_lms +class RedactUserSocialAuthPIITest(TestCase): + """ + Tests for SSO PII redaction before deletion. + """ + + def setUp(self): + """ + Create test user and SSO associations. + """ + super().setUp() + self.user = UserFactory.create(username='testuser', email='testuser@example.com') + + def create_social_auth(self, provider='google-oauth2', uid='user@example.com', extra_data=None): + """ + Helper method to create a UserSocialAuth record. + """ + if extra_data is None: + extra_data = { + 'email': 'user@example.com', + 'name': 'Test User', + 'id': '123456789', + } + return UserSocialAuth.objects.create( + user=self.user, + provider=provider, + uid=uid, + extra_data=extra_data, + ) + + def test_redact_user_social_auth_pii(self): + """ + Test that PII is redacted from UserSocialAuth records. + """ + social_auth = self.create_social_auth() + + # Verify original PII is present + assert social_auth.uid == 'user@example.com' + assert social_auth.extra_data['email'] == 'user@example.com' + assert 'name' in social_auth.extra_data + + # Redact PII + redact_user_social_auth_pii(social_auth) + + # Refresh from database + social_auth.refresh_from_db() + + # Verify PII is redacted + assert social_auth.uid == 'redacted@retired.invalid' + assert social_auth.extra_data == {} + + def test_redact_user_social_auth_pii_idempotent(self): + """ + Test that redaction is idempotent (can be called multiple times safely). + """ + social_auth = self.create_social_auth() + + # Redact PII twice + redact_user_social_auth_pii(social_auth) + redact_user_social_auth_pii(social_auth) + + # Refresh from database + social_auth.refresh_from_db() + + # Verify PII is still properly redacted + assert social_auth.uid == 'redacted@retired.invalid' + assert social_auth.extra_data == {} + + def test_redact_pii_before_deletion_via_signal(self): + """ + Test that the pre_delete signal automatically redacts PII before deletion. + """ + social_auth = self.create_social_auth() + social_auth_id = social_auth.id + + # Delete the record - this should trigger the signal + social_auth.delete() + + # Since the record is deleted, we can't verify the redacted state directly. + # But we can test that the deletion completed without errors. + # In Snowflake, the soft-deleted record would have redacted PII. + assert not UserSocialAuth.objects.filter(id=social_auth_id).exists() + + def test_redact_multiple_sso_providers(self): + """ + Test that PII is redacted for multiple SSO providers. + """ + google_auth = self.create_social_auth( + provider='google-oauth2', + uid='google@example.com', + extra_data={'email': 'google@example.com', 'name': 'Google User'} + ) + saml_auth = self.create_social_auth( + provider='tpa-saml', + uid='saml@example.com', + extra_data={'email': 'saml@example.com', 'name': 'SAML User', 'uid': 'saml-uid'} + ) + + # Redact both + redact_user_social_auth_pii(google_auth) + redact_user_social_auth_pii(saml_auth) + + # Refresh from database + google_auth.refresh_from_db() + saml_auth.refresh_from_db() + + # Verify both are redacted + assert google_auth.uid == 'redacted@retired.invalid' + assert google_auth.extra_data == {} + assert saml_auth.uid == 'redacted@retired.invalid' + assert saml_auth.extra_data == {} diff --git a/openedx/core/djangoapps/user_api/accounts/utils.py b/openedx/core/djangoapps/user_api/accounts/utils.py index 826dbd42cd13..0f94fa0816c1 100644 --- a/openedx/core/djangoapps/user_api/accounts/utils.py +++ b/openedx/core/djangoapps/user_api/accounts/utils.py @@ -15,7 +15,11 @@ from edx_django_utils.user import generate_password from social_django.models import UserSocialAuth -from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email +from common.djangoapps.student.models import ( + AccountRecovery, + Registration, + get_retired_email_by_email, +) from openedx.core.djangoapps.site_configuration.models import SiteConfiguration from openedx.core.djangoapps.theming.helpers import get_config_value_from_site_or_settings, get_current_site from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models @@ -196,6 +200,52 @@ def is_secondary_email_feature_enabled(): return waffle.switch_is_active(ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH) +def redact_user_social_auth_pii(user_social_auth): + """ + Redacts PII from a UserSocialAuth record before deletion. + + This ensures that soft-deleted records in Snowflake do not retain sensitive user information. + When a user unlinks or retires, the SSO record is deleted from the LMS database but persists + as a soft-deleted record in Snowflake. This function redacts PII fields before deletion. + + Args: + user_social_auth: UserSocialAuth instance to redact + + Note: + This should be called before deleting any UserSocialAuth record, regardless of whether + the deletion is triggered by user retirement, manual unlinking, or any other method. + Since re-linking always creates a new entry, there is no need to preserve PII in deleted records. + + This function is idempotent - it can be called multiple times safely and will only + redact once. + """ + # Check if already redacted + if user_social_auth.uid == 'redacted@retired.invalid' and user_social_auth.extra_data == {}: + LOGGER.debug( + "UserSocialAuth record already redacted: user_id=%s, provider=%s, id=%s", + user_social_auth.user_id, + user_social_auth.provider, + user_social_auth.id + ) + return + + # Redact the UID field - this often contains email or other identifiable information + user_social_auth.uid = 'redacted@retired.invalid' + + # Redact extra_data which may contain various PII fields from the SSO provider + user_social_auth.extra_data = {} + + # Save the redacted record + user_social_auth.save() + + LOGGER.info( + "Redacted PII for UserSocialAuth record: user_id=%s, provider=%s, id=%s", + user_social_auth.user_id, + user_social_auth.provider, + user_social_auth.id + ) + + def create_retirement_request_and_deactivate_account(user): """ Adds user to retirement queue, unlinks social auth accounts, changes user passwords @@ -204,8 +254,10 @@ def create_retirement_request_and_deactivate_account(user): # Add user to retirement queue. UserRetirementStatus.create_retirement(user) - # Unlink LMS social auth accounts - UserSocialAuth.objects.filter(user_id=user.id).delete() + # Redact and unlink LMS social auth accounts + for social_auth in UserSocialAuth.objects.filter(user_id=user.id): + redact_user_social_auth_pii(social_auth) + social_auth.delete() # Change LMS password & email user.email = get_retired_email_by_email(user.email) diff --git a/openedx/core/djangoapps/user_api/management/commands/retire_user.py b/openedx/core/djangoapps/user_api/management/commands/retire_user.py index f1ef928e2712..6e0bf9a0f8e6 100644 --- a/openedx/core/djangoapps/user_api/management/commands/retire_user.py +++ b/openedx/core/djangoapps/user_api/management/commands/retire_user.py @@ -7,10 +7,15 @@ from django.db import transaction from social_django.models import UserSocialAuth -from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email +from common.djangoapps.student.models import ( + AccountRecovery, + Registration, + get_retired_email_by_email, +) from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models from ...models import BulkUserRetirementConfig, UserRetirementStatus +from openedx.core.djangoapps.user_api.accounts.utils import redact_user_social_auth_pii logger = logging.getLogger(__name__) @@ -144,8 +149,10 @@ def handle(self, *args, **options): for user in users: # Add user to retirement queue. UserRetirementStatus.create_retirement(user) - # Unlink LMS social auth accounts - UserSocialAuth.objects.filter(user_id=user.id).delete() + # Redact and unlink LMS social auth accounts + for social_auth in UserSocialAuth.objects.filter(user_id=user.id): + redact_user_social_auth_pii(social_auth) + social_auth.delete() # Change LMS password & email user.email = get_retired_email_by_email(user.email) user.set_unusable_password() diff --git a/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py b/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py index 023efb54f4d7..c0ed4bb572ac 100644 --- a/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py +++ b/openedx/core/djangoapps/user_api/management/tests/test_retire_user.py @@ -3,18 +3,21 @@ """ +import csv +import os + import pytest from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user from django.core.management import CommandError, call_command +from social_django.models import UserSocialAuth -from ...models import UserRetirementStatus +from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order - setup_retirement_states + setup_retirement_states, # noqa: F401 ) from openedx.core.djangolib.testing.utils import skip_unless_lms # lint-amnesty, pylint: disable=wrong-import-order -from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order -import csv -import os + +from ...models import UserRetirementStatus pytestmark = pytest.mark.django_db user_file = 'userfile.csv' @@ -105,3 +108,71 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'): call_command('retire_user', user_file=user_file, username=username, user_email=user_email) remove_user_file() + + +@skip_unless_lms +def test_retire_user_redacts_sso_pii_before_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument + """ + Test that SSO PII is redacted before deletion during user retirement. + """ + user = UserFactory.create(username='sso-user', email='sso-user@example.com') + + # Create SSO association with PII + social_auth = UserSocialAuth.objects.create( + user=user, + provider='google-oauth2', + uid='sso-user@example.com', + extra_data={ + 'email': 'sso-user@example.com', + 'name': 'SSO Test User', + 'id': '123456789', + } + ) + social_auth_id = social_auth.id + + # Retire the user + call_command('retire_user', username=user.username, user_email=user.email) + + # Verify SSO record was deleted + assert not UserSocialAuth.objects.filter(id=social_auth_id).exists() + + # Verify user retirement was successful + retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first() + assert retired_user_status is not None + assert retired_user_status.original_email == 'sso-user@example.com' + + +@skip_unless_lms +def test_retire_user_redacts_multiple_sso_providers(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument + """ + Test that multiple SSO providers have their PII redacted before deletion. + """ + user = UserFactory.create(username='multi-sso-user', email='multi-sso@example.com') + + # Create multiple SSO associations + google_auth = UserSocialAuth.objects.create( + user=user, + provider='google-oauth2', + uid='google-multi@example.com', + extra_data={'email': 'google-multi@example.com', 'name': 'Google User'} + ) + saml_auth = UserSocialAuth.objects.create( + user=user, + provider='tpa-saml', + uid='saml-multi@example.com', + extra_data={'email': 'saml-multi@example.com', 'name': 'SAML User', 'uid': 'saml-123'} + ) + + google_id = google_auth.id + saml_id = saml_auth.id + + # Retire the user + call_command('retire_user', username=user.username, user_email=user.email) + + # Verify both SSO records were deleted + assert not UserSocialAuth.objects.filter(id=google_id).exists() + assert not UserSocialAuth.objects.filter(id=saml_id).exists() + + # Verify user retirement was successful + retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first() + assert retired_user_status is not None