Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions openedx/core/djangoapps/user_api/accounts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from django.utils.text import format_lazy
from django.utils.translation import gettext_lazy as _

# Import signals to ensure they are registered
from . import signals # noqa: F401, pylint: disable=unused-import

# The maximum length for the bio ("about me") account field
BIO_MAX_LENGTH = 300

Expand Down
34 changes: 33 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
Django Signal related functionality for user_api accounts
"""

import logging

from django.dispatch import Signal
from django.db.models.signals import pre_delete
from django.dispatch import Signal, receiver
from social_django.models import UserSocialAuth

from .utils import redact_user_social_auth_pii

logger = logging.getLogger(__name__)

# Signal to retire a user from LMS-initiated mailings (course mailings, etc)
# providing_args=["user"]
Expand All @@ -16,3 +23,28 @@
# Signal to retire LMS misc information
# providing_args=["user"]
USER_RETIRE_LMS_MISC = Signal()


@receiver(pre_delete, sender=UserSocialAuth)
def redact_social_auth_pii_before_deletion(sender, instance, **kwargs): # pylint: disable=unused-argument
"""
Signal handler to redact PII from UserSocialAuth records before deletion.

This ensures that when SSO records are deleted (either via user retirement, manual unlinking,
or any other method), PII is redacted first. This prevents soft-deleted records in Snowflake
from retaining sensitive user information.

Note: We call redact_user_social_auth_pii which saves the redacted data before the actual
deletion happens. This is intentional - when Snowflake syncs, it will capture the redacted
state before marking the record as deleted.
"""
try:
redact_user_social_auth_pii(instance)
except Exception as e: # pylint: disable=broad-except
# Log the error but don't prevent the deletion
logger.exception(
"Failed to redact PII for UserSocialAuth before deletion: user_id=%s, provider=%s, error=%s",
instance.user_id,
instance.provider,
str(e)
)
119 changes: 118 additions & 1 deletion openedx/core/djangoapps/user_api/accounts/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@
from completion.test_utils import CompletionWaffleTestMixin
from django.test import TestCase
from django.test.utils import override_settings
from social_django.models import UserSocialAuth

from openedx.core.djangoapps.user_api.accounts.utils import retrieve_last_sitewide_block_completed
from openedx.core.djangoapps.user_api.accounts.utils import (
redact_user_social_auth_pii,
retrieve_last_sitewide_block_completed,
)
from openedx.core.djangolib.testing.utils import skip_unless_lms
from common.djangoapps.student.models import CourseEnrollment
from common.djangoapps.student.tests.factories import UserFactory
Expand Down Expand Up @@ -128,3 +132,116 @@ def test_retrieve_last_sitewide_block_completed(self):
)

assert empty_block_url is None


@skip_unless_lms
class RedactUserSocialAuthPIITest(TestCase):
"""
Tests for SSO PII redaction before deletion.
"""

def setUp(self):
"""
Create test user and SSO associations.
"""
super().setUp()
self.user = UserFactory.create(username='testuser', email='testuser@example.com')

def create_social_auth(self, provider='google-oauth2', uid='user@example.com', extra_data=None):
"""
Helper method to create a UserSocialAuth record.
"""
if extra_data is None:
extra_data = {
'email': 'user@example.com',
'name': 'Test User',
'id': '123456789',
}
return UserSocialAuth.objects.create(
user=self.user,
provider=provider,
uid=uid,
extra_data=extra_data,
)

def test_redact_user_social_auth_pii(self):
"""
Test that PII is redacted from UserSocialAuth records.
"""
social_auth = self.create_social_auth()

# Verify original PII is present
assert social_auth.uid == 'user@example.com'
assert social_auth.extra_data['email'] == 'user@example.com'
assert 'name' in social_auth.extra_data

# Redact PII
redact_user_social_auth_pii(social_auth)

# Refresh from database
social_auth.refresh_from_db()

# Verify PII is redacted
assert social_auth.uid == 'redacted@retired.invalid'
assert social_auth.extra_data == {}

def test_redact_user_social_auth_pii_idempotent(self):
"""
Test that redaction is idempotent (can be called multiple times safely).
"""
social_auth = self.create_social_auth()

# Redact PII twice
redact_user_social_auth_pii(social_auth)
redact_user_social_auth_pii(social_auth)

# Refresh from database
social_auth.refresh_from_db()

# Verify PII is still properly redacted
assert social_auth.uid == 'redacted@retired.invalid'
assert social_auth.extra_data == {}

def test_redact_pii_before_deletion_via_signal(self):
"""
Test that the pre_delete signal automatically redacts PII before deletion.
"""
social_auth = self.create_social_auth()
social_auth_id = social_auth.id

# Delete the record - this should trigger the signal
social_auth.delete()

# Since the record is deleted, we can't verify the redacted state directly.
# But we can test that the deletion completed without errors.
# In Snowflake, the soft-deleted record would have redacted PII.
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

def test_redact_multiple_sso_providers(self):
"""
Test that PII is redacted for multiple SSO providers.
"""
google_auth = self.create_social_auth(
provider='google-oauth2',
uid='google@example.com',
extra_data={'email': 'google@example.com', 'name': 'Google User'}
)
saml_auth = self.create_social_auth(
provider='tpa-saml',
uid='saml@example.com',
extra_data={'email': 'saml@example.com', 'name': 'SAML User', 'uid': 'saml-uid'}
)

# Redact both
redact_user_social_auth_pii(google_auth)
redact_user_social_auth_pii(saml_auth)

# Refresh from database
google_auth.refresh_from_db()
saml_auth.refresh_from_db()

# Verify both are redacted
assert google_auth.uid == 'redacted@retired.invalid'
assert google_auth.extra_data == {}
assert saml_auth.uid == 'redacted@retired.invalid'
assert saml_auth.extra_data == {}
58 changes: 55 additions & 3 deletions openedx/core/djangoapps/user_api/accounts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@
from edx_django_utils.user import generate_password
from social_django.models import UserSocialAuth

from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
from common.djangoapps.student.models import (
AccountRecovery,
Registration,
get_retired_email_by_email,
)
from openedx.core.djangoapps.site_configuration.models import SiteConfiguration
from openedx.core.djangoapps.theming.helpers import get_config_value_from_site_or_settings, get_current_site
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models
Expand Down Expand Up @@ -196,6 +200,52 @@ def is_secondary_email_feature_enabled():
return waffle.switch_is_active(ENABLE_SECONDARY_EMAIL_FEATURE_SWITCH)


def redact_user_social_auth_pii(user_social_auth):
"""
Redacts PII from a UserSocialAuth record before deletion.

This ensures that soft-deleted records in Snowflake do not retain sensitive user information.
When a user unlinks or retires, the SSO record is deleted from the LMS database but persists
as a soft-deleted record in Snowflake. This function redacts PII fields before deletion.

Args:
user_social_auth: UserSocialAuth instance to redact

Note:
This should be called before deleting any UserSocialAuth record, regardless of whether
the deletion is triggered by user retirement, manual unlinking, or any other method.
Since re-linking always creates a new entry, there is no need to preserve PII in deleted records.

This function is idempotent - it can be called multiple times safely and will only
redact once.
"""
# Check if already redacted
if user_social_auth.uid == 'redacted@retired.invalid' and user_social_auth.extra_data == {}:
LOGGER.debug(
"UserSocialAuth record already redacted: user_id=%s, provider=%s, id=%s",
user_social_auth.user_id,
user_social_auth.provider,
user_social_auth.id
)
return

# Redact the UID field - this often contains email or other identifiable information
user_social_auth.uid = 'redacted@retired.invalid'

# Redact extra_data which may contain various PII fields from the SSO provider
user_social_auth.extra_data = {}

# Save the redacted record
user_social_auth.save()

LOGGER.info(
"Redacted PII for UserSocialAuth record: user_id=%s, provider=%s, id=%s",
user_social_auth.user_id,
user_social_auth.provider,
user_social_auth.id
)


def create_retirement_request_and_deactivate_account(user):
"""
Adds user to retirement queue, unlinks social auth accounts, changes user passwords
Expand All @@ -204,8 +254,10 @@ def create_retirement_request_and_deactivate_account(user):
# Add user to retirement queue.
UserRetirementStatus.create_retirement(user)

# Unlink LMS social auth accounts
UserSocialAuth.objects.filter(user_id=user.id).delete()
# Redact and unlink LMS social auth accounts
for social_auth in UserSocialAuth.objects.filter(user_id=user.id):
redact_user_social_auth_pii(social_auth)
social_auth.delete()

# Change LMS password & email
user.email = get_retired_email_by_email(user.email)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
from django.db import transaction
from social_django.models import UserSocialAuth

from common.djangoapps.student.models import AccountRecovery, Registration, get_retired_email_by_email
from common.djangoapps.student.models import (
AccountRecovery,
Registration,
get_retired_email_by_email,
)
from openedx.core.djangolib.oauth2_retirement_utils import retire_dot_oauth2_models

from ...models import BulkUserRetirementConfig, UserRetirementStatus
from openedx.core.djangoapps.user_api.accounts.utils import redact_user_social_auth_pii

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -144,8 +149,10 @@ def handle(self, *args, **options):
for user in users:
# Add user to retirement queue.
UserRetirementStatus.create_retirement(user)
# Unlink LMS social auth accounts
UserSocialAuth.objects.filter(user_id=user.id).delete()
# Redact and unlink LMS social auth accounts
for social_auth in UserSocialAuth.objects.filter(user_id=user.id):
redact_user_social_auth_pii(social_auth)
social_auth.delete()
# Change LMS password & email
user.email = get_retired_email_by_email(user.email)
user.set_unusable_password()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@
"""


import csv
import os

import pytest
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
from django.core.management import CommandError, call_command
from social_django.models import UserSocialAuth

from ...models import UserRetirementStatus
from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order
from openedx.core.djangoapps.user_api.accounts.tests.retirement_helpers import ( # lint-amnesty, pylint: disable=unused-import, wrong-import-order
setup_retirement_states
setup_retirement_states, # noqa: F401
)
from openedx.core.djangolib.testing.utils import skip_unless_lms # lint-amnesty, pylint: disable=wrong-import-order
from common.djangoapps.student.tests.factories import UserFactory # lint-amnesty, pylint: disable=wrong-import-order
import csv
import os

from ...models import UserRetirementStatus

pytestmark = pytest.mark.django_db
user_file = 'userfile.csv'
Expand Down Expand Up @@ -105,3 +108,71 @@ def test_retire_with_username_email_userfile(setup_retirement_states): # lint-a
with pytest.raises(CommandError, match=r'You cannot use userfile option with username and user_email'):
call_command('retire_user', user_file=user_file, username=username, user_email=user_email)
remove_user_file()


@skip_unless_lms
def test_retire_user_redacts_sso_pii_before_deletion(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument
"""
Test that SSO PII is redacted before deletion during user retirement.
"""
user = UserFactory.create(username='sso-user', email='sso-user@example.com')

# Create SSO association with PII
social_auth = UserSocialAuth.objects.create(
user=user,
provider='google-oauth2',
uid='sso-user@example.com',
extra_data={
'email': 'sso-user@example.com',
'name': 'SSO Test User',
'id': '123456789',
}
)
social_auth_id = social_auth.id

# Retire the user
call_command('retire_user', username=user.username, user_email=user.email)

# Verify SSO record was deleted
assert not UserSocialAuth.objects.filter(id=social_auth_id).exists()

# Verify user retirement was successful
retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first()
assert retired_user_status is not None
assert retired_user_status.original_email == 'sso-user@example.com'


@skip_unless_lms
def test_retire_user_redacts_multiple_sso_providers(setup_retirement_states): # lint-amnesty, pylint: disable=redefined-outer-name, unused-argument
"""
Test that multiple SSO providers have their PII redacted before deletion.
"""
user = UserFactory.create(username='multi-sso-user', email='multi-sso@example.com')

# Create multiple SSO associations
google_auth = UserSocialAuth.objects.create(
user=user,
provider='google-oauth2',
uid='google-multi@example.com',
extra_data={'email': 'google-multi@example.com', 'name': 'Google User'}
)
saml_auth = UserSocialAuth.objects.create(
user=user,
provider='tpa-saml',
uid='saml-multi@example.com',
extra_data={'email': 'saml-multi@example.com', 'name': 'SAML User', 'uid': 'saml-123'}
)

google_id = google_auth.id
saml_id = saml_auth.id

# Retire the user
call_command('retire_user', username=user.username, user_email=user.email)

# Verify both SSO records were deleted
assert not UserSocialAuth.objects.filter(id=google_id).exists()
assert not UserSocialAuth.objects.filter(id=saml_id).exists()

# Verify user retirement was successful
retired_user_status = UserRetirementStatus.objects.filter(original_username=user.username).first()
assert retired_user_status is not None
Loading