Skip to content
26 changes: 25 additions & 1 deletion app/api/auth.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import base64
import logging
import random
import re
import string
from datetime import timedelta
from functools import wraps
Expand Down Expand Up @@ -51,6 +52,27 @@
auth_routes = Blueprint('auth', __name__, url_prefix='/v1/auth')


def sanitize_for_logging(text):
"""
Remove control characters from user input before logging to prevent log injection.

Security Issue #9120: User-provided data like emails can contain newlines, carriage
returns, or tabs that allow attackers to inject false log entries, corrupt log files,
bypass log analysis tools, or hide malicious activity.

Example Attack: email="user@test.com\\nFAKE: Admin login successful from 1.2.3.4"

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚨 suggestion (security): Consider broadening the sanitization to cover all control characters, not just newlines, carriage returns, and tabs.

The current regex only removes \n, \r, and \t. Other ASCII control characters (e.g., \x00-\x1F and \x7F) may also cause log-parsing issues or enable injection in some sinks. Consider using a broader pattern like r"[\x00-\x1F\x7F]" to strip all control characters, or clearly document why the mitigation is limited to these three characters.

Suggested implementation:

    Security Issue #9120: User-provided data like emails can contain control characters
    (ASCII 0x00-0x1F and 0x7F) that allow attackers to inject false log entries, corrupt
    log files, bypass log analysis tools, or hide malicious activity.
    Example Attack: email="user@test.com\\nFAKE: Admin login successful from 1.2.3.4"
    if not text:
        return text
    # Strip all ASCII control characters (0x00-0x1F and 0x7F) to prevent log injection
    return re.sub(r'[\x00-\x1F\x7F]', '', text)

Args:
text (str): User-provided input to sanitize

Returns:
str: Text with control characters (\\n, \\r, \\t) removed
"""
if not text:
return text
return re.sub(r'[\n\r\t]', '', text)
Comment on lines +71 to +73
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (code-quality): We've found these issues:

Suggested change
if not text:
return text
return re.sub(r'[\n\r\t]', '', text)
return text if not text else re.sub(r'[\n\r\t]', '', text)



def authenticate(allow_refresh_token=False, existing_identity=None):
data = request.get_json()
username = data.get('email', data.get('username'))
Expand Down Expand Up @@ -320,7 +342,9 @@ def resend_verification_email():
try:
user = User.query.filter_by(email=email).one()
except NoResultFound:
logging.info('User with email: ' + email + ' not found.')
# Sanitize email to prevent log injection (Issue #9120)
safe_email = sanitize_for_logging(email)
logging.info(f'User with email: {safe_email} not found.')
raise UnprocessableEntityError(
{'source': ''}, 'User with email: ' + email + ' not found.'
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The error message still uses the unsanitized email variable. While this is returned to the user (not logged), it's inconsistent with the security fix applied to the logging statement above. Consider using safe_email here as well for consistency, or document why the unsanitized version is acceptable for user-facing error messages.

Note: This is less critical than the log injection issue since error messages are typically not parsed by log analysis tools, but consistency would improve code clarity.

Suggested change
{'source': ''}, 'User with email: ' + email + ' not found.'
{'source': ''}, 'User with email: ' + safe_email + ' not found.'

Copilot uses AI. Check for mistakes.
)
Expand Down
69 changes: 69 additions & 0 deletions tests/all/integration/api/test_auth_log_injection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Test for log injection vulnerability in auth.py (Issue #9120)
Tests that user-provided email addresses cannot inject malicious content into logs
Comment on lines +1 to +3
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Consider aligning the test type with its location (integration vs unit) or expanding it to true integration coverage

This file is under tests/all/integration/api/ but only contains unit-style tests over strings/regexes without touching the API or logging system, which can be confusing. Either move these tests to the usual unit-test location for helpers, or expand them into true integration tests that call /v1/auth/resend-verification-email and verify logs. This keeps the test suite structure consistent and easier to navigate.

Suggested implementation:

"""
Tests for log injection vulnerability in auth.py (Issue #9120).

This module lives under `tests/all/integration/api/` and therefore provides
true integration coverage by exercising the `/v1/auth/resend-verification-email`
endpoint and asserting on what is written to the logging subsystem. It may
also contain focused unit-style assertions over the sanitization helpers to
guard against regressions in the underlying regex logic.
"""
import logging
import re


def test_resend_verification_email_log_sanitization(api_client, caplog):
    """
    Integration test for log injection vulnerability (Issue #9120).

    Sends a request to the resend-verification-email endpoint with a potentially
    malicious email address and verifies that the value written to logs has
    been sanitized (no raw control characters that could be used for log
    injection), while the base email content is still present.
    """
    malicious_email = "attacker@example.com\nERROR: forged entry\n\tat fake_traceback"

    # This should mirror the behavior of `sanitize_for_logging` in auth.py so
    # the expectation here stays aligned with the implementation.
    sanitized_email = re.sub(r"[\r\n\t]", "", malicious_email)

    with caplog.at_level(logging.INFO):
        response = api_client.post(
            "/v1/auth/resend-verification-email",
            json={"email": malicious_email},
        )

    # We don't assert a specific status code here because different environments
    # may have different user/account states; we just require that the endpoint
    # responds successfully at the HTTP layer and does not error out.
    assert 200 <= response.status_code < 500

    log_output = "\n".join(caplog.messages)
    # Raw, unsanitized value must never appear in logs.
    assert malicious_email not in log_output
    # Sanitized value should be what actually gets logged.
    assert sanitized_email in log_output


def test_log_sanitization_for_email():

To wire this up cleanly in your codebase, you may need to:

  1. Ensure the api_client fixture name matches your existing integration-test client fixture (e.g. it might be called client, test_client, or similar). If so, adjust the function signature accordingly:

    • def test_resend_verification_email_log_sanitization(client, caplog):
    • and use client.post(...) inside the test.
  2. If your logging is done under a specific logger name (e.g. "auth" or "myapp.auth"), tighten the caplog context to that logger so the test is less brittle:

    • with caplog.at_level(logging.INFO, logger="myapp.auth"):.
  3. Confirm that the endpoint path and payload match your actual API:

    • If the route differs (e.g. /api/v1/auth/resend-verification-email or uses form-encoded data), update the post call accordingly.
  4. Keep the sanitize_for_logging implementation in auth.py using the same control-character pattern ([\r\n\t]). If the implementation diverges (e.g. more characters are stripped), mirror that behavior in the sanitized_email computation so the expectation remains accurate.

"""
import logging
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logging module is imported but never used in the test file. It's only referenced in a comment at line 34. Consider removing this unused import.

Suggested change
import logging

Copilot uses AI. Check for mistakes.
import re
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Use the actual sanitize_for_logging helper instead of re-implementing the regex in tests

Both tests call re.sub(r'[\n\r\t]', '', ...) directly, so they would still pass even if sanitize_for_logging() changed or was removed, as long as this regex stayed the same. Import and call sanitize_for_logging from app.api.auth so the tests exercise the real helper and will fail on future regressions in its behavior.



def test_log_sanitization_for_email():
"""
Unit test for log injection vulnerability (Issue #9120)
Tests that email addresses with injection characters are properly sanitized
before being logged to prevent log file corruption.
Security Impact: Without sanitization, attackers can:
- Inject false log entries (e.g., fake admin logins)
- Corrupt log file structure
- Bypass log analysis tools
- Hide malicious activity
Example attack: email="user@test.com\nFAKE: Admin login successful"
"""
# Simulate the vulnerable code pattern from auth.py line 323
malicious_inputs = [
"test@example.com\nFAKE: Admin logged in from 1.2.3.4",
"test@example.com\rFAKE: Password reset",
"test@example.com\t\t\tFAKE_COLUMN",
"test@example.com\n\rMultiline\nInjection\rAttempt",
]

for malicious_email in malicious_inputs:
# This represents the VULNERABLE code pattern:
# logging.info('User with email: ' + email + ' not found.')

# Vulnerability demonstration: raw concatenation allows injection
vulnerable_log_message = 'User with email: ' + malicious_email + ' not found.'

# Check 1: Vulnerable pattern contains control characters (SECURITY ISSUE)
Comment on lines 31 to 38
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Simplify and clarify the injection check for control characters

Using the ' ' literal makes the condition’s intent unclear and ties it to a specific number of tabs. Prefer checking for a single tab (and letting in match the sequence), e.g. [' ', ' ', ' '] or any(c in vulnerable_log_message for c in [' ', ' ', ' ']), so the assertion simply verifies the presence of control characters instead of a particular pattern length.

Suggested change
for malicious_email in malicious_inputs:
# This represents the VULNERABLE code pattern:
# logging.info('User with email: ' + email + ' not found.')
# Vulnerability demonstration: raw concatenation allows injection
vulnerable_log_message = 'User with email: ' + malicious_email + ' not found.'
# Check 1: Vulnerable pattern contains control characters (SECURITY ISSUE)
for malicious_email in malicious_inputs:
# This represents the VULNERABLE code pattern:
# logging.info('User with email: ' + email + ' not found.')
# Vulnerability demonstration: raw concatenation allows injection
vulnerable_log_message = 'User with email: ' + malicious_email + ' not found.'
# Check 1: Vulnerable pattern contains control characters (SECURITY ISSUE)
has_injection = any(
control_char in vulnerable_log_message
for control_char in ['\n', '\r', '\t']
)
assert has_injection, (
f"Test setup error: Expected control characters in: {repr(vulnerable_log_message)}"
)

has_injection = any(char in vulnerable_log_message for char in ['\n', '\r', '\t\t\t'])
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check for tab characters is incorrect. The code checks for the literal string '\t\t\t' (three tabs) instead of a single tab character '\t'. This means the test will fail for the third test case which only contains three tabs in sequence.

The issue is that '\t\t\t' in vulnerable_log_message checks if the exact sequence of three tabs appears anywhere as a substring, but any(char in vulnerable_log_message for char in ['\n', '\r', '\t\t\t']) treats '\t\t\t' as a single "character" to search for.

Fix by replacing '\t\t\t' with '\t':

has_injection = any(char in vulnerable_log_message for char in ['\n', '\r', '\t'])
Suggested change
has_injection = any(char in vulnerable_log_message for char in ['\n', '\r', '\t\t\t'])
has_injection = any(char in vulnerable_log_message for char in ['\n', '\r', '\t'])

Copilot uses AI. Check for mistakes.
assert has_injection, \
f"Test setup error: Expected injection characters in: {repr(vulnerable_log_message)}"

# Check 2: After sanitization, these characters should be removed/escaped
# This test will PASS after the fix is implemented in auth.py
sanitized_email = re.sub(r'[\n\r\t]', '', malicious_email)
safe_log_message = 'User with email: ' + sanitized_email + ' not found.'
Comment on lines 36 to 46
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): Use f-string instead of string concatenation [×4] (use-fstring-for-concatenation)


# This assertion documents the expected fix:
# After fix, sanitized logs should not contain injection attempts
assert '\nFAKE:' not in safe_log_message, \
f"Sanitized message should not contain newline injection: {safe_log_message}"
assert '\rFAKE:' not in safe_log_message, \
f"Sanitized message should not contain CR injection: {safe_log_message}"
Comment on lines 31 to 53
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): Avoid loops in tests. (no-loop-in-tests)

ExplanationAvoid complex code, like loops, in test functions.

Google's software engineering guidelines says:
"Clear tests are trivially correct upon inspection"
To reach that avoid complex code in tests:

  • loops
  • conditionals

Some ways to fix this:

  • Use parametrized tests to get rid of the loop.
  • Move the complex logic into helpers.
  • Move the complex part into pytest fixtures.

Complexity is most often introduced in the form of logic. Logic is defined via the imperative parts of programming languages such as operators, loops, and conditionals. When a piece of code contains logic, you need to do a bit of mental computation to determine its result instead of just reading it off of the screen. It doesn't take much logic to make a test more difficult to reason about.

Software Engineering at Google / Don't Put Logic in Tests



def test_normal_email_unchanged_after_sanitization():
"""Test that normal emails remain unchanged after sanitization"""
normal_emails = [
"user@example.com",
"test.user+tag@domain.co.uk",
"admin@localhost",
]

for email in normal_emails:
# Sanitization should not affect legitimate emails
Comment on lines 56 to 65
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Add tests for edge cases like empty strings and None inputs to match sanitize_for_logging behavior

sanitize_for_logging handles falsy inputs (if not text: return text), but the tests only cover non-empty, well-formed emails. Please add cases for:

  • "" (empty string)
  • None
  • Strings of only control characters (e.g. "\n", "\r\t")

You can put these either into this test or a dedicated one that uses the real sanitize_for_logging helper so its edge-case behavior is explicitly verified.

Suggested implementation:

def test_normal_email_unchanged_after_sanitization():
    """Test that normal emails remain unchanged after sanitization"""
    normal_emails = [
        "user@example.com",
        "test.user+tag@domain.co.uk",
        "admin@localhost",
    ]

    for email in normal_emails:
        # Sanitization should not affect legitimate emails
        sanitized = sanitize_for_logging(email)
        assert sanitized == email, (
            f"Normal email should remain unchanged: {email} -> {sanitized}"
        )


def test_sanitize_for_logging_edge_cases():
    """Test sanitize_for_logging behavior on falsy and control-character-only inputs."""
    # Empty string should be returned as-is
    assert sanitize_for_logging("") == ""

    # None should be returned as-is (function passes through falsy inputs)
    assert sanitize_for_logging(None) is None

    # Strings with only control characters should be stripped to empty string
    assert sanitize_for_logging("\n") == ""
    assert sanitize_for_logging("\r\t") == ""
    assert sanitize_for_logging("\n\r\t\n") == ""

To compile and run these tests, you need to import sanitize_for_logging at the top of tests/all/integration/api/test_auth_log_injection.py from the module where it is defined in your codebase. For example, if it lives in app.utils.logging, add:

from app.utils.logging import sanitize_for_logging

Adjust the import path to match the actual location of sanitize_for_logging.

sanitized = re.sub(r'[\n\r\t]', '', email)
assert sanitized == email, \
f"Normal email should remain unchanged: {email} -> {sanitized}"
Comment on lines +64 to +68
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (code-quality): Avoid loops in tests. (no-loop-in-tests)

ExplanationAvoid complex code, like loops, in test functions.

Google's software engineering guidelines says:
"Clear tests are trivially correct upon inspection"
To reach that avoid complex code in tests:

  • loops
  • conditionals

Some ways to fix this:

  • Use parametrized tests to get rid of the loop.
  • Move the complex logic into helpers.
  • Move the complex part into pytest fixtures.

Complexity is most often introduced in the form of logic. Logic is defined via the imperative parts of programming languages such as operators, loops, and conditionals. When a piece of code contains logic, you need to do a bit of mental computation to determine its result instead of just reading it off of the screen. It doesn't take much logic to make a test more difficult to reason about.

Software Engineering at Google / Don't Put Logic in Tests

Comment on lines 1 to 68
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] This test file is located in tests/all/integration/api/ but contains unit tests that don't perform any integration testing (no API calls, no database, no fixtures). The tests only verify regex sanitization logic in isolation.

Consider moving this file to tests/all/unit/api/ to match the project's test organization structure. Integration tests should actually test the integration between components (e.g., making HTTP requests to the auth endpoint and verifying log output).

Suggested change
"""
Test for log injection vulnerability in auth.py (Issue #9120)
Tests that user-provided email addresses cannot inject malicious content into logs
"""
import logging
import re
def test_log_sanitization_for_email():
"""
Unit test for log injection vulnerability (Issue #9120)
Tests that email addresses with injection characters are properly sanitized
before being logged to prevent log file corruption.
Security Impact: Without sanitization, attackers can:
- Inject false log entries (e.g., fake admin logins)
- Corrupt log file structure
- Bypass log analysis tools
- Hide malicious activity
Example attack: email="user@test.com\nFAKE: Admin login successful"
"""
# Simulate the vulnerable code pattern from auth.py line 323
malicious_inputs = [
"test@example.com\nFAKE: Admin logged in from 1.2.3.4",
"test@example.com\rFAKE: Password reset",
"test@example.com\t\t\tFAKE_COLUMN",
"test@example.com\n\rMultiline\nInjection\rAttempt",
]
for malicious_email in malicious_inputs:
# This represents the VULNERABLE code pattern:
# logging.info('User with email: ' + email + ' not found.')
# Vulnerability demonstration: raw concatenation allows injection
vulnerable_log_message = 'User with email: ' + malicious_email + ' not found.'
# Check 1: Vulnerable pattern contains control characters (SECURITY ISSUE)
has_injection = any(char in vulnerable_log_message for char in ['\n', '\r', '\t\t\t'])
assert has_injection, \
f"Test setup error: Expected injection characters in: {repr(vulnerable_log_message)}"
# Check 2: After sanitization, these characters should be removed/escaped
# This test will PASS after the fix is implemented in auth.py
sanitized_email = re.sub(r'[\n\r\t]', '', malicious_email)
safe_log_message = 'User with email: ' + sanitized_email + ' not found.'
# This assertion documents the expected fix:
# After fix, sanitized logs should not contain injection attempts
assert '\nFAKE:' not in safe_log_message, \
f"Sanitized message should not contain newline injection: {safe_log_message}"
assert '\rFAKE:' not in safe_log_message, \
f"Sanitized message should not contain CR injection: {safe_log_message}"
def test_normal_email_unchanged_after_sanitization():
"""Test that normal emails remain unchanged after sanitization"""
normal_emails = [
"user@example.com",
"test.user+tag@domain.co.uk",
"admin@localhost",
]
for email in normal_emails:
# Sanitization should not affect legitimate emails
sanitized = re.sub(r'[\n\r\t]', '', email)
assert sanitized == email, \
f"Normal email should remain unchanged: {email} -> {sanitized}"
(MOVE FILE to tests/all/unit/api/test_auth_log_injection.py; contents unchanged)

Copilot uses AI. Check for mistakes.
Comment on lines 8 to 68
Copy link

Copilot AI Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests don't actually import or test the sanitize_for_logging() function from app.api.auth. The tests only replicate the sanitization logic inline using re.sub(r'[\n\r\t]', '', ...) which means:

  1. If the actual implementation in auth.py is changed, these tests won't detect regressions
  2. The tests don't verify that the sanitization function is actually being called in the auth endpoint

Consider:

  • Importing and directly testing sanitize_for_logging() from app.api.auth
  • Or adding an integration test that actually calls the /v1/auth/resend-verification-email endpoint with malicious payloads and verifies the logs

Example:

from app.api.auth import sanitize_for_logging

def test_sanitize_for_logging_function():
    malicious = "test@example.com\nFAKE: Admin logged in"
    sanitized = sanitize_for_logging(malicious)
    assert '\n' not in sanitized
    assert sanitized == "test@example.comFAKE: Admin logged in"

Copilot uses AI. Check for mistakes.