From 37449c25f2d374656bf52878456ab3a4e859b04a Mon Sep 17 00:00:00 2001 From: MartinRinas Date: Mon, 16 Dec 2024 16:09:15 +0000 Subject: [PATCH 1/4] add redaction of sensitiv log lines --- packages/helpermodules/logger.py | 83 ++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/packages/helpermodules/logger.py b/packages/helpermodules/logger.py index 06bd3c6c6e..0000d513bf 100644 --- a/packages/helpermodules/logger.py +++ b/packages/helpermodules/logger.py @@ -5,12 +5,87 @@ import sys import threading import typing_extensions +import re FORMAT_STR_DETAILED = '%(asctime)s - {%(name)s:%(lineno)s} - {%(levelname)s:%(threadName)s} - %(message)s' FORMAT_STR_SHORT = '%(asctime)s - %(message)s' RAMDISK_PATH = str(Path(__file__).resolve().parents[2]) + '/ramdisk/' PERSISTENT_LOG_PATH = str(Path(__file__).resolve().parents[2]) + '/data/log/' +KNOWN_SENSITIVE_FIELDS = ['password', 'secret', 'token', 'apikey'] +REDACTION_PATTERNS = [ + (r'({field})[=:]([^\s&]+)', r'\1=***REDACTED***'), # Matches field=value, i.e. for URL query parameters + (r'"{field}":\s*"([^"]+)"', r'"{field}": "***REDACTED***"') # Matches "field": "value", JSON formatted data +] + + +def redact_sensitive_info(message: str, additional_fields: list = None) -> str: + """ + Redacts sensitive information from the given message. + + This function replaces occurrences of known sensitive fields and their values + in the message with a redaction placeholder (***REDACTED***). The fields to be + redacted are defined in the KNOWN_SENSITIVE_FIELDS list. The function uses + predefined patterns to identify and replace the sensitive information. + + Args: + message (str): The log message to be redacted. + + Returns: + str: The redacted log message. + """ + fields_to_redact = KNOWN_SENSITIVE_FIELDS + (additional_fields or []) + for field in fields_to_redact: + for pattern, replacement in REDACTION_PATTERNS: + pattern = pattern.replace('{field}', field) + replacement = replacement.replace('{field}', field) + message = re.sub(pattern, replacement, message, flags=re.IGNORECASE) + return message + + +class RedactingFilter(logging.Filter): + """ + A logging filter that redacts sensitive information from log messages. + + This filter replaces occurrences of known sensitive fields and their values + in the log message with a redaction placeholder (***REDACTED***). The fields to be + redacted are defined in the KNOWN_SENSITIVE_FIELDS list. Additional fields to be + redacted can be specified using the 'redact_fields' key in the 'extra' parameter + when logging. + + Example: + log.debug("sample data with redaction=" + dumps(data, indent=4), extra={'redact_fields': 'username,password'}) + + Args: + name (str): The name of the filter. + """ + def __init__(self, name: str = ''): + super().__init__(name) + + def filter(self, record: logging.LogRecord) -> bool: + """ + Redacts sensitive information from the log record's message. + + This method formats the log message with its arguments, applies the redaction + logic to replace sensitive information, and updates the log record's message. + + Args: + record (logging.LogRecord): The log record to be filtered. + extra (dict, optional): Additional fields to be redacted, specified using the 'redact_fields' key. + + Returns: + bool: True to indicate that the log record should be processed. + """ + + message = record.getMessage() # required for lazy formatting like urllib3 + + additional_fields = getattr(record, 'redact_fields', '') + fields_to_redact = KNOWN_SENSITIVE_FIELDS + [ + field.strip() for field in additional_fields.split(',') if field.strip()] + record.msg = redact_sensitive_info(message, fields_to_redact) + record.args = () + return True + def filter_neg(name: str, record) -> bool: if name in record.threadName: @@ -30,6 +105,7 @@ def mb_to_bytes(megabytes: int) -> int: # Mehrere kleine Dateien verwenden, damit nicht zu viel verworfen wird, wenn die Datei voll ist. main_file_handler = RotatingFileHandler(RAMDISK_PATH + 'main.log', maxBytes=mb_to_bytes(5.5), backupCount=4) main_file_handler.setFormatter(logging.Formatter(FORMAT_STR_DETAILED)) + main_file_handler.addFilter(RedactingFilter()) logging.basicConfig(level=logging.DEBUG, handlers=[main_file_handler]) logging.getLogger().handlers[0].addFilter(functools.partial(filter_neg, "soc")) logging.getLogger().handlers[0].addFilter(functools.partial(filter_neg, "Internal Chargepoint")) @@ -40,6 +116,7 @@ def mb_to_bytes(megabytes: int) -> int: chargelog_file_handler = RotatingFileHandler( RAMDISK_PATH + 'chargelog.log', maxBytes=mb_to_bytes(2), backupCount=1) chargelog_file_handler.setFormatter(logging.Formatter(FORMAT_STR_SHORT)) + chargelog_file_handler.addFilter(RedactingFilter()) chargelog_log.addHandler(chargelog_file_handler) data_migration_log = logging.getLogger("data_migration") @@ -47,22 +124,26 @@ def mb_to_bytes(megabytes: int) -> int: data_migration_file_handler = RotatingFileHandler( PERSISTENT_LOG_PATH + 'data_migration.log', maxBytes=mb_to_bytes(1), backupCount=1) data_migration_file_handler.setFormatter(logging.Formatter(FORMAT_STR_SHORT)) + data_migration_file_handler.addFilter(RedactingFilter()) data_migration_log.addHandler(data_migration_file_handler) mqtt_log = logging.getLogger("mqtt") mqtt_log.propagate = False mqtt_file_handler = RotatingFileHandler(RAMDISK_PATH + 'mqtt.log', maxBytes=mb_to_bytes(3), backupCount=1) mqtt_file_handler.setFormatter(logging.Formatter(FORMAT_STR_SHORT)) + mqtt_file_handler.addFilter(RedactingFilter()) mqtt_log.addHandler(mqtt_file_handler) smarthome_log_handler = RotatingFileHandler(RAMDISK_PATH + 'smarthome.log', maxBytes=mb_to_bytes(1), backupCount=1) smarthome_log_handler.setFormatter(logging.Formatter(FORMAT_STR_SHORT)) smarthome_log_handler.addFilter(functools.partial(filter_pos, "smarthome")) + smarthome_log_handler.addFilter(RedactingFilter()) logging.getLogger().addHandler(smarthome_log_handler) soc_log_handler = RotatingFileHandler(RAMDISK_PATH + 'soc.log', maxBytes=mb_to_bytes(2), backupCount=1) soc_log_handler.setFormatter(logging.Formatter(FORMAT_STR_DETAILED)) soc_log_handler.addFilter(functools.partial(filter_pos, "soc")) + soc_log_handler.addFilter(RedactingFilter()) logging.getLogger().addHandler(soc_log_handler) internal_chargepoint_log_handler = RotatingFileHandler(RAMDISK_PATH + 'internal_chargepoint.log', @@ -70,12 +151,14 @@ def mb_to_bytes(megabytes: int) -> int: backupCount=1) internal_chargepoint_log_handler.setFormatter(logging.Formatter(FORMAT_STR_DETAILED)) internal_chargepoint_log_handler.addFilter(functools.partial(filter_pos, "Internal Chargepoint")) + internal_chargepoint_log_handler.addFilter(RedactingFilter()) logging.getLogger().addHandler(internal_chargepoint_log_handler) urllib3_log = logging.getLogger("urllib3.connectionpool") urllib3_log.propagate = True urllib3_file_handler = RotatingFileHandler(RAMDISK_PATH + 'soc.log', maxBytes=mb_to_bytes(2), backupCount=1) urllib3_file_handler.setFormatter(logging.Formatter(FORMAT_STR_DETAILED)) + urllib3_file_handler.addFilter(RedactingFilter()) urllib3_file_handler.addFilter(functools.partial(filter_pos, "soc")) urllib3_log.addHandler(urllib3_file_handler) From 96c46106d976d4c2ba3a353d58495ba6950525d2 Mon Sep 17 00:00:00 2001 From: MartinRinas Date: Mon, 16 Dec 2024 16:13:21 +0000 Subject: [PATCH 2/4] add access and refresh token --- packages/helpermodules/logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/helpermodules/logger.py b/packages/helpermodules/logger.py index 0000d513bf..44bfedd9b8 100644 --- a/packages/helpermodules/logger.py +++ b/packages/helpermodules/logger.py @@ -12,7 +12,7 @@ RAMDISK_PATH = str(Path(__file__).resolve().parents[2]) + '/ramdisk/' PERSISTENT_LOG_PATH = str(Path(__file__).resolve().parents[2]) + '/data/log/' -KNOWN_SENSITIVE_FIELDS = ['password', 'secret', 'token', 'apikey'] +KNOWN_SENSITIVE_FIELDS = ['password', 'secret', 'token', 'apikey', 'access_token', 'refresh_token'] REDACTION_PATTERNS = [ (r'({field})[=:]([^\s&]+)', r'\1=***REDACTED***'), # Matches field=value, i.e. for URL query parameters (r'"{field}":\s*"([^"]+)"', r'"{field}": "***REDACTED***"') # Matches "field": "value", JSON formatted data From 285281c38913302321f7d816003720ed956643cc Mon Sep 17 00:00:00 2001 From: MartinRinas Date: Tue, 17 Dec 2024 21:11:11 +0000 Subject: [PATCH 3/4] update known sensitive fields, modify regex --- packages/helpermodules/logger.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/helpermodules/logger.py b/packages/helpermodules/logger.py index 44bfedd9b8..f9a507955f 100644 --- a/packages/helpermodules/logger.py +++ b/packages/helpermodules/logger.py @@ -12,10 +12,13 @@ RAMDISK_PATH = str(Path(__file__).resolve().parents[2]) + '/ramdisk/' PERSISTENT_LOG_PATH = str(Path(__file__).resolve().parents[2]) + '/data/log/' -KNOWN_SENSITIVE_FIELDS = ['password', 'secret', 'token', 'apikey', 'access_token', 'refresh_token'] +KNOWN_SENSITIVE_FIELDS = [ + 'password', 'secret', 'token', 'apikey', 'access_token', + 'refresh_token', 'accesstoken', 'refreshtoken' +] REDACTION_PATTERNS = [ (r'({field})[=:]([^\s&]+)', r'\1=***REDACTED***'), # Matches field=value, i.e. for URL query parameters - (r'"{field}":\s*"([^"]+)"', r'"{field}": "***REDACTED***"') # Matches "field": "value", JSON formatted data + (r'"{field}":\s*"(.*?)"', r'"{field}": "***REDACTED***"') # Matches "field": "value", JSON formatted data ] From b7e8cc3c321daf5c7c23197602b494d7a037616d Mon Sep 17 00:00:00 2001 From: MartinRinas Date: Thu, 2 Jan 2025 14:12:30 +0000 Subject: [PATCH 4/4] capture single quotes in json data --- packages/helpermodules/logger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/helpermodules/logger.py b/packages/helpermodules/logger.py index f9a507955f..93fafcf7e5 100644 --- a/packages/helpermodules/logger.py +++ b/packages/helpermodules/logger.py @@ -17,8 +17,9 @@ 'refresh_token', 'accesstoken', 'refreshtoken' ] REDACTION_PATTERNS = [ - (r'({field})[=:]([^\s&]+)', r'\1=***REDACTED***'), # Matches field=value, i.e. for URL query parameters - (r'"{field}":\s*"(.*?)"', r'"{field}": "***REDACTED***"') # Matches "field": "value", JSON formatted data + (r'({field})[=:]([^\s&]+)', r'\1=***REDACTED***'), # field=value, i.e. for URL query parameters + (r'"{field}":\s*"(.*?)"', r'"{field}": "***REDACTED***"'), # "field": "value", JSON formatted data + (r'\'{field}\':\s*\'(.*?)\'', r"'{field}': '***REDACTED***'") # 'field': 'value', JSON formatted data ]