|
7 | 7 | from mssql_python import ddbc_bindings |
8 | 8 | from mssql_python.exceptions import raise_exception |
9 | 9 | from mssql_python.logging_config import get_logger |
10 | | -import platform |
11 | | -from pathlib import Path |
| 10 | +import re |
| 11 | +from mssql_python.constants import ConstantsDDBC |
12 | 12 | from mssql_python.ddbc_bindings import normalize_architecture |
13 | 13 |
|
14 | 14 | logger = get_logger() |
@@ -155,6 +155,136 @@ def sanitize_user_input(user_input: str, max_length: int = 50) -> str: |
155 | 155 | # Return placeholder if nothing remains after sanitization |
156 | 156 | return sanitized if sanitized else "<invalid>" |
157 | 157 |
|
| 158 | +def validate_attribute_value(attribute, value, sanitize_logs=True, max_log_length=50): |
| 159 | + """ |
| 160 | + Validates attribute and value pairs for connection attributes and optionally |
| 161 | + sanitizes values for safe logging. |
| 162 | + |
| 163 | + This function performs comprehensive validation of ODBC connection attributes |
| 164 | + and their values to ensure they are safe and valid before passing to the C++ layer. |
| 165 | + |
| 166 | + Args: |
| 167 | + attribute (int): The connection attribute to validate (SQL_ATTR_*) |
| 168 | + value: The value to set for the attribute (int, str, bytes, or bytearray) |
| 169 | + sanitize_logs (bool): Whether to include sanitized versions for logging |
| 170 | + max_log_length (int): Maximum length of sanitized output for logging |
| 171 | + |
| 172 | + Returns: |
| 173 | + tuple: (is_valid, error_message, sanitized_attribute, sanitized_value) where: |
| 174 | + - is_valid is a boolean |
| 175 | + - error_message is None if valid, otherwise validation error message |
| 176 | + - sanitized_attribute is attribute as a string safe for logging |
| 177 | + - sanitized_value is value as a string safe for logging |
| 178 | + |
| 179 | + Note: |
| 180 | + This validation acts as a security layer to prevent SQL injection, buffer |
| 181 | + overflows, and other attacks by validating all inputs before they reach C++ code. |
| 182 | + """ |
| 183 | + |
| 184 | + # Sanitize a value for logging |
| 185 | + def _sanitize_for_logging(input_val, max_length=max_log_length): |
| 186 | + if not isinstance(input_val, str): |
| 187 | + try: |
| 188 | + input_val = str(input_val) |
| 189 | + except: |
| 190 | + return "<non-string>" |
| 191 | + |
| 192 | + # Remove control characters and non-printable characters |
| 193 | + # Allow alphanumeric, dash, underscore, and dot (common in encoding names) |
| 194 | + sanitized = re.sub(r'[^\w\-\.]', '', input_val) |
| 195 | + |
| 196 | + # Limit length to prevent log flooding |
| 197 | + if len(sanitized) > max_length: |
| 198 | + sanitized = sanitized[:max_length] + "..." |
| 199 | + |
| 200 | + # Return placeholder if nothing remains after sanitization |
| 201 | + return sanitized if sanitized else "<invalid>" |
| 202 | + |
| 203 | + # Create sanitized versions for logging regardless of validation result |
| 204 | + sanitized_attr = _sanitize_for_logging(attribute) if sanitize_logs else str(attribute) |
| 205 | + sanitized_val = _sanitize_for_logging(value) if sanitize_logs else str(value) |
| 206 | + |
| 207 | + # Attribute must be a non-negative integer |
| 208 | + if not isinstance(attribute, int): |
| 209 | + return False, f"Attribute must be an integer, got {type(attribute).__name__}", sanitized_attr, sanitized_val |
| 210 | + |
| 211 | + if attribute < 0: |
| 212 | + return False, f"Attribute value cannot be negative: {attribute}", sanitized_attr, sanitized_val |
| 213 | + |
| 214 | + # Define attribute limits based on SQL specifications |
| 215 | + MAX_STRING_SIZE = 8192 # 8KB maximum for string values |
| 216 | + MAX_BINARY_SIZE = 32768 # 32KB maximum for binary data |
| 217 | + |
| 218 | + # Attribute-specific validation |
| 219 | + if isinstance(value, int): |
| 220 | + # General integer validation |
| 221 | + if value < 0 and attribute not in [ |
| 222 | + # List of attributes that can accept negative values (very few) |
| 223 | + ]: |
| 224 | + return False, f"Integer value cannot be negative: {value}", sanitized_attr, sanitized_val |
| 225 | + |
| 226 | + # Attribute-specific integer validation |
| 227 | + if attribute == ConstantsDDBC.SQL_ATTR_CONNECTION_TIMEOUT.value: |
| 228 | + # Connection timeout has a maximum of UINT_MAX (4294967295) |
| 229 | + if value > 4294967295: |
| 230 | + return False, f"Connection timeout cannot exceed 4294967295: {value}", sanitized_attr, sanitized_val |
| 231 | + |
| 232 | + elif attribute == ConstantsDDBC.SQL_ATTR_LOGIN_TIMEOUT.value: |
| 233 | + # Login timeout has a maximum of UINT_MAX (4294967295) |
| 234 | + if value > 4294967295: |
| 235 | + return False, f"Login timeout cannot exceed 4294967295: {value}", sanitized_attr, sanitized_val |
| 236 | + |
| 237 | + elif attribute == ConstantsDDBC.SQL_ATTR_AUTOCOMMIT.value: |
| 238 | + # Autocommit can only be 0 or 1 |
| 239 | + if value not in [0, 1]: |
| 240 | + return False, f"Autocommit value must be 0 or 1: {value}", sanitized_attr, sanitized_val |
| 241 | + |
| 242 | + elif attribute == ConstantsDDBC.SQL_ATTR_TXN_ISOLATION.value: |
| 243 | + # Transaction isolation must be one of the predefined values |
| 244 | + valid_isolation_levels = [ |
| 245 | + ConstantsDDBC.SQL_TXN_READ_UNCOMMITTED.value, |
| 246 | + ConstantsDDBC.SQL_TXN_READ_COMMITTED.value, |
| 247 | + ConstantsDDBC.SQL_TXN_REPEATABLE_READ.value, |
| 248 | + ConstantsDDBC.SQL_TXN_SERIALIZABLE.value |
| 249 | + ] |
| 250 | + if value not in valid_isolation_levels: |
| 251 | + return False, f"Invalid transaction isolation level: {value}", sanitized_attr, sanitized_val |
| 252 | + |
| 253 | + elif isinstance(value, str): |
| 254 | + # String validation |
| 255 | + if len(value) > MAX_STRING_SIZE: |
| 256 | + return False, f"String value too large: {len(value)} bytes (max {MAX_STRING_SIZE})", sanitized_attr, sanitized_val |
| 257 | + |
| 258 | + # SQL injection pattern detection for strings |
| 259 | + sql_injection_patterns = [ |
| 260 | + '--', ';', '/*', '*/', 'UNION', 'SELECT', 'INSERT', 'UPDATE', |
| 261 | + 'DELETE', 'DROP', 'EXEC', 'EXECUTE', '@@', 'CHAR(', 'CAST(' |
| 262 | + ] |
| 263 | + |
| 264 | + # Case-insensitive check for SQL injection patterns |
| 265 | + value_upper = value.upper() |
| 266 | + for pattern in sql_injection_patterns: |
| 267 | + if pattern.upper() in value_upper: |
| 268 | + return False, f"String value contains potentially unsafe SQL pattern: {pattern}", sanitized_attr, sanitized_val |
| 269 | + |
| 270 | + elif isinstance(value, (bytes, bytearray)): |
| 271 | + # Binary data validation |
| 272 | + if len(value) > MAX_BINARY_SIZE: |
| 273 | + return False, f"Binary value too large: {len(value)} bytes (max {MAX_BINARY_SIZE})", sanitized_attr, sanitized_val |
| 274 | + |
| 275 | + # Check for suspicious binary patterns |
| 276 | + # Count null bytes (could indicate manipulation) |
| 277 | + null_count = value.count(0) |
| 278 | + # Too many nulls might indicate padding attack |
| 279 | + if null_count > len(value) // 4: # More than 25% nulls |
| 280 | + return False, "Binary data contains suspicious patterns", sanitized_attr, sanitized_val |
| 281 | + |
| 282 | + else: |
| 283 | + return False, f"Unsupported attribute value type: {type(value).__name__}", sanitized_attr, sanitized_val |
| 284 | + |
| 285 | + # If we got here, all validations passed |
| 286 | + return True, None, sanitized_attr, sanitized_val |
| 287 | + |
158 | 288 |
|
159 | 289 | def log(level: str, message: str, *args) -> None: |
160 | 290 | """ |
|
0 commit comments