5757# Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
5858UTF16_ENCODINGS : frozenset [str ] = frozenset (["utf-16le" , "utf-16be" ])
5959
60- # Valid encoding characters (alphanumeric, dash, underscore only)
61- import string
6260
63- VALID_ENCODING_CHARS : frozenset [str ] = frozenset (string .ascii_letters + string .digits + "-_" )
61+ def _validate_utf16_wchar_compatibility (
62+ encoding : str , wchar_type : int , context : str = "SQL_WCHAR"
63+ ) -> None :
64+ """
65+ Validates UTF-16 encoding compatibility with SQL_WCHAR.
66+
67+ Centralizes the validation logic to eliminate duplication across setencoding/setdecoding.
68+
69+ Args:
70+ encoding: The encoding string (already normalized to lowercase)
71+ wchar_type: The SQL_WCHAR constant value to check against
72+ context: Context string for error messages ('SQL_WCHAR', 'SQL_WCHAR ctype', etc.)
73+
74+ Raises:
75+ ProgrammingError: If encoding is incompatible with SQL_WCHAR
76+ """
77+ if encoding == "utf-16" :
78+ # UTF-16 with BOM is rejected due to byte order ambiguity
79+ logger .warning ("utf-16 with BOM rejected for %s" , context )
80+ raise ProgrammingError (
81+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
82+ ddbc_error = (
83+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
84+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
85+ ),
86+ )
87+ elif encoding not in UTF16_ENCODINGS :
88+ # Non-UTF-16 encodings are not supported with SQL_WCHAR
89+ logger .warning (
90+ "Non-UTF-16 encoding %s attempted with %s" , sanitize_user_input (encoding ), context
91+ )
92+
93+ # Generate context-appropriate error messages
94+ if "ctype" in context :
95+ driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings"
96+ ddbc_context = "SQL_WCHAR ctype"
97+ else :
98+ driver_error = f"SQL_WCHAR only supports UTF-16 encodings"
99+ ddbc_context = "SQL_WCHAR"
100+
101+ raise ProgrammingError (
102+ driver_error = driver_error ,
103+ ddbc_error = (
104+ f"Cannot use encoding '{ encoding } ' with { ddbc_context } . "
105+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
106+ ),
107+ )
64108
65109
66110def _validate_encoding (encoding : str ) -> bool :
@@ -78,14 +122,18 @@ def _validate_encoding(encoding: str) -> bool:
78122 Cache size is limited to 128 entries which should cover most use cases.
79123 Also validates that encoding name only contains safe characters.
80124 """
81- # First check for dangerous characters (security validation)
82- if not all ( c in VALID_ENCODING_CHARS for c in encoding ):
125+ # Basic security checks - prevent obvious attacks
126+ if not encoding or not isinstance ( encoding , str ):
83127 return False
84128
85129 # Check length limit (prevent DOS)
86130 if len (encoding ) > 100 :
87131 return False
88132
133+ # Prevent null bytes and control characters that could cause issues
134+ if "\x00 " in encoding or any (ord (c ) < 32 and c not in "\t \n \r " for c in encoding ):
135+ return False
136+
89137 # Then check if it's a valid Python codec
90138 try :
91139 codecs .lookup (encoding )
@@ -450,18 +498,9 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
450498 encoding = encoding .casefold ()
451499 logger .debug ("setencoding: Encoding normalized to %s" , encoding )
452500
453- # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
454- if encoding == "utf-16" and ctype == ConstantsDDBC .SQL_WCHAR .value :
455- logger .warning (
456- "utf-16 with BOM rejected for SQL_WCHAR" ,
457- )
458- raise ProgrammingError (
459- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
460- ddbc_error = (
461- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
462- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
463- ),
464- )
501+ # Early validation if ctype is already specified as SQL_WCHAR
502+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
503+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR" )
465504
466505 # Set default ctype based on encoding if not provided
467506 if ctype is None :
@@ -488,28 +527,9 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
488527 ),
489528 )
490529
491- # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
530+ # Final validation: SQL_WCHAR ctype only supports UTF-16 encodings (without BOM)
492531 if ctype == ConstantsDDBC .SQL_WCHAR .value :
493- if encoding == "utf-16" :
494- raise ProgrammingError (
495- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
496- ddbc_error = (
497- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
498- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
499- ),
500- )
501- elif encoding not in UTF16_ENCODINGS :
502- logger .warning (
503- "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype" ,
504- sanitize_user_input (encoding ),
505- )
506- raise ProgrammingError (
507- driver_error = f"SQL_WCHAR only supports UTF-16 encodings" ,
508- ddbc_error = (
509- f"Cannot use encoding '{ encoding } ' with SQL_WCHAR. "
510- f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
511- ),
512- )
532+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR" )
513533
514534 # Store the encoding settings (thread-safe with lock)
515535 with self ._encoding_lock :
@@ -633,32 +653,9 @@ def setdecoding(
633653 # Normalize encoding to lowercase for consistency
634654 encoding = encoding .lower ()
635655
636- # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
637- if sqltype == ConstantsDDBC .SQL_WCHAR .value and encoding == "utf-16" :
638- logger .warning (
639- "utf-16 with BOM rejected for SQL_WCHAR" ,
640- )
641- raise ProgrammingError (
642- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
643- ddbc_error = (
644- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
645- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
646- ),
647- )
648-
649- # Validate SQL_WCHAR only supports UTF-16 encodings (SQL_WMETADATA is more flexible)
650- if sqltype == ConstantsDDBC .SQL_WCHAR .value and encoding not in UTF16_ENCODINGS :
651- logger .warning (
652- "Non-UTF-16 encoding %s attempted with SQL_WCHAR sqltype" ,
653- sanitize_user_input (encoding ),
654- )
655- raise ProgrammingError (
656- driver_error = f"SQL_WCHAR only supports UTF-16 encodings" ,
657- ddbc_error = (
658- f"Cannot use encoding '{ encoding } ' with SQL_WCHAR. "
659- f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
660- ),
661- )
656+ # Validate SQL_WCHAR encoding compatibility
657+ if sqltype == ConstantsDDBC .SQL_WCHAR .value :
658+ _validate_utf16_wchar_compatibility (encoding , sqltype , "SQL_WCHAR sqltype" )
662659
663660 # SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
664661 # No restriction needed here - let users configure as needed
@@ -685,28 +682,9 @@ def setdecoding(
685682 ),
686683 )
687684
688- # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
685+ # Validate SQL_WCHAR ctype encoding compatibility
689686 if ctype == ConstantsDDBC .SQL_WCHAR .value :
690- if encoding == "utf-16" :
691- raise ProgrammingError (
692- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
693- ddbc_error = (
694- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
695- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
696- ),
697- )
698- elif encoding not in UTF16_ENCODINGS :
699- logger .warning (
700- "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype" ,
701- sanitize_user_input (encoding ),
702- )
703- raise ProgrammingError (
704- driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings" ,
705- ddbc_error = (
706- f"Cannot use encoding '{ encoding } ' with SQL_WCHAR ctype. "
707- f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
708- ),
709- )
687+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR ctype" )
710688
711689 # Store the decoding settings for the specified sqltype (thread-safe with lock)
712690 with self ._encoding_lock :
0 commit comments