5757# Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
5858UTF16_ENCODINGS : frozenset [str ] = frozenset (["utf-16le" , "utf-16be" ])
5959
60- # Valid encoding characters (alphanumeric, dash, underscore only)
61- import string
6260
63- VALID_ENCODING_CHARS : frozenset [str ] = frozenset (string .ascii_letters + string .digits + "-_" )
61+ def _validate_utf16_wchar_compatibility (
62+ encoding : str , wchar_type : int , context : str = "SQL_WCHAR"
63+ ) -> None :
64+ """
65+ Validates UTF-16 encoding compatibility with SQL_WCHAR.
66+
67+ Centralizes the validation logic to eliminate duplication across setencoding/setdecoding.
68+
69+ Args:
70+ encoding: The encoding string (already normalized to lowercase)
71+ wchar_type: The SQL_WCHAR constant value to check against
72+ context: Context string for error messages ('SQL_WCHAR', 'SQL_WCHAR ctype', etc.)
73+
74+ Raises:
75+ ProgrammingError: If encoding is incompatible with SQL_WCHAR
76+ """
77+ if encoding == "utf-16" :
78+ # UTF-16 with BOM is rejected due to byte order ambiguity
79+ logger .warning ("utf-16 with BOM rejected for %s" , context )
80+ raise ProgrammingError (
81+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
82+ ddbc_error = (
83+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
84+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
85+ ),
86+ )
87+ elif encoding not in UTF16_ENCODINGS :
88+ # Non-UTF-16 encodings are not supported with SQL_WCHAR
89+ logger .warning (
90+ "Non-UTF-16 encoding %s attempted with %s" , sanitize_user_input (encoding ), context
91+ )
92+
93+ # Generate context-appropriate error messages
94+ if "ctype" in context :
95+ driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings"
96+ ddbc_context = "SQL_WCHAR ctype"
97+ else :
98+ driver_error = f"SQL_WCHAR only supports UTF-16 encodings"
99+ ddbc_context = "SQL_WCHAR"
100+
101+ raise ProgrammingError (
102+ driver_error = driver_error ,
103+ ddbc_error = (
104+ f"Cannot use encoding '{ encoding } ' with { ddbc_context } . "
105+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
106+ ),
107+ )
64108
65109
66110def _validate_encoding (encoding : str ) -> bool :
@@ -78,14 +122,18 @@ def _validate_encoding(encoding: str) -> bool:
78122 Cache size is limited to 128 entries which should cover most use cases.
79123 Also validates that encoding name only contains safe characters.
80124 """
81- # First check for dangerous characters (security validation)
82- if not all ( c in VALID_ENCODING_CHARS for c in encoding ):
125+ # Basic security checks - prevent obvious attacks
126+ if not encoding or not isinstance ( encoding , str ):
83127 return False
84128
85129 # Check length limit (prevent DOS)
86130 if len (encoding ) > 100 :
87131 return False
88132
133+ # Prevent null bytes and control characters that could cause issues
134+ if "\x00 " in encoding or any (ord (c ) < 32 and c not in "\t \n \r " for c in encoding ):
135+ return False
136+
89137 # Then check if it's a valid Python codec
90138 try :
91139 codecs .lookup (encoding )
@@ -437,8 +485,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
437485 # Validate encoding using cached validation for better performance
438486 if not _validate_encoding (encoding ):
439487 # Log the sanitized encoding for security
440- logger .debug (
441- "warning" ,
488+ logger .warning (
442489 "Invalid encoding attempted: %s" ,
443490 sanitize_user_input (str (encoding )),
444491 )
@@ -451,19 +498,9 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
451498 encoding = encoding .casefold ()
452499 logger .debug ("setencoding: Encoding normalized to %s" , encoding )
453500
454- # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
455- if encoding == "utf-16" and ctype == ConstantsDDBC .SQL_WCHAR .value :
456- logger .debug (
457- "warning" ,
458- "utf-16 with BOM rejected for SQL_WCHAR" ,
459- )
460- raise ProgrammingError (
461- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
462- ddbc_error = (
463- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
464- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
465- ),
466- )
501+ # Early validation if ctype is already specified as SQL_WCHAR
502+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
503+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR" )
467504
468505 # Set default ctype based on encoding if not provided
469506 if ctype is None :
@@ -478,8 +515,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
478515 valid_ctypes = [ConstantsDDBC .SQL_CHAR .value , ConstantsDDBC .SQL_WCHAR .value ]
479516 if ctype not in valid_ctypes :
480517 # Log the sanitized ctype for security
481- logger .debug (
482- "warning" ,
518+ logger .warning (
483519 "Invalid ctype attempted: %s" ,
484520 sanitize_user_input (str (ctype )),
485521 )
@@ -491,37 +527,16 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
491527 ),
492528 )
493529
494- # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
530+ # Final validation: SQL_WCHAR ctype only supports UTF-16 encodings (without BOM)
495531 if ctype == ConstantsDDBC .SQL_WCHAR .value :
496- if encoding == "utf-16" :
497- raise ProgrammingError (
498- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
499- ddbc_error = (
500- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
501- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
502- ),
503- )
504- elif encoding not in UTF16_ENCODINGS :
505- logger .debug (
506- "warning" ,
507- "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype" ,
508- sanitize_user_input (encoding ),
509- )
510- raise ProgrammingError (
511- driver_error = f"SQL_WCHAR only supports UTF-16 encodings" ,
512- ddbc_error = (
513- f"Cannot use encoding '{ encoding } ' with SQL_WCHAR. "
514- f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
515- ),
516- )
532+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR" )
517533
518534 # Store the encoding settings (thread-safe with lock)
519535 with self ._encoding_lock :
520536 self ._encoding_settings = {"encoding" : encoding , "ctype" : ctype }
521537
522538 # Log with sanitized values for security
523- logger .debug (
524- "info" ,
539+ logger .info (
525540 "Text encoding set to %s with ctype %s" ,
526541 sanitize_user_input (encoding ),
527542 sanitize_user_input (str (ctype )),
@@ -604,8 +619,7 @@ def setdecoding(
604619 SQL_WMETADATA ,
605620 ]
606621 if sqltype not in valid_sqltypes :
607- logger .debug (
608- "warning" ,
622+ logger .warning (
609623 "Invalid sqltype attempted: %s" ,
610624 sanitize_user_input (str (sqltype )),
611625 )
@@ -627,8 +641,7 @@ def setdecoding(
627641
628642 # Validate encoding using cached validation for better performance
629643 if not _validate_encoding (encoding ):
630- logger .debug (
631- "warning" ,
644+ logger .warning (
632645 "Invalid encoding attempted: %s" ,
633646 sanitize_user_input (str (encoding )),
634647 )
@@ -640,34 +653,9 @@ def setdecoding(
640653 # Normalize encoding to lowercase for consistency
641654 encoding = encoding .lower ()
642655
643- # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
644- if sqltype == ConstantsDDBC .SQL_WCHAR .value and encoding == "utf-16" :
645- logger .debug (
646- "warning" ,
647- "utf-16 with BOM rejected for SQL_WCHAR" ,
648- )
649- raise ProgrammingError (
650- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
651- ddbc_error = (
652- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
653- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
654- ),
655- )
656-
657- # Validate SQL_WCHAR only supports UTF-16 encodings (SQL_WMETADATA is more flexible)
658- if sqltype == ConstantsDDBC .SQL_WCHAR .value and encoding not in UTF16_ENCODINGS :
659- logger .debug (
660- "warning" ,
661- "Non-UTF-16 encoding %s attempted with SQL_WCHAR sqltype" ,
662- sanitize_user_input (encoding ),
663- )
664- raise ProgrammingError (
665- driver_error = f"SQL_WCHAR only supports UTF-16 encodings" ,
666- ddbc_error = (
667- f"Cannot use encoding '{ encoding } ' with SQL_WCHAR. "
668- f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
669- ),
670- )
656+ # Validate SQL_WCHAR encoding compatibility
657+ if sqltype == ConstantsDDBC .SQL_WCHAR .value :
658+ _validate_utf16_wchar_compatibility (encoding , sqltype , "SQL_WCHAR sqltype" )
671659
672660 # SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
673661 # No restriction needed here - let users configure as needed
@@ -682,8 +670,7 @@ def setdecoding(
682670 # Validate ctype
683671 valid_ctypes = [ConstantsDDBC .SQL_CHAR .value , ConstantsDDBC .SQL_WCHAR .value ]
684672 if ctype not in valid_ctypes :
685- logger .debug (
686- "warning" ,
673+ logger .warning (
687674 "Invalid ctype attempted: %s" ,
688675 sanitize_user_input (str (ctype )),
689676 )
@@ -695,29 +682,9 @@ def setdecoding(
695682 ),
696683 )
697684
698- # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
685+ # Validate SQL_WCHAR ctype encoding compatibility
699686 if ctype == ConstantsDDBC .SQL_WCHAR .value :
700- if encoding == "utf-16" :
701- raise ProgrammingError (
702- driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
703- ddbc_error = (
704- "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
705- "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
706- ),
707- )
708- elif encoding not in UTF16_ENCODINGS :
709- logger .debug (
710- "warning" ,
711- "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype" ,
712- sanitize_user_input (encoding ),
713- )
714- raise ProgrammingError (
715- driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings" ,
716- ddbc_error = (
717- f"Cannot use encoding '{ encoding } ' with SQL_WCHAR ctype. "
718- f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
719- ),
720- )
687+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR ctype" )
721688
722689 # Store the decoding settings for the specified sqltype (thread-safe with lock)
723690 with self ._encoding_lock :
@@ -730,8 +697,7 @@ def setdecoding(
730697 SQL_WMETADATA : "SQL_WMETADATA" ,
731698 }.get (sqltype , str (sqltype ))
732699
733- logger .debug (
734- "info" ,
700+ logger .info (
735701 "Text decoding set for %s to %s with ctype %s" ,
736702 sqltype_name ,
737703 sanitize_user_input (encoding ),
0 commit comments