5555INFO_TYPE_STRING_THRESHOLD : int = 10000
5656
5757# UTF-16 encoding variants that should use SQL_WCHAR by default
58- UTF16_ENCODINGS : frozenset [str ] = frozenset (["utf-16" , "utf-16le" , "utf-16be" ])
58+ # Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
59+ UTF16_ENCODINGS : frozenset [str ] = frozenset (["utf-16le" , "utf-16be" ])
60+
61+
62+ def _validate_utf16_wchar_compatibility (
63+ encoding : str , wchar_type : int , context : str = "SQL_WCHAR"
64+ ) -> None :
65+ """
66+ Validates UTF-16 encoding compatibility with SQL_WCHAR.
67+
68+ Centralizes the validation logic to eliminate duplication across setencoding/setdecoding.
69+
70+ Args:
71+ encoding: The encoding string (already normalized to lowercase)
72+ wchar_type: The SQL_WCHAR constant value to check against
73+ context: Context string for error messages ('SQL_WCHAR', 'SQL_WCHAR ctype', etc.)
74+
75+ Raises:
76+ ProgrammingError: If encoding is incompatible with SQL_WCHAR
77+ """
78+ if encoding == "utf-16" :
79+ # UTF-16 with BOM is rejected due to byte order ambiguity
80+ logger .warning ("utf-16 with BOM rejected for %s" , context )
81+ raise ProgrammingError (
82+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
83+ ddbc_error = (
84+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
85+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
86+ ),
87+ )
88+ elif encoding not in UTF16_ENCODINGS :
89+ # Non-UTF-16 encodings are not supported with SQL_WCHAR
90+ logger .warning (
91+ "Non-UTF-16 encoding %s attempted with %s" , sanitize_user_input (encoding ), context
92+ )
93+
94+ # Generate context-appropriate error messages
95+ if "ctype" in context :
96+ driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings"
97+ ddbc_context = "SQL_WCHAR ctype"
98+ else :
99+ driver_error = f"SQL_WCHAR only supports UTF-16 encodings"
100+ ddbc_context = "SQL_WCHAR"
101+
102+ raise ProgrammingError (
103+ driver_error = driver_error ,
104+ ddbc_error = (
105+ f"Cannot use encoding '{ encoding } ' with { ddbc_context } . "
106+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
107+ ),
108+ )
59109
60110
61111def _validate_encoding (encoding : str ) -> bool :
@@ -71,7 +121,21 @@ def _validate_encoding(encoding: str) -> bool:
71121 Note:
72122 Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
73123 Cache size is limited to 128 entries which should cover most use cases.
124+ Also validates that encoding name only contains safe characters.
74125 """
126+ # Basic security checks - prevent obvious attacks
127+ if not encoding or not isinstance (encoding , str ):
128+ return False
129+
130+ # Check length limit (prevent DOS)
131+ if len (encoding ) > 100 :
132+ return False
133+
134+ # Prevent null bytes and control characters that could cause issues
135+ if "\x00 " in encoding or any (ord (c ) < 32 and c not in "\t \n \r " for c in encoding ):
136+ return False
137+
138+ # Then check if it's a valid Python codec
75139 try :
76140 codecs .lookup (encoding )
77141 return True
@@ -228,6 +292,15 @@ def __init__(
228292 self ._output_converters = {}
229293 self ._converters_lock = threading .Lock ()
230294
295+ # Initialize encoding/decoding settings lock for thread safety
296+ # This lock protects both _encoding_settings and _decoding_settings dictionaries
297+ # from concurrent modification. We use a simple Lock (not RLock) because:
298+ # - Write operations (setencoding/setdecoding) replace the entire dict atomically
299+ # - Read operations (getencoding/getdecoding) return a copy, so they're safe
300+ # - No recursive locking is needed in our usage pattern
301+ # This is more performant than RLock for the multiple-readers-single-writer pattern
302+ self ._encoding_lock = threading .Lock ()
303+
231304 # Initialize search escape character
232305 self ._searchescape = None
233306
@@ -433,8 +506,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
433506 # Validate encoding using cached validation for better performance
434507 if not _validate_encoding (encoding ):
435508 # Log the sanitized encoding for security
436- logger .debug (
437- "warning" ,
509+ logger .warning (
438510 "Invalid encoding attempted: %s" ,
439511 sanitize_user_input (str (encoding )),
440512 )
@@ -447,6 +519,10 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
447519 encoding = encoding .casefold ()
448520 logger .debug ("setencoding: Encoding normalized to %s" , encoding )
449521
522+ # Early validation if ctype is already specified as SQL_WCHAR
523+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
524+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR" )
525+
450526 # Set default ctype based on encoding if not provided
451527 if ctype is None :
452528 if encoding in UTF16_ENCODINGS :
@@ -460,8 +536,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
460536 valid_ctypes = [ConstantsDDBC .SQL_CHAR .value , ConstantsDDBC .SQL_WCHAR .value ]
461537 if ctype not in valid_ctypes :
462538 # Log the sanitized ctype for security
463- logger .debug (
464- "warning" ,
539+ logger .warning (
465540 "Invalid ctype attempted: %s" ,
466541 sanitize_user_input (str (ctype )),
467542 )
@@ -473,20 +548,24 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
473548 ),
474549 )
475550
476- # Store the encoding settings
477- self ._encoding_settings = {"encoding" : encoding , "ctype" : ctype }
551+ # Final validation: SQL_WCHAR ctype only supports UTF-16 encodings (without BOM)
552+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
553+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR" )
554+
555+ # Store the encoding settings (thread-safe with lock)
556+ with self ._encoding_lock :
557+ self ._encoding_settings = {"encoding" : encoding , "ctype" : ctype }
478558
479559 # Log with sanitized values for security
480- logger .debug (
481- "info" ,
560+ logger .info (
482561 "Text encoding set to %s with ctype %s" ,
483562 sanitize_user_input (encoding ),
484563 sanitize_user_input (str (ctype )),
485564 )
486565
487566 def getencoding (self ) -> Dict [str , Union [str , int ]]:
488567 """
489- Gets the current text encoding settings.
568+ Gets the current text encoding settings (thread-safe) .
490569
491570 Returns:
492571 dict: A dictionary containing 'encoding' and 'ctype' keys.
@@ -498,14 +577,20 @@ def getencoding(self) -> Dict[str, Union[str, int]]:
498577 settings = cnxn.getencoding()
499578 print(f"Current encoding: {settings['encoding']}")
500579 print(f"Current ctype: {settings['ctype']}")
580+
581+ Note:
582+ This method is thread-safe and can be called from multiple threads concurrently.
583+ Returns a copy of the settings to prevent external modification.
501584 """
502585 if self ._closed :
503586 raise InterfaceError (
504587 driver_error = "Connection is closed" ,
505588 ddbc_error = "Connection is closed" ,
506589 )
507590
508- return self ._encoding_settings .copy ()
591+ # Thread-safe read with lock to prevent race conditions
592+ with self ._encoding_lock :
593+ return self ._encoding_settings .copy ()
509594
510595 def setdecoding (
511596 self , sqltype : int , encoding : Optional [str ] = None , ctype : Optional [int ] = None
@@ -556,8 +641,7 @@ def setdecoding(
556641 SQL_WMETADATA ,
557642 ]
558643 if sqltype not in valid_sqltypes :
559- logger .debug (
560- "warning" ,
644+ logger .warning (
561645 "Invalid sqltype attempted: %s" ,
562646 sanitize_user_input (str (sqltype )),
563647 )
@@ -579,8 +663,7 @@ def setdecoding(
579663
580664 # Validate encoding using cached validation for better performance
581665 if not _validate_encoding (encoding ):
582- logger .debug (
583- "warning" ,
666+ logger .warning (
584667 "Invalid encoding attempted: %s" ,
585668 sanitize_user_input (str (encoding )),
586669 )
@@ -592,6 +675,13 @@ def setdecoding(
592675 # Normalize encoding to lowercase for consistency
593676 encoding = encoding .lower ()
594677
678+ # Validate SQL_WCHAR encoding compatibility
679+ if sqltype == ConstantsDDBC .SQL_WCHAR .value :
680+ _validate_utf16_wchar_compatibility (encoding , sqltype , "SQL_WCHAR sqltype" )
681+
682+ # SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
683+ # No restriction needed here - let users configure as needed
684+
595685 # Set default ctype based on encoding if not provided
596686 if ctype is None :
597687 if encoding in UTF16_ENCODINGS :
@@ -602,8 +692,7 @@ def setdecoding(
602692 # Validate ctype
603693 valid_ctypes = [ConstantsDDBC .SQL_CHAR .value , ConstantsDDBC .SQL_WCHAR .value ]
604694 if ctype not in valid_ctypes :
605- logger .debug (
606- "warning" ,
695+ logger .warning (
607696 "Invalid ctype attempted: %s" ,
608697 sanitize_user_input (str (ctype )),
609698 )
@@ -615,8 +704,13 @@ def setdecoding(
615704 ),
616705 )
617706
618- # Store the decoding settings for the specified sqltype
619- self ._decoding_settings [sqltype ] = {"encoding" : encoding , "ctype" : ctype }
707+ # Validate SQL_WCHAR ctype encoding compatibility
708+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
709+ _validate_utf16_wchar_compatibility (encoding , ctype , "SQL_WCHAR ctype" )
710+
711+ # Store the decoding settings for the specified sqltype (thread-safe with lock)
712+ with self ._encoding_lock :
713+ self ._decoding_settings [sqltype ] = {"encoding" : encoding , "ctype" : ctype }
620714
621715 # Log with sanitized values for security
622716 sqltype_name = {
@@ -625,8 +719,7 @@ def setdecoding(
625719 SQL_WMETADATA : "SQL_WMETADATA" ,
626720 }.get (sqltype , str (sqltype ))
627721
628- logger .debug (
629- "info" ,
722+ logger .info (
630723 "Text decoding set for %s to %s with ctype %s" ,
631724 sqltype_name ,
632725 sanitize_user_input (encoding ),
@@ -635,7 +728,7 @@ def setdecoding(
635728
636729 def getdecoding (self , sqltype : int ) -> Dict [str , Union [str , int ]]:
637730 """
638- Gets the current text decoding settings for the specified SQL type.
731+ Gets the current text decoding settings for the specified SQL type (thread-safe) .
639732
640733 Args:
641734 sqltype (int): The SQL type to get settings for: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
@@ -651,6 +744,10 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
651744 settings = cnxn.getdecoding(mssql_python.SQL_CHAR)
652745 print(f"SQL_CHAR encoding: {settings['encoding']}")
653746 print(f"SQL_CHAR ctype: {settings['ctype']}")
747+
748+ Note:
749+ This method is thread-safe and can be called from multiple threads concurrently.
750+ Returns a copy of the settings to prevent external modification.
654751 """
655752 if self ._closed :
656753 raise InterfaceError (
@@ -674,7 +771,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
674771 ),
675772 )
676773
677- return self ._decoding_settings [sqltype ].copy ()
774+ # Thread-safe read with lock to prevent race conditions
775+ with self ._encoding_lock :
776+ return self ._decoding_settings [sqltype ].copy ()
678777
679778 def set_attr (self , attribute : int , value : Union [int , str , bytes , bytearray ]) -> None :
680779 """
0 commit comments