Skip to content

Commit ae23203

Browse files
authored
Merge branch 'main' into subrata-ms/SegFault
2 parents e8bc380 + 907b364 commit ae23203

12 files changed

+12049
-5842
lines changed

eng/pipelines/pr-validation-pipeline.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,14 +1395,12 @@ jobs:
13951395
13961396
- script: |
13971397
# Create a Docker container for testing on x86_64
1398-
# TODO(AB#40901): Temporary pin to 3.22 due to msodbcsql ARM64 package arch mismatch
1399-
# Revert to alpine:latest once ODBC team releases fixed ARM64 package
14001398
docker run -d --name test-container-alpine \
14011399
--platform linux/amd64 \
14021400
-v $(Build.SourcesDirectory):/workspace \
14031401
-w /workspace \
14041402
--network bridge \
1405-
alpine:3.22 \
1403+
alpine:latest \
14061404
tail -f /dev/null
14071405
displayName: 'Create Alpine x86_64 container'
14081406

mssql_python/connection.py

Lines changed: 122 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,57 @@
5555
INFO_TYPE_STRING_THRESHOLD: int = 10000
5656

5757
# UTF-16 encoding variants that should use SQL_WCHAR by default
58-
UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16", "utf-16le", "utf-16be"])
58+
# Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
59+
UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16le", "utf-16be"])
60+
61+
62+
def _validate_utf16_wchar_compatibility(
63+
encoding: str, wchar_type: int, context: str = "SQL_WCHAR"
64+
) -> None:
65+
"""
66+
Validates UTF-16 encoding compatibility with SQL_WCHAR.
67+
68+
Centralizes the validation logic to eliminate duplication across setencoding/setdecoding.
69+
70+
Args:
71+
encoding: The encoding string (already normalized to lowercase)
72+
wchar_type: The SQL_WCHAR constant value to check against
73+
context: Context string for error messages ('SQL_WCHAR', 'SQL_WCHAR ctype', etc.)
74+
75+
Raises:
76+
ProgrammingError: If encoding is incompatible with SQL_WCHAR
77+
"""
78+
if encoding == "utf-16":
79+
# UTF-16 with BOM is rejected due to byte order ambiguity
80+
logger.warning("utf-16 with BOM rejected for %s", context)
81+
raise ProgrammingError(
82+
driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
83+
ddbc_error=(
84+
"Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
85+
"Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
86+
),
87+
)
88+
elif encoding not in UTF16_ENCODINGS:
89+
# Non-UTF-16 encodings are not supported with SQL_WCHAR
90+
logger.warning(
91+
"Non-UTF-16 encoding %s attempted with %s", sanitize_user_input(encoding), context
92+
)
93+
94+
# Generate context-appropriate error messages
95+
if "ctype" in context:
96+
driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings"
97+
ddbc_context = "SQL_WCHAR ctype"
98+
else:
99+
driver_error = f"SQL_WCHAR only supports UTF-16 encodings"
100+
ddbc_context = "SQL_WCHAR"
101+
102+
raise ProgrammingError(
103+
driver_error=driver_error,
104+
ddbc_error=(
105+
f"Cannot use encoding '{encoding}' with {ddbc_context}. "
106+
f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
107+
),
108+
)
59109

60110

61111
def _validate_encoding(encoding: str) -> bool:
@@ -71,7 +121,21 @@ def _validate_encoding(encoding: str) -> bool:
71121
Note:
72122
Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
73123
Cache size is limited to 128 entries which should cover most use cases.
124+
Also validates that encoding name only contains safe characters.
74125
"""
126+
# Basic security checks - prevent obvious attacks
127+
if not encoding or not isinstance(encoding, str):
128+
return False
129+
130+
# Check length limit (prevent DOS)
131+
if len(encoding) > 100:
132+
return False
133+
134+
# Prevent null bytes and control characters that could cause issues
135+
if "\x00" in encoding or any(ord(c) < 32 and c not in "\t\n\r" for c in encoding):
136+
return False
137+
138+
# Then check if it's a valid Python codec
75139
try:
76140
codecs.lookup(encoding)
77141
return True
@@ -228,6 +292,15 @@ def __init__(
228292
self._output_converters = {}
229293
self._converters_lock = threading.Lock()
230294

295+
# Initialize encoding/decoding settings lock for thread safety
296+
# This lock protects both _encoding_settings and _decoding_settings dictionaries
297+
# from concurrent modification. We use a simple Lock (not RLock) because:
298+
# - Write operations (setencoding/setdecoding) replace the entire dict atomically
299+
# - Read operations (getencoding/getdecoding) return a copy, so they're safe
300+
# - No recursive locking is needed in our usage pattern
301+
# This is more performant than RLock for the multiple-readers-single-writer pattern
302+
self._encoding_lock = threading.Lock()
303+
231304
# Initialize search escape character
232305
self._searchescape = None
233306

@@ -433,8 +506,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
433506
# Validate encoding using cached validation for better performance
434507
if not _validate_encoding(encoding):
435508
# Log the sanitized encoding for security
436-
logger.debug(
437-
"warning",
509+
logger.warning(
438510
"Invalid encoding attempted: %s",
439511
sanitize_user_input(str(encoding)),
440512
)
@@ -447,6 +519,10 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
447519
encoding = encoding.casefold()
448520
logger.debug("setencoding: Encoding normalized to %s", encoding)
449521

522+
# Early validation if ctype is already specified as SQL_WCHAR
523+
if ctype == ConstantsDDBC.SQL_WCHAR.value:
524+
_validate_utf16_wchar_compatibility(encoding, ctype, "SQL_WCHAR")
525+
450526
# Set default ctype based on encoding if not provided
451527
if ctype is None:
452528
if encoding in UTF16_ENCODINGS:
@@ -460,8 +536,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
460536
valid_ctypes = [ConstantsDDBC.SQL_CHAR.value, ConstantsDDBC.SQL_WCHAR.value]
461537
if ctype not in valid_ctypes:
462538
# Log the sanitized ctype for security
463-
logger.debug(
464-
"warning",
539+
logger.warning(
465540
"Invalid ctype attempted: %s",
466541
sanitize_user_input(str(ctype)),
467542
)
@@ -473,20 +548,24 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
473548
),
474549
)
475550

476-
# Store the encoding settings
477-
self._encoding_settings = {"encoding": encoding, "ctype": ctype}
551+
# Final validation: SQL_WCHAR ctype only supports UTF-16 encodings (without BOM)
552+
if ctype == ConstantsDDBC.SQL_WCHAR.value:
553+
_validate_utf16_wchar_compatibility(encoding, ctype, "SQL_WCHAR")
554+
555+
# Store the encoding settings (thread-safe with lock)
556+
with self._encoding_lock:
557+
self._encoding_settings = {"encoding": encoding, "ctype": ctype}
478558

479559
# Log with sanitized values for security
480-
logger.debug(
481-
"info",
560+
logger.info(
482561
"Text encoding set to %s with ctype %s",
483562
sanitize_user_input(encoding),
484563
sanitize_user_input(str(ctype)),
485564
)
486565

487566
def getencoding(self) -> Dict[str, Union[str, int]]:
488567
"""
489-
Gets the current text encoding settings.
568+
Gets the current text encoding settings (thread-safe).
490569
491570
Returns:
492571
dict: A dictionary containing 'encoding' and 'ctype' keys.
@@ -498,14 +577,20 @@ def getencoding(self) -> Dict[str, Union[str, int]]:
498577
settings = cnxn.getencoding()
499578
print(f"Current encoding: {settings['encoding']}")
500579
print(f"Current ctype: {settings['ctype']}")
580+
581+
Note:
582+
This method is thread-safe and can be called from multiple threads concurrently.
583+
Returns a copy of the settings to prevent external modification.
501584
"""
502585
if self._closed:
503586
raise InterfaceError(
504587
driver_error="Connection is closed",
505588
ddbc_error="Connection is closed",
506589
)
507590

508-
return self._encoding_settings.copy()
591+
# Thread-safe read with lock to prevent race conditions
592+
with self._encoding_lock:
593+
return self._encoding_settings.copy()
509594

510595
def setdecoding(
511596
self, sqltype: int, encoding: Optional[str] = None, ctype: Optional[int] = None
@@ -556,8 +641,7 @@ def setdecoding(
556641
SQL_WMETADATA,
557642
]
558643
if sqltype not in valid_sqltypes:
559-
logger.debug(
560-
"warning",
644+
logger.warning(
561645
"Invalid sqltype attempted: %s",
562646
sanitize_user_input(str(sqltype)),
563647
)
@@ -579,8 +663,7 @@ def setdecoding(
579663

580664
# Validate encoding using cached validation for better performance
581665
if not _validate_encoding(encoding):
582-
logger.debug(
583-
"warning",
666+
logger.warning(
584667
"Invalid encoding attempted: %s",
585668
sanitize_user_input(str(encoding)),
586669
)
@@ -592,6 +675,13 @@ def setdecoding(
592675
# Normalize encoding to lowercase for consistency
593676
encoding = encoding.lower()
594677

678+
# Validate SQL_WCHAR encoding compatibility
679+
if sqltype == ConstantsDDBC.SQL_WCHAR.value:
680+
_validate_utf16_wchar_compatibility(encoding, sqltype, "SQL_WCHAR sqltype")
681+
682+
# SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
683+
# No restriction needed here - let users configure as needed
684+
595685
# Set default ctype based on encoding if not provided
596686
if ctype is None:
597687
if encoding in UTF16_ENCODINGS:
@@ -602,8 +692,7 @@ def setdecoding(
602692
# Validate ctype
603693
valid_ctypes = [ConstantsDDBC.SQL_CHAR.value, ConstantsDDBC.SQL_WCHAR.value]
604694
if ctype not in valid_ctypes:
605-
logger.debug(
606-
"warning",
695+
logger.warning(
607696
"Invalid ctype attempted: %s",
608697
sanitize_user_input(str(ctype)),
609698
)
@@ -615,8 +704,13 @@ def setdecoding(
615704
),
616705
)
617706

618-
# Store the decoding settings for the specified sqltype
619-
self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}
707+
# Validate SQL_WCHAR ctype encoding compatibility
708+
if ctype == ConstantsDDBC.SQL_WCHAR.value:
709+
_validate_utf16_wchar_compatibility(encoding, ctype, "SQL_WCHAR ctype")
710+
711+
# Store the decoding settings for the specified sqltype (thread-safe with lock)
712+
with self._encoding_lock:
713+
self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}
620714

621715
# Log with sanitized values for security
622716
sqltype_name = {
@@ -625,8 +719,7 @@ def setdecoding(
625719
SQL_WMETADATA: "SQL_WMETADATA",
626720
}.get(sqltype, str(sqltype))
627721

628-
logger.debug(
629-
"info",
722+
logger.info(
630723
"Text decoding set for %s to %s with ctype %s",
631724
sqltype_name,
632725
sanitize_user_input(encoding),
@@ -635,7 +728,7 @@ def setdecoding(
635728

636729
def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
637730
"""
638-
Gets the current text decoding settings for the specified SQL type.
731+
Gets the current text decoding settings for the specified SQL type (thread-safe).
639732
640733
Args:
641734
sqltype (int): The SQL type to get settings for: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
@@ -651,6 +744,10 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
651744
settings = cnxn.getdecoding(mssql_python.SQL_CHAR)
652745
print(f"SQL_CHAR encoding: {settings['encoding']}")
653746
print(f"SQL_CHAR ctype: {settings['ctype']}")
747+
748+
Note:
749+
This method is thread-safe and can be called from multiple threads concurrently.
750+
Returns a copy of the settings to prevent external modification.
654751
"""
655752
if self._closed:
656753
raise InterfaceError(
@@ -674,7 +771,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
674771
),
675772
)
676773

677-
return self._decoding_settings[sqltype].copy()
774+
# Thread-safe read with lock to prevent race conditions
775+
with self._encoding_lock:
776+
return self._decoding_settings[sqltype].copy()
678777

679778
def set_attr(self, attribute: int, value: Union[int, str, bytes, bytearray]) -> None:
680779
"""

mssql_python/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ class ConstantsDDBC(Enum):
180180
# Reset Connection Constants
181181
SQL_RESET_CONNECTION_YES = 1
182182

183+
# Query Timeout Constants
184+
SQL_ATTR_QUERY_TIMEOUT = 0
185+
183186

184187
class GetInfoConstants(Enum):
185188
"""

0 commit comments

Comments
 (0)