Skip to content

Commit c55f7a4

Browse files
committed
FEAT: Adding conn.setencoding() API
1 parent 1c2a895 commit c55f7a4

File tree

3 files changed

+415
-2
lines changed

3 files changed

+415
-2
lines changed

mssql_python/connection.py

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@
1212
"""
1313
import weakref
1414
import re
15+
import codecs
1516
from mssql_python.cursor import Cursor
1617
from mssql_python.helpers import add_driver_to_connection_str, sanitize_connection_string, log
1718
from mssql_python import ddbc_bindings
1819
from mssql_python.pooling import PoolingManager
19-
from mssql_python.exceptions import InterfaceError
20+
from mssql_python.exceptions import InterfaceError, ProgrammingError
2021
from mssql_python.auth import process_connection_string
22+
from mssql_python.constants import ConstantsDDBC
2123

2224

2325
class Connection:
@@ -36,6 +38,7 @@ class Connection:
3638
commit() -> None:
3739
rollback() -> None:
3840
close() -> None:
41+
setencoding(encoding=None, ctype=None) -> None:
3942
"""
4043

4144
def __init__(self, connection_str: str = "", autocommit: bool = False, attrs_before: dict = None, **kwargs) -> None:
@@ -63,6 +66,13 @@ def __init__(self, connection_str: str = "", autocommit: bool = False, attrs_bef
6366
)
6467
self._attrs_before = attrs_before or {}
6568

69+
# Initialize encoding settings with defaults for Python 3
70+
# Python 3 only has str (which is Unicode), so we use utf-16le by default
71+
self._encoding_settings = {
72+
'encoding': 'utf-16le',
73+
'ctype': ConstantsDDBC.SQL_WCHAR.value
74+
}
75+
6676
# Check if the connection string contains authentication parameters
6777
# This is important for processing the connection string correctly.
6878
# If authentication is specified, it will be processed to handle
@@ -159,6 +169,90 @@ def setautocommit(self, value: bool = False) -> None:
159169
"""
160170
self._conn.set_autocommit(value)
161171

172+
def setencoding(self, encoding=None, ctype=None):
173+
"""
174+
Sets the text encoding for SQL statements and text parameters.
175+
176+
Since Python 3 only has str (which is Unicode), this method configures
177+
how text is encoded when sending to the database.
178+
179+
Args:
180+
encoding (str, optional): The encoding to use. This must be a valid Python
181+
encoding that converts text to bytes. If None, defaults to 'utf-16le'.
182+
ctype (int, optional): The C data type to use when passing data:
183+
SQL_CHAR or SQL_WCHAR. If not provided, SQL_WCHAR is used for
184+
"utf-16", "utf-16le", and "utf-16be". SQL_CHAR is used for all other encodings.
185+
186+
Returns:
187+
None
188+
189+
Raises:
190+
ProgrammingError: If the encoding is not valid or not supported.
191+
InterfaceError: If the connection is closed.
192+
193+
Example:
194+
# For databases that only communicate with UTF-8
195+
cnxn.setencoding(encoding='utf-8')
196+
197+
# For explicitly using SQL_CHAR
198+
cnxn.setencoding(encoding='utf-8', ctype=mssql_python.SQL_CHAR)
199+
"""
200+
if self._closed:
201+
raise InterfaceError(
202+
driver_error="Cannot set encoding on closed connection",
203+
ddbc_error="Cannot set encoding on closed connection",
204+
)
205+
206+
# Set default encoding if not provided
207+
if encoding is None:
208+
encoding = 'utf-16le'
209+
210+
# Validate encoding
211+
try:
212+
codecs.lookup(encoding)
213+
except LookupError:
214+
raise ProgrammingError(
215+
driver_error=f"Unknown encoding: {encoding}",
216+
ddbc_error=f"The encoding '{encoding}' is not supported by Python",
217+
)
218+
219+
# Set default ctype based on encoding if not provided
220+
if ctype is None:
221+
if encoding.lower() in ('utf-16', 'utf-16le', 'utf-16be'):
222+
ctype = ConstantsDDBC.SQL_WCHAR.value
223+
else:
224+
ctype = ConstantsDDBC.SQL_CHAR.value
225+
226+
# Validate ctype
227+
valid_ctypes = [ConstantsDDBC.SQL_CHAR.value, ConstantsDDBC.SQL_WCHAR.value]
228+
if ctype not in valid_ctypes:
229+
raise ProgrammingError(
230+
driver_error=f"Invalid ctype: {ctype}",
231+
ddbc_error=f"ctype must be SQL_CHAR ({ConstantsDDBC.SQL_CHAR.value}) or SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value})",
232+
)
233+
234+
# Store the encoding settings
235+
self._encoding_settings = {
236+
'encoding': encoding,
237+
'ctype': ctype
238+
}
239+
240+
log('info', "Text encoding set to %s with ctype %s", encoding, ctype)
241+
242+
def getencoding(self):
243+
"""
244+
Gets the current text encoding settings.
245+
246+
Returns:
247+
dict: A dictionary containing 'encoding' and 'ctype' keys.
248+
249+
Example:
250+
settings = cnxn.getencoding()
251+
print(f"Current encoding: {settings['encoding']}")
252+
print(f"Current ctype: {settings['ctype']}")
253+
"""
254+
return self._encoding_settings.copy()
255+
162256
def cursor(self) -> Cursor:
163257
"""
164258
Return a new Cursor object using the connection.

mssql_python/type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def Binary(value) -> bytes:
104104
"""
105105
Converts a string or bytes to bytes for use with binary database columns.
106106
107-
This function follows the DB-API 2.0 specification and pyodbc compatibility.
107+
This function follows the DB-API 2.0 specification.
108108
It accepts only str and bytes/bytearray types to ensure type safety.
109109
110110
Args:

0 commit comments

Comments
 (0)