Skip to content

Commit 13050e6

Browse files
committed
working executemany for datetimeoffset
1 parent beeeb2b commit 13050e6

File tree

3 files changed

+198
-64
lines changed

3 files changed

+198
-64
lines changed

mssql_python/cursor.py

Lines changed: 15 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -140,27 +140,26 @@ def _parse_date(self, param):
140140

141141
def _parse_datetime(self, param):
142142
"""
143-
Attempt to parse a string as a datetime, smalldatetime, datetime2, timestamp.
144-
145-
Args:
146-
param: The string to parse.
147-
148-
Returns:
149-
A datetime.datetime object if parsing is successful, else None.
143+
Attempt to parse a string as a datetime, datetime2, timestamp, or smalldatetime.
144+
Handles both naive and timezone-aware datetime strings.
150145
"""
151146
formats = [
152-
"%Y-%m-%dT%H:%M:%S.%f", # ISO 8601 datetime with fractional seconds
153-
"%Y-%m-%dT%H:%M:%S", # ISO 8601 datetime
154-
"%Y-%m-%d %H:%M:%S.%f", # Datetime with fractional seconds
155-
"%Y-%m-%d %H:%M:%S", # Datetime without fractional seconds
147+
"%Y-%m-%dT%H:%M:%S.%f%z", # ISO 8601 with fractional seconds + timezone
148+
"%Y-%m-%dT%H:%M:%S%z", # ISO 8601 with timezone
149+
"%Y-%m-%d %H:%M:%S.%f%z", # Space-separated with fractional seconds + timezone
150+
"%Y-%m-%d %H:%M:%S%z", # Space-separated with timezone
151+
"%Y-%m-%dT%H:%M:%S.%f", # ISO 8601 without timezone
152+
"%Y-%m-%dT%H:%M:%S", # ISO 8601 without timezone
153+
"%Y-%m-%d %H:%M:%S.%f", # Space-separated without timezone
154+
"%Y-%m-%d %H:%M:%S", # Space-separated without timezone
156155
]
157156
for fmt in formats:
158157
try:
159-
return datetime.datetime.strptime(param, fmt) # Valid datetime
158+
dt = datetime.datetime.strptime(param, fmt)
159+
return dt
160160
except ValueError:
161-
continue # Try next format
162-
163-
return None # If all formats fail, return None
161+
continue
162+
return None # parsing failed
164163

165164
def _parse_time(self, param):
166165
"""
@@ -1442,35 +1441,6 @@ def columns(self, table=None, catalog=None, schema=None, column=None):
14421441
# Use the helper method to prepare the result set
14431442
return self._prepare_metadata_result_set(fallback_description=fallback_description)
14441443

1445-
@staticmethod
1446-
def _select_best_sample_value(column):
1447-
"""
1448-
Selects the most representative non-null value from a column for type inference.
1449-
1450-
This is used during executemany() to infer SQL/C types based on actual data,
1451-
preferring a non-null value that is not the first row to avoid bias from placeholder defaults.
1452-
1453-
Args:
1454-
column: List of values in the column.
1455-
"""
1456-
non_nulls = [v for v in column if v is not None]
1457-
if not non_nulls:
1458-
return None
1459-
if all(isinstance(v, int) for v in non_nulls):
1460-
# Pick the value with the widest range (min/max)
1461-
return max(non_nulls, key=lambda v: abs(v))
1462-
if all(isinstance(v, float) for v in non_nulls):
1463-
return 0.0
1464-
if all(isinstance(v, decimal.Decimal) for v in non_nulls):
1465-
return max(non_nulls, key=lambda d: len(d.as_tuple().digits))
1466-
if all(isinstance(v, str) for v in non_nulls):
1467-
return max(non_nulls, key=lambda s: len(str(s)))
1468-
if all(isinstance(v, datetime.datetime) for v in non_nulls):
1469-
return datetime.datetime.now()
1470-
if all(isinstance(v, datetime.date) for v in non_nulls):
1471-
return datetime.date.today()
1472-
return non_nulls[0] # fallback
1473-
14741444
def _transpose_rowwise_to_columnwise(self, seq_of_parameters: list) -> tuple[list, int]:
14751445
"""
14761446
Convert sequence of rows (row-wise) into list of columns (column-wise),
@@ -1643,12 +1613,7 @@ def executemany(self, operation: str, seq_of_parameters: list) -> None:
16431613
else:
16441614
# Use auto-detection for columns without explicit types
16451615
column = [row[col_index] for row in seq_of_parameters] if hasattr(seq_of_parameters, '__getitem__') else []
1646-
if not column:
1647-
# For generators, use the sample row for inference
1648-
sample_value = sample_row[col_index]
1649-
else:
1650-
sample_value = self._select_best_sample_value(column)
1651-
1616+
sample_value, min_val, max_val = self._compute_column_type(column)
16521617
dummy_row = list(sample_row)
16531618
paraminfo = self._create_parameter_types_list(
16541619
sample_value, param_info, dummy_row, col_index, min_val=min_val, max_val=max_val

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 126 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,20 @@ struct NumericData {
6464
: precision(precision), scale(scale), sign(sign), val(value) {}
6565
};
6666

67+
// Struct to hold the DateTimeOffset structure
68+
struct DateTimeOffset
69+
{
70+
SQLSMALLINT year;
71+
SQLUSMALLINT month;
72+
SQLUSMALLINT day;
73+
SQLUSMALLINT hour;
74+
SQLUSMALLINT minute;
75+
SQLUSMALLINT second;
76+
SQLUINTEGER fraction; // Nanoseconds
77+
SQLSMALLINT timezone_hour; // Offset hours from UTC
78+
SQLSMALLINT timezone_minute; // Offset minutes from UTC
79+
};
80+
6781
// Struct to hold data buffers and indicators for each column
6882
struct ColumnBuffers {
6983
std::vector<std::vector<SQLCHAR>> charBuffers;
@@ -78,6 +92,7 @@ struct ColumnBuffers {
7892
std::vector<std::vector<SQL_TIME_STRUCT>> timeBuffers;
7993
std::vector<std::vector<SQLGUID>> guidBuffers;
8094
std::vector<std::vector<SQLLEN>> indicators;
95+
std::vector<std::vector<DateTimeOffset>> datetimeoffsetBuffers;
8196

8297
ColumnBuffers(SQLSMALLINT numCols, int fetchSize)
8398
: charBuffers(numCols),
@@ -91,23 +106,10 @@ struct ColumnBuffers {
91106
dateBuffers(numCols),
92107
timeBuffers(numCols),
93108
guidBuffers(numCols),
109+
datetimeoffsetBuffers(numCols),
94110
indicators(numCols, std::vector<SQLLEN>(fetchSize)) {}
95111
};
96112

97-
// Struct to hold the DateTimeOffset structure
98-
struct DateTimeOffset
99-
{
100-
SQLSMALLINT year;
101-
SQLUSMALLINT month;
102-
SQLUSMALLINT day;
103-
SQLUSMALLINT hour;
104-
SQLUSMALLINT minute;
105-
SQLUSMALLINT second;
106-
SQLUINTEGER fraction; // Nanoseconds
107-
SQLSMALLINT timezone_hour; // Offset hours from UTC
108-
SQLSMALLINT timezone_minute; // Offset minutes from UTC
109-
};
110-
111113
//-------------------------------------------------------------------------------------------------
112114
// Function pointer initialization
113115
//-------------------------------------------------------------------------------------------------
@@ -1945,6 +1947,7 @@ SQLRETURN BindParameterArray(SQLHANDLE hStmt,
19451947
break;
19461948
}
19471949
case SQL_C_TYPE_TIMESTAMP: {
1950+
std::cout<<"Binding Timestamp param at index "<<paramIndex<<std::endl;
19481951
SQL_TIMESTAMP_STRUCT* tsArray = AllocateParamBufferArray<SQL_TIMESTAMP_STRUCT>(tempBuffers, paramSetSize);
19491952
strLenOrIndArray = AllocateParamBufferArray<SQLLEN>(tempBuffers, paramSetSize);
19501953
for (size_t i = 0; i < paramSetSize; ++i) {
@@ -1967,6 +1970,67 @@ SQLRETURN BindParameterArray(SQLHANDLE hStmt,
19671970
bufferLength = sizeof(SQL_TIMESTAMP_STRUCT);
19681971
break;
19691972
}
1973+
case SQL_C_SS_TIMESTAMPOFFSET: {
1974+
std::cout<<"Binding DateTimeOffset param at index "<<paramIndex<<std::endl;
1975+
DateTimeOffset* dtoArray = AllocateParamBufferArray<DateTimeOffset>(tempBuffers, paramSetSize);
1976+
strLenOrIndArray = AllocateParamBufferArray<SQLLEN>(tempBuffers, paramSetSize);
1977+
1978+
py::object datetimeType = py::module_::import("datetime").attr("datetime");
1979+
1980+
for (size_t i = 0; i < paramSetSize; ++i) {
1981+
const py::handle& param = columnValues[i];
1982+
1983+
if (param.is_none()) {
1984+
std::memset(&dtoArray[i], 0, sizeof(DateTimeOffset));
1985+
strLenOrIndArray[i] = SQL_NULL_DATA;
1986+
} else {
1987+
if (!py::isinstance(param, datetimeType)) {
1988+
ThrowStdException(MakeParamMismatchErrorStr(info.paramCType, paramIndex));
1989+
}
1990+
1991+
py::object tzinfo = param.attr("tzinfo");
1992+
if (tzinfo.is_none()) {
1993+
ThrowStdException("Datetime object must have tzinfo for SQL_C_SS_TIMESTAMPOFFSET at paramIndex " +
1994+
std::to_string(paramIndex));
1995+
}
1996+
1997+
// Convert the Python datetime object to UTC before binding.
1998+
// This is the crucial step to ensure timezone normalization.
1999+
py::object datetimeModule = py::module_::import("datetime");
2000+
py::object utc_dt = param.attr("astimezone")(datetimeModule.attr("timezone").attr("utc"));
2001+
std::cout<<"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"<<std::endl;
2002+
// --- TEMPORARY DEBUGGING: LOG THE UTC VALUES ---
2003+
LOG("Binding UTC values: {}-{}-{} {}:{}:{}.{} +00:00",
2004+
utc_dt.attr("year").cast<int>(),
2005+
utc_dt.attr("month").cast<int>(),
2006+
utc_dt.attr("day").cast<int>(),
2007+
utc_dt.attr("hour").cast<int>(),
2008+
utc_dt.attr("minute").cast<int>(),
2009+
utc_dt.attr("second").cast<int>(),
2010+
utc_dt.attr("microsecond").cast<int>()
2011+
);
2012+
2013+
// Now, populate the C++ struct using the UTC-converted object.
2014+
dtoArray[i].year = static_cast<SQLSMALLINT>(utc_dt.attr("year").cast<int>());
2015+
dtoArray[i].month = static_cast<SQLUSMALLINT>(utc_dt.attr("month").cast<int>());
2016+
dtoArray[i].day = static_cast<SQLUSMALLINT>(utc_dt.attr("day").cast<int>());
2017+
dtoArray[i].hour = static_cast<SQLUSMALLINT>(utc_dt.attr("hour").cast<int>());
2018+
dtoArray[i].minute = static_cast<SQLUSMALLINT>(utc_dt.attr("minute").cast<int>());
2019+
dtoArray[i].second = static_cast<SQLUSMALLINT>(utc_dt.attr("second").cast<int>());
2020+
dtoArray[i].fraction = static_cast<SQLUINTEGER>(utc_dt.attr("microsecond").cast<int>() * 1000);
2021+
2022+
// Since we've converted to UTC, the timezone offset is always 0.
2023+
dtoArray[i].timezone_hour = 0;
2024+
dtoArray[i].timezone_minute = 0;
2025+
2026+
strLenOrIndArray[i] = sizeof(DateTimeOffset);
2027+
}
2028+
}
2029+
2030+
dataPtr = dtoArray;
2031+
bufferLength = sizeof(DateTimeOffset);
2032+
break;
2033+
}
19702034
case SQL_C_NUMERIC: {
19712035
SQL_NUMERIC_STRUCT* numericArray = AllocateParamBufferArray<SQL_NUMERIC_STRUCT>(tempBuffers, paramSetSize);
19722036
strLenOrIndArray = AllocateParamBufferArray<SQLLEN>(tempBuffers, paramSetSize);
@@ -2642,6 +2706,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
26422706
microseconds,
26432707
tzinfo
26442708
);
2709+
py_dt = py_dt.attr("astimezone")(datetime.attr("timezone").attr("utc"));
26452710
row.append(py_dt);
26462711
} else {
26472712
LOG("Error fetching DATETIMEOFFSET for column {}, ret={}", i, ret);
@@ -2912,6 +2977,13 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
29122977
ret = SQLBindCol_ptr(hStmt, col, SQL_C_BINARY, buffers.charBuffers[col - 1].data(),
29132978
columnSize, buffers.indicators[col - 1].data());
29142979
break;
2980+
case SQL_SS_TIMESTAMPOFFSET:
2981+
buffers.datetimeoffsetBuffers[col - 1].resize(fetchSize);
2982+
ret = SQLBindCol_ptr(hStmt, col, SQL_C_SS_TIMESTAMPOFFSET,
2983+
buffers.datetimeoffsetBuffers[col - 1].data(),
2984+
sizeof(DateTimeOffset) * fetchSize,
2985+
buffers.indicators[col - 1].data());
2986+
break;
29152987
default:
29162988
std::wstring columnName = columnMeta["ColumnName"].cast<std::wstring>();
29172989
std::ostringstream errorString;
@@ -3127,6 +3199,43 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
31273199
buffers.timeBuffers[col - 1][i].second));
31283200
break;
31293201
}
3202+
case SQL_SS_TIMESTAMPOFFSET: {
3203+
// i = current row index in outer loop
3204+
SQLULEN rowIdx = i;
3205+
const DateTimeOffset& dtoValue = buffers.datetimeoffsetBuffers[col - 1][rowIdx];
3206+
SQLLEN indicator = buffers.indicators[col - 1][rowIdx];
3207+
3208+
if (indicator != SQL_NULL_DATA) {
3209+
// Compute total minutes offset
3210+
int totalMinutes = dtoValue.timezone_hour * 60 + dtoValue.timezone_minute;
3211+
3212+
// Import Python datetime module
3213+
py::object datetime = py::module_::import("datetime");
3214+
3215+
// Construct tzinfo object for the original offset
3216+
py::object tzinfo = datetime.attr("timezone")(
3217+
datetime.attr("timedelta")(py::arg("minutes") = totalMinutes)
3218+
);
3219+
3220+
// Construct Python datetime object with tzinfo
3221+
py::object py_dt = datetime.attr("datetime")(
3222+
dtoValue.year,
3223+
dtoValue.month,
3224+
dtoValue.day,
3225+
dtoValue.hour,
3226+
dtoValue.minute,
3227+
dtoValue.second,
3228+
dtoValue.fraction / 1000, // ns → µs
3229+
tzinfo
3230+
);
3231+
py_dt = py_dt.attr("astimezone")(datetime.attr("timezone").attr("utc"));
3232+
// Append to row
3233+
row.append(py_dt);
3234+
} else {
3235+
row.append(py::none());
3236+
}
3237+
break;
3238+
}
31303239
case SQL_GUID: {
31313240
SQLGUID* guidValue = &buffers.guidBuffers[col - 1][i];
31323241
uint8_t reordered[16];
@@ -3246,6 +3355,9 @@ size_t calculateRowSize(py::list& columnNames, SQLUSMALLINT numCols) {
32463355
case SQL_LONGVARBINARY:
32473356
rowSize += columnSize;
32483357
break;
3358+
case SQL_SS_TIMESTAMPOFFSET:
3359+
rowSize += sizeof(DateTimeOffset); // your custom struct for SQL_C_SS_TIMESTAMPOFFSET
3360+
break;
32493361
default:
32503362
std::wstring columnName = columnMeta["ColumnName"].cast<std::wstring>();
32513363
std::ostringstream errorString;

tests/test_004_cursor.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7822,6 +7822,63 @@ def test_datetimeoffset_malformed_input(cursor, db_connection):
78227822
finally:
78237823
cursor.execute("DROP TABLE IF EXISTS #pytest_datetimeoffset_malformed_input;")
78247824
db_connection.commit()
7825+
7826+
def test_datetimeoffset_executemany(cursor, db_connection):
7827+
"""
7828+
Test the driver's ability to correctly read and write DATETIMEOFFSET data
7829+
using executemany, including timezone information.
7830+
"""
7831+
try:
7832+
datetimeoffset_test_cases = [
7833+
(
7834+
"2023-10-26 10:30:00.0000000 +05:30",
7835+
datetime(2023, 10, 26, 10, 30, 0, 0,
7836+
tzinfo=timezone(timedelta(hours=5, minutes=30)))
7837+
),
7838+
(
7839+
"2023-10-27 15:45:10.1234567 -08:00",
7840+
datetime(2023, 10, 27, 15, 45, 10, 123456,
7841+
tzinfo=timezone(timedelta(hours=-8)))
7842+
),
7843+
(
7844+
"2023-10-28 20:00:05.9876543 +00:00",
7845+
datetime(2023, 10, 28, 20, 0, 5, 987654,
7846+
tzinfo=timezone(timedelta(hours=0)))
7847+
)
7848+
]
7849+
7850+
# Create temp table
7851+
cursor.execute("IF OBJECT_ID('tempdb..#pytest_dto', 'U') IS NOT NULL DROP TABLE #pytest_dto;")
7852+
cursor.execute("CREATE TABLE #pytest_dto (id INT PRIMARY KEY, dto_column DATETIMEOFFSET);")
7853+
db_connection.commit()
7854+
7855+
# Prepare data for executemany
7856+
param_list = [(i, python_dt) for i, (_, python_dt) in enumerate(datetimeoffset_test_cases)]
7857+
cursor.executemany("INSERT INTO #pytest_dto (id, dto_column) VALUES (?, ?);", param_list)
7858+
db_connection.commit()
7859+
7860+
# Read back and validate
7861+
cursor.execute("SELECT id, dto_column FROM #pytest_dto ORDER BY id;")
7862+
rows = cursor.fetchall()
7863+
7864+
for i, (sql_str, python_dt) in enumerate(datetimeoffset_test_cases):
7865+
fetched_id, fetched_dto = rows[i]
7866+
assert fetched_dto.tzinfo is not None, "Fetched datetime object is naive."
7867+
7868+
expected_utc = python_dt.astimezone(timezone.utc).replace(tzinfo=None)
7869+
fetched_utc = fetched_dto.astimezone(timezone.utc).replace(tzinfo=None)
7870+
7871+
# Round microseconds to nearest millisecond for comparison
7872+
expected_utc = expected_utc.replace(microsecond=int(expected_utc.microsecond / 1000) * 1000)
7873+
fetched_utc = fetched_utc.replace(microsecond=int(fetched_utc.microsecond / 1000) * 1000)
7874+
7875+
assert fetched_utc == expected_utc, (
7876+
f"Value mismatch for test case {i}. "
7877+
f"Expected UTC: {expected_utc}, Got UTC: {fetched_utc}"
7878+
)
7879+
finally:
7880+
cursor.execute("IF OBJECT_ID('tempdb..#pytest_dto', 'U') IS NOT NULL DROP TABLE #pytest_dto;")
7881+
db_connection.commit()
78257882

78267883
def test_lowercase_attribute(cursor, db_connection):
78277884
"""Test that the lowercase attribute properly converts column names to lowercase"""

0 commit comments

Comments
 (0)