Skip to content

Commit a79aa37

Browse files
committed
working executemany for datetimeoffset
1 parent 39fa6f3 commit a79aa37

File tree

3 files changed

+198
-64
lines changed

3 files changed

+198
-64
lines changed

mssql_python/cursor.py

Lines changed: 15 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -140,27 +140,26 @@ def _parse_date(self, param):
140140

141141
def _parse_datetime(self, param):
142142
"""
143-
Attempt to parse a string as a datetime, smalldatetime, datetime2, timestamp.
144-
145-
Args:
146-
param: The string to parse.
147-
148-
Returns:
149-
A datetime.datetime object if parsing is successful, else None.
143+
Attempt to parse a string as a datetime, datetime2, timestamp, or smalldatetime.
144+
Handles both naive and timezone-aware datetime strings.
150145
"""
151146
formats = [
152-
"%Y-%m-%dT%H:%M:%S.%f", # ISO 8601 datetime with fractional seconds
153-
"%Y-%m-%dT%H:%M:%S", # ISO 8601 datetime
154-
"%Y-%m-%d %H:%M:%S.%f", # Datetime with fractional seconds
155-
"%Y-%m-%d %H:%M:%S", # Datetime without fractional seconds
147+
"%Y-%m-%dT%H:%M:%S.%f%z", # ISO 8601 with fractional seconds + timezone
148+
"%Y-%m-%dT%H:%M:%S%z", # ISO 8601 with timezone
149+
"%Y-%m-%d %H:%M:%S.%f%z", # Space-separated with fractional seconds + timezone
150+
"%Y-%m-%d %H:%M:%S%z", # Space-separated with timezone
151+
"%Y-%m-%dT%H:%M:%S.%f", # ISO 8601 without timezone
152+
"%Y-%m-%dT%H:%M:%S", # ISO 8601 without timezone
153+
"%Y-%m-%d %H:%M:%S.%f", # Space-separated without timezone
154+
"%Y-%m-%d %H:%M:%S", # Space-separated without timezone
156155
]
157156
for fmt in formats:
158157
try:
159-
return datetime.datetime.strptime(param, fmt) # Valid datetime
158+
dt = datetime.datetime.strptime(param, fmt)
159+
return dt
160160
except ValueError:
161-
continue # Try next format
162-
163-
return None # If all formats fail, return None
161+
continue
162+
return None # parsing failed
164163

165164
def _parse_time(self, param):
166165
"""
@@ -1450,35 +1449,6 @@ def columns(self, table=None, catalog=None, schema=None, column=None):
14501449
# Use the helper method to prepare the result set
14511450
return self._prepare_metadata_result_set(fallback_description=fallback_description)
14521451

1453-
@staticmethod
1454-
def _select_best_sample_value(column):
1455-
"""
1456-
Selects the most representative non-null value from a column for type inference.
1457-
1458-
This is used during executemany() to infer SQL/C types based on actual data,
1459-
preferring a non-null value that is not the first row to avoid bias from placeholder defaults.
1460-
1461-
Args:
1462-
column: List of values in the column.
1463-
"""
1464-
non_nulls = [v for v in column if v is not None]
1465-
if not non_nulls:
1466-
return None
1467-
if all(isinstance(v, int) for v in non_nulls):
1468-
# Pick the value with the widest range (min/max)
1469-
return max(non_nulls, key=lambda v: abs(v))
1470-
if all(isinstance(v, float) for v in non_nulls):
1471-
return 0.0
1472-
if all(isinstance(v, decimal.Decimal) for v in non_nulls):
1473-
return max(non_nulls, key=lambda d: len(d.as_tuple().digits))
1474-
if all(isinstance(v, str) for v in non_nulls):
1475-
return max(non_nulls, key=lambda s: len(str(s)))
1476-
if all(isinstance(v, datetime.datetime) for v in non_nulls):
1477-
return datetime.datetime.now()
1478-
if all(isinstance(v, datetime.date) for v in non_nulls):
1479-
return datetime.date.today()
1480-
return non_nulls[0] # fallback
1481-
14821452
def _transpose_rowwise_to_columnwise(self, seq_of_parameters: list) -> tuple[list, int]:
14831453
"""
14841454
Convert sequence of rows (row-wise) into list of columns (column-wise),
@@ -1651,12 +1621,7 @@ def executemany(self, operation: str, seq_of_parameters: list) -> None:
16511621
else:
16521622
# Use auto-detection for columns without explicit types
16531623
column = [row[col_index] for row in seq_of_parameters] if hasattr(seq_of_parameters, '__getitem__') else []
1654-
if not column:
1655-
# For generators, use the sample row for inference
1656-
sample_value = sample_row[col_index]
1657-
else:
1658-
sample_value = self._select_best_sample_value(column)
1659-
1624+
sample_value, min_val, max_val = self._compute_column_type(column)
16601625
dummy_row = list(sample_row)
16611626
paraminfo = self._create_parameter_types_list(
16621627
sample_value, param_info, dummy_row, col_index, min_val=min_val, max_val=max_val

mssql_python/pybind/ddbc_bindings.cpp

Lines changed: 126 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,20 @@ struct NumericData {
6464
: precision(precision), scale(scale), sign(sign), val(value) {}
6565
};
6666

67+
// Struct to hold the DateTimeOffset structure
68+
struct DateTimeOffset
69+
{
70+
SQLSMALLINT year;
71+
SQLUSMALLINT month;
72+
SQLUSMALLINT day;
73+
SQLUSMALLINT hour;
74+
SQLUSMALLINT minute;
75+
SQLUSMALLINT second;
76+
SQLUINTEGER fraction; // Nanoseconds
77+
SQLSMALLINT timezone_hour; // Offset hours from UTC
78+
SQLSMALLINT timezone_minute; // Offset minutes from UTC
79+
};
80+
6781
// Struct to hold data buffers and indicators for each column
6882
struct ColumnBuffers {
6983
std::vector<std::vector<SQLCHAR>> charBuffers;
@@ -78,6 +92,7 @@ struct ColumnBuffers {
7892
std::vector<std::vector<SQL_TIME_STRUCT>> timeBuffers;
7993
std::vector<std::vector<SQLGUID>> guidBuffers;
8094
std::vector<std::vector<SQLLEN>> indicators;
95+
std::vector<std::vector<DateTimeOffset>> datetimeoffsetBuffers;
8196

8297
ColumnBuffers(SQLSMALLINT numCols, int fetchSize)
8398
: charBuffers(numCols),
@@ -91,23 +106,10 @@ struct ColumnBuffers {
91106
dateBuffers(numCols),
92107
timeBuffers(numCols),
93108
guidBuffers(numCols),
109+
datetimeoffsetBuffers(numCols),
94110
indicators(numCols, std::vector<SQLLEN>(fetchSize)) {}
95111
};
96112

97-
// Struct to hold the DateTimeOffset structure
98-
struct DateTimeOffset
99-
{
100-
SQLSMALLINT year;
101-
SQLUSMALLINT month;
102-
SQLUSMALLINT day;
103-
SQLUSMALLINT hour;
104-
SQLUSMALLINT minute;
105-
SQLUSMALLINT second;
106-
SQLUINTEGER fraction; // Nanoseconds
107-
SQLSMALLINT timezone_hour; // Offset hours from UTC
108-
SQLSMALLINT timezone_minute; // Offset minutes from UTC
109-
};
110-
111113
//-------------------------------------------------------------------------------------------------
112114
// Function pointer initialization
113115
//-------------------------------------------------------------------------------------------------
@@ -1876,6 +1878,7 @@ SQLRETURN BindParameterArray(SQLHANDLE hStmt,
18761878
break;
18771879
}
18781880
case SQL_C_TYPE_TIMESTAMP: {
1881+
std::cout<<"Binding Timestamp param at index "<<paramIndex<<std::endl;
18791882
SQL_TIMESTAMP_STRUCT* tsArray = AllocateParamBufferArray<SQL_TIMESTAMP_STRUCT>(tempBuffers, paramSetSize);
18801883
strLenOrIndArray = AllocateParamBufferArray<SQLLEN>(tempBuffers, paramSetSize);
18811884
for (size_t i = 0; i < paramSetSize; ++i) {
@@ -1898,6 +1901,67 @@ SQLRETURN BindParameterArray(SQLHANDLE hStmt,
18981901
bufferLength = sizeof(SQL_TIMESTAMP_STRUCT);
18991902
break;
19001903
}
1904+
case SQL_C_SS_TIMESTAMPOFFSET: {
1905+
std::cout<<"Binding DateTimeOffset param at index "<<paramIndex<<std::endl;
1906+
DateTimeOffset* dtoArray = AllocateParamBufferArray<DateTimeOffset>(tempBuffers, paramSetSize);
1907+
strLenOrIndArray = AllocateParamBufferArray<SQLLEN>(tempBuffers, paramSetSize);
1908+
1909+
py::object datetimeType = py::module_::import("datetime").attr("datetime");
1910+
1911+
for (size_t i = 0; i < paramSetSize; ++i) {
1912+
const py::handle& param = columnValues[i];
1913+
1914+
if (param.is_none()) {
1915+
std::memset(&dtoArray[i], 0, sizeof(DateTimeOffset));
1916+
strLenOrIndArray[i] = SQL_NULL_DATA;
1917+
} else {
1918+
if (!py::isinstance(param, datetimeType)) {
1919+
ThrowStdException(MakeParamMismatchErrorStr(info.paramCType, paramIndex));
1920+
}
1921+
1922+
py::object tzinfo = param.attr("tzinfo");
1923+
if (tzinfo.is_none()) {
1924+
ThrowStdException("Datetime object must have tzinfo for SQL_C_SS_TIMESTAMPOFFSET at paramIndex " +
1925+
std::to_string(paramIndex));
1926+
}
1927+
1928+
// Convert the Python datetime object to UTC before binding.
1929+
// This is the crucial step to ensure timezone normalization.
1930+
py::object datetimeModule = py::module_::import("datetime");
1931+
py::object utc_dt = param.attr("astimezone")(datetimeModule.attr("timezone").attr("utc"));
1932+
std::cout<<"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"<<std::endl;
1933+
// --- TEMPORARY DEBUGGING: LOG THE UTC VALUES ---
1934+
LOG("Binding UTC values: {}-{}-{} {}:{}:{}.{} +00:00",
1935+
utc_dt.attr("year").cast<int>(),
1936+
utc_dt.attr("month").cast<int>(),
1937+
utc_dt.attr("day").cast<int>(),
1938+
utc_dt.attr("hour").cast<int>(),
1939+
utc_dt.attr("minute").cast<int>(),
1940+
utc_dt.attr("second").cast<int>(),
1941+
utc_dt.attr("microsecond").cast<int>()
1942+
);
1943+
1944+
// Now, populate the C++ struct using the UTC-converted object.
1945+
dtoArray[i].year = static_cast<SQLSMALLINT>(utc_dt.attr("year").cast<int>());
1946+
dtoArray[i].month = static_cast<SQLUSMALLINT>(utc_dt.attr("month").cast<int>());
1947+
dtoArray[i].day = static_cast<SQLUSMALLINT>(utc_dt.attr("day").cast<int>());
1948+
dtoArray[i].hour = static_cast<SQLUSMALLINT>(utc_dt.attr("hour").cast<int>());
1949+
dtoArray[i].minute = static_cast<SQLUSMALLINT>(utc_dt.attr("minute").cast<int>());
1950+
dtoArray[i].second = static_cast<SQLUSMALLINT>(utc_dt.attr("second").cast<int>());
1951+
dtoArray[i].fraction = static_cast<SQLUINTEGER>(utc_dt.attr("microsecond").cast<int>() * 1000);
1952+
1953+
// Since we've converted to UTC, the timezone offset is always 0.
1954+
dtoArray[i].timezone_hour = 0;
1955+
dtoArray[i].timezone_minute = 0;
1956+
1957+
strLenOrIndArray[i] = sizeof(DateTimeOffset);
1958+
}
1959+
}
1960+
1961+
dataPtr = dtoArray;
1962+
bufferLength = sizeof(DateTimeOffset);
1963+
break;
1964+
}
19011965
case SQL_C_NUMERIC: {
19021966
SQL_NUMERIC_STRUCT* numericArray = AllocateParamBufferArray<SQL_NUMERIC_STRUCT>(tempBuffers, paramSetSize);
19031967
strLenOrIndArray = AllocateParamBufferArray<SQLLEN>(tempBuffers, paramSetSize);
@@ -2573,6 +2637,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p
25732637
microseconds,
25742638
tzinfo
25752639
);
2640+
py_dt = py_dt.attr("astimezone")(datetime.attr("timezone").attr("utc"));
25762641
row.append(py_dt);
25772642
} else {
25782643
LOG("Error fetching DATETIMEOFFSET for column {}, ret={}", i, ret);
@@ -2836,6 +2901,13 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column
28362901
ret = SQLBindCol_ptr(hStmt, col, SQL_C_BINARY, buffers.charBuffers[col - 1].data(),
28372902
columnSize, buffers.indicators[col - 1].data());
28382903
break;
2904+
case SQL_SS_TIMESTAMPOFFSET:
2905+
buffers.datetimeoffsetBuffers[col - 1].resize(fetchSize);
2906+
ret = SQLBindCol_ptr(hStmt, col, SQL_C_SS_TIMESTAMPOFFSET,
2907+
buffers.datetimeoffsetBuffers[col - 1].data(),
2908+
sizeof(DateTimeOffset) * fetchSize,
2909+
buffers.indicators[col - 1].data());
2910+
break;
28392911
default:
28402912
std::wstring columnName = columnMeta["ColumnName"].cast<std::wstring>();
28412913
std::ostringstream errorString;
@@ -3051,6 +3123,43 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum
30513123
buffers.timeBuffers[col - 1][i].second));
30523124
break;
30533125
}
3126+
case SQL_SS_TIMESTAMPOFFSET: {
3127+
// i = current row index in outer loop
3128+
SQLULEN rowIdx = i;
3129+
const DateTimeOffset& dtoValue = buffers.datetimeoffsetBuffers[col - 1][rowIdx];
3130+
SQLLEN indicator = buffers.indicators[col - 1][rowIdx];
3131+
3132+
if (indicator != SQL_NULL_DATA) {
3133+
// Compute total minutes offset
3134+
int totalMinutes = dtoValue.timezone_hour * 60 + dtoValue.timezone_minute;
3135+
3136+
// Import Python datetime module
3137+
py::object datetime = py::module_::import("datetime");
3138+
3139+
// Construct tzinfo object for the original offset
3140+
py::object tzinfo = datetime.attr("timezone")(
3141+
datetime.attr("timedelta")(py::arg("minutes") = totalMinutes)
3142+
);
3143+
3144+
// Construct Python datetime object with tzinfo
3145+
py::object py_dt = datetime.attr("datetime")(
3146+
dtoValue.year,
3147+
dtoValue.month,
3148+
dtoValue.day,
3149+
dtoValue.hour,
3150+
dtoValue.minute,
3151+
dtoValue.second,
3152+
dtoValue.fraction / 1000, // ns → µs
3153+
tzinfo
3154+
);
3155+
py_dt = py_dt.attr("astimezone")(datetime.attr("timezone").attr("utc"));
3156+
// Append to row
3157+
row.append(py_dt);
3158+
} else {
3159+
row.append(py::none());
3160+
}
3161+
break;
3162+
}
30543163
case SQL_GUID: {
30553164
row.append(
30563165
py::bytes(reinterpret_cast<const char*>(&buffers.guidBuffers[col - 1][i]),
@@ -3156,6 +3265,9 @@ size_t calculateRowSize(py::list& columnNames, SQLUSMALLINT numCols) {
31563265
case SQL_LONGVARBINARY:
31573266
rowSize += columnSize;
31583267
break;
3268+
case SQL_SS_TIMESTAMPOFFSET:
3269+
rowSize += sizeof(DateTimeOffset); // your custom struct for SQL_C_SS_TIMESTAMPOFFSET
3270+
break;
31593271
default:
31603272
std::wstring columnName = columnMeta["ColumnName"].cast<std::wstring>();
31613273
std::ostringstream errorString;

tests/test_004_cursor.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7414,6 +7414,63 @@ def test_datetimeoffset_read_write(cursor, db_connection):
74147414
finally:
74157415
cursor.execute("IF OBJECT_ID('tempdb..#pytest_dto', 'U') IS NOT NULL DROP TABLE #pytest_dto;")
74167416
db_connection.commit()
7417+
7418+
def test_datetimeoffset_executemany(cursor, db_connection):
7419+
"""
7420+
Test the driver's ability to correctly read and write DATETIMEOFFSET data
7421+
using executemany, including timezone information.
7422+
"""
7423+
try:
7424+
datetimeoffset_test_cases = [
7425+
(
7426+
"2023-10-26 10:30:00.0000000 +05:30",
7427+
datetime(2023, 10, 26, 10, 30, 0, 0,
7428+
tzinfo=timezone(timedelta(hours=5, minutes=30)))
7429+
),
7430+
(
7431+
"2023-10-27 15:45:10.1234567 -08:00",
7432+
datetime(2023, 10, 27, 15, 45, 10, 123456,
7433+
tzinfo=timezone(timedelta(hours=-8)))
7434+
),
7435+
(
7436+
"2023-10-28 20:00:05.9876543 +00:00",
7437+
datetime(2023, 10, 28, 20, 0, 5, 987654,
7438+
tzinfo=timezone(timedelta(hours=0)))
7439+
)
7440+
]
7441+
7442+
# Create temp table
7443+
cursor.execute("IF OBJECT_ID('tempdb..#pytest_dto', 'U') IS NOT NULL DROP TABLE #pytest_dto;")
7444+
cursor.execute("CREATE TABLE #pytest_dto (id INT PRIMARY KEY, dto_column DATETIMEOFFSET);")
7445+
db_connection.commit()
7446+
7447+
# Prepare data for executemany
7448+
param_list = [(i, python_dt) for i, (_, python_dt) in enumerate(datetimeoffset_test_cases)]
7449+
cursor.executemany("INSERT INTO #pytest_dto (id, dto_column) VALUES (?, ?);", param_list)
7450+
db_connection.commit()
7451+
7452+
# Read back and validate
7453+
cursor.execute("SELECT id, dto_column FROM #pytest_dto ORDER BY id;")
7454+
rows = cursor.fetchall()
7455+
7456+
for i, (sql_str, python_dt) in enumerate(datetimeoffset_test_cases):
7457+
fetched_id, fetched_dto = rows[i]
7458+
assert fetched_dto.tzinfo is not None, "Fetched datetime object is naive."
7459+
7460+
expected_utc = python_dt.astimezone(timezone.utc).replace(tzinfo=None)
7461+
fetched_utc = fetched_dto.astimezone(timezone.utc).replace(tzinfo=None)
7462+
7463+
# Round microseconds to nearest millisecond for comparison
7464+
expected_utc = expected_utc.replace(microsecond=int(expected_utc.microsecond / 1000) * 1000)
7465+
fetched_utc = fetched_utc.replace(microsecond=int(fetched_utc.microsecond / 1000) * 1000)
7466+
7467+
assert fetched_utc == expected_utc, (
7468+
f"Value mismatch for test case {i}. "
7469+
f"Expected UTC: {expected_utc}, Got UTC: {fetched_utc}"
7470+
)
7471+
finally:
7472+
cursor.execute("IF OBJECT_ID('tempdb..#pytest_dto', 'U') IS NOT NULL DROP TABLE #pytest_dto;")
7473+
db_connection.commit()
74177474

74187475
def test_lowercase_attribute(cursor, db_connection):
74197476
"""Test that the lowercase attribute properly converts column names to lowercase"""

0 commit comments

Comments
 (0)