Skip to content

Commit 7a05ed6

Browse files
committed
better-8
1 parent e3e3083 commit 7a05ed6

File tree

4 files changed

+242
-49
lines changed

4 files changed

+242
-49
lines changed

main.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@
157157
cursor.close()
158158
conn.close()
159159

160-
print("Using mssql-python now")
160+
print("Using mssql-python now (original)")
161161
start_time = time.perf_counter()
162162
conn = connect(conn_str)
163163
cursor = conn.cursor()
@@ -166,28 +166,64 @@
166166

167167
end_time = time.perf_counter()
168168
elapsed_time = end_time - start_time
169-
print(f"Elapsed time in mssql-python for query 1: {elapsed_time:.4f} seconds")
169+
print(f"Elapsed time in mssql-python (original) for query 1: {elapsed_time:.4f} seconds")
170170

171171
start_time = time.perf_counter()
172172
cursor.execute(LARGE_DATASET)
173173
rows = cursor.fetchall()
174174
end_time = time.perf_counter()
175175
elapsed_time = end_time - start_time
176-
print(f"Elapsed time in mssql-python for query 2: {elapsed_time:.4f} seconds")
176+
print(f"Elapsed time in mssql-python (original) for query 2: {elapsed_time:.4f} seconds")
177177

178178
start_time = time.perf_counter()
179179
cursor.execute(VERY_LARGE_DATASET)
180180
rows = cursor.fetchall()
181181
end_time = time.perf_counter()
182182
elapsed_time = end_time - start_time
183-
print(f"Elapsed time in mssql-python for query 3: {elapsed_time:.4f} seconds")
183+
print(f"Elapsed time in mssql-python (original) for query 3: {elapsed_time:.4f} seconds")
184184

185185
start_time = time.perf_counter()
186186
cursor.execute(SUBQUERY_WITH_CTE)
187187
rows = cursor.fetchall()
188188
end_time = time.perf_counter()
189189
elapsed_time = end_time - start_time
190-
print(f"Elapsed time in mssql-python for query 4: {elapsed_time:.4f} seconds")
190+
print(f"Elapsed time in mssql-python (original) for query 4: {elapsed_time:.4f} seconds")
191+
192+
cursor.close()
193+
conn.close()
194+
195+
print("Using mssql-python now (optimized)")
196+
start_time = time.perf_counter()
197+
conn = connect(conn_str)
198+
conn.enable_performance_mode() # Enable connection-level performance mode
199+
cursor = conn.cursor() # This will automatically apply optimizations
200+
cursor.execute(COMPLEX_JOIN_AGGREGATION)
201+
rows = cursor.fetchall()
202+
203+
end_time = time.perf_counter()
204+
elapsed_time = end_time - start_time
205+
print(f"Elapsed time in mssql-python (optimized) for query 1: {elapsed_time:.4f} seconds")
206+
207+
start_time = time.perf_counter()
208+
cursor.execute(LARGE_DATASET)
209+
rows = cursor.fetchall()
210+
end_time = time.perf_counter()
211+
elapsed_time = end_time - start_time
212+
print(f"Elapsed time in mssql-python (optimized) for query 2: {elapsed_time:.4f} seconds")
213+
214+
start_time = time.perf_counter()
215+
cursor.execute(VERY_LARGE_DATASET)
216+
rows = cursor.fetchall()
217+
end_time = time.perf_counter()
218+
elapsed_time = end_time - start_time
219+
print(f"Elapsed time in mssql-python (optimized) for query 3: {elapsed_time:.4f} seconds")
220+
221+
start_time = time.perf_counter()
222+
cursor.execute(SUBQUERY_WITH_CTE)
223+
rows = cursor.fetchall()
224+
end_time = time.perf_counter()
225+
elapsed_time = end_time - start_time
226+
print(f"Elapsed time in mssql-python (optimized) for query 4: {elapsed_time:.4f} seconds")
191227

192228
cursor.close()
193229
conn.close()

mssql_python/connection.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ def __init__(self, connection_str: str = "", autocommit: bool = False, attrs_bef
202202
self._pooling = PoolingManager.is_enabled()
203203
self._conn = ddbc_bindings.Connection(self.connection_str, self._pooling, self._attrs_before)
204204
self.setautocommit(autocommit)
205+
206+
# Performance optimization settings
207+
self._performance_mode = False # When enabled, applies aggressive optimizations
205208

206209
def _construct_connection_string(self, connection_str: str = "", **kwargs) -> str:
207210
"""
@@ -595,6 +598,12 @@ def cursor(self) -> Cursor:
595598
)
596599

597600
cursor = Cursor(self, timeout=self._timeout)
601+
602+
# Apply performance optimizations if enabled
603+
if self._performance_mode:
604+
cursor.optimize_for_performance()
605+
cursor.enable_fast_mode() # Enable fast mode when performance mode is on
606+
598607
self._cursors.add(cursor) # Track the cursor
599608
return cursor
600609

@@ -1236,6 +1245,27 @@ def __exit__(self, *args) -> None:
12361245
if not self._closed:
12371246
self.close()
12381247

1248+
def enable_performance_mode(self):
1249+
"""
1250+
Enable performance mode for all cursors created from this connection.
1251+
This applies optimizations that prioritize speed over some features:
1252+
- Returns tuples instead of Row objects
1253+
- Increases batch sizes for better throughput
1254+
- Caches column mappings
1255+
- Uses aggressive arraysize settings
1256+
"""
1257+
self._performance_mode = True
1258+
1259+
def disable_performance_mode(self):
1260+
"""
1261+
Disable performance mode and return to standard behavior.
1262+
"""
1263+
self._performance_mode = False
1264+
1265+
def is_performance_mode_enabled(self):
1266+
"""Check if performance mode is enabled."""
1267+
return self._performance_mode
1268+
12391269
def __del__(self):
12401270
"""
12411271
Destructor to ensure the connection is closed when the connection object is no longer needed.

mssql_python/cursor.py

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def __init__(self, connection, timeout: int = 0) -> None:
8181
self.description = None
8282
self.rowcount = -1
8383
self.arraysize = (
84-
1 # Default number of rows to fetch at a time is 1, user can change it
84+
1000 # Increased default for better performance - user can still change it
8585
)
8686
self.buffer_length = 1024 # Default buffer length for string data
8787
self.closed = False
@@ -98,6 +98,10 @@ def __init__(self, connection, timeout: int = 0) -> None:
9898

9999
# rownumber attribute
100100
self._rownumber = -1 # DB-API extension: last returned row index, -1 before first
101+
102+
# Performance optimizations
103+
self._fast_mode = False # When enabled, returns tuples instead of Row objects
104+
self._cached_column_map = None # Cache column mapping for performance
101105
self._next_row_index = 0 # internal: index of the next row the driver will return (0-based)
102106
self._has_result_set = False # Track if we have an active result set
103107
self._skip_increment_for_next_fetch = False # Track if we need to skip incrementing the row index
@@ -1010,9 +1014,12 @@ def execute(
10101014
if self.description: # If we have column descriptions, it's likely a SELECT
10111015
self.rowcount = -1
10121016
self._reset_rownumber()
1017+
# Performance optimization: pre-build column map after execution
1018+
self._cached_column_map = {col_desc[0]: i for i, col_desc in enumerate(self.description)}
10131019
else:
10141020
self.rowcount = ddbc_bindings.DDBCSQLRowCount(self.hstmt)
10151021
self._clear_rownumber()
1022+
self._cached_column_map = None # Clear cache for non-SELECT statements
10161023

10171024
# After successful execution, initialize description if there are results
10181025
column_metadata = []
@@ -1775,8 +1782,16 @@ def fetchmany(self, size: int = None) -> List[Row]:
17751782
else:
17761783
self.rowcount = self._next_row_index
17771784

1785+
# Performance optimization: return tuples in fast mode
1786+
if self._fast_mode:
1787+
return [tuple(row_data) for row_data in rows_data]
1788+
1789+
# Build column map once and cache it for better performance
1790+
if self._cached_column_map is None and self.description:
1791+
self._cached_column_map = {col_desc[0]: i for i, col_desc in enumerate(self.description)}
1792+
17781793
# Convert raw data to Row objects
1779-
column_map = getattr(self, '_column_name_map', None)
1794+
column_map = self._cached_column_map or getattr(self, '_column_name_map', None)
17801795
return [Row(self, self.description, row_data, column_map) for row_data in rows_data]
17811796
except Exception as e:
17821797
# On error, don't increment rownumber - rethrow the error
@@ -1787,7 +1802,7 @@ def fetchall(self) -> List[Row]:
17871802
Fetch all (remaining) rows of a query result.
17881803
17891804
Returns:
1790-
List of Row objects.
1805+
List of Row objects or tuples (if fast mode is enabled).
17911806
"""
17921807
self._check_closed() # Check if the cursor is closed
17931808
if not self._has_result_set and self.description:
@@ -1813,8 +1828,15 @@ def fetchall(self) -> List[Row]:
18131828
else:
18141829
self.rowcount = self._next_row_index
18151830

1816-
# Convert raw data to Row objects
1817-
column_map = getattr(self, '_column_name_map', None)
1831+
# Performance optimization: return tuples in fast mode
1832+
if self._fast_mode:
1833+
return [tuple(row_data) for row_data in rows_data]
1834+
1835+
# Build column map once and cache it for better performance
1836+
if self._cached_column_map is None and self.description:
1837+
self._cached_column_map = {col_desc[0]: i for i, col_desc in enumerate(self.description)}
1838+
1839+
column_map = self._cached_column_map or getattr(self, '_column_name_map', None)
18181840
return [Row(self, self.description, row_data, column_map) for row_data in rows_data]
18191841
except Exception as e:
18201842
# On error, don't increment rownumber - rethrow the error
@@ -2167,4 +2189,48 @@ def tables(self, table=None, catalog=None, schema=None, tableType=None):
21672189
except Exception as e:
21682190
# Log the error and re-raise
21692191
log('error', f"Error executing tables query: {e}")
2170-
raise
2192+
raise
2193+
2194+
# Performance optimization methods
2195+
def enable_fast_mode(self):
2196+
"""
2197+
Enable fast mode for better performance.
2198+
In fast mode, fetch methods return tuples instead of Row objects,
2199+
significantly reducing memory usage and object creation overhead.
2200+
2201+
Note: This breaks DB-API compatibility but provides better performance
2202+
for applications that don't need Row object features.
2203+
"""
2204+
self._fast_mode = True
2205+
2206+
def disable_fast_mode(self):
2207+
"""
2208+
Disable fast mode and return to standard Row objects.
2209+
"""
2210+
self._fast_mode = False
2211+
2212+
def is_fast_mode_enabled(self):
2213+
"""
2214+
Check if fast mode is currently enabled.
2215+
2216+
Returns:
2217+
bool: True if fast mode is enabled, False otherwise.
2218+
"""
2219+
return self._fast_mode
2220+
2221+
2222+
2223+
def optimize_for_performance(self):
2224+
"""
2225+
Apply all available performance optimizations to this cursor.
2226+
This includes:
2227+
- Increasing arraysize for better batching (sweet spot between speed and memory)
2228+
- Pre-building column maps for faster row creation
2229+
- Optimizing internal data structures
2230+
"""
2231+
# Set arraysize to optimal value based on testing results
2232+
self.arraysize = 5000 # Sweet spot - more aggressive than default but not excessive
2233+
2234+
# Pre-build column map if we have description
2235+
if self.description and self._cached_column_map is None:
2236+
self._cached_column_map = {col_desc[0]: i for i, col_desc in enumerate(self.description)}

0 commit comments

Comments
 (0)