diff --git a/CHANGES.md b/CHANGES.md index c6c8802..c9b5a03 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,5 @@ # Changelog +- Types: Improved support for FLOAT type, converging to FLOAT vs. DOUBLE ## 2026/05/28 0.42.0 - Added support for SQL Alchemy 2.1 diff --git a/pyproject.toml b/pyproject.toml index 12a831e..042f2a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,7 @@ optional-dependencies.release = [ optional-dependencies.test = [ "cratedb-toolkit[testing]", "dask[dataframe]", - "pandas<2.4", + "pandas[test]<2.4", "pueblo>=0.0.7", "pytest<10", "pytest-cov<8", diff --git a/src/sqlalchemy_cratedb/compiler.py b/src/sqlalchemy_cratedb/compiler.py index 851e8eb..2449bb2 100644 --- a/src/sqlalchemy_cratedb/compiler.py +++ b/src/sqlalchemy_cratedb/compiler.py @@ -257,6 +257,27 @@ def visit_TIMESTAMP(self, type_, **kw): """ return "TIMESTAMP %s" % ((type_.timezone and "WITH" or "WITHOUT") + " TIME ZONE",) + def visit_FLOAT(self, type_, **kw): + """ + From `sqlalchemy.sql.sqltypes.Float`. + + When a :paramref:`.Float.precision` is not provided in a + :class:`_types.Float` type some backend may compile this type as + an 8 bytes / 64 bit float datatype. To use a 4 bytes / 32 bit float + datatype a precision <= 24 can usually be provided or the + :class:`_types.REAL` type can be used. + This is known to be the case in the PostgreSQL and MSSQL dialects + that render the type as ``FLOAT`` that's in both an alias of + ``DOUBLE PRECISION``. Other third party dialects may have similar + behavior. + """ + if not type_.precision: + return "FLOAT" + elif type_.precision <= 24: + return "FLOAT" + else: + return "DOUBLE" + class CrateCompiler(compiler.SQLCompiler): def visit_getitem_binary(self, binary, operator, **kw): diff --git a/tests/test_support_pandas.py b/tests/test_support_pandas.py index 47fe9c7..ce8bb6e 100644 --- a/tests/test_support_pandas.py +++ b/tests/test_support_pandas.py @@ -1,7 +1,9 @@ import re import sys +import pandas as pd import pytest +from pandas._testing import assert_equal from pueblo.testing.pandas import makeTimeDataFrame from sqlalchemy.exc import ProgrammingError @@ -15,6 +17,18 @@ df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S") df["time"] = df.index +float_double_data = { + "col_1": [19556.88, 629414.27, 51570.0, 2933.52, 20338.98], + "col_2": [ + 15379.920000000002, + 1107140.42, + 8081.999999999999, + 1570.0300000000002, + 29468.539999999997, + ], +} +float_double_df = pd.DataFrame.from_dict(float_double_data) + @pytest.mark.skipif( sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier" @@ -113,3 +127,33 @@ def test_table_kwargs_unknown(cratedb_service): "passed to [ALTER | CREATE] TABLE statement]" ) ) + + +@pytest.mark.skipif( + sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier" +) +@pytest.mark.skipif( + SA_VERSION < SA_2_0, reason="Feature not supported on SQLAlchemy 1.4 and earlier" +) +def test_float_double(cratedb_service): + """ + Validate I/O with floating point numbers, specifically DOUBLE types. + + Motto: Do not lose precision when DOUBLE is required. + """ + tablename = "pandas_double" + engine = cratedb_service.database.engine + float_double_df.to_sql( + tablename, + engine, + if_exists="replace", + index=False, + ) + cratedb_service.database.run_sql(f"REFRESH TABLE {tablename}") + df_load = pd.read_sql_table(tablename, engine) + + before = float_double_df.sort_values(by="col_1", ignore_index=True) + after = df_load.sort_values(by="col_1", ignore_index=True) + + pd.options.display.float_format = "{:.12f}".format + assert_equal(before, after, check_exact=True)