Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
375254a
Initial commit setting up enum and test file
knassre-bodo Mar 4, 2026
138766a
Initial dialect plumbing added
knassre-bodo Mar 4, 2026
37b1238
Resolving conflicts
knassre-bodo Mar 4, 2026
84c601a
Added basic graphs and trino defog sql files
knassre-bodo Mar 4, 2026
76d9caa
Trino DOW WIP
knassre-bodo Mar 5, 2026
e93f57d
WIP Trino pipeline addition
knassre-bodo Mar 18, 2026
63c2c60
Up and running with mysql+postgres, need to debug 12 failing queries
knassre-bodo Mar 18, 2026
933769a
incremental trino support increases
knassre-bodo Mar 18, 2026
9e7fe0d
Debugging queries in trino WIP, need to add mongodb+cassandra
knassre-bodo Mar 23, 2026
6642c4c
Added simplification tinkers to avoid redundant LENGTH(literal)
knassre-bodo Mar 24, 2026
e9e17c2
Adding division simplification tricks and updating more files based o…
knassre-bodo Mar 24, 2026
3ac2c49
WIP debugging various issues, adding custom datasets marker
knassre-bodo Mar 24, 2026
19a0ee1
Resolving conflicts
knassre-bodo Mar 24, 2026
4212ab2
more changes WIP
knassre-bodo Mar 25, 2026
fd1c9e7
Testing advancements and week handling WIP, some DOW bugs resolved
knassre-bodo Mar 26, 2026
9df0c0c
Testing WIP dow
knassre-bodo Mar 26, 2026
fb9fbca
Mongo/cassandra adding WIP, dow/sow WIP
knassre-bodo Mar 26, 2026
b8b031d
Updated plans, down to last generation of bugs and dow/sow issues
knassre-bodo Mar 27, 2026
4fda91c
Test run on CI [RUN CI][RUN TRINO]
knassre-bodo Mar 30, 2026
7f9324c
Merge branch 'main' into kian/trino
knassre-bodo Mar 30, 2026
b194c25
Test run on CI [RUN CI][RUN TRINO]
knassre-bodo Mar 30, 2026
d04fdd4
WIP updating trino workflow
knassre-bodo Apr 8, 2026
3c35b34
Merge conflict resolved, testing CI [RUN TRINO]
knassre-bodo Apr 13, 2026
a05458f
Fixing trino workflow [RUN TRINO]
knassre-bodo Apr 13, 2026
c02ee7a
Fixing trino workflow [RUN TRINO]
knassre-bodo Apr 13, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
24 changes: 24 additions & 0 deletions .github/workflows/pr_testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ on:
type: boolean
required: false
default: false
run-trino:
description: "Run Trino Tests"
type: boolean
required: false
default: false
run-mysql:
description: "Run MySQL Tests"
type: boolean
Expand Down Expand Up @@ -207,6 +212,25 @@ jobs:
&& needs.get-py-ver-matrix.outputs.matrix
|| '["3.10", "3.11", "3.12", "3.13"]' }}

run-trino-tests:
name: Trino Tests
needs: [run-defog-daily-update, get-msg, get-py-ver-matrix]
if: |
(github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run all]')) ||
(github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run dialects]')) ||
(github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run trino]')) ||
(github.event_name == 'workflow_dispatch' && (inputs.run-all || inputs.run-dialects || inputs.run-trino))
uses: ./.github/workflows/trino_testing.yml
secrets:
MYSQL_USERNAME: ${{ secrets.MYSQL_USERNAME }}
MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
with:
python-versions: ${{ github.event_name == 'workflow_dispatch'
&& needs.get-py-ver-matrix.outputs.matrix
|| '["3.10", "3.11", "3.12", "3.13"]' }}

run-mysql-tests:
name: MySQL Tests
needs: [get-msg, get-py-ver-matrix]
Expand Down
129 changes: 129 additions & 0 deletions .github/workflows/trino_testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
name: Run Trino Tests

on:
workflow_call:
inputs:
python-versions:
description: "JSON string of Python versions"
type: string
required: true
secrets:
POSTGRES_USER:
required: true
POSTGRES_PASSWORD:
required: true

jobs:
trino-test:
name: Trino Tests (Python ${{ matrix.python-version }})
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ${{ fromJSON(inputs.python-versions) }}

# Define services here to run Docker containers alongside your job
services:
postgres:
image: bodoai1/pydough-postgres-tpch:latest
env:
# Set environment variables for Postgres container
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_DB: "pydough_test"
ports:
- 5432:5432
mysql:
image: bodoai1/pydough-mysql-tpch:latest
env:
# Set environment variables for MySQL container
MYSQL_ROOT_PASSWORD: ${{ secrets.MYSQL_PASSWORD }}
MYSQL_DATABASE: tpch
ports:
- 3306:3306
mongo:
image: bodoai1/pydough-mongo-tpch:latest
env:
# Set environment variables for MongoDB container
MONGO_HOST: 127.0.0.1
MONGO_USER: "root_user"
MONGO_PASSWORD: "mongo_pwd"
ports:
- 27017:27017
cassandra:
image: bodoai1/pydough-cassandra-tpch:latest
command: ["--memory=4g", "--memory-swap=6g"]
env:
# Set environment variables for Cassandra container
MAX_HEAP_SIZE: "2G"
HEAP_NEWSIZE: "512M"
CASSANDRA_CLUSTER_NAME: "test"
CASSANDRA_DC: "datacenter1"
CASSANDRA_RACK: "rack1"
CASSANDRA_HOST: 127.0.0.1
FILTER_TABLES: "true"
TPCH_TABLES: "partsupp,nation"
DEFOG_TABLES: "sbDailyPrice,customers,sales,patients,treatments,adverse_events,coupons,wallet_merchant_balance_daily,user_setting_snapshot,conference,domain_conference,domain_publication,organization,writes,restaurant"
ports:
- 9042:9042
trino:
image: bodoai1/pydough-trino:latest
env:
POSTGRES_HOST: postgres
POSTGRES_DB: "pydough_test"
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
MYSQL_HOST: mysql
MYSQL_DB: test
MYSQL_USERNAME: ${{ secrets.MYSQL_USERNAME }}
MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }}
ports:
- "8080:8080"
depends_on:
- postgres
- mysql
- mongo
- cassandra
env:
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_DB: pydough_test
POSTGRES_PORT: 5432
POSTGRES_HOST: 127.0.0.1
MYSQL_USERNAME: ${{ secrets.MYSQL_USERNAME }}
MYSQL_PASSWORD: ${{ secrets.MYSQL_PASSWORD }}
MYSQL_PORT: 3306
MYSQL_HOST: 127.0.0.1
MONGO_HOST: 127.0.0.1
CASSANDRA_HOST: 127.0.0.1

steps:
- uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
id: setup-python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "0.4.23"

- name: Create virtual environment
run: uv venv

- name: Install dependencies
run: uv pip install -e ".[trino]"

- name: Wait for Trino to be ready
run: |
for i in $(seq 1 36); do
pg_isready -h ${{ env.POSTGRES_HOST }} -U ${{ env.POSTGRES_USER }} -d ${{ env.POSTGRES_DB }} && break || sleep 5;
done
pg_isready -h ${{ env.POSTGRES_HOST }} -U ${{ env.POSTGRES_USER }} -d ${{ env.POSTGRES_DB }} && echo "PostgreSQL is running" || exit 1;
- name: Confirm Trino connector is installed
run: uv run python -c "import trino; print('Trino connector installed')"

- name: Run Trino Tests
run: uv run pytest -m trino tests/ -rs
4 changes: 4 additions & 0 deletions pydough/conversion/hybrid_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,10 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree:
"""
# 1. Run the initial conversion from QDAG to Hybrid
hybrid: HybridTree = self.make_hybrid_tree(node, None)
print()
print(node.to_tree_string())
print()
print(hybrid)
# 2. Eject any aggregate inputs from the hybrid tree.
self.eject_aggregate_inputs(hybrid)
# 3. Syncretize any children of the hybrid tree that share a common
Expand Down
44 changes: 44 additions & 0 deletions pydough/database_connectors/builtin_databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"load_postgres_connection",
"load_snowflake_connection",
"load_sqlite_connection",
"load_trino_connection",
]


Expand Down Expand Up @@ -55,6 +56,9 @@ def load_database_context(database_name: str, **kwargs) -> DatabaseContext:
case "snowflake":
connection = load_snowflake_connection(**kwargs)
dialect = DatabaseDialect.SNOWFLAKE
case "trino":
connection = load_trino_connection(**kwargs)
dialect = DatabaseDialect.TRINO
case "mysql":
connection = load_mysql_connection(**kwargs)
dialect = DatabaseDialect.MYSQL
Expand Down Expand Up @@ -136,6 +140,46 @@ def load_snowflake_connection(**kwargs) -> DatabaseConnection:
return DatabaseConnection(connection)


def load_trino_connection(**kwargs) -> DatabaseConnection:
"""
Loads a Trino database connection. This is done by providing a
wrapper around the DB 2.0 connect API.

Args:
**kwargs: The keyword arguments which are expected to include either a
connection object for Trino or the required connection parameters:
- user: Trino username (str)
- host: Trino server host (str, default: "127.0.01"/"localhost")
- port: Trino server port (int, default: 8080)

Raises:
ImportError: If the Trino connector is not installed.
ValueError: If required connection parameters are missing.

Returns:
A DatabaseContext object for Trino.
"""
try:
import trino
except ImportError:
raise ImportError(
"Trino connector is not installed. Please install it with `pip install trino`."
)

connection = kwargs.pop("connection", None)
if connection:
return DatabaseConnection(connection)
required_keys = ["user", "host", "port"]
if not all(key in kwargs for key in required_keys):
raise ValueError(
"Trino connection requires the following arguments: "
+ ", ".join(required_keys)
)
# Create a Trino connection using the provided keyword arguments
connection = trino.dbapi.connect(**kwargs)
return DatabaseConnection(connection)


def load_mysql_connection(**kwargs) -> DatabaseConnection:
"""
Loads a MySQL database connection. This is done by providing a wrapper
Expand Down
1 change: 1 addition & 0 deletions pydough/database_connectors/database_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ class DatabaseDialect(Enum):
ANSI = "ansi"
SQLITE = "sqlite"
SNOWFLAKE = "snowflake"
TRINO = "trino"
MYSQL = "mysql"
POSTGRES = "postgres"
ORACLE = "oracle"
Expand Down
22 changes: 22 additions & 0 deletions pydough/sqlglot/execute_relational.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from sqlglot.dialects import Postgres as PostgresDialect
from sqlglot.dialects import Snowflake as SnowflakeDialect
from sqlglot.dialects import SQLite as SQLiteDialect
from sqlglot.dialects import Trino as TrinoDialect
from sqlglot.dialects.dialect import rename_func
from sqlglot.dialects.mysql import MySQL
from sqlglot.dialects.oracle import Oracle
from sqlglot.dialects.trino import Trino
from sqlglot.errors import SqlglotError
from sqlglot.expressions import (
Alias,
Expand Down Expand Up @@ -552,6 +554,8 @@ def convert_dialect_to_sqlglot(dialect: DatabaseDialect) -> SQLGlotDialect:
# The BodoSQL dialect is essentially a subset of the Snowflake SQL
# dialect without many of the extraneous features.
return SnowflakeDialect()
case DatabaseDialect.TRINO:
return TrinoDialect()
case DatabaseDialect.MYSQL:
return MySQLDialect()
case DatabaseDialect.POSTGRES:
Expand Down Expand Up @@ -602,6 +606,15 @@ def change_sqlglot_dialect_configuration(dialect: DatabaseDialect) -> None:
)
)

case DatabaseDialect.TRINO:
# Replace the DAYOFWEEK override for the Trino generator with the
# default version, since PyDough handles the conversion logic
# correctly whereas the SQLGlot version gets confused with multiple
# rounds of parsing and unparsing.
Trino.Generator.TRANSFORMS[sqlglot_expressions.DayOfWeek] = (
lambda self, e: f"DAY_OF_WEEK({self.sql(e.this)})"
)

case _:
pass

Expand Down Expand Up @@ -629,6 +642,15 @@ def reset_sqlglot_dialect_configuration(dialect: DatabaseDialect) -> None:
]
)
)
case DatabaseDialect.TRINO:
# Replace the DAYOFWEEK override for the Trino generator with the
# default version, since PyDough handles the conversion logic
# correctly whereas the SQLGlot version gets confused with multiple
# rounds of parsing and unparsing.
Trino.Generator.TRANSFORMS[sqlglot_expressions.DayOfWeek] = (
lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)",
)

case _:
pass

Expand Down
Loading