From 86b8a2cff02f9789fcc4a63b42b25f1840145052 Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Sun, 21 Jun 2026 19:48:43 +0530 Subject: [PATCH 1/7] =?UTF-8?q?feat(multi-value):=20Stage=20A=20=E2=80=94?= =?UTF-8?q?=20MULTI=5FVALUE=20type=20+=20engine-spec=20capability=20contra?= =?UTF-8?q?ct?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce first-class multi-value (array-typed) column support, designed to work across any SQL dialect that supports arrays. This stage adds the semantic type and the dialect-agnostic capability layer, with ClickHouse as the first concrete implementation. - Activate GenericDataType.MULTI_VALUE (= 4) on both the backend enum (superset/utils/core.py) and the frontend enum (superset-core/common). - Add an opt-in capability contract to BaseEngineSpec: supports_multivalue_columns flag (default False) plus array_contains, array_length and array_explode methods (default NotImplementedError) so engines that have not opted in keep treating arrays as strings. - ClickHouse: set the flag, reclassify Array(...) columns as MULTI_VALUE, and implement has()/length()/arrayJoin() as bound SQLAlchemy expressions. Tests: flip the existing Array->STRING assertion to MULTI_VALUE, add nested array variants, per-capability SQL-compilation tests, a bound-parameter (injection-safety) test, and negative tests asserting the base spec stays disabled and raises. array_explode is implemented and unit-tested but not yet wired into the query builder (Stage C). Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- .../superset-core/src/common/index.ts | 1 + superset/db_engine_specs/base.py | 58 ++++++++++++++- superset/db_engine_specs/clickhouse.py | 22 +++++- superset/utils/core.py | 2 +- tests/unit_tests/db_engine_specs/test_base.py | 16 +++++ .../db_engine_specs/test_clickhouse.py | 70 ++++++++++++++++++- 6 files changed, 164 insertions(+), 5 deletions(-) diff --git a/superset-frontend/packages/superset-core/src/common/index.ts b/superset-frontend/packages/superset-core/src/common/index.ts index cc399cf62751..34d513e4f74e 100644 --- a/superset-frontend/packages/superset-core/src/common/index.ts +++ b/superset-frontend/packages/superset-core/src/common/index.ts @@ -130,6 +130,7 @@ export enum GenericDataType { String = 1, Temporal = 2, Boolean = 3, + MultiValue = 4, } /** diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py index 3b3c234c1650..08ff0aa254ee 100644 --- a/superset/db_engine_specs/base.py +++ b/superset/db_engine_specs/base.py @@ -55,7 +55,13 @@ from sqlalchemy.engine.url import URL from sqlalchemy.ext.compiler import compiles from sqlalchemy.sql import literal_column, quoted_name, text -from sqlalchemy.sql.expression import BinaryExpression, ColumnClause, Select, TextClause +from sqlalchemy.sql.expression import ( + BinaryExpression, + ColumnClause, + ColumnElement, + Select, + TextClause, +) from sqlalchemy.types import TypeEngine from superset import db @@ -493,6 +499,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods time_groupby_inline = False limit_method = LimitMethod.FORCE_LIMIT supports_multivalues_insert = False + # Whether this engine supports first-class multi-value (array-typed) columns. + # When True, array columns are classified as ``GenericDataType.MULTI_VALUE`` and + # the ``array_*`` capability methods below must be implemented. Defaults to + # False so engines that have not opted in keep treating arrays as strings. + supports_multivalue_columns = False allows_joins = True allows_subqueries = True allows_alias_in_select = True @@ -2393,6 +2404,51 @@ def update_params_from_encrypted_extra( # pylint: disable=invalid-name logger.error(ex, exc_info=True) raise + @classmethod + def array_contains(cls, col: ColumnElement, value: Any) -> ColumnElement: + """ + Build a boolean expression testing whether array column ``col`` contains + ``value``. Engines that set ``supports_multivalue_columns = True`` must + override this with their native array-membership function. + + :param col: SQLAlchemy column element for the array column + :param value: scalar value to look for inside the array + :return: a SQLAlchemy boolean expression + """ + raise NotImplementedError( + f"{cls.engine} does not support multi-value (array) columns" + ) + + @classmethod + def array_length(cls, col: ColumnElement) -> ColumnElement: + """ + Build a numeric expression returning the number of elements in array + column ``col``. Engines that set ``supports_multivalue_columns = True`` + must override this with their native array-length function. + + :param col: SQLAlchemy column element for the array column + :return: a SQLAlchemy numeric expression + """ + raise NotImplementedError( + f"{cls.engine} does not support multi-value (array) columns" + ) + + @classmethod + def array_explode(cls, col: ColumnElement) -> ColumnElement: + """ + Build an expression that expands array column ``col`` into one scalar + value per element (e.g. ClickHouse ``arrayJoin``). Set-returning dialects + (Postgres/Trino/BigQuery ``UNNEST``) need additional FROM/JOIN plumbing + that is handled by the query builder; this method only returns the + element-producing expression. + + :param col: SQLAlchemy column element for the array column + :return: a SQLAlchemy expression yielding individual array elements + """ + raise NotImplementedError( + f"{cls.engine} does not support multi-value (array) columns" + ) + @classmethod def get_column_spec( # pylint: disable=unused-argument cls, diff --git a/superset/db_engine_specs/clickhouse.py b/superset/db_engine_specs/clickhouse.py index a998dce75e5c..66b5ac7eebc4 100644 --- a/superset/db_engine_specs/clickhouse.py +++ b/superset/db_engine_specs/clickhouse.py @@ -26,8 +26,9 @@ from flask_babel import gettext as __ from marshmallow import fields, Schema from marshmallow.validate import Range -from sqlalchemy import types +from sqlalchemy import func, types from sqlalchemy.engine.url import URL +from sqlalchemy.sql.expression import ColumnElement from urllib3.exceptions import NewConnectionError from superset.databases.utils import make_url_safe @@ -55,6 +56,7 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec): time_groupby_inline = True supports_multivalues_insert = True + supports_multivalue_columns = True _time_grain_expressions = { None: "{col}", @@ -80,7 +82,7 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec): ( re.compile(r".*Array.*", re.IGNORECASE), types.String(), - GenericDataType.STRING, + GenericDataType.MULTI_VALUE, ), ( re.compile(r".*UUID.*", re.IGNORECASE), @@ -119,6 +121,22 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec): ), ) + @classmethod + def array_contains(cls, col: ColumnElement, value: Any) -> ColumnElement: + # ClickHouse: has(arr, value) -> 1 if the array contains value + return func.has(col, value) + + @classmethod + def array_length(cls, col: ColumnElement) -> ColumnElement: + # ClickHouse: length(arr) -> number of elements + return func.length(col) + + @classmethod + def array_explode(cls, col: ColumnElement) -> ColumnElement: + # ClickHouse: arrayJoin(arr) is a scalar function usable directly in + # SELECT/GROUP BY (no JOIN needed, unlike UNNEST dialects). + return func.arrayJoin(col) + @classmethod def epoch_to_dttm(cls) -> str: return "{col}" diff --git a/superset/utils/core.py b/superset/utils/core.py index f49a874f2e70..ed843f0a3873 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -187,7 +187,7 @@ class GenericDataType(IntEnum): STRING = 1 TEMPORAL = 2 BOOLEAN = 3 - # ARRAY = 4 # Mapping all the complex data types to STRING for now + MULTI_VALUE = 4 # array-typed columns (e.g. ClickHouse Array, Postgres ARRAY) # JSON = 5 # and leaving these as a reminder. # MAP = 6 # ROW = 7 diff --git a/tests/unit_tests/db_engine_specs/test_base.py b/tests/unit_tests/db_engine_specs/test_base.py index e46517a9927e..18b456a3837a 100644 --- a/tests/unit_tests/db_engine_specs/test_base.py +++ b/tests/unit_tests/db_engine_specs/test_base.py @@ -1360,3 +1360,19 @@ def test_resolve_column_type_falls_back_to_pa_mapped() -> None: def test_resolve_column_type_returns_none_when_both_absent() -> None: """None is returned when neither source provides a type.""" assert BaseEngineSpec.resolve_column_type(None, None) is None + + +def test_multivalue_columns_disabled_by_default() -> None: + """Engines must opt in to multi-value support; base defaults to off.""" + assert BaseEngineSpec.supports_multivalue_columns is False + + +@pytest.mark.parametrize("method", ["array_contains", "array_length", "array_explode"]) +def test_array_capabilities_raise_when_unsupported(method: str) -> None: + """Array capability methods raise NotImplementedError unless overridden.""" + from sqlalchemy import column + + fn = getattr(BaseEngineSpec, method) + args = (column("c"), "v") if method == "array_contains" else (column("c"),) + with pytest.raises(NotImplementedError): + fn(*args) diff --git a/tests/unit_tests/db_engine_specs/test_clickhouse.py b/tests/unit_tests/db_engine_specs/test_clickhouse.py index 5f56554e82d1..d77b03aacae6 100644 --- a/tests/unit_tests/db_engine_specs/test_clickhouse.py +++ b/tests/unit_tests/db_engine_specs/test_clickhouse.py @@ -109,7 +109,16 @@ def test_connect_convert_dttm( GenericDataType.STRING, False, ), - ("Array(UInt8)", String, None, GenericDataType.STRING, False), + ("Array(UInt8)", String, None, GenericDataType.MULTI_VALUE, False), + ("Array(String)", String, None, GenericDataType.MULTI_VALUE, False), + ("Array(UInt64)", String, None, GenericDataType.MULTI_VALUE, False), + ( + "Array(LowCardinality(String))", + String, + None, + GenericDataType.MULTI_VALUE, + False, + ), ("Enum('hello', 'world')", String, None, GenericDataType.STRING, False), ("Enum('UInt32', 'Bool')", String, None, GenericDataType.STRING, False), ( @@ -233,3 +242,62 @@ def test_adjust_engine_params_fully_qualified( uri = spec.adjust_engine_params(url, {}, None, schema)[0] assert str(uri) == expected_result + + +def _compile(expr) -> str: + return str(expr.compile(compile_kwargs={"literal_binds": True})) + + +def test_clickhouse_supports_multivalue_columns() -> None: + from superset.db_engine_specs.clickhouse import ( # noqa: N813 + ClickHouseEngineSpec as spec, + ) + + assert spec.supports_multivalue_columns is True + + +def test_multivalue_contains_sql() -> None: + from sqlalchemy import column + + from superset.db_engine_specs.clickhouse import ( # noqa: N813 + ClickHouseEngineSpec as spec, + ) + + expr = spec.array_contains(column("skills"), "Driver") + assert _compile(expr) == "has(skills, 'Driver')" + + +def test_multivalue_contains_binds_parameters() -> None: + """Value must be a bound parameter, not inlined (SQL-injection safety).""" + from sqlalchemy import column + + from superset.db_engine_specs.clickhouse import ( # noqa: N813 + ClickHouseEngineSpec as spec, + ) + + expr = spec.array_contains(column("skills"), "Driver") + compiled = expr.compile() + assert "Driver" not in str(compiled) + assert "Driver" in compiled.params.values() + + +def test_multivalue_length_sql() -> None: + from sqlalchemy import column + + from superset.db_engine_specs.clickhouse import ( # noqa: N813 + ClickHouseEngineSpec as spec, + ) + + expr = spec.array_length(column("skills")) + assert _compile(expr) == "length(skills)" + + +def test_multivalue_explode_sql() -> None: + from sqlalchemy import column + + from superset.db_engine_specs.clickhouse import ( # noqa: N813 + ClickHouseEngineSpec as spec, + ) + + expr = spec.array_explode(column("skills")) + assert _compile(expr) == "arrayJoin(skills)" From cf46ea0473c2addf31cd2e4a7cd5efb55b4b10dc Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Sun, 21 Jun 2026 19:54:01 +0530 Subject: [PATCH 2/7] =?UTF-8?q?feat(multi-value):=20Stage=20B=20=E2=80=94?= =?UTF-8?q?=20CONTAINS=20filter=20operator=20for=20array=20columns?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire a new CONTAINS filter operator end-to-end so users can filter multi-value (array) columns by membership. - Add FilterOperator.CONTAINS (and the FilterStringOperators mirror). The charts API schema derives its allowed operators from the enum, so CONTAINS is accepted without any extra allow-list change. - Translate CONTAINS in the query builder (models/helpers.py): route to db_engine_spec.array_contains() so each dialect emits its native membership call, and raise a clear QueryObjectValidationError when the engine does not support multi-value columns. Tests: operator round-trip, an unsupported-engine guard (sqlite raises), and a positive path that drives the real get_sqla_query pipeline with a ClickHouse engine spec and asserts the generated SQL contains has(...). The dataset compiles against sqlite so no ClickHouse driver is required (func.has renders dialect-agnostically). Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- superset/models/helpers.py | 11 ++ superset/utils/core.py | 2 + .../models/test_multivalue_filter.py | 119 ++++++++++++++++++ 3 files changed, 132 insertions(+) create mode 100644 tests/unit_tests/models/test_multivalue_filter.py diff --git a/superset/models/helpers.py b/superset/models/helpers.py index b603f0bb3cea..2d4a85b6083e 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -3615,6 +3615,17 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma target_clause_list.append(sqla_col.not_like(eq)) else: target_clause_list.append(sqla_col.not_ilike(eq)) + elif op == utils.FilterOperator.CONTAINS: + if not db_engine_spec.supports_multivalue_columns: + raise QueryObjectValidationError( + _( + "The CONTAINS operator is only supported for " + "multi-value (array) columns on this database." + ) + ) + target_clause_list.append( + db_engine_spec.array_contains(sqla_col, eq) + ) elif ( op == utils.FilterOperator.TEMPORAL_RANGE and isinstance(eq, str) diff --git a/superset/utils/core.py b/superset/utils/core.py index ed843f0a3873..a1c8a8dd1f94 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -277,6 +277,7 @@ class FilterOperator(StrEnum): IS_TRUE = "IS TRUE" IS_FALSE = "IS FALSE" TEMPORAL_RANGE = "TEMPORAL_RANGE" + CONTAINS = "CONTAINS" # array membership, for MULTI_VALUE columns class FilterStringOperators(StrEnum): @@ -295,6 +296,7 @@ class FilterStringOperators(StrEnum): LATEST_PARTITION = ("LATEST_PARTITION",) IS_TRUE = ("IS_TRUE",) IS_FALSE = ("IS_FALSE",) + CONTAINS = ("CONTAINS",) class PostProcessingBoxplotWhiskerType(StrEnum): diff --git a/tests/unit_tests/models/test_multivalue_filter.py b/tests/unit_tests/models/test_multivalue_filter.py new file mode 100644 index 000000000000..d21105fac8eb --- /dev/null +++ b/tests/unit_tests/models/test_multivalue_filter.py @@ -0,0 +1,119 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Stage B: the CONTAINS filter operator for multi-value (array) columns.""" + +from __future__ import annotations + +import pytest +from flask import Flask +from pytest_mock import MockerFixture + +from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn +from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec +from superset.exceptions import QueryObjectValidationError +from superset.models.core import Database +from superset.superset_typing import QueryObjectDict +from superset.utils.core import FilterOperator + + +def _make_dataset(mocker: MockerFixture) -> SqlaTable: + database = Database( + id=1, + database_name="test_db", + sqlalchemy_uri="sqlite://", + ) + columns = [ + TableColumn(column_name="skills", type="Array(String)"), + TableColumn(column_name="city", type="VARCHAR(100)"), + ] + dataset = SqlaTable( + table_name="jobs", + columns=columns, + database=database, + metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.is_guest_user", + return_value=False, + ) + return dataset + + +def _query_obj() -> QueryObjectDict: + return { + "granularity": None, + "from_dttm": None, + "to_dttm": None, + "is_timeseries": False, + "groupby": ["city"], + "metrics": ["count"], + "filter": [ + { + "col": "skills", + "op": FilterOperator.CONTAINS.value, + "val": "Driver", + } + ], + "columns": [], + } + + +def test_contains_operator_is_registered() -> None: + """The operator round-trips through the enum used to parse filter payloads.""" + assert FilterOperator("CONTAINS") is FilterOperator.CONTAINS + assert FilterOperator.CONTAINS.value == "CONTAINS" + + +def test_contains_filter_unsupported_engine_raises( + mocker: MockerFixture, + app: Flask, +) -> None: + """On an engine without array support (sqlite) CONTAINS is rejected.""" + dataset = _make_dataset(mocker) + with app.test_request_context(): # noqa: SIM117 + with pytest.raises(QueryObjectValidationError): + dataset.get_sqla_query(**_query_obj()) + + +def test_contains_filter_generates_native_sql( + mocker: MockerFixture, + app: Flask, +) -> None: + """On a multi-value-capable engine CONTAINS compiles to the native call. + + The dataset compiles against sqlite (so no ClickHouse driver is needed), but + its engine spec is ClickHouse, so the operator routes through + ``ClickHouseEngineSpec.array_contains`` -> ``has(...)``. ``func.has`` renders + the same regardless of dialect, which lets us assert on it here. + """ + dataset = _make_dataset(mocker) + mocker.patch.object( + SqlaTable, + "db_engine_spec", + new=property(lambda self: ClickHouseEngineSpec), + ) + + with app.test_request_context(): + extended = dataset.get_query_str_extended(_query_obj(), mutate=False) + sql = extended.sql + + assert "has(" in sql.lower() + assert "driver" in sql.lower() From b7dd83edb047caee593421c478b8d00de74cd82e Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Sun, 21 Jun 2026 19:59:24 +0530 Subject: [PATCH 3/7] feat(multi-value): array-length virtual dimension Let users group/aggregate by the element count of an array column, produced dialect-agnostically via the engine-spec capability layer. - Add a multi-value modifier column shape to AdhocColumn: a base `column` plus a `columnOperation` (e.g. "LENGTH") instead of a literal sqlExpression. Add the MultiValueColumnOperation enum and an is_multivalue_operation_column detector. - Resolve the shape in SqlaTable via a new _multivalue_column_to_sqla helper: it looks up the base array column and calls db_engine_spec.array_length(), typing the result NUMERIC. Unsupported engines, unknown base columns and unknown operations all raise a clear QueryObjectValidationError. - Route the shape through both the grouped (adhoc_column_to_sqla) and raw-columns branches of the query builder. Because the derived column is computed live from the engine spec there is no stored virtual column to persist, so nothing special is needed to survive "Sync columns from database". Tests: end-to-end length(skills) generation through get_sqla_query, NUMERIC typing, and the unsupported-engine / unknown-column / unknown-operation guards. Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- superset/connectors/sqla/models.py | 47 ++++++ superset/models/helpers.py | 7 + superset/superset_typing.py | 4 + superset/utils/core.py | 16 ++ .../models/test_multivalue_length.py | 159 ++++++++++++++++++ 5 files changed, 233 insertions(+) create mode 100644 tests/unit_tests/models/test_multivalue_length.py diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 81b5c2fc8d9d..690bad1b6c89 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1711,6 +1711,48 @@ def _render_adhoc_expression_for_metadata_lookup( ) ) from ex + def _multivalue_column_to_sqla( + self, + col: AdhocColumn, + template_processor: BaseTemplateProcessor | None = None, + ) -> tuple[ColumnElement, utils.GenericDataType | None]: + """ + Turn a multi-value (array) modifier column into a sqlalchemy column. + + The column references a base array column and an operation (e.g. array + length); the native SQL is produced by the engine spec so the same + payload works across any dialect that supports array columns. + """ + label = utils.get_column_name(col) + base_name = col.get("column") + operation = col.get("columnOperation") + + db_engine_spec = self.db_engine_spec + if not db_engine_spec.supports_multivalue_columns: + raise QueryObjectValidationError( + _("This database does not support multi-value (array) columns.") + ) + + base_column = self.get_column(base_name) + if base_column is None: + raise QueryObjectValidationError( + _("Unknown column used as multi-value source: %(col)s", col=base_name) + ) + base_sqla_col = base_column.get_sqla_col(template_processor=template_processor) + + if operation == utils.MultiValueColumnOperation.LENGTH: + expression = db_engine_spec.array_length(base_sqla_col) + generic_type = utils.GenericDataType.NUMERIC + else: + raise QueryObjectValidationError( + _( + "Unsupported multi-value column operation: %(op)s", + op=operation, + ) + ) + + return self.make_sqla_column_compatible(expression, label), generic_type + def adhoc_column_to_sqla( # pylint: disable=too-many-locals self, col: AdhocColumn, @@ -1733,6 +1775,11 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals Python type (e.g. numeric casts for numeric adhoc expressions). :rtype: tuple[sqlalchemy.sql.ColumnElement, Optional[GenericDataType]] """ + if utils.is_multivalue_operation_column(col): + return self._multivalue_column_to_sqla( + col, template_processor=template_processor + ) + label = utils.get_column_name(col) sql_expression = col["sqlExpression"] time_grain = col.get("timeGrain") diff --git a/superset/models/helpers.py b/superset/models/helpers.py index 2d4a85b6083e..09280de3d5b1 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -3278,6 +3278,13 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma select_exprs.append(outer) elif columns: for selected in columns: + if utils.is_multivalue_operation_column(selected): + outer, _unused = self.adhoc_column_to_sqla( + col=selected, + template_processor=template_processor, + ) + select_exprs.append(outer) + continue if is_adhoc_column(selected): _sql = selected["sqlExpression"] _column_label = selected["label"] diff --git a/superset/superset_typing.py b/superset/superset_typing.py index 39ff9ab13478..31294a5af3d7 100644 --- a/superset/superset_typing.py +++ b/superset/superset_typing.py @@ -104,6 +104,10 @@ class AdhocColumn(TypedDict, total=False): isColumnReference: bool | None columnType: Literal["BASE_AXIS", "SERIES"] | None timeGrain: str | None + # Multi-value (array) column modifiers: ``column`` names the base array + # column and ``columnOperation`` the operation to apply (e.g. "LENGTH"). + column: str + columnOperation: str class SQLAColumnType(TypedDict): diff --git a/superset/utils/core.py b/superset/utils/core.py index a1c8a8dd1f94..e9724a729ff8 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1273,6 +1273,22 @@ def is_adhoc_column(column: Column) -> TypeGuard[AdhocColumn]: ) +class MultiValueColumnOperation(StrEnum): + """Operations that can be applied to a multi-value (array) column.""" + + LENGTH = "LENGTH" + EXPLODE = "EXPLODE" + + +def is_multivalue_operation_column(column: Column) -> bool: + """Whether ``column`` is a multi-value modifier (e.g. array length). + + These columns carry a base ``column`` plus a ``columnOperation`` instead of a + ``sqlExpression``; the actual SQL is produced by the engine spec. + """ + return isinstance(column, dict) and "columnOperation" in column + + def is_base_axis(column: Column) -> bool: return is_adhoc_column(column) and column.get("columnType") == "BASE_AXIS" diff --git a/tests/unit_tests/models/test_multivalue_length.py b/tests/unit_tests/models/test_multivalue_length.py new file mode 100644 index 000000000000..4d39071bfe14 --- /dev/null +++ b/tests/unit_tests/models/test_multivalue_length.py @@ -0,0 +1,159 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Stage D: the array-length virtual dimension for multi-value columns.""" + +from __future__ import annotations + +import pytest +from flask import Flask +from pytest_mock import MockerFixture + +from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn +from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec +from superset.exceptions import QueryObjectValidationError +from superset.models.core import Database +from superset.superset_typing import QueryObjectDict +from superset.utils.core import GenericDataType, MultiValueColumnOperation + + +def _make_dataset(mocker: MockerFixture) -> SqlaTable: + database = Database( + id=1, + database_name="test_db", + sqlalchemy_uri="sqlite://", + ) + columns = [ + TableColumn(column_name="skills", type="Array(String)"), + TableColumn(column_name="city", type="VARCHAR(100)"), + ] + dataset = SqlaTable( + table_name="jobs", + columns=columns, + database=database, + metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.is_guest_user", + return_value=False, + ) + return dataset + + +def _length_dimension() -> dict: + return { + "label": "skills_length", + "column": "skills", + "columnOperation": MultiValueColumnOperation.LENGTH.value, + } + + +def _query_obj(dimension: dict) -> QueryObjectDict: + return { + "granularity": None, + "from_dttm": None, + "to_dttm": None, + "is_timeseries": False, + "groupby": [dimension], + "metrics": ["count"], + "filter": [], + "columns": [], + } + + +def _clickhouse_dataset(mocker: MockerFixture) -> SqlaTable: + dataset = _make_dataset(mocker) + mocker.patch.object( + SqlaTable, + "db_engine_spec", + new=property(lambda self: ClickHouseEngineSpec), + ) + return dataset + + +def test_length_dimension_generates_native_sql( + mocker: MockerFixture, + app: Flask, +) -> None: + """A length dimension routes through array_length -> length(skills).""" + dataset = _clickhouse_dataset(mocker) + with app.test_request_context(): + extended = dataset.get_query_str_extended( + _query_obj(_length_dimension()), mutate=False + ) + sql = extended.sql.lower() + + assert "length(skills)" in sql + assert "skills_length" in sql # the label is applied + + +def test_length_dimension_type_is_numeric( + mocker: MockerFixture, + app: Flask, +) -> None: + """The derived length column is typed NUMERIC so it behaves like a number.""" + dataset = _clickhouse_dataset(mocker) + with app.test_request_context(): + _expr, generic_type = dataset.adhoc_column_to_sqla(col=_length_dimension()) + + assert generic_type == GenericDataType.NUMERIC + + +def test_length_dimension_unsupported_engine_raises( + mocker: MockerFixture, + app: Flask, +) -> None: + """On an engine without array support (sqlite) the length op is rejected.""" + dataset = _make_dataset(mocker) + with app.test_request_context(): # noqa: SIM117 + with pytest.raises(QueryObjectValidationError): + dataset.get_sqla_query(**_query_obj(_length_dimension())) + + +def test_length_dimension_unknown_base_column_raises( + mocker: MockerFixture, + app: Flask, +) -> None: + """A length op referencing a missing base column is rejected.""" + dataset = _clickhouse_dataset(mocker) + bad = { + "label": "nope_length", + "column": "does_not_exist", + "columnOperation": MultiValueColumnOperation.LENGTH.value, + } + with app.test_request_context(): # noqa: SIM117 + with pytest.raises(QueryObjectValidationError): + dataset.get_sqla_query(**_query_obj(bad)) + + +def test_unsupported_operation_raises( + mocker: MockerFixture, + app: Flask, +) -> None: + """An unknown columnOperation is rejected rather than silently ignored.""" + dataset = _clickhouse_dataset(mocker) + bad = { + "label": "skills_x", + "column": "skills", + "columnOperation": "NOT_A_REAL_OP", + } + with app.test_request_context(): # noqa: SIM117 + with pytest.raises(QueryObjectValidationError): + dataset.adhoc_column_to_sqla(col=bad) From cbaff76e19e3f3f2af69f8bbf5e2bc1a9485450c Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Sun, 21 Jun 2026 20:05:42 +0530 Subject: [PATCH 4/7] =?UTF-8?q?feat(multi-value):=20explode=20(group=20by?= =?UTF-8?q?=20array=20elements)=20=E2=80=94=20ClickHouse=20MVP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an EXPLODE operation to the multi-value modifier column so users can group by individual elements of an array column. - Wire EXPLODE into _multivalue_column_to_sqla: it calls db_engine_spec.array_explode(), which on ClickHouse is the scalar arrayJoin(col) and projects directly into SELECT/GROUP BY (no JOIN needed). - Future-safe guard: set-returning UNNEST dialects (Postgres/Trino/BigQuery) require CROSS JOIN UNNEST plumbing and are deferred to phase 2 (post live validation). Those engines leave array_explode unimplemented, so we catch NotImplementedError and raise a clear QueryObjectValidationError instead of emitting invalid SQL. The explode modifier is part of the query object, so exploded and non-exploded queries naturally get distinct cache keys. Note (ClickHouse semantics): arrayJoin behaves like an INNER JOIN — rows with empty arrays drop out, changing totals. To be confirmed against a live instance in Stage E. Tests: arrayJoin(skills) generation through get_sqla_query, explode-vs-plain SQL divergence, the unsupported-engine guard, and a simulated UNNEST-only dialect getting a clean error rather than bad SQL. Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- superset/connectors/sqla/models.py | 15 ++ .../models/test_multivalue_explode.py | 160 ++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 tests/unit_tests/models/test_multivalue_explode.py diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 690bad1b6c89..de7bbbaec77b 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1743,6 +1743,21 @@ def _multivalue_column_to_sqla( if operation == utils.MultiValueColumnOperation.LENGTH: expression = db_engine_spec.array_length(base_sqla_col) generic_type = utils.GenericDataType.NUMERIC + elif operation == utils.MultiValueColumnOperation.EXPLODE: + # Scalar explode (e.g. ClickHouse arrayJoin) can be projected directly. + # Set-returning UNNEST dialects (Postgres/Trino/BigQuery) need extra + # FROM/JOIN plumbing and are handled in a later phase; for those the + # engine spec leaves array_explode unimplemented and we surface a + # clear error instead of emitting invalid SQL. + try: + expression = db_engine_spec.array_explode(base_sqla_col) + except NotImplementedError as ex: + raise QueryObjectValidationError( + _("This database does not support exploding array columns.") + ) from ex + # The exploded value is a single array element; its type is not + # reliably known from the array type string, so leave it unset. + generic_type = None else: raise QueryObjectValidationError( _( diff --git a/tests/unit_tests/models/test_multivalue_explode.py b/tests/unit_tests/models/test_multivalue_explode.py new file mode 100644 index 000000000000..7a9cf0e91c32 --- /dev/null +++ b/tests/unit_tests/models/test_multivalue_explode.py @@ -0,0 +1,160 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Explode (group by array elements) for multi-value columns — ClickHouse MVP. + +Set-returning UNNEST dialects (Postgres/Trino/BigQuery) need CROSS JOIN UNNEST +plumbing and are handled in a later phase; here we cover the scalar ClickHouse +``arrayJoin`` path plus the guard that keeps unimplemented dialects from emitting +invalid SQL. +""" + +from __future__ import annotations + +import pytest +from flask import Flask +from pytest_mock import MockerFixture + +from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn +from superset.db_engine_specs.base import BaseEngineSpec +from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec +from superset.exceptions import QueryObjectValidationError +from superset.models.core import Database +from superset.superset_typing import QueryObjectDict +from superset.utils.core import MultiValueColumnOperation + + +class _UnnestOnlySpec(BaseEngineSpec): + """Simulates a future array dialect that has not implemented scalar explode.""" + + supports_multivalue_columns = True + # array_explode intentionally left as the base NotImplementedError + + +def _make_dataset(mocker: MockerFixture) -> SqlaTable: + database = Database( + id=1, + database_name="test_db", + sqlalchemy_uri="sqlite://", + ) + columns = [ + TableColumn(column_name="skills", type="Array(String)"), + TableColumn(column_name="city", type="VARCHAR(100)"), + ] + dataset = SqlaTable( + table_name="jobs", + columns=columns, + database=database, + metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ) + mocker.patch( + "superset.connectors.sqla.models.security_manager.is_guest_user", + return_value=False, + ) + return dataset + + +def _explode_dimension() -> dict: + return { + "label": "skill", + "column": "skills", + "columnOperation": MultiValueColumnOperation.EXPLODE.value, + } + + +def _query_obj(dimension: dict) -> QueryObjectDict: + return { + "granularity": None, + "from_dttm": None, + "to_dttm": None, + "is_timeseries": False, + "groupby": [dimension], + "metrics": ["count"], + "filter": [], + "columns": [], + } + + +def _with_spec(mocker: MockerFixture, dataset: SqlaTable, spec: type) -> None: + mocker.patch.object( + SqlaTable, + "db_engine_spec", + new=property(lambda self: spec), + ) + + +def test_explode_dimension_generates_arrayjoin( + mocker: MockerFixture, + app: Flask, +) -> None: + """An explode dimension routes through array_explode -> arrayJoin(skills).""" + dataset = _make_dataset(mocker) + _with_spec(mocker, dataset, ClickHouseEngineSpec) + with app.test_request_context(): + sql = dataset.get_query_str_extended( + _query_obj(_explode_dimension()), mutate=False + ).sql.lower() + + assert "arrayjoin(skills)" in sql + assert "skill" in sql # the label is applied + + +def test_explode_changes_generated_sql( + mocker: MockerFixture, + app: Flask, +) -> None: + """Exploding a column yields different SQL than grouping by it raw. + + The explode modifier is part of the query object, so a query that explodes + cannot collide with one that does not (distinct cache keys downstream). + """ + dataset = _make_dataset(mocker) + _with_spec(mocker, dataset, ClickHouseEngineSpec) + with app.test_request_context(): + exploded = dataset.get_query_str_extended( + _query_obj(_explode_dimension()), mutate=False + ).sql + plain = dataset.get_query_str_extended(_query_obj("city"), mutate=False).sql + + assert exploded != plain + assert "arrayJoin" in exploded + + +def test_explode_unsupported_engine_raises( + mocker: MockerFixture, + app: Flask, +) -> None: + """On an engine without array support (sqlite) explode is rejected.""" + dataset = _make_dataset(mocker) + with app.test_request_context(): # noqa: SIM117 + with pytest.raises(QueryObjectValidationError): + dataset.get_sqla_query(**_query_obj(_explode_dimension())) + + +def test_explode_unimplemented_dialect_raises_clean_error( + mocker: MockerFixture, + app: Flask, +) -> None: + """A multi-value dialect lacking scalar explode gets a clear error, not bad SQL.""" + dataset = _make_dataset(mocker) + _with_spec(mocker, dataset, _UnnestOnlySpec) + with app.test_request_context(): # noqa: SIM117 + with pytest.raises(QueryObjectValidationError): + dataset.adhoc_column_to_sqla(col=_explode_dimension()) From 8dfe6f49042161b8cb21039d6ea87cc1a2710a5a Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Sun, 21 Jun 2026 20:15:09 +0530 Subject: [PATCH 5/7] =?UTF-8?q?feat(multi-value):=20frontend=20=E2=80=94?= =?UTF-8?q?=20array=20column=20icon=20+=20CONTAINS=20filter=20operator?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface multi-value columns and the membership filter in Explore. - ColumnTypeLabel renders a distinct list icon for GenericDataType.MultiValue so array columns are visually identifiable in the column tree. - Add the Contains operator (CONTAINS) to the Explore operator enum and the operator->SQL label map. - Gate operators by column type in the adhoc filter editor: CONTAINS is shown only for multi-value columns, and multi-value columns expose only CONTAINS / IS NULL / IS NOT NULL (scalar comparators are hidden). Capability is implied by the backend classifying the column as MULTI_VALUE, so no separate frontend flag is needed. Tests: multi-value icon rendering, a GenericDataType enum-parity test guarding the values shared with the backend, and operator-visibility tests (CONTAINS shown for array columns, hidden for scalar ones). Note: the Length/Explode dimension modifier UI (controls that emit the columnOperation payload) is a separate follow-up; those operations are already exercisable via the query API. Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- .../ColumnTypeLabel/ColumnTypeLabel.tsx | 5 ++ .../test/components/ColumnTypeLabel.test.tsx | 17 ++++++ ...FilterEditPopoverSimpleTabContent.test.tsx | 54 +++++++++++++++++++ .../index.tsx | 15 ++++++ .../FilterControl/utils/translateToSQL.ts | 1 + superset-frontend/src/explore/constants.ts | 2 + 6 files changed, 94 insertions(+) diff --git a/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx b/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx index 8ea43bdc8a68..0316ca257629 100644 --- a/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx +++ b/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx @@ -28,6 +28,7 @@ import { FieldBinaryOutlined, FieldStringOutlined, NumberOutlined, + UnorderedListOutlined, } from '@ant-design/icons'; import { Icons } from '@superset-ui/core/components'; @@ -72,6 +73,10 @@ export function ColumnTypeLabel({ type }: ColumnTypeLabelProps) { typeIcon = ; } else if (type === GenericDataType.Temporal) { typeIcon = ; + } else if (type === GenericDataType.MultiValue) { + typeIcon = ( + + ); } return {typeIcon}; diff --git a/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx b/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx index fc65cd26f86f..46fc74b49367 100644 --- a/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx +++ b/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx @@ -64,4 +64,21 @@ describe('ColumnOption', () => { renderColumnTypeLabel({ type: GenericDataType.Temporal }); expect(screen.getByLabelText('temporal type icon')).toBeVisible(); }); + test('multi-value (array) type shows list icon', () => { + renderColumnTypeLabel({ type: GenericDataType.MultiValue }); + expect(screen.getByLabelText('multi-value type icon')).toBeVisible(); + }); +}); + +describe('GenericDataType enum parity', () => { + // These numeric values are shared with the backend enum in + // superset/utils/core.py (GenericDataType). They must stay in sync because + // the backend serializes columns using these integers. + test('values match the backend contract', () => { + expect(GenericDataType.Numeric).toBe(0); + expect(GenericDataType.String).toBe(1); + expect(GenericDataType.Temporal).toBe(2); + expect(GenericDataType.Boolean).toBe(3); + expect(GenericDataType.MultiValue).toBe(4); + }); }); diff --git a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx index c2b5231a90dc..6c0898c6f581 100644 --- a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx +++ b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx @@ -35,6 +35,7 @@ import { } from 'src/explore/constants'; import AdhocMetric from 'src/explore/components/controls/MetricControl/AdhocMetric'; import { FeatureFlag, isFeatureEnabled } from '@superset-ui/core'; +import { GenericDataType } from '@apache-superset/core/common'; import fetchMock from 'fetch-mock'; import { TestDataset, Dataset } from '@superset-ui/chart-controls'; @@ -252,6 +253,59 @@ test('shows boolean only operators when subject is number', () => { ].map(operator => expect(isOperatorRelevant(operator, 'value')).toBe(true)); }); +test('shows CONTAINS and null operators when subject is multi-value', () => { + const props = setup({ + adhocFilter: new AdhocFilter({ + expressionType: ExpressionTypes.Simple, + subject: 'skills', + operatorId: undefined, + operator: undefined, + comparator: undefined, + clause: undefined, + }), + datasource: { + columns: [ + { + id: 3, + column_name: 'skills', + type: 'Array(String)', + type_generic: GenericDataType.MultiValue, + }, + ], + }, + }); + const { isOperatorRelevant } = useSimpleTabFilterProps( + props as unknown as Props, + ); + [Operators.Contains, Operators.IsNull, Operators.IsNotNull].forEach(operator => + expect(isOperatorRelevant(operator, 'skills')).toBe(true), + ); + // scalar operators are hidden for array columns + [Operators.Equals, Operators.GreaterThan, Operators.Like].forEach(operator => + expect(isOperatorRelevant(operator, 'skills')).toBe(false), + ); +}); + +test('hides CONTAINS for non multi-value columns', () => { + const props = setup({ + adhocFilter: new AdhocFilter({ + expressionType: ExpressionTypes.Simple, + subject: 'value', + operatorId: undefined, + operator: undefined, + comparator: undefined, + clause: undefined, + }), + datasource: { + columns: [{ id: 3, column_name: 'value', type: 'STRING' }], + }, + }); + const { isOperatorRelevant } = useSimpleTabFilterProps( + props as unknown as Props, + ); + expect(isOperatorRelevant(Operators.Contains, 'value')).toBe(false); +}); + test('will convert from individual comparator to array if the operator changes to multi', () => { const props = setup(); const { onOperatorChange } = useSimpleTabFilterProps( diff --git a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx index 0c66026f8e1d..845e3cbd0e67 100644 --- a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx +++ b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx @@ -32,6 +32,7 @@ import { isDefined, SupersetClient, } from '@superset-ui/core'; +import { GenericDataType } from '@apache-superset/core/common'; import { styled, useTheme, css } from '@apache-superset/core/theme'; import { Operators, @@ -118,6 +119,8 @@ export const useSimpleTabFilterProps = (props: Props) => { const isColumnNumber = !!column && (column.type === 'INT' || column.type === 'INTEGER'); const isColumnFunction = !!column && !!column.expression; + const isColumnMultiValue = + !!column && column.type_generic === GenericDataType.MultiValue; if (operator && operator === Operators.LatestPartition) { const { partitionColumn } = props; @@ -127,6 +130,18 @@ export const useSimpleTabFilterProps = (props: Props) => { // hide the TEMPORAL_RANGE operator return false; } + // CONTAINS (array membership) only applies to multi-value columns. + if (operator === Operators.Contains) { + return isColumnMultiValue; + } + if (isColumnMultiValue) { + // multi-value columns support membership and null checks only + return ( + operator === Operators.Contains || + operator === Operators.IsNull || + operator === Operators.IsNotNull + ); + } if (operator === Operators.IsTrue || operator === Operators.IsFalse) { return isColumnBoolean || isColumnNumber || isColumnFunction; } diff --git a/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts b/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts index 171ddbc6f528..c105bcda9fdd 100644 --- a/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts +++ b/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts @@ -44,6 +44,7 @@ export const OPERATORS_TO_SQL = { 'IS NULL': 'IS NULL', 'IS TRUE': 'IS TRUE', 'IS FALSE': 'IS FALSE', + CONTAINS: 'CONTAINS', 'LATEST PARTITION': ({ datasource, }: { diff --git a/superset-frontend/src/explore/constants.ts b/superset-frontend/src/explore/constants.ts index 437c859232a4..f0c41d603756 100644 --- a/superset-frontend/src/explore/constants.ts +++ b/superset-frontend/src/explore/constants.ts @@ -45,6 +45,7 @@ export enum Operators { IsTrue = 'IS_TRUE', IsFalse = 'IS_FALSE', TemporalRange = 'TEMPORAL_RANGE', + Contains = 'CONTAINS', } export interface OperatorType { @@ -89,6 +90,7 @@ export const OPERATOR_ENUM_TO_OPERATOR_TYPE: { display: t('TEMPORAL_RANGE'), operation: 'TEMPORAL_RANGE', }, + [Operators.Contains]: { display: t('Contains'), operation: 'CONTAINS' }, }; export const OPERATORS_OPTIONS = Object.values(Operators) as Operators[]; From 71e5a8117dd78b3d5f52b86222bffbaa5f16c8e2 Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Mon, 22 Jun 2026 12:57:58 +0530 Subject: [PATCH 6/7] fix(multi-value): exclude modifier columns from physical-column validation Found during live validation against a real ClickHouse instance: executing a query with a Length/Explode modifier column failed with "Columns missing in dataset", even though SQL generation worked. The query-context validation in query_context_processor.py extracts physical column names via get_column_name_from_column, which returned the modifier dict (treating it as a physical column) instead of None. - get_column_name_from_column now returns None for multi-value operation columns, matching how adhoc (sqlExpression) columns are already handled, so validation ignores them. Also make the multi-value model tests import the ClickHouse engine spec lazily inside their helpers. clickhouse.py reads app.config at module-import time (inside a try/except ImportError around clickhouse-connect); once the driver is installed that import-time access fails at pytest collection without an app context. Deferring the import matches the existing pattern in tests/.../db_engine_specs/test_clickhouse.py. Adds a regression test asserting modifier columns are excluded from physical column-name extraction. Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- superset/utils/core.py | 2 +- .../models/test_multivalue_explode.py | 13 ++++++++--- .../models/test_multivalue_filter.py | 5 ++++- .../models/test_multivalue_length.py | 22 +++++++++++++++++-- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/superset/utils/core.py b/superset/utils/core.py index e9724a729ff8..08253956a5b2 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -1694,7 +1694,7 @@ def get_column_name_from_column(column: Column) -> str | None: :param column: Physical and ad-hoc column :return: column name if physical column, otherwise None """ - if is_adhoc_column(column): + if is_adhoc_column(column) or is_multivalue_operation_column(column): return None return column # type: ignore diff --git a/tests/unit_tests/models/test_multivalue_explode.py b/tests/unit_tests/models/test_multivalue_explode.py index 7a9cf0e91c32..2dd0bb99f1df 100644 --- a/tests/unit_tests/models/test_multivalue_explode.py +++ b/tests/unit_tests/models/test_multivalue_explode.py @@ -30,7 +30,6 @@ from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.db_engine_specs.base import BaseEngineSpec -from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec from superset.exceptions import QueryObjectValidationError from superset.models.core import Database from superset.superset_typing import QueryObjectDict @@ -92,6 +91,14 @@ def _query_obj(dimension: dict) -> QueryObjectDict: } +def _clickhouse_spec() -> type: + # Imported lazily: clickhouse.py touches app.config at import time, which + # is unavailable at pytest collection once clickhouse-connect is installed. + from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec + + return ClickHouseEngineSpec + + def _with_spec(mocker: MockerFixture, dataset: SqlaTable, spec: type) -> None: mocker.patch.object( SqlaTable, @@ -106,7 +113,7 @@ def test_explode_dimension_generates_arrayjoin( ) -> None: """An explode dimension routes through array_explode -> arrayJoin(skills).""" dataset = _make_dataset(mocker) - _with_spec(mocker, dataset, ClickHouseEngineSpec) + _with_spec(mocker, dataset, _clickhouse_spec()) with app.test_request_context(): sql = dataset.get_query_str_extended( _query_obj(_explode_dimension()), mutate=False @@ -126,7 +133,7 @@ def test_explode_changes_generated_sql( cannot collide with one that does not (distinct cache keys downstream). """ dataset = _make_dataset(mocker) - _with_spec(mocker, dataset, ClickHouseEngineSpec) + _with_spec(mocker, dataset, _clickhouse_spec()) with app.test_request_context(): exploded = dataset.get_query_str_extended( _query_obj(_explode_dimension()), mutate=False diff --git a/tests/unit_tests/models/test_multivalue_filter.py b/tests/unit_tests/models/test_multivalue_filter.py index d21105fac8eb..0301eca2af66 100644 --- a/tests/unit_tests/models/test_multivalue_filter.py +++ b/tests/unit_tests/models/test_multivalue_filter.py @@ -23,7 +23,6 @@ from pytest_mock import MockerFixture from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn -from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec from superset.exceptions import QueryObjectValidationError from superset.models.core import Database from superset.superset_typing import QueryObjectDict @@ -104,6 +103,10 @@ def test_contains_filter_generates_native_sql( ``ClickHouseEngineSpec.array_contains`` -> ``has(...)``. ``func.has`` renders the same regardless of dialect, which lets us assert on it here. """ + # Imported lazily: clickhouse.py touches app.config at import time, which + # is unavailable at pytest collection once clickhouse-connect is installed. + from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec + dataset = _make_dataset(mocker) mocker.patch.object( SqlaTable, diff --git a/tests/unit_tests/models/test_multivalue_length.py b/tests/unit_tests/models/test_multivalue_length.py index 4d39071bfe14..c321615f5e76 100644 --- a/tests/unit_tests/models/test_multivalue_length.py +++ b/tests/unit_tests/models/test_multivalue_length.py @@ -23,11 +23,15 @@ from pytest_mock import MockerFixture from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn -from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec from superset.exceptions import QueryObjectValidationError from superset.models.core import Database from superset.superset_typing import QueryObjectDict -from superset.utils.core import GenericDataType, MultiValueColumnOperation +from superset.utils.core import ( + GenericDataType, + get_column_name_from_column, + get_column_names_from_columns, + MultiValueColumnOperation, +) def _make_dataset(mocker: MockerFixture) -> SqlaTable: @@ -79,6 +83,10 @@ def _query_obj(dimension: dict) -> QueryObjectDict: def _clickhouse_dataset(mocker: MockerFixture) -> SqlaTable: + # Imported lazily: clickhouse.py touches app.config at import time, which + # is unavailable at pytest collection once clickhouse-connect is installed. + from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec + dataset = _make_dataset(mocker) mocker.patch.object( SqlaTable, @@ -88,6 +96,16 @@ def _clickhouse_dataset(mocker: MockerFixture) -> SqlaTable: return dataset +def test_multivalue_column_not_treated_as_physical_column() -> None: + """Regression: multi-value modifier columns must be ignored by the + physical-column extraction used by query-context validation, otherwise they + are wrongly reported as "Columns missing in dataset" on execution. + """ + col = _length_dimension() + assert get_column_name_from_column(col) is None + assert get_column_names_from_columns([col, "city"]) == ["city"] + + def test_length_dimension_generates_native_sql( mocker: MockerFixture, app: Flask, From 66a9e9953b035252a4e14d4947bf5189ab5ced15 Mon Sep 17 00:00:00 2001 From: thedeceptio Date: Mon, 22 Jun 2026 14:31:27 +0530 Subject: [PATCH 7/7] chore(multi-value): satisfy pre-commit (mypy types, lazy imports, prettier) - Type the multi-value test helpers with AdhocColumn/Column and QueryObjectDict, and exercise the raise paths via get_query_str_extended() instead of get_sqla_query(**dict) (avoids mypy TypedDict-unpacking false positives). - Import ClickHouseEngineSpec lazily inside tests: clickhouse.py reads app.config at import time, which is unavailable at pytest collection once clickhouse-connect is installed. - Apply prettier formatting to the adhoc filter simple-tab test. Co-Authored-By: Claude Opus 4.8 Signed-off-by: thedeceptio --- ...hocFilterEditPopoverSimpleTabContent.test.tsx | 4 ++-- .../unit_tests/models/test_multivalue_explode.py | 10 ++++++---- .../unit_tests/models/test_multivalue_filter.py | 2 +- .../unit_tests/models/test_multivalue_length.py | 16 +++++++++------- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx index 6c0898c6f581..b6543b7e693e 100644 --- a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx +++ b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx @@ -277,8 +277,8 @@ test('shows CONTAINS and null operators when subject is multi-value', () => { const { isOperatorRelevant } = useSimpleTabFilterProps( props as unknown as Props, ); - [Operators.Contains, Operators.IsNull, Operators.IsNotNull].forEach(operator => - expect(isOperatorRelevant(operator, 'skills')).toBe(true), + [Operators.Contains, Operators.IsNull, Operators.IsNotNull].forEach( + operator => expect(isOperatorRelevant(operator, 'skills')).toBe(true), ); // scalar operators are hidden for array columns [Operators.Equals, Operators.GreaterThan, Operators.Like].forEach(operator => diff --git a/tests/unit_tests/models/test_multivalue_explode.py b/tests/unit_tests/models/test_multivalue_explode.py index 2dd0bb99f1df..79645acdd3bd 100644 --- a/tests/unit_tests/models/test_multivalue_explode.py +++ b/tests/unit_tests/models/test_multivalue_explode.py @@ -32,7 +32,7 @@ from superset.db_engine_specs.base import BaseEngineSpec from superset.exceptions import QueryObjectValidationError from superset.models.core import Database -from superset.superset_typing import QueryObjectDict +from superset.superset_typing import AdhocColumn, Column, QueryObjectDict from superset.utils.core import MultiValueColumnOperation @@ -70,7 +70,7 @@ def _make_dataset(mocker: MockerFixture) -> SqlaTable: return dataset -def _explode_dimension() -> dict: +def _explode_dimension() -> AdhocColumn: return { "label": "skill", "column": "skills", @@ -78,7 +78,7 @@ def _explode_dimension() -> dict: } -def _query_obj(dimension: dict) -> QueryObjectDict: +def _query_obj(dimension: Column) -> QueryObjectDict: return { "granularity": None, "from_dttm": None, @@ -152,7 +152,9 @@ def test_explode_unsupported_engine_raises( dataset = _make_dataset(mocker) with app.test_request_context(): # noqa: SIM117 with pytest.raises(QueryObjectValidationError): - dataset.get_sqla_query(**_query_obj(_explode_dimension())) + dataset.get_query_str_extended( + _query_obj(_explode_dimension()), mutate=False + ) def test_explode_unimplemented_dialect_raises_clean_error( diff --git a/tests/unit_tests/models/test_multivalue_filter.py b/tests/unit_tests/models/test_multivalue_filter.py index 0301eca2af66..db52e67b017a 100644 --- a/tests/unit_tests/models/test_multivalue_filter.py +++ b/tests/unit_tests/models/test_multivalue_filter.py @@ -89,7 +89,7 @@ def test_contains_filter_unsupported_engine_raises( dataset = _make_dataset(mocker) with app.test_request_context(): # noqa: SIM117 with pytest.raises(QueryObjectValidationError): - dataset.get_sqla_query(**_query_obj()) + dataset.get_query_str_extended(_query_obj(), mutate=False) def test_contains_filter_generates_native_sql( diff --git a/tests/unit_tests/models/test_multivalue_length.py b/tests/unit_tests/models/test_multivalue_length.py index c321615f5e76..67b4db1b3ac2 100644 --- a/tests/unit_tests/models/test_multivalue_length.py +++ b/tests/unit_tests/models/test_multivalue_length.py @@ -25,7 +25,7 @@ from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn from superset.exceptions import QueryObjectValidationError from superset.models.core import Database -from superset.superset_typing import QueryObjectDict +from superset.superset_typing import AdhocColumn, Column, QueryObjectDict from superset.utils.core import ( GenericDataType, get_column_name_from_column, @@ -61,7 +61,7 @@ def _make_dataset(mocker: MockerFixture) -> SqlaTable: return dataset -def _length_dimension() -> dict: +def _length_dimension() -> AdhocColumn: return { "label": "skills_length", "column": "skills", @@ -69,7 +69,7 @@ def _length_dimension() -> dict: } -def _query_obj(dimension: dict) -> QueryObjectDict: +def _query_obj(dimension: Column) -> QueryObjectDict: return { "granularity": None, "from_dttm": None, @@ -142,7 +142,9 @@ def test_length_dimension_unsupported_engine_raises( dataset = _make_dataset(mocker) with app.test_request_context(): # noqa: SIM117 with pytest.raises(QueryObjectValidationError): - dataset.get_sqla_query(**_query_obj(_length_dimension())) + dataset.get_query_str_extended( + _query_obj(_length_dimension()), mutate=False + ) def test_length_dimension_unknown_base_column_raises( @@ -151,14 +153,14 @@ def test_length_dimension_unknown_base_column_raises( ) -> None: """A length op referencing a missing base column is rejected.""" dataset = _clickhouse_dataset(mocker) - bad = { + bad: AdhocColumn = { "label": "nope_length", "column": "does_not_exist", "columnOperation": MultiValueColumnOperation.LENGTH.value, } with app.test_request_context(): # noqa: SIM117 with pytest.raises(QueryObjectValidationError): - dataset.get_sqla_query(**_query_obj(bad)) + dataset.get_query_str_extended(_query_obj(bad), mutate=False) def test_unsupported_operation_raises( @@ -167,7 +169,7 @@ def test_unsupported_operation_raises( ) -> None: """An unknown columnOperation is rejected rather than silently ignored.""" dataset = _clickhouse_dataset(mocker) - bad = { + bad: AdhocColumn = { "label": "skills_x", "column": "skills", "columnOperation": "NOT_A_REAL_OP",