diff --git a/superset-frontend/packages/superset-core/src/common/index.ts b/superset-frontend/packages/superset-core/src/common/index.ts
index cc399cf62751..34d513e4f74e 100644
--- a/superset-frontend/packages/superset-core/src/common/index.ts
+++ b/superset-frontend/packages/superset-core/src/common/index.ts
@@ -130,6 +130,7 @@ export enum GenericDataType {
String = 1,
Temporal = 2,
Boolean = 3,
+ MultiValue = 4,
}
/**
diff --git a/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx b/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx
index 8ea43bdc8a68..0316ca257629 100644
--- a/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx
+++ b/superset-frontend/packages/superset-ui-chart-controls/src/components/ColumnTypeLabel/ColumnTypeLabel.tsx
@@ -28,6 +28,7 @@ import {
FieldBinaryOutlined,
FieldStringOutlined,
NumberOutlined,
+ UnorderedListOutlined,
} from '@ant-design/icons';
import { Icons } from '@superset-ui/core/components';
@@ -72,6 +73,10 @@ export function ColumnTypeLabel({ type }: ColumnTypeLabelProps) {
typeIcon = ;
} else if (type === GenericDataType.Temporal) {
typeIcon = ;
+ } else if (type === GenericDataType.MultiValue) {
+ typeIcon = (
+
+ );
}
return {typeIcon};
diff --git a/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx b/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx
index fc65cd26f86f..46fc74b49367 100644
--- a/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx
+++ b/superset-frontend/packages/superset-ui-chart-controls/test/components/ColumnTypeLabel.test.tsx
@@ -64,4 +64,21 @@ describe('ColumnOption', () => {
renderColumnTypeLabel({ type: GenericDataType.Temporal });
expect(screen.getByLabelText('temporal type icon')).toBeVisible();
});
+ test('multi-value (array) type shows list icon', () => {
+ renderColumnTypeLabel({ type: GenericDataType.MultiValue });
+ expect(screen.getByLabelText('multi-value type icon')).toBeVisible();
+ });
+});
+
+describe('GenericDataType enum parity', () => {
+ // These numeric values are shared with the backend enum in
+ // superset/utils/core.py (GenericDataType). They must stay in sync because
+ // the backend serializes columns using these integers.
+ test('values match the backend contract', () => {
+ expect(GenericDataType.Numeric).toBe(0);
+ expect(GenericDataType.String).toBe(1);
+ expect(GenericDataType.Temporal).toBe(2);
+ expect(GenericDataType.Boolean).toBe(3);
+ expect(GenericDataType.MultiValue).toBe(4);
+ });
});
diff --git a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx
index c2b5231a90dc..b6543b7e693e 100644
--- a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx
+++ b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/AdhocFilterEditPopoverSimpleTabContent.test.tsx
@@ -35,6 +35,7 @@ import {
} from 'src/explore/constants';
import AdhocMetric from 'src/explore/components/controls/MetricControl/AdhocMetric';
import { FeatureFlag, isFeatureEnabled } from '@superset-ui/core';
+import { GenericDataType } from '@apache-superset/core/common';
import fetchMock from 'fetch-mock';
import { TestDataset, Dataset } from '@superset-ui/chart-controls';
@@ -252,6 +253,59 @@ test('shows boolean only operators when subject is number', () => {
].map(operator => expect(isOperatorRelevant(operator, 'value')).toBe(true));
});
+test('shows CONTAINS and null operators when subject is multi-value', () => {
+ const props = setup({
+ adhocFilter: new AdhocFilter({
+ expressionType: ExpressionTypes.Simple,
+ subject: 'skills',
+ operatorId: undefined,
+ operator: undefined,
+ comparator: undefined,
+ clause: undefined,
+ }),
+ datasource: {
+ columns: [
+ {
+ id: 3,
+ column_name: 'skills',
+ type: 'Array(String)',
+ type_generic: GenericDataType.MultiValue,
+ },
+ ],
+ },
+ });
+ const { isOperatorRelevant } = useSimpleTabFilterProps(
+ props as unknown as Props,
+ );
+ [Operators.Contains, Operators.IsNull, Operators.IsNotNull].forEach(
+ operator => expect(isOperatorRelevant(operator, 'skills')).toBe(true),
+ );
+ // scalar operators are hidden for array columns
+ [Operators.Equals, Operators.GreaterThan, Operators.Like].forEach(operator =>
+ expect(isOperatorRelevant(operator, 'skills')).toBe(false),
+ );
+});
+
+test('hides CONTAINS for non multi-value columns', () => {
+ const props = setup({
+ adhocFilter: new AdhocFilter({
+ expressionType: ExpressionTypes.Simple,
+ subject: 'value',
+ operatorId: undefined,
+ operator: undefined,
+ comparator: undefined,
+ clause: undefined,
+ }),
+ datasource: {
+ columns: [{ id: 3, column_name: 'value', type: 'STRING' }],
+ },
+ });
+ const { isOperatorRelevant } = useSimpleTabFilterProps(
+ props as unknown as Props,
+ );
+ expect(isOperatorRelevant(Operators.Contains, 'value')).toBe(false);
+});
+
test('will convert from individual comparator to array if the operator changes to multi', () => {
const props = setup();
const { onOperatorChange } = useSimpleTabFilterProps(
diff --git a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx
index 0c66026f8e1d..845e3cbd0e67 100644
--- a/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx
+++ b/superset-frontend/src/explore/components/controls/FilterControl/AdhocFilterEditPopoverSimpleTabContent/index.tsx
@@ -32,6 +32,7 @@ import {
isDefined,
SupersetClient,
} from '@superset-ui/core';
+import { GenericDataType } from '@apache-superset/core/common';
import { styled, useTheme, css } from '@apache-superset/core/theme';
import {
Operators,
@@ -118,6 +119,8 @@ export const useSimpleTabFilterProps = (props: Props) => {
const isColumnNumber =
!!column && (column.type === 'INT' || column.type === 'INTEGER');
const isColumnFunction = !!column && !!column.expression;
+ const isColumnMultiValue =
+ !!column && column.type_generic === GenericDataType.MultiValue;
if (operator && operator === Operators.LatestPartition) {
const { partitionColumn } = props;
@@ -127,6 +130,18 @@ export const useSimpleTabFilterProps = (props: Props) => {
// hide the TEMPORAL_RANGE operator
return false;
}
+ // CONTAINS (array membership) only applies to multi-value columns.
+ if (operator === Operators.Contains) {
+ return isColumnMultiValue;
+ }
+ if (isColumnMultiValue) {
+ // multi-value columns support membership and null checks only
+ return (
+ operator === Operators.Contains ||
+ operator === Operators.IsNull ||
+ operator === Operators.IsNotNull
+ );
+ }
if (operator === Operators.IsTrue || operator === Operators.IsFalse) {
return isColumnBoolean || isColumnNumber || isColumnFunction;
}
diff --git a/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts b/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts
index 171ddbc6f528..c105bcda9fdd 100644
--- a/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts
+++ b/superset-frontend/src/explore/components/controls/FilterControl/utils/translateToSQL.ts
@@ -44,6 +44,7 @@ export const OPERATORS_TO_SQL = {
'IS NULL': 'IS NULL',
'IS TRUE': 'IS TRUE',
'IS FALSE': 'IS FALSE',
+ CONTAINS: 'CONTAINS',
'LATEST PARTITION': ({
datasource,
}: {
diff --git a/superset-frontend/src/explore/constants.ts b/superset-frontend/src/explore/constants.ts
index 437c859232a4..f0c41d603756 100644
--- a/superset-frontend/src/explore/constants.ts
+++ b/superset-frontend/src/explore/constants.ts
@@ -45,6 +45,7 @@ export enum Operators {
IsTrue = 'IS_TRUE',
IsFalse = 'IS_FALSE',
TemporalRange = 'TEMPORAL_RANGE',
+ Contains = 'CONTAINS',
}
export interface OperatorType {
@@ -89,6 +90,7 @@ export const OPERATOR_ENUM_TO_OPERATOR_TYPE: {
display: t('TEMPORAL_RANGE'),
operation: 'TEMPORAL_RANGE',
},
+ [Operators.Contains]: { display: t('Contains'), operation: 'CONTAINS' },
};
export const OPERATORS_OPTIONS = Object.values(Operators) as Operators[];
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 81b5c2fc8d9d..de7bbbaec77b 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -1711,6 +1711,63 @@ def _render_adhoc_expression_for_metadata_lookup(
)
) from ex
+ def _multivalue_column_to_sqla(
+ self,
+ col: AdhocColumn,
+ template_processor: BaseTemplateProcessor | None = None,
+ ) -> tuple[ColumnElement, utils.GenericDataType | None]:
+ """
+ Turn a multi-value (array) modifier column into a sqlalchemy column.
+
+ The column references a base array column and an operation (e.g. array
+ length); the native SQL is produced by the engine spec so the same
+ payload works across any dialect that supports array columns.
+ """
+ label = utils.get_column_name(col)
+ base_name = col.get("column")
+ operation = col.get("columnOperation")
+
+ db_engine_spec = self.db_engine_spec
+ if not db_engine_spec.supports_multivalue_columns:
+ raise QueryObjectValidationError(
+ _("This database does not support multi-value (array) columns.")
+ )
+
+ base_column = self.get_column(base_name)
+ if base_column is None:
+ raise QueryObjectValidationError(
+ _("Unknown column used as multi-value source: %(col)s", col=base_name)
+ )
+ base_sqla_col = base_column.get_sqla_col(template_processor=template_processor)
+
+ if operation == utils.MultiValueColumnOperation.LENGTH:
+ expression = db_engine_spec.array_length(base_sqla_col)
+ generic_type = utils.GenericDataType.NUMERIC
+ elif operation == utils.MultiValueColumnOperation.EXPLODE:
+ # Scalar explode (e.g. ClickHouse arrayJoin) can be projected directly.
+ # Set-returning UNNEST dialects (Postgres/Trino/BigQuery) need extra
+ # FROM/JOIN plumbing and are handled in a later phase; for those the
+ # engine spec leaves array_explode unimplemented and we surface a
+ # clear error instead of emitting invalid SQL.
+ try:
+ expression = db_engine_spec.array_explode(base_sqla_col)
+ except NotImplementedError as ex:
+ raise QueryObjectValidationError(
+ _("This database does not support exploding array columns.")
+ ) from ex
+ # The exploded value is a single array element; its type is not
+ # reliably known from the array type string, so leave it unset.
+ generic_type = None
+ else:
+ raise QueryObjectValidationError(
+ _(
+ "Unsupported multi-value column operation: %(op)s",
+ op=operation,
+ )
+ )
+
+ return self.make_sqla_column_compatible(expression, label), generic_type
+
def adhoc_column_to_sqla( # pylint: disable=too-many-locals
self,
col: AdhocColumn,
@@ -1733,6 +1790,11 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals
Python type (e.g. numeric casts for numeric adhoc expressions).
:rtype: tuple[sqlalchemy.sql.ColumnElement, Optional[GenericDataType]]
"""
+ if utils.is_multivalue_operation_column(col):
+ return self._multivalue_column_to_sqla(
+ col, template_processor=template_processor
+ )
+
label = utils.get_column_name(col)
sql_expression = col["sqlExpression"]
time_grain = col.get("timeGrain")
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 3b3c234c1650..08ff0aa254ee 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -55,7 +55,13 @@
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql import literal_column, quoted_name, text
-from sqlalchemy.sql.expression import BinaryExpression, ColumnClause, Select, TextClause
+from sqlalchemy.sql.expression import (
+ BinaryExpression,
+ ColumnClause,
+ ColumnElement,
+ Select,
+ TextClause,
+)
from sqlalchemy.types import TypeEngine
from superset import db
@@ -493,6 +499,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
time_groupby_inline = False
limit_method = LimitMethod.FORCE_LIMIT
supports_multivalues_insert = False
+ # Whether this engine supports first-class multi-value (array-typed) columns.
+ # When True, array columns are classified as ``GenericDataType.MULTI_VALUE`` and
+ # the ``array_*`` capability methods below must be implemented. Defaults to
+ # False so engines that have not opted in keep treating arrays as strings.
+ supports_multivalue_columns = False
allows_joins = True
allows_subqueries = True
allows_alias_in_select = True
@@ -2393,6 +2404,51 @@ def update_params_from_encrypted_extra( # pylint: disable=invalid-name
logger.error(ex, exc_info=True)
raise
+ @classmethod
+ def array_contains(cls, col: ColumnElement, value: Any) -> ColumnElement:
+ """
+ Build a boolean expression testing whether array column ``col`` contains
+ ``value``. Engines that set ``supports_multivalue_columns = True`` must
+ override this with their native array-membership function.
+
+ :param col: SQLAlchemy column element for the array column
+ :param value: scalar value to look for inside the array
+ :return: a SQLAlchemy boolean expression
+ """
+ raise NotImplementedError(
+ f"{cls.engine} does not support multi-value (array) columns"
+ )
+
+ @classmethod
+ def array_length(cls, col: ColumnElement) -> ColumnElement:
+ """
+ Build a numeric expression returning the number of elements in array
+ column ``col``. Engines that set ``supports_multivalue_columns = True``
+ must override this with their native array-length function.
+
+ :param col: SQLAlchemy column element for the array column
+ :return: a SQLAlchemy numeric expression
+ """
+ raise NotImplementedError(
+ f"{cls.engine} does not support multi-value (array) columns"
+ )
+
+ @classmethod
+ def array_explode(cls, col: ColumnElement) -> ColumnElement:
+ """
+ Build an expression that expands array column ``col`` into one scalar
+ value per element (e.g. ClickHouse ``arrayJoin``). Set-returning dialects
+ (Postgres/Trino/BigQuery ``UNNEST``) need additional FROM/JOIN plumbing
+ that is handled by the query builder; this method only returns the
+ element-producing expression.
+
+ :param col: SQLAlchemy column element for the array column
+ :return: a SQLAlchemy expression yielding individual array elements
+ """
+ raise NotImplementedError(
+ f"{cls.engine} does not support multi-value (array) columns"
+ )
+
@classmethod
def get_column_spec( # pylint: disable=unused-argument
cls,
diff --git a/superset/db_engine_specs/clickhouse.py b/superset/db_engine_specs/clickhouse.py
index a998dce75e5c..66b5ac7eebc4 100644
--- a/superset/db_engine_specs/clickhouse.py
+++ b/superset/db_engine_specs/clickhouse.py
@@ -26,8 +26,9 @@
from flask_babel import gettext as __
from marshmallow import fields, Schema
from marshmallow.validate import Range
-from sqlalchemy import types
+from sqlalchemy import func, types
from sqlalchemy.engine.url import URL
+from sqlalchemy.sql.expression import ColumnElement
from urllib3.exceptions import NewConnectionError
from superset.databases.utils import make_url_safe
@@ -55,6 +56,7 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec):
time_groupby_inline = True
supports_multivalues_insert = True
+ supports_multivalue_columns = True
_time_grain_expressions = {
None: "{col}",
@@ -80,7 +82,7 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec):
(
re.compile(r".*Array.*", re.IGNORECASE),
types.String(),
- GenericDataType.STRING,
+ GenericDataType.MULTI_VALUE,
),
(
re.compile(r".*UUID.*", re.IGNORECASE),
@@ -119,6 +121,22 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec):
),
)
+ @classmethod
+ def array_contains(cls, col: ColumnElement, value: Any) -> ColumnElement:
+ # ClickHouse: has(arr, value) -> 1 if the array contains value
+ return func.has(col, value)
+
+ @classmethod
+ def array_length(cls, col: ColumnElement) -> ColumnElement:
+ # ClickHouse: length(arr) -> number of elements
+ return func.length(col)
+
+ @classmethod
+ def array_explode(cls, col: ColumnElement) -> ColumnElement:
+ # ClickHouse: arrayJoin(arr) is a scalar function usable directly in
+ # SELECT/GROUP BY (no JOIN needed, unlike UNNEST dialects).
+ return func.arrayJoin(col)
+
@classmethod
def epoch_to_dttm(cls) -> str:
return "{col}"
diff --git a/superset/models/helpers.py b/superset/models/helpers.py
index b603f0bb3cea..09280de3d5b1 100644
--- a/superset/models/helpers.py
+++ b/superset/models/helpers.py
@@ -3278,6 +3278,13 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
select_exprs.append(outer)
elif columns:
for selected in columns:
+ if utils.is_multivalue_operation_column(selected):
+ outer, _unused = self.adhoc_column_to_sqla(
+ col=selected,
+ template_processor=template_processor,
+ )
+ select_exprs.append(outer)
+ continue
if is_adhoc_column(selected):
_sql = selected["sqlExpression"]
_column_label = selected["label"]
@@ -3615,6 +3622,17 @@ def get_sqla_query( # pylint: disable=too-many-arguments,too-many-locals,too-ma
target_clause_list.append(sqla_col.not_like(eq))
else:
target_clause_list.append(sqla_col.not_ilike(eq))
+ elif op == utils.FilterOperator.CONTAINS:
+ if not db_engine_spec.supports_multivalue_columns:
+ raise QueryObjectValidationError(
+ _(
+ "The CONTAINS operator is only supported for "
+ "multi-value (array) columns on this database."
+ )
+ )
+ target_clause_list.append(
+ db_engine_spec.array_contains(sqla_col, eq)
+ )
elif (
op == utils.FilterOperator.TEMPORAL_RANGE
and isinstance(eq, str)
diff --git a/superset/superset_typing.py b/superset/superset_typing.py
index 39ff9ab13478..31294a5af3d7 100644
--- a/superset/superset_typing.py
+++ b/superset/superset_typing.py
@@ -104,6 +104,10 @@ class AdhocColumn(TypedDict, total=False):
isColumnReference: bool | None
columnType: Literal["BASE_AXIS", "SERIES"] | None
timeGrain: str | None
+ # Multi-value (array) column modifiers: ``column`` names the base array
+ # column and ``columnOperation`` the operation to apply (e.g. "LENGTH").
+ column: str
+ columnOperation: str
class SQLAColumnType(TypedDict):
diff --git a/superset/utils/core.py b/superset/utils/core.py
index f49a874f2e70..08253956a5b2 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -187,7 +187,7 @@ class GenericDataType(IntEnum):
STRING = 1
TEMPORAL = 2
BOOLEAN = 3
- # ARRAY = 4 # Mapping all the complex data types to STRING for now
+ MULTI_VALUE = 4 # array-typed columns (e.g. ClickHouse Array, Postgres ARRAY)
# JSON = 5 # and leaving these as a reminder.
# MAP = 6
# ROW = 7
@@ -277,6 +277,7 @@ class FilterOperator(StrEnum):
IS_TRUE = "IS TRUE"
IS_FALSE = "IS FALSE"
TEMPORAL_RANGE = "TEMPORAL_RANGE"
+ CONTAINS = "CONTAINS" # array membership, for MULTI_VALUE columns
class FilterStringOperators(StrEnum):
@@ -295,6 +296,7 @@ class FilterStringOperators(StrEnum):
LATEST_PARTITION = ("LATEST_PARTITION",)
IS_TRUE = ("IS_TRUE",)
IS_FALSE = ("IS_FALSE",)
+ CONTAINS = ("CONTAINS",)
class PostProcessingBoxplotWhiskerType(StrEnum):
@@ -1271,6 +1273,22 @@ def is_adhoc_column(column: Column) -> TypeGuard[AdhocColumn]:
)
+class MultiValueColumnOperation(StrEnum):
+ """Operations that can be applied to a multi-value (array) column."""
+
+ LENGTH = "LENGTH"
+ EXPLODE = "EXPLODE"
+
+
+def is_multivalue_operation_column(column: Column) -> bool:
+ """Whether ``column`` is a multi-value modifier (e.g. array length).
+
+ These columns carry a base ``column`` plus a ``columnOperation`` instead of a
+ ``sqlExpression``; the actual SQL is produced by the engine spec.
+ """
+ return isinstance(column, dict) and "columnOperation" in column
+
+
def is_base_axis(column: Column) -> bool:
return is_adhoc_column(column) and column.get("columnType") == "BASE_AXIS"
@@ -1676,7 +1694,7 @@ def get_column_name_from_column(column: Column) -> str | None:
:param column: Physical and ad-hoc column
:return: column name if physical column, otherwise None
"""
- if is_adhoc_column(column):
+ if is_adhoc_column(column) or is_multivalue_operation_column(column):
return None
return column # type: ignore
diff --git a/tests/unit_tests/db_engine_specs/test_base.py b/tests/unit_tests/db_engine_specs/test_base.py
index e46517a9927e..18b456a3837a 100644
--- a/tests/unit_tests/db_engine_specs/test_base.py
+++ b/tests/unit_tests/db_engine_specs/test_base.py
@@ -1360,3 +1360,19 @@ def test_resolve_column_type_falls_back_to_pa_mapped() -> None:
def test_resolve_column_type_returns_none_when_both_absent() -> None:
"""None is returned when neither source provides a type."""
assert BaseEngineSpec.resolve_column_type(None, None) is None
+
+
+def test_multivalue_columns_disabled_by_default() -> None:
+ """Engines must opt in to multi-value support; base defaults to off."""
+ assert BaseEngineSpec.supports_multivalue_columns is False
+
+
+@pytest.mark.parametrize("method", ["array_contains", "array_length", "array_explode"])
+def test_array_capabilities_raise_when_unsupported(method: str) -> None:
+ """Array capability methods raise NotImplementedError unless overridden."""
+ from sqlalchemy import column
+
+ fn = getattr(BaseEngineSpec, method)
+ args = (column("c"), "v") if method == "array_contains" else (column("c"),)
+ with pytest.raises(NotImplementedError):
+ fn(*args)
diff --git a/tests/unit_tests/db_engine_specs/test_clickhouse.py b/tests/unit_tests/db_engine_specs/test_clickhouse.py
index 5f56554e82d1..d77b03aacae6 100644
--- a/tests/unit_tests/db_engine_specs/test_clickhouse.py
+++ b/tests/unit_tests/db_engine_specs/test_clickhouse.py
@@ -109,7 +109,16 @@ def test_connect_convert_dttm(
GenericDataType.STRING,
False,
),
- ("Array(UInt8)", String, None, GenericDataType.STRING, False),
+ ("Array(UInt8)", String, None, GenericDataType.MULTI_VALUE, False),
+ ("Array(String)", String, None, GenericDataType.MULTI_VALUE, False),
+ ("Array(UInt64)", String, None, GenericDataType.MULTI_VALUE, False),
+ (
+ "Array(LowCardinality(String))",
+ String,
+ None,
+ GenericDataType.MULTI_VALUE,
+ False,
+ ),
("Enum('hello', 'world')", String, None, GenericDataType.STRING, False),
("Enum('UInt32', 'Bool')", String, None, GenericDataType.STRING, False),
(
@@ -233,3 +242,62 @@ def test_adjust_engine_params_fully_qualified(
uri = spec.adjust_engine_params(url, {}, None, schema)[0]
assert str(uri) == expected_result
+
+
+def _compile(expr) -> str:
+ return str(expr.compile(compile_kwargs={"literal_binds": True}))
+
+
+def test_clickhouse_supports_multivalue_columns() -> None:
+ from superset.db_engine_specs.clickhouse import ( # noqa: N813
+ ClickHouseEngineSpec as spec,
+ )
+
+ assert spec.supports_multivalue_columns is True
+
+
+def test_multivalue_contains_sql() -> None:
+ from sqlalchemy import column
+
+ from superset.db_engine_specs.clickhouse import ( # noqa: N813
+ ClickHouseEngineSpec as spec,
+ )
+
+ expr = spec.array_contains(column("skills"), "Driver")
+ assert _compile(expr) == "has(skills, 'Driver')"
+
+
+def test_multivalue_contains_binds_parameters() -> None:
+ """Value must be a bound parameter, not inlined (SQL-injection safety)."""
+ from sqlalchemy import column
+
+ from superset.db_engine_specs.clickhouse import ( # noqa: N813
+ ClickHouseEngineSpec as spec,
+ )
+
+ expr = spec.array_contains(column("skills"), "Driver")
+ compiled = expr.compile()
+ assert "Driver" not in str(compiled)
+ assert "Driver" in compiled.params.values()
+
+
+def test_multivalue_length_sql() -> None:
+ from sqlalchemy import column
+
+ from superset.db_engine_specs.clickhouse import ( # noqa: N813
+ ClickHouseEngineSpec as spec,
+ )
+
+ expr = spec.array_length(column("skills"))
+ assert _compile(expr) == "length(skills)"
+
+
+def test_multivalue_explode_sql() -> None:
+ from sqlalchemy import column
+
+ from superset.db_engine_specs.clickhouse import ( # noqa: N813
+ ClickHouseEngineSpec as spec,
+ )
+
+ expr = spec.array_explode(column("skills"))
+ assert _compile(expr) == "arrayJoin(skills)"
diff --git a/tests/unit_tests/models/test_multivalue_explode.py b/tests/unit_tests/models/test_multivalue_explode.py
new file mode 100644
index 000000000000..79645acdd3bd
--- /dev/null
+++ b/tests/unit_tests/models/test_multivalue_explode.py
@@ -0,0 +1,169 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Explode (group by array elements) for multi-value columns — ClickHouse MVP.
+
+Set-returning UNNEST dialects (Postgres/Trino/BigQuery) need CROSS JOIN UNNEST
+plumbing and are handled in a later phase; here we cover the scalar ClickHouse
+``arrayJoin`` path plus the guard that keeps unimplemented dialects from emitting
+invalid SQL.
+"""
+
+from __future__ import annotations
+
+import pytest
+from flask import Flask
+from pytest_mock import MockerFixture
+
+from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
+from superset.db_engine_specs.base import BaseEngineSpec
+from superset.exceptions import QueryObjectValidationError
+from superset.models.core import Database
+from superset.superset_typing import AdhocColumn, Column, QueryObjectDict
+from superset.utils.core import MultiValueColumnOperation
+
+
+class _UnnestOnlySpec(BaseEngineSpec):
+ """Simulates a future array dialect that has not implemented scalar explode."""
+
+ supports_multivalue_columns = True
+ # array_explode intentionally left as the base NotImplementedError
+
+
+def _make_dataset(mocker: MockerFixture) -> SqlaTable:
+ database = Database(
+ id=1,
+ database_name="test_db",
+ sqlalchemy_uri="sqlite://",
+ )
+ columns = [
+ TableColumn(column_name="skills", type="Array(String)"),
+ TableColumn(column_name="city", type="VARCHAR(100)"),
+ ]
+ dataset = SqlaTable(
+ table_name="jobs",
+ columns=columns,
+ database=database,
+ metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")],
+ )
+ mocker.patch(
+ "superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
+ return_value=[],
+ )
+ mocker.patch(
+ "superset.connectors.sqla.models.security_manager.is_guest_user",
+ return_value=False,
+ )
+ return dataset
+
+
+def _explode_dimension() -> AdhocColumn:
+ return {
+ "label": "skill",
+ "column": "skills",
+ "columnOperation": MultiValueColumnOperation.EXPLODE.value,
+ }
+
+
+def _query_obj(dimension: Column) -> QueryObjectDict:
+ return {
+ "granularity": None,
+ "from_dttm": None,
+ "to_dttm": None,
+ "is_timeseries": False,
+ "groupby": [dimension],
+ "metrics": ["count"],
+ "filter": [],
+ "columns": [],
+ }
+
+
+def _clickhouse_spec() -> type:
+ # Imported lazily: clickhouse.py touches app.config at import time, which
+ # is unavailable at pytest collection once clickhouse-connect is installed.
+ from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec
+
+ return ClickHouseEngineSpec
+
+
+def _with_spec(mocker: MockerFixture, dataset: SqlaTable, spec: type) -> None:
+ mocker.patch.object(
+ SqlaTable,
+ "db_engine_spec",
+ new=property(lambda self: spec),
+ )
+
+
+def test_explode_dimension_generates_arrayjoin(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """An explode dimension routes through array_explode -> arrayJoin(skills)."""
+ dataset = _make_dataset(mocker)
+ _with_spec(mocker, dataset, _clickhouse_spec())
+ with app.test_request_context():
+ sql = dataset.get_query_str_extended(
+ _query_obj(_explode_dimension()), mutate=False
+ ).sql.lower()
+
+ assert "arrayjoin(skills)" in sql
+ assert "skill" in sql # the label is applied
+
+
+def test_explode_changes_generated_sql(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """Exploding a column yields different SQL than grouping by it raw.
+
+ The explode modifier is part of the query object, so a query that explodes
+ cannot collide with one that does not (distinct cache keys downstream).
+ """
+ dataset = _make_dataset(mocker)
+ _with_spec(mocker, dataset, _clickhouse_spec())
+ with app.test_request_context():
+ exploded = dataset.get_query_str_extended(
+ _query_obj(_explode_dimension()), mutate=False
+ ).sql
+ plain = dataset.get_query_str_extended(_query_obj("city"), mutate=False).sql
+
+ assert exploded != plain
+ assert "arrayJoin" in exploded
+
+
+def test_explode_unsupported_engine_raises(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """On an engine without array support (sqlite) explode is rejected."""
+ dataset = _make_dataset(mocker)
+ with app.test_request_context(): # noqa: SIM117
+ with pytest.raises(QueryObjectValidationError):
+ dataset.get_query_str_extended(
+ _query_obj(_explode_dimension()), mutate=False
+ )
+
+
+def test_explode_unimplemented_dialect_raises_clean_error(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """A multi-value dialect lacking scalar explode gets a clear error, not bad SQL."""
+ dataset = _make_dataset(mocker)
+ _with_spec(mocker, dataset, _UnnestOnlySpec)
+ with app.test_request_context(): # noqa: SIM117
+ with pytest.raises(QueryObjectValidationError):
+ dataset.adhoc_column_to_sqla(col=_explode_dimension())
diff --git a/tests/unit_tests/models/test_multivalue_filter.py b/tests/unit_tests/models/test_multivalue_filter.py
new file mode 100644
index 000000000000..db52e67b017a
--- /dev/null
+++ b/tests/unit_tests/models/test_multivalue_filter.py
@@ -0,0 +1,122 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Stage B: the CONTAINS filter operator for multi-value (array) columns."""
+
+from __future__ import annotations
+
+import pytest
+from flask import Flask
+from pytest_mock import MockerFixture
+
+from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
+from superset.exceptions import QueryObjectValidationError
+from superset.models.core import Database
+from superset.superset_typing import QueryObjectDict
+from superset.utils.core import FilterOperator
+
+
+def _make_dataset(mocker: MockerFixture) -> SqlaTable:
+ database = Database(
+ id=1,
+ database_name="test_db",
+ sqlalchemy_uri="sqlite://",
+ )
+ columns = [
+ TableColumn(column_name="skills", type="Array(String)"),
+ TableColumn(column_name="city", type="VARCHAR(100)"),
+ ]
+ dataset = SqlaTable(
+ table_name="jobs",
+ columns=columns,
+ database=database,
+ metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")],
+ )
+ mocker.patch(
+ "superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
+ return_value=[],
+ )
+ mocker.patch(
+ "superset.connectors.sqla.models.security_manager.is_guest_user",
+ return_value=False,
+ )
+ return dataset
+
+
+def _query_obj() -> QueryObjectDict:
+ return {
+ "granularity": None,
+ "from_dttm": None,
+ "to_dttm": None,
+ "is_timeseries": False,
+ "groupby": ["city"],
+ "metrics": ["count"],
+ "filter": [
+ {
+ "col": "skills",
+ "op": FilterOperator.CONTAINS.value,
+ "val": "Driver",
+ }
+ ],
+ "columns": [],
+ }
+
+
+def test_contains_operator_is_registered() -> None:
+ """The operator round-trips through the enum used to parse filter payloads."""
+ assert FilterOperator("CONTAINS") is FilterOperator.CONTAINS
+ assert FilterOperator.CONTAINS.value == "CONTAINS"
+
+
+def test_contains_filter_unsupported_engine_raises(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """On an engine without array support (sqlite) CONTAINS is rejected."""
+ dataset = _make_dataset(mocker)
+ with app.test_request_context(): # noqa: SIM117
+ with pytest.raises(QueryObjectValidationError):
+ dataset.get_query_str_extended(_query_obj(), mutate=False)
+
+
+def test_contains_filter_generates_native_sql(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """On a multi-value-capable engine CONTAINS compiles to the native call.
+
+ The dataset compiles against sqlite (so no ClickHouse driver is needed), but
+ its engine spec is ClickHouse, so the operator routes through
+ ``ClickHouseEngineSpec.array_contains`` -> ``has(...)``. ``func.has`` renders
+ the same regardless of dialect, which lets us assert on it here.
+ """
+ # Imported lazily: clickhouse.py touches app.config at import time, which
+ # is unavailable at pytest collection once clickhouse-connect is installed.
+ from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec
+
+ dataset = _make_dataset(mocker)
+ mocker.patch.object(
+ SqlaTable,
+ "db_engine_spec",
+ new=property(lambda self: ClickHouseEngineSpec),
+ )
+
+ with app.test_request_context():
+ extended = dataset.get_query_str_extended(_query_obj(), mutate=False)
+ sql = extended.sql
+
+ assert "has(" in sql.lower()
+ assert "driver" in sql.lower()
diff --git a/tests/unit_tests/models/test_multivalue_length.py b/tests/unit_tests/models/test_multivalue_length.py
new file mode 100644
index 000000000000..67b4db1b3ac2
--- /dev/null
+++ b/tests/unit_tests/models/test_multivalue_length.py
@@ -0,0 +1,179 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Stage D: the array-length virtual dimension for multi-value columns."""
+
+from __future__ import annotations
+
+import pytest
+from flask import Flask
+from pytest_mock import MockerFixture
+
+from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
+from superset.exceptions import QueryObjectValidationError
+from superset.models.core import Database
+from superset.superset_typing import AdhocColumn, Column, QueryObjectDict
+from superset.utils.core import (
+ GenericDataType,
+ get_column_name_from_column,
+ get_column_names_from_columns,
+ MultiValueColumnOperation,
+)
+
+
+def _make_dataset(mocker: MockerFixture) -> SqlaTable:
+ database = Database(
+ id=1,
+ database_name="test_db",
+ sqlalchemy_uri="sqlite://",
+ )
+ columns = [
+ TableColumn(column_name="skills", type="Array(String)"),
+ TableColumn(column_name="city", type="VARCHAR(100)"),
+ ]
+ dataset = SqlaTable(
+ table_name="jobs",
+ columns=columns,
+ database=database,
+ metrics=[SqlMetric(metric_name="count", expression="COUNT(*)")],
+ )
+ mocker.patch(
+ "superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
+ return_value=[],
+ )
+ mocker.patch(
+ "superset.connectors.sqla.models.security_manager.is_guest_user",
+ return_value=False,
+ )
+ return dataset
+
+
+def _length_dimension() -> AdhocColumn:
+ return {
+ "label": "skills_length",
+ "column": "skills",
+ "columnOperation": MultiValueColumnOperation.LENGTH.value,
+ }
+
+
+def _query_obj(dimension: Column) -> QueryObjectDict:
+ return {
+ "granularity": None,
+ "from_dttm": None,
+ "to_dttm": None,
+ "is_timeseries": False,
+ "groupby": [dimension],
+ "metrics": ["count"],
+ "filter": [],
+ "columns": [],
+ }
+
+
+def _clickhouse_dataset(mocker: MockerFixture) -> SqlaTable:
+ # Imported lazily: clickhouse.py touches app.config at import time, which
+ # is unavailable at pytest collection once clickhouse-connect is installed.
+ from superset.db_engine_specs.clickhouse import ClickHouseEngineSpec
+
+ dataset = _make_dataset(mocker)
+ mocker.patch.object(
+ SqlaTable,
+ "db_engine_spec",
+ new=property(lambda self: ClickHouseEngineSpec),
+ )
+ return dataset
+
+
+def test_multivalue_column_not_treated_as_physical_column() -> None:
+ """Regression: multi-value modifier columns must be ignored by the
+ physical-column extraction used by query-context validation, otherwise they
+ are wrongly reported as "Columns missing in dataset" on execution.
+ """
+ col = _length_dimension()
+ assert get_column_name_from_column(col) is None
+ assert get_column_names_from_columns([col, "city"]) == ["city"]
+
+
+def test_length_dimension_generates_native_sql(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """A length dimension routes through array_length -> length(skills)."""
+ dataset = _clickhouse_dataset(mocker)
+ with app.test_request_context():
+ extended = dataset.get_query_str_extended(
+ _query_obj(_length_dimension()), mutate=False
+ )
+ sql = extended.sql.lower()
+
+ assert "length(skills)" in sql
+ assert "skills_length" in sql # the label is applied
+
+
+def test_length_dimension_type_is_numeric(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """The derived length column is typed NUMERIC so it behaves like a number."""
+ dataset = _clickhouse_dataset(mocker)
+ with app.test_request_context():
+ _expr, generic_type = dataset.adhoc_column_to_sqla(col=_length_dimension())
+
+ assert generic_type == GenericDataType.NUMERIC
+
+
+def test_length_dimension_unsupported_engine_raises(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """On an engine without array support (sqlite) the length op is rejected."""
+ dataset = _make_dataset(mocker)
+ with app.test_request_context(): # noqa: SIM117
+ with pytest.raises(QueryObjectValidationError):
+ dataset.get_query_str_extended(
+ _query_obj(_length_dimension()), mutate=False
+ )
+
+
+def test_length_dimension_unknown_base_column_raises(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """A length op referencing a missing base column is rejected."""
+ dataset = _clickhouse_dataset(mocker)
+ bad: AdhocColumn = {
+ "label": "nope_length",
+ "column": "does_not_exist",
+ "columnOperation": MultiValueColumnOperation.LENGTH.value,
+ }
+ with app.test_request_context(): # noqa: SIM117
+ with pytest.raises(QueryObjectValidationError):
+ dataset.get_query_str_extended(_query_obj(bad), mutate=False)
+
+
+def test_unsupported_operation_raises(
+ mocker: MockerFixture,
+ app: Flask,
+) -> None:
+ """An unknown columnOperation is rejected rather than silently ignored."""
+ dataset = _clickhouse_dataset(mocker)
+ bad: AdhocColumn = {
+ "label": "skills_x",
+ "column": "skills",
+ "columnOperation": "NOT_A_REAL_OP",
+ }
+ with app.test_request_context(): # noqa: SIM117
+ with pytest.raises(QueryObjectValidationError):
+ dataset.adhoc_column_to_sqla(col=bad)