Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ export enum GenericDataType {
String = 1,
Temporal = 2,
Boolean = 3,
MultiValue = 4,
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
FieldBinaryOutlined,
FieldStringOutlined,
NumberOutlined,
UnorderedListOutlined,
} from '@ant-design/icons';
import { Icons } from '@superset-ui/core/components';

Expand Down Expand Up @@ -72,6 +73,10 @@ export function ColumnTypeLabel({ type }: ColumnTypeLabelProps) {
typeIcon = <FieldBinaryOutlined aria-label={t('boolean type icon')} />;
} else if (type === GenericDataType.Temporal) {
typeIcon = <ClockCircleOutlined aria-label={t('temporal type icon')} />;
} else if (type === GenericDataType.MultiValue) {
typeIcon = (
<UnorderedListOutlined aria-label={t('multi-value type icon')} />
);
}

return <TypeIconWrapper>{typeIcon}</TypeIconWrapper>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,21 @@ describe('ColumnOption', () => {
renderColumnTypeLabel({ type: GenericDataType.Temporal });
expect(screen.getByLabelText('temporal type icon')).toBeVisible();
});
test('multi-value (array) type shows list icon', () => {
renderColumnTypeLabel({ type: GenericDataType.MultiValue });
expect(screen.getByLabelText('multi-value type icon')).toBeVisible();
});
});

describe('GenericDataType enum parity', () => {
// These numeric values are shared with the backend enum in
// superset/utils/core.py (GenericDataType). They must stay in sync because
// the backend serializes columns using these integers.
test('values match the backend contract', () => {
expect(GenericDataType.Numeric).toBe(0);
expect(GenericDataType.String).toBe(1);
expect(GenericDataType.Temporal).toBe(2);
expect(GenericDataType.Boolean).toBe(3);
expect(GenericDataType.MultiValue).toBe(4);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import {
} from 'src/explore/constants';
import AdhocMetric from 'src/explore/components/controls/MetricControl/AdhocMetric';
import { FeatureFlag, isFeatureEnabled } from '@superset-ui/core';
import { GenericDataType } from '@apache-superset/core/common';
import fetchMock from 'fetch-mock';

import { TestDataset, Dataset } from '@superset-ui/chart-controls';
Expand Down Expand Up @@ -252,6 +253,59 @@ test('shows boolean only operators when subject is number', () => {
].map(operator => expect(isOperatorRelevant(operator, 'value')).toBe(true));
});

test('shows CONTAINS and null operators when subject is multi-value', () => {
const props = setup({
adhocFilter: new AdhocFilter({
expressionType: ExpressionTypes.Simple,
subject: 'skills',
operatorId: undefined,
operator: undefined,
comparator: undefined,
clause: undefined,
}),
datasource: {
columns: [
{
id: 3,
column_name: 'skills',
type: 'Array(String)',
type_generic: GenericDataType.MultiValue,
},
],
},
});
const { isOperatorRelevant } = useSimpleTabFilterProps(
props as unknown as Props,
);
[Operators.Contains, Operators.IsNull, Operators.IsNotNull].forEach(
operator => expect(isOperatorRelevant(operator, 'skills')).toBe(true),
);
// scalar operators are hidden for array columns
[Operators.Equals, Operators.GreaterThan, Operators.Like].forEach(operator =>
expect(isOperatorRelevant(operator, 'skills')).toBe(false),
);
});

test('hides CONTAINS for non multi-value columns', () => {
const props = setup({
adhocFilter: new AdhocFilter({
expressionType: ExpressionTypes.Simple,
subject: 'value',
operatorId: undefined,
operator: undefined,
comparator: undefined,
clause: undefined,
}),
datasource: {
columns: [{ id: 3, column_name: 'value', type: 'STRING' }],
},
});
const { isOperatorRelevant } = useSimpleTabFilterProps(
props as unknown as Props,
);
expect(isOperatorRelevant(Operators.Contains, 'value')).toBe(false);
});

test('will convert from individual comparator to array if the operator changes to multi', () => {
const props = setup();
const { onOperatorChange } = useSimpleTabFilterProps(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
isDefined,
SupersetClient,
} from '@superset-ui/core';
import { GenericDataType } from '@apache-superset/core/common';
import { styled, useTheme, css } from '@apache-superset/core/theme';
import {
Operators,
Expand Down Expand Up @@ -118,6 +119,8 @@ export const useSimpleTabFilterProps = (props: Props) => {
const isColumnNumber =
!!column && (column.type === 'INT' || column.type === 'INTEGER');
const isColumnFunction = !!column && !!column.expression;
const isColumnMultiValue =
!!column && column.type_generic === GenericDataType.MultiValue;

if (operator && operator === Operators.LatestPartition) {
const { partitionColumn } = props;
Expand All @@ -127,6 +130,18 @@ export const useSimpleTabFilterProps = (props: Props) => {
// hide the TEMPORAL_RANGE operator
return false;
}
// CONTAINS (array membership) only applies to multi-value columns.
if (operator === Operators.Contains) {
return isColumnMultiValue;
}
if (isColumnMultiValue) {
// multi-value columns support membership and null checks only
return (
operator === Operators.Contains ||
operator === Operators.IsNull ||
operator === Operators.IsNotNull
);
}
if (operator === Operators.IsTrue || operator === Operators.IsFalse) {
return isColumnBoolean || isColumnNumber || isColumnFunction;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export const OPERATORS_TO_SQL = {
'IS NULL': 'IS NULL',
'IS TRUE': 'IS TRUE',
'IS FALSE': 'IS FALSE',
CONTAINS: 'CONTAINS',
'LATEST PARTITION': ({
datasource,
}: {
Expand Down
2 changes: 2 additions & 0 deletions superset-frontend/src/explore/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export enum Operators {
IsTrue = 'IS_TRUE',
IsFalse = 'IS_FALSE',
TemporalRange = 'TEMPORAL_RANGE',
Contains = 'CONTAINS',
}

export interface OperatorType {
Expand Down Expand Up @@ -89,6 +90,7 @@ export const OPERATOR_ENUM_TO_OPERATOR_TYPE: {
display: t('TEMPORAL_RANGE'),
operation: 'TEMPORAL_RANGE',
},
[Operators.Contains]: { display: t('Contains'), operation: 'CONTAINS' },
};

export const OPERATORS_OPTIONS = Object.values(Operators) as Operators[];
Expand Down
62 changes: 62 additions & 0 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1711,6 +1711,63 @@ def _render_adhoc_expression_for_metadata_lookup(
)
) from ex

def _multivalue_column_to_sqla(
self,
col: AdhocColumn,
template_processor: BaseTemplateProcessor | None = None,
) -> tuple[ColumnElement, utils.GenericDataType | None]:
"""
Turn a multi-value (array) modifier column into a sqlalchemy column.

The column references a base array column and an operation (e.g. array
length); the native SQL is produced by the engine spec so the same
payload works across any dialect that supports array columns.
"""
label = utils.get_column_name(col)
base_name = col.get("column")
operation = col.get("columnOperation")

db_engine_spec = self.db_engine_spec
if not db_engine_spec.supports_multivalue_columns:
raise QueryObjectValidationError(
_("This database does not support multi-value (array) columns.")
)

base_column = self.get_column(base_name)
if base_column is None:
raise QueryObjectValidationError(
_("Unknown column used as multi-value source: %(col)s", col=base_name)
)
base_sqla_col = base_column.get_sqla_col(template_processor=template_processor)

if operation == utils.MultiValueColumnOperation.LENGTH:
expression = db_engine_spec.array_length(base_sqla_col)
generic_type = utils.GenericDataType.NUMERIC
elif operation == utils.MultiValueColumnOperation.EXPLODE:
# Scalar explode (e.g. ClickHouse arrayJoin) can be projected directly.
# Set-returning UNNEST dialects (Postgres/Trino/BigQuery) need extra
# FROM/JOIN plumbing and are handled in a later phase; for those the
# engine spec leaves array_explode unimplemented and we surface a
# clear error instead of emitting invalid SQL.
try:
expression = db_engine_spec.array_explode(base_sqla_col)
except NotImplementedError as ex:
raise QueryObjectValidationError(
_("This database does not support exploding array columns.")
) from ex
# The exploded value is a single array element; its type is not
# reliably known from the array type string, so leave it unset.
generic_type = None
else:
raise QueryObjectValidationError(
_(
"Unsupported multi-value column operation: %(op)s",
op=operation,
)
)

return self.make_sqla_column_compatible(expression, label), generic_type

def adhoc_column_to_sqla( # pylint: disable=too-many-locals
self,
col: AdhocColumn,
Expand All @@ -1733,6 +1790,11 @@ def adhoc_column_to_sqla( # pylint: disable=too-many-locals
Python type (e.g. numeric casts for numeric adhoc expressions).
:rtype: tuple[sqlalchemy.sql.ColumnElement, Optional[GenericDataType]]
"""
if utils.is_multivalue_operation_column(col):
return self._multivalue_column_to_sqla(
col, template_processor=template_processor
)

label = utils.get_column_name(col)
sql_expression = col["sqlExpression"]
time_grain = col.get("timeGrain")
Expand Down
58 changes: 57 additions & 1 deletion superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,13 @@
from sqlalchemy.engine.url import URL
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql import literal_column, quoted_name, text
from sqlalchemy.sql.expression import BinaryExpression, ColumnClause, Select, TextClause
from sqlalchemy.sql.expression import (
BinaryExpression,
ColumnClause,
ColumnElement,
Select,
TextClause,
)
from sqlalchemy.types import TypeEngine

from superset import db
Expand Down Expand Up @@ -493,6 +499,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
time_groupby_inline = False
limit_method = LimitMethod.FORCE_LIMIT
supports_multivalues_insert = False
# Whether this engine supports first-class multi-value (array-typed) columns.
# When True, array columns are classified as ``GenericDataType.MULTI_VALUE`` and
# the ``array_*`` capability methods below must be implemented. Defaults to
# False so engines that have not opted in keep treating arrays as strings.
supports_multivalue_columns = False
allows_joins = True
allows_subqueries = True
allows_alias_in_select = True
Expand Down Expand Up @@ -2393,6 +2404,51 @@ def update_params_from_encrypted_extra( # pylint: disable=invalid-name
logger.error(ex, exc_info=True)
raise

@classmethod
def array_contains(cls, col: ColumnElement, value: Any) -> ColumnElement:
"""
Build a boolean expression testing whether array column ``col`` contains
``value``. Engines that set ``supports_multivalue_columns = True`` must
override this with their native array-membership function.

:param col: SQLAlchemy column element for the array column
:param value: scalar value to look for inside the array
:return: a SQLAlchemy boolean expression
"""
raise NotImplementedError(
f"{cls.engine} does not support multi-value (array) columns"
)

@classmethod
def array_length(cls, col: ColumnElement) -> ColumnElement:
"""
Build a numeric expression returning the number of elements in array
column ``col``. Engines that set ``supports_multivalue_columns = True``
must override this with their native array-length function.

:param col: SQLAlchemy column element for the array column
:return: a SQLAlchemy numeric expression
"""
raise NotImplementedError(
f"{cls.engine} does not support multi-value (array) columns"
)

@classmethod
def array_explode(cls, col: ColumnElement) -> ColumnElement:
"""
Build an expression that expands array column ``col`` into one scalar
value per element (e.g. ClickHouse ``arrayJoin``). Set-returning dialects
(Postgres/Trino/BigQuery ``UNNEST``) need additional FROM/JOIN plumbing
that is handled by the query builder; this method only returns the
element-producing expression.

:param col: SQLAlchemy column element for the array column
:return: a SQLAlchemy expression yielding individual array elements
"""
raise NotImplementedError(
f"{cls.engine} does not support multi-value (array) columns"
)

@classmethod
def get_column_spec( # pylint: disable=unused-argument
cls,
Expand Down
22 changes: 20 additions & 2 deletions superset/db_engine_specs/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
from flask_babel import gettext as __
from marshmallow import fields, Schema
from marshmallow.validate import Range
from sqlalchemy import types
from sqlalchemy import func, types
from sqlalchemy.engine.url import URL
from sqlalchemy.sql.expression import ColumnElement
from urllib3.exceptions import NewConnectionError

from superset.databases.utils import make_url_safe
Expand Down Expand Up @@ -55,6 +56,7 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec):

time_groupby_inline = True
supports_multivalues_insert = True
supports_multivalue_columns = True

_time_grain_expressions = {
None: "{col}",
Expand All @@ -80,7 +82,7 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec):
(
re.compile(r".*Array.*", re.IGNORECASE),
types.String(),
GenericDataType.STRING,
GenericDataType.MULTI_VALUE,
),
(
re.compile(r".*UUID.*", re.IGNORECASE),
Expand Down Expand Up @@ -119,6 +121,22 @@ class ClickHouseBaseEngineSpec(BaseEngineSpec):
),
)

@classmethod
def array_contains(cls, col: ColumnElement, value: Any) -> ColumnElement:
# ClickHouse: has(arr, value) -> 1 if the array contains value
return func.has(col, value)

@classmethod
def array_length(cls, col: ColumnElement) -> ColumnElement:
# ClickHouse: length(arr) -> number of elements
return func.length(col)

@classmethod
def array_explode(cls, col: ColumnElement) -> ColumnElement:
# ClickHouse: arrayJoin(arr) is a scalar function usable directly in
# SELECT/GROUP BY (no JOIN needed, unlike UNNEST dialects).
return func.arrayJoin(col)

@classmethod
def epoch_to_dttm(cls) -> str:
return "{col}"
Expand Down
Loading
Loading