diff --git a/superset/security/manager.py b/superset/security/manager.py index c8a44a6bffb3..8846be1deeac 100644 --- a/superset/security/manager.py +++ b/superset/security/manager.py @@ -520,47 +520,111 @@ def _native_filter_request_modified(query_context: "QueryContext") -> bool: ) -def query_context_modified(query_context: "QueryContext") -> bool: +def _collect_sortable_identifiers( + stored_chart: "Slice", + stored_query_context: Optional[dict[str, Any]], +) -> set[str]: """ - Check if a query context has been modified. - - This is used to ensure guest users don't modify the payload and fetch data - different from what was shared with them in dashboards. + Frozen column names and metric labels/definitions a guest may legitimately + sort by: every column or metric the stored chart already references. + + Order-by only changes the ordering of the result, not which data is read, so + any column or metric already part of the chart is a safe sort target. A term + that is not present in the stored chart (for example a free-form ``random()`` + expression) cannot be validated and must be rejected. Order-by entries are + ``(column_or_metric, ascending)`` pairs, so only their first element is + collected. """ - form_data = query_context.form_data - stored_chart = query_context.slice_ + allowed: set[str] = set() - # Native-filter data requests have no associated chart (no slice_id). Rather - # than accepting any payload, constrain them to the column(s) the dashboard's - # native filter is allowed to target; other chartless paths keep prior - # behavior (see _native_filter_request_modified). - if stored_chart is None: - return _native_filter_request_modified(query_context) + def add(values: Any) -> None: + for value in values or []: + allowed.add(freeze_value(value)) - if form_data is None: - return False + def add_orderby(entries: Any) -> None: + for entry in entries or []: + if isinstance(entry, (list, tuple)) and entry: + allowed.add(freeze_value(entry[0])) - # cannot request a different chart - if form_data.get("slice_id") != stored_chart.id: - return True + params = stored_chart.params_dict + for key in ("columns", "groupby", "metrics", "all_columns"): + add(params.get(key)) + # Legacy charts store a single metric under the singular ``metric`` key. + add([params["metric"]] if params.get("metric") is not None else None) + add_orderby(params.get("orderby")) + + if stored_query_context: + for query in stored_query_context.get("queries") or []: + for key in ("columns", "groupby", "metrics", "all_columns"): + add(query.get(key)) + add_orderby(query.get("orderby")) + + return allowed - stored_query_context = ( - json.loads(cast(str, stored_chart.query_context)) - if stored_chart.query_context - else None - ) - # compare columns and metrics in form_data with stored values +def _orderby_modified( + query_context: "QueryContext", + stored_chart: "Slice", + stored_query_context: Optional[dict[str, Any]], +) -> bool: + """ + Whether any order-by clause sorts by a term the stored chart does not already + reference. + + A guest reordering an embedded chart by one of its existing columns or + metrics is legitimate and must not read as tampering; introducing a new + expression is not, and is rejected. + """ + allowed = _collect_sortable_identifiers(stored_chart, stored_query_context) + form_data = query_context.form_data or {} + # Both ``form_data`` and each ``QueryObject`` can carry an order-by, and in + # the common frontend path they carry the same one. Either source could + # smuggle an unauthorized term, so validate the union of both rather than + # trusting one over the other; the duplication is harmless. + requested = list(form_data.get("orderby") or []) + for query in query_context.queries: + requested.extend(getattr(query, "orderby", None) or []) + + for entry in requested: + # Order-by entries must be ``(column_or_metric, ascending)`` pairs. A + # malformed shape (e.g. a bare string or nested list) is not a valid + # sort the chart could have produced, so treat it as tampering rather + # than letting it crash query building when it is later unpacked. + if not ( + isinstance(entry, (list, tuple)) + and len(entry) == 2 + and isinstance(entry[1], bool) + ): + return True + if freeze_value(entry[0]) not in allowed: + return True + return False + + +def _columns_metrics_modified( + query_context: "QueryContext", + form_data: dict[str, Any], + stored_chart: "Slice", + stored_query_context: Optional[dict[str, Any]], +) -> bool: + """ + Whether the requested columns/metrics/group-by read beyond what the stored + chart exposes. Each requested set must be a subset of the values stored on + the chart (params and, when present, the stored query context). + """ for key, equivalent in [ ("metrics", ["metrics"]), ("columns", ["columns", "groupby"]), ("groupby", ["columns", "groupby"]), - ("orderby", ["orderby"]), ]: requested_values = {freeze_value(value) for value in form_data.get(key) or []} stored_values = { freeze_value(value) for value in stored_chart.params_dict.get(key) or [] } + # ``form_data`` values are checked against ``params_dict`` alone; + # ``query_context`` values are checked below against the fuller set that + # also includes the stored query context. This asymmetry is intentional: + # each requested source is compared to its corresponding stored source. if not requested_values.issubset(stored_values): return True @@ -572,9 +636,9 @@ def query_context_modified(query_context: "QueryContext") -> bool: } if stored_query_context: for query in stored_query_context.get("queries") or []: - for key in equivalent: + for equiv_key in equivalent: stored_values.update( - {freeze_value(value) for value in query.get(key) or []} + {freeze_value(value) for value in query.get(equiv_key) or []} ) if not queries_values.issubset(stored_values): @@ -583,6 +647,52 @@ def query_context_modified(query_context: "QueryContext") -> bool: return False +def query_context_modified(query_context: "QueryContext") -> bool: + """ + Check if a query context has been modified. + + This is used to ensure guest users don't modify the payload and fetch data + different from what was shared with them in dashboards. + """ + form_data = query_context.form_data + stored_chart = query_context.slice_ + + # Native-filter data requests have no associated chart (no slice_id). Rather + # than accepting any payload, constrain them to the column(s) the dashboard's + # native filter is allowed to target; other chartless paths keep prior + # behavior (see _native_filter_request_modified). + if stored_chart is None: + return _native_filter_request_modified(query_context) + + if form_data is None: + return False + + # cannot request a different chart + if form_data.get("slice_id") != stored_chart.id: + return True + + stored_query_context = ( + json.loads(cast(str, stored_chart.query_context)) + if stored_chart.query_context + else None + ) + + # compare columns and metrics in form_data with stored values. Order-by is + # handled separately: a strict subset check there would reject a guest + # legitimately sorting an embedded chart by one of its existing columns. + if _columns_metrics_modified( + query_context, form_data, stored_chart, stored_query_context + ): + return True + + # Order-by may sort only by columns/metrics already present in the stored + # chart; new expressions (e.g. ``random()``) are still rejected. + if _orderby_modified(query_context, stored_chart, stored_query_context): + return True + + return False + + class SupersetSecurityManager( # pylint: disable=too-many-public-methods SecurityManager ): diff --git a/tests/unit_tests/security/manager_test.py b/tests/unit_tests/security/manager_test.py index f4441630f685..d4c3aa8fe9c9 100644 --- a/tests/unit_tests/security/manager_test.py +++ b/tests/unit_tests/security/manager_test.py @@ -19,7 +19,7 @@ import json # noqa: TID251 from types import SimpleNamespace -from typing import Any +from typing import Any, Optional import pytest from flask_appbuilder.security.sqla.models import Role, User @@ -31,6 +31,8 @@ from superset.extensions import appbuilder from superset.models.slice import Slice from superset.security.manager import ( + _collect_sortable_identifiers, + freeze_value, query_context_modified, SupersetSecurityManager, ) @@ -1218,6 +1220,129 @@ def test_query_context_modified_orderby(mocker: MockerFixture) -> None: assert query_context_modified(query_context) +def _table_sort_query_context( + mocker: MockerFixture, + orderby: list[Any], + *, + stored_metrics: Optional[list[Any]] = None, + with_stored_query_context: bool = True, +) -> Any: + """ + Build a minimal table-chart query context for a guest that sorts by + ``orderby``. The stored chart groups by ``gender`` and aggregates ``count``. + """ + metrics: list[Any] = stored_metrics if stored_metrics is not None else ["count"] + query_context = mocker.MagicMock() + query_context.queries = [ + QueryObject(columns=["gender"], metrics=metrics, orderby=orderby), + ] + query_context.form_data = {"slice_id": 101, "groupby": ["gender"]} + query_context.slice_.id = 101 + query_context.slice_.params_dict = {"groupby": ["gender"], "metrics": metrics} + query_context.slice_.query_context = ( + json.dumps({"queries": [{"columns": ["gender"], "metrics": metrics}]}) + if with_stored_query_context + else None + ) + return query_context + + +def test_query_context_modified_orderby_sort_by_column(mocker: MockerFixture) -> None: + """A guest sorting an embedded table by an existing column is allowed.""" + query_context = _table_sort_query_context(mocker, orderby=[("gender", True)]) + assert not query_context_modified(query_context) + + +def test_query_context_modified_orderby_sort_by_metric(mocker: MockerFixture) -> None: + """A guest sorting by an existing metric is allowed.""" + query_context = _table_sort_query_context(mocker, orderby=[("count", False)]) + assert not query_context_modified(query_context) + + +def test_query_context_modified_orderby_sort_by_adhoc_metric( + mocker: MockerFixture, +) -> None: + """A guest sorting by an existing adhoc metric definition is allowed.""" + adhoc_metric: AdhocMetric = { + "aggregate": "SUM", + "column": {"column_name": "num"}, + "expressionType": "SIMPLE", + "hasCustomLabel": False, + "label": "SUM(num)", + } + query_context = _table_sort_query_context( + mocker, + orderby=[(adhoc_metric, False)], + stored_metrics=[adhoc_metric], + ) + assert not query_context_modified(query_context) + + +def test_query_context_modified_orderby_unknown_column(mocker: MockerFixture) -> None: + """A guest sorting by a column the chart does not reference is rejected.""" + query_context = _table_sort_query_context(mocker, orderby=[("salary", True)]) + assert query_context_modified(query_context) + + +def test_query_context_modified_orderby_empty(mocker: MockerFixture) -> None: + """An empty order-by is not a modification.""" + query_context = _table_sort_query_context(mocker, orderby=[]) + assert not query_context_modified(query_context) + + +def test_query_context_modified_orderby_legacy_singular_metric( + mocker: MockerFixture, +) -> None: + """A guest sorting by a legacy ``metric`` (singular) field is allowed.""" + query_context = _table_sort_query_context( + mocker, + orderby=[("num_sold", False)], + stored_metrics=[], + with_stored_query_context=False, + ) + query_context.slice_.params_dict = { + "columns": ["gender"], + "groupby": ["gender"], + "metric": "num_sold", + } + assert not query_context_modified(query_context) + + +def test_query_context_modified_orderby_malformed_entry( + mocker: MockerFixture, +) -> None: + """A malformed order-by entry (not a ``(term, bool)`` pair) is rejected.""" + query_context = _table_sort_query_context(mocker, orderby=[["gender"]]) + assert query_context_modified(query_context) + + +def test_query_context_modified_orderby_sort_by_stored_qc_only_column( + mocker: MockerFixture, +) -> None: + """A column present only in the stored query context is an allowed sort target.""" + query_context = _table_sort_query_context(mocker, orderby=[("age", True)]) + # "age" is absent from params_dict but exposed via the stored query context, + # so sorting by it must be allowed. + query_context.slice_.params_dict = {"groupby": ["gender"], "metrics": ["count"]} + query_context.slice_.query_context = json.dumps( + {"queries": [{"columns": ["gender", "age"], "metrics": ["count"]}]} + ) + assert not query_context_modified(query_context) + + +def test_collect_sortable_identifiers_includes_stored_qc_all_columns( + mocker: MockerFixture, +) -> None: + """``all_columns`` exposed via the stored query context is a valid sort target.""" + stored_chart = mocker.MagicMock() + stored_chart.params_dict = {"groupby": ["gender"], "metrics": ["count"]} + allowed = _collect_sortable_identifiers( + stored_chart, + {"queries": [{"all_columns": ["gender", "age"], "metrics": ["count"]}]}, + ) + assert freeze_value("age") in allowed + + def test_query_context_modified_time_grain_native_filter( mocker: MockerFixture, ) -> None: