Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 137 additions & 27 deletions superset/security/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,47 +520,111 @@ def _native_filter_request_modified(query_context: "QueryContext") -> bool:
)


def query_context_modified(query_context: "QueryContext") -> bool:
def _collect_sortable_identifiers(
stored_chart: "Slice",
stored_query_context: Optional[dict[str, Any]],
) -> set[str]:
"""
Check if a query context has been modified.

This is used to ensure guest users don't modify the payload and fetch data
different from what was shared with them in dashboards.
Frozen column names and metric labels/definitions a guest may legitimately
sort by: every column or metric the stored chart already references.

Order-by only changes the ordering of the result, not which data is read, so
any column or metric already part of the chart is a safe sort target. A term
that is not present in the stored chart (for example a free-form ``random()``
expression) cannot be validated and must be rejected. Order-by entries are
``(column_or_metric, ascending)`` pairs, so only their first element is
collected.
"""
form_data = query_context.form_data
stored_chart = query_context.slice_
allowed: set[str] = set()

# Native-filter data requests have no associated chart (no slice_id). Rather
# than accepting any payload, constrain them to the column(s) the dashboard's
# native filter is allowed to target; other chartless paths keep prior
# behavior (see _native_filter_request_modified).
if stored_chart is None:
return _native_filter_request_modified(query_context)
def add(values: Any) -> None:
for value in values or []:
allowed.add(freeze_value(value))
Comment thread
rusackas marked this conversation as resolved.

if form_data is None:
return False
def add_orderby(entries: Any) -> None:
for entry in entries or []:
if isinstance(entry, (list, tuple)) and entry:
allowed.add(freeze_value(entry[0]))
Comment thread
rusackas marked this conversation as resolved.

# cannot request a different chart
if form_data.get("slice_id") != stored_chart.id:
return True
params = stored_chart.params_dict
for key in ("columns", "groupby", "metrics", "all_columns"):
add(params.get(key))
Comment thread
rusackas marked this conversation as resolved.
# Legacy charts store a single metric under the singular ``metric`` key.
add([params["metric"]] if params.get("metric") is not None else None)
add_orderby(params.get("orderby"))

if stored_query_context:
for query in stored_query_context.get("queries") or []:
for key in ("columns", "groupby", "metrics", "all_columns"):
add(query.get(key))
add_orderby(query.get("orderby"))

return allowed

stored_query_context = (
json.loads(cast(str, stored_chart.query_context))
if stored_chart.query_context
else None
)

# compare columns and metrics in form_data with stored values
def _orderby_modified(
query_context: "QueryContext",
stored_chart: "Slice",
stored_query_context: Optional[dict[str, Any]],
) -> bool:
"""
Whether any order-by clause sorts by a term the stored chart does not already
reference.

A guest reordering an embedded chart by one of its existing columns or
metrics is legitimate and must not read as tampering; introducing a new
expression is not, and is rejected.
"""
allowed = _collect_sortable_identifiers(stored_chart, stored_query_context)
form_data = query_context.form_data or {}
# Both ``form_data`` and each ``QueryObject`` can carry an order-by, and in
# the common frontend path they carry the same one. Either source could
# smuggle an unauthorized term, so validate the union of both rather than
# trusting one over the other; the duplication is harmless.
requested = list(form_data.get("orderby") or [])
for query in query_context.queries:
requested.extend(getattr(query, "orderby", None) or [])

for entry in requested:
# Order-by entries must be ``(column_or_metric, ascending)`` pairs. A
# malformed shape (e.g. a bare string or nested list) is not a valid
# sort the chart could have produced, so treat it as tampering rather
# than letting it crash query building when it is later unpacked.
if not (
isinstance(entry, (list, tuple))
and len(entry) == 2
and isinstance(entry[1], bool)
):
return True
if freeze_value(entry[0]) not in allowed:
return True
Comment thread
rusackas marked this conversation as resolved.
return False


def _columns_metrics_modified(
query_context: "QueryContext",
form_data: dict[str, Any],
stored_chart: "Slice",
stored_query_context: Optional[dict[str, Any]],
) -> bool:
"""
Whether the requested columns/metrics/group-by read beyond what the stored
chart exposes. Each requested set must be a subset of the values stored on
the chart (params and, when present, the stored query context).
"""
for key, equivalent in [
("metrics", ["metrics"]),
("columns", ["columns", "groupby"]),
("groupby", ["columns", "groupby"]),
("orderby", ["orderby"]),
]:
requested_values = {freeze_value(value) for value in form_data.get(key) or []}
stored_values = {
freeze_value(value) for value in stored_chart.params_dict.get(key) or []
}
# ``form_data`` values are checked against ``params_dict`` alone;
# ``query_context`` values are checked below against the fuller set that
# also includes the stored query context. This asymmetry is intentional:
# each requested source is compared to its corresponding stored source.
if not requested_values.issubset(stored_values):
return True

Expand All @@ -572,9 +636,9 @@ def query_context_modified(query_context: "QueryContext") -> bool:
}
if stored_query_context:
for query in stored_query_context.get("queries") or []:
for key in equivalent:
for equiv_key in equivalent:
stored_values.update(
{freeze_value(value) for value in query.get(key) or []}
{freeze_value(value) for value in query.get(equiv_key) or []}
)

if not queries_values.issubset(stored_values):
Expand All @@ -583,6 +647,52 @@ def query_context_modified(query_context: "QueryContext") -> bool:
return False


def query_context_modified(query_context: "QueryContext") -> bool:
"""
Check if a query context has been modified.

This is used to ensure guest users don't modify the payload and fetch data
different from what was shared with them in dashboards.
"""
form_data = query_context.form_data
stored_chart = query_context.slice_

# Native-filter data requests have no associated chart (no slice_id). Rather
# than accepting any payload, constrain them to the column(s) the dashboard's
# native filter is allowed to target; other chartless paths keep prior
# behavior (see _native_filter_request_modified).
if stored_chart is None:
return _native_filter_request_modified(query_context)

if form_data is None:
return False

# cannot request a different chart
if form_data.get("slice_id") != stored_chart.id:
return True

stored_query_context = (
json.loads(cast(str, stored_chart.query_context))
if stored_chart.query_context
else None
)

# compare columns and metrics in form_data with stored values. Order-by is
# handled separately: a strict subset check there would reject a guest
# legitimately sorting an embedded chart by one of its existing columns.
if _columns_metrics_modified(
query_context, form_data, stored_chart, stored_query_context
):
return True

# Order-by may sort only by columns/metrics already present in the stored
# chart; new expressions (e.g. ``random()``) are still rejected.
if _orderby_modified(query_context, stored_chart, stored_query_context):
return True

return False


class SupersetSecurityManager( # pylint: disable=too-many-public-methods
SecurityManager
):
Expand Down
127 changes: 126 additions & 1 deletion tests/unit_tests/security/manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import json # noqa: TID251
from types import SimpleNamespace
from typing import Any
from typing import Any, Optional

import pytest
from flask_appbuilder.security.sqla.models import Role, User
Expand All @@ -31,6 +31,8 @@
from superset.extensions import appbuilder
from superset.models.slice import Slice
from superset.security.manager import (
_collect_sortable_identifiers,
freeze_value,
query_context_modified,
SupersetSecurityManager,
)
Expand Down Expand Up @@ -1218,6 +1220,129 @@ def test_query_context_modified_orderby(mocker: MockerFixture) -> None:
assert query_context_modified(query_context)


def _table_sort_query_context(
mocker: MockerFixture,
orderby: list[Any],
*,
stored_metrics: Optional[list[Any]] = None,
with_stored_query_context: bool = True,
) -> Any:
"""
Build a minimal table-chart query context for a guest that sorts by
``orderby``. The stored chart groups by ``gender`` and aggregates ``count``.
"""
metrics: list[Any] = stored_metrics if stored_metrics is not None else ["count"]
query_context = mocker.MagicMock()
query_context.queries = [
QueryObject(columns=["gender"], metrics=metrics, orderby=orderby),
]
query_context.form_data = {"slice_id": 101, "groupby": ["gender"]}
query_context.slice_.id = 101
query_context.slice_.params_dict = {"groupby": ["gender"], "metrics": metrics}
query_context.slice_.query_context = (
json.dumps({"queries": [{"columns": ["gender"], "metrics": metrics}]})
if with_stored_query_context
else None
)
return query_context


def test_query_context_modified_orderby_sort_by_column(mocker: MockerFixture) -> None:
"""A guest sorting an embedded table by an existing column is allowed."""
query_context = _table_sort_query_context(mocker, orderby=[("gender", True)])
assert not query_context_modified(query_context)


def test_query_context_modified_orderby_sort_by_metric(mocker: MockerFixture) -> None:
"""A guest sorting by an existing metric is allowed."""
query_context = _table_sort_query_context(mocker, orderby=[("count", False)])
assert not query_context_modified(query_context)


def test_query_context_modified_orderby_sort_by_adhoc_metric(
mocker: MockerFixture,
) -> None:
"""A guest sorting by an existing adhoc metric definition is allowed."""
adhoc_metric: AdhocMetric = {
"aggregate": "SUM",
"column": {"column_name": "num"},
"expressionType": "SIMPLE",
"hasCustomLabel": False,
"label": "SUM(num)",
}
query_context = _table_sort_query_context(
mocker,
orderby=[(adhoc_metric, False)],
stored_metrics=[adhoc_metric],
)
assert not query_context_modified(query_context)


def test_query_context_modified_orderby_unknown_column(mocker: MockerFixture) -> None:
"""A guest sorting by a column the chart does not reference is rejected."""
query_context = _table_sort_query_context(mocker, orderby=[("salary", True)])
assert query_context_modified(query_context)


def test_query_context_modified_orderby_empty(mocker: MockerFixture) -> None:
"""An empty order-by is not a modification."""
query_context = _table_sort_query_context(mocker, orderby=[])
assert not query_context_modified(query_context)


def test_query_context_modified_orderby_legacy_singular_metric(
mocker: MockerFixture,
) -> None:
"""A guest sorting by a legacy ``metric`` (singular) field is allowed."""
query_context = _table_sort_query_context(
mocker,
orderby=[("num_sold", False)],
stored_metrics=[],
with_stored_query_context=False,
)
query_context.slice_.params_dict = {
"columns": ["gender"],
"groupby": ["gender"],
"metric": "num_sold",
}
assert not query_context_modified(query_context)


def test_query_context_modified_orderby_malformed_entry(
mocker: MockerFixture,
) -> None:
"""A malformed order-by entry (not a ``(term, bool)`` pair) is rejected."""
query_context = _table_sort_query_context(mocker, orderby=[["gender"]])
assert query_context_modified(query_context)


def test_query_context_modified_orderby_sort_by_stored_qc_only_column(
mocker: MockerFixture,
) -> None:
"""A column present only in the stored query context is an allowed sort target."""
query_context = _table_sort_query_context(mocker, orderby=[("age", True)])
# "age" is absent from params_dict but exposed via the stored query context,
# so sorting by it must be allowed.
query_context.slice_.params_dict = {"groupby": ["gender"], "metrics": ["count"]}
query_context.slice_.query_context = json.dumps(
{"queries": [{"columns": ["gender", "age"], "metrics": ["count"]}]}
)
assert not query_context_modified(query_context)


def test_collect_sortable_identifiers_includes_stored_qc_all_columns(
mocker: MockerFixture,
) -> None:
"""``all_columns`` exposed via the stored query context is a valid sort target."""
stored_chart = mocker.MagicMock()
stored_chart.params_dict = {"groupby": ["gender"], "metrics": ["count"]}
allowed = _collect_sortable_identifiers(
stored_chart,
{"queries": [{"all_columns": ["gender", "age"], "metrics": ["count"]}]},
)
assert freeze_value("age") in allowed


def test_query_context_modified_time_grain_native_filter(
mocker: MockerFixture,
) -> None:
Expand Down
Loading