Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ assists people when migrating to a new version.

## Next

### Guest-token RLS rules reject unknown fields

The `rls` rules passed to `POST /api/v1/security/guest_token/` are now validated strictly: a rule may only contain `dataset` and `clause`. Previously unknown fields were silently dropped, so a mistyped or legacy scope key (most commonly `datasource` instead of `dataset`) produced a rule with no `dataset`, which is treated as a *global* rule applied to every dataset the embedded resource can reach. Such a request now returns HTTP 400 identifying the offending field instead of issuing a token with an unintended global rule. Integrators that were sending extra fields in RLS rules must remove them; valid dataset-scoped (`{"dataset": 41, "clause": "..."}`) and global (`{"clause": "..."}`) rules are unaffected.

### Pivot table First/Last aggregations follow data order

The pivot table chart's `First` and `Last` aggregations now return the first and last value in data (query result) order, instead of effectively returning the minimum and maximum. Existing pivot tables that use these aggregations for totals/subtotals may show different values after upgrading. For deterministic results, ensure the underlying query has a stable sort order.
Expand Down
39 changes: 36 additions & 3 deletions superset/security/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,15 @@
from flask_appbuilder.security.decorators import permission_name, protect
from flask_appbuilder.security.sqla.models import RegisterUser, Role
from flask_wtf.csrf import generate_csrf
from marshmallow import EXCLUDE, fields, post_load, Schema, ValidationError
from marshmallow import (
EXCLUDE,
fields,
post_load,
RAISE,
Schema,
validate,
ValidationError,
)
from sqlalchemy import asc, desc
from sqlalchemy.orm import selectinload

Expand Down Expand Up @@ -74,8 +82,33 @@ def convert_enum_to_value( # pylint: disable=unused-argument
return data


class RlsRuleSchema(PermissiveSchema):
dataset = fields.Integer()
class RlsRuleSchema(Schema):
"""
Schema for a single row-level security rule attached to a guest token.

Unlike the other guest-token schemas, this one rejects unknown fields
instead of silently dropping them. A rule is scoped to a dataset only when
it carries a valid positive integer ``dataset`` key; a rule with no
``dataset`` is treated as global and its ``clause`` is applied to every
dataset the embedded resource can reach (see ``get_guest_rls_filters``).
Silently excluding an unexpected field -- most commonly a mistyped or
legacy scope key such as ``datasource`` -- would therefore turn an intended
dataset-scoped rule into a global one without any feedback to the caller.
Raising on unknown fields surfaces the mistake as an HTTP 400 before a
token is ever issued and keeps the accepted payload aligned with the
documented ``RlsRule`` contract (``dataset`` and ``clause``).

For the same reason ``dataset`` is constrained to strict, positive
integers: a falsy value such as ``0`` (or ``false``, which marshmallow
coerces to ``0``) would pass a bare ``Integer`` field but then read as
falsy in ``get_guest_rls_filters``, silently widening a scoped rule to
every dataset.
"""

class Meta: # pylint: disable=too-few-public-methods
unknown = RAISE

dataset = fields.Integer(strict=True, validate=validate.Range(min=1))
clause = fields.String(required=True) # todo other options?


Expand Down
55 changes: 55 additions & 0 deletions tests/unit_tests/security/api_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
from typing import Any

import pytest
from marshmallow import ValidationError

from superset.extensions import csrf
from superset.security.api import RlsRuleSchema


@pytest.mark.parametrize(
Expand Down Expand Up @@ -60,3 +62,56 @@ def test_csrf_exempt_blueprints_with_api_key(app: Any, app_context: None) -> Non
config is enabled.
"""
assert "ApiKeyApi" in {blueprint.name for blueprint in csrf._exempt_blueprints}


def test_rls_rule_schema_accepts_dataset_scoped_rule() -> None:
"""A rule with an integer ``dataset`` and a ``clause`` loads unchanged."""
result = RlsRuleSchema().load({"dataset": 41, "clause": "tenant_id = 1"})
assert result == {"dataset": 41, "clause": "tenant_id = 1"}


def test_rls_rule_schema_accepts_global_rule() -> None:
"""A rule with no ``dataset`` (a global rule) is still valid."""
result = RlsRuleSchema().load({"clause": "tenant_id = 1"})
assert result == {"clause": "tenant_id = 1"}


def test_rls_rule_schema_rejects_unknown_scope_key() -> None:
"""
A mistyped or legacy scope key (e.g. ``datasource`` instead of ``dataset``)
used to be silently dropped, turning the rule into an unintended global rule.
It now raises a ``ValidationError`` that names the offending field.
"""
with pytest.raises(ValidationError) as exc_info:
RlsRuleSchema().load(
{"datasource": {"id": 41, "type": "table"}, "clause": "tenant_id = 1"}
)
assert "datasource" in exc_info.value.messages


def test_rls_rule_schema_rejects_unknown_fields() -> None:
"""Any unexpected field on an RLS rule is rejected."""
with pytest.raises(ValidationError) as exc_info:
RlsRuleSchema().load(
{"dataset": 41, "clause": "tenant_id = 1", "extra": "nope"}
)
assert "extra" in exc_info.value.messages


def test_rls_rule_schema_requires_clause() -> None:
"""``clause`` remains required."""
with pytest.raises(ValidationError) as exc_info:
RlsRuleSchema().load({"dataset": 41})
assert "clause" in exc_info.value.messages


@pytest.mark.parametrize("dataset", [0, -1, False])
def test_rls_rule_schema_rejects_falsy_dataset(dataset: Any) -> None:
"""
A falsy ``dataset`` (``0``, a negative id, or ``false`` which marshmallow
coerces to ``0``) would read as falsy in ``get_guest_rls_filters`` and
silently widen a scoped rule to every dataset. It is rejected at load time.
"""
with pytest.raises(ValidationError) as exc_info:
RlsRuleSchema().load({"dataset": dataset, "clause": "tenant_id = 1"})
assert "dataset" in exc_info.value.messages
Loading