diff --git a/UPDATING.md b/UPDATING.md index 01e59b4f6fb0..e02f9b9542db 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -24,6 +24,10 @@ assists people when migrating to a new version. ## Next +### Guest-token RLS rules reject unknown fields + +The `rls` rules passed to `POST /api/v1/security/guest_token/` are now validated strictly: a rule may only contain `dataset` and `clause`. Previously unknown fields were silently dropped, so a mistyped or legacy scope key (most commonly `datasource` instead of `dataset`) produced a rule with no `dataset`, which is treated as a *global* rule applied to every dataset the embedded resource can reach. Such a request now returns HTTP 400 identifying the offending field instead of issuing a token with an unintended global rule. Integrators that were sending extra fields in RLS rules must remove them; valid dataset-scoped (`{"dataset": 41, "clause": "..."}`) and global (`{"clause": "..."}`) rules are unaffected. + ### Pivot table First/Last aggregations follow data order The pivot table chart's `First` and `Last` aggregations now return the first and last value in data (query result) order, instead of effectively returning the minimum and maximum. Existing pivot tables that use these aggregations for totals/subtotals may show different values after upgrading. For deterministic results, ensure the underlying query has a stable sort order. diff --git a/superset/security/api.py b/superset/security/api.py index c38b3c590fe0..96f94b1b0942 100644 --- a/superset/security/api.py +++ b/superset/security/api.py @@ -24,7 +24,15 @@ from flask_appbuilder.security.decorators import permission_name, protect from flask_appbuilder.security.sqla.models import RegisterUser, Role from flask_wtf.csrf import generate_csrf -from marshmallow import EXCLUDE, fields, post_load, Schema, ValidationError +from marshmallow import ( + EXCLUDE, + fields, + post_load, + RAISE, + Schema, + validate, + ValidationError, +) from sqlalchemy import asc, desc from sqlalchemy.orm import selectinload @@ -74,8 +82,33 @@ def convert_enum_to_value( # pylint: disable=unused-argument return data -class RlsRuleSchema(PermissiveSchema): - dataset = fields.Integer() +class RlsRuleSchema(Schema): + """ + Schema for a single row-level security rule attached to a guest token. + + Unlike the other guest-token schemas, this one rejects unknown fields + instead of silently dropping them. A rule is scoped to a dataset only when + it carries a valid positive integer ``dataset`` key; a rule with no + ``dataset`` is treated as global and its ``clause`` is applied to every + dataset the embedded resource can reach (see ``get_guest_rls_filters``). + Silently excluding an unexpected field -- most commonly a mistyped or + legacy scope key such as ``datasource`` -- would therefore turn an intended + dataset-scoped rule into a global one without any feedback to the caller. + Raising on unknown fields surfaces the mistake as an HTTP 400 before a + token is ever issued and keeps the accepted payload aligned with the + documented ``RlsRule`` contract (``dataset`` and ``clause``). + + For the same reason ``dataset`` is constrained to strict, positive + integers: a falsy value such as ``0`` (or ``false``, which marshmallow + coerces to ``0``) would pass a bare ``Integer`` field but then read as + falsy in ``get_guest_rls_filters``, silently widening a scoped rule to + every dataset. + """ + + class Meta: # pylint: disable=too-few-public-methods + unknown = RAISE + + dataset = fields.Integer(strict=True, validate=validate.Range(min=1)) clause = fields.String(required=True) # todo other options? diff --git a/tests/unit_tests/security/api_test.py b/tests/unit_tests/security/api_test.py index cc7ecaa77d2a..88ea25948a50 100644 --- a/tests/unit_tests/security/api_test.py +++ b/tests/unit_tests/security/api_test.py @@ -17,8 +17,10 @@ from typing import Any import pytest +from marshmallow import ValidationError from superset.extensions import csrf +from superset.security.api import RlsRuleSchema @pytest.mark.parametrize( @@ -60,3 +62,56 @@ def test_csrf_exempt_blueprints_with_api_key(app: Any, app_context: None) -> Non config is enabled. """ assert "ApiKeyApi" in {blueprint.name for blueprint in csrf._exempt_blueprints} + + +def test_rls_rule_schema_accepts_dataset_scoped_rule() -> None: + """A rule with an integer ``dataset`` and a ``clause`` loads unchanged.""" + result = RlsRuleSchema().load({"dataset": 41, "clause": "tenant_id = 1"}) + assert result == {"dataset": 41, "clause": "tenant_id = 1"} + + +def test_rls_rule_schema_accepts_global_rule() -> None: + """A rule with no ``dataset`` (a global rule) is still valid.""" + result = RlsRuleSchema().load({"clause": "tenant_id = 1"}) + assert result == {"clause": "tenant_id = 1"} + + +def test_rls_rule_schema_rejects_unknown_scope_key() -> None: + """ + A mistyped or legacy scope key (e.g. ``datasource`` instead of ``dataset``) + used to be silently dropped, turning the rule into an unintended global rule. + It now raises a ``ValidationError`` that names the offending field. + """ + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load( + {"datasource": {"id": 41, "type": "table"}, "clause": "tenant_id = 1"} + ) + assert "datasource" in exc_info.value.messages + + +def test_rls_rule_schema_rejects_unknown_fields() -> None: + """Any unexpected field on an RLS rule is rejected.""" + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load( + {"dataset": 41, "clause": "tenant_id = 1", "extra": "nope"} + ) + assert "extra" in exc_info.value.messages + + +def test_rls_rule_schema_requires_clause() -> None: + """``clause`` remains required.""" + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load({"dataset": 41}) + assert "clause" in exc_info.value.messages + + +@pytest.mark.parametrize("dataset", [0, -1, False]) +def test_rls_rule_schema_rejects_falsy_dataset(dataset: Any) -> None: + """ + A falsy ``dataset`` (``0``, a negative id, or ``false`` which marshmallow + coerces to ``0``) would read as falsy in ``get_guest_rls_filters`` and + silently widen a scoped rule to every dataset. It is rejected at load time. + """ + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load({"dataset": dataset, "clause": "tenant_id = 1"}) + assert "dataset" in exc_info.value.messages