From 89678dd44e5ba39609f5191e7084cc156af5b372 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Thu, 18 Jun 2026 21:26:56 -0700 Subject: [PATCH 1/2] fix(security): reject unknown fields on guest-token RLS rules RlsRuleSchema inherited from PermissiveSchema, which silently drops unknown fields. A guest-token rls rule that used a mistyped or legacy scope key -- most commonly datasource instead of dataset -- therefore had that key stripped during deserialization, leaving a rule with no dataset. get_guest_rls_filters treats a rule with no dataset as global, so the clause was applied to every dataset the embedded resource can reach rather than the single dataset the caller intended. Make RlsRuleSchema strict (Meta.unknown = RAISE) so an unexpected field raises a ValidationError (HTTP 400) before a token is issued, instead of producing a silently-global rule. This aligns runtime validation with the documented RlsRule contract (dataset and clause). Valid dataset-scoped and global rules are unaffected; the rest of the guest-token payload remains permissive. Co-Authored-By: Claude Fable 5 --- UPDATING.md | 4 +++ superset/security/api.py | 23 ++++++++++++-- tests/unit_tests/security/api_test.py | 43 +++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 01e59b4f6fb0..e02f9b9542db 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -24,6 +24,10 @@ assists people when migrating to a new version. ## Next +### Guest-token RLS rules reject unknown fields + +The `rls` rules passed to `POST /api/v1/security/guest_token/` are now validated strictly: a rule may only contain `dataset` and `clause`. Previously unknown fields were silently dropped, so a mistyped or legacy scope key (most commonly `datasource` instead of `dataset`) produced a rule with no `dataset`, which is treated as a *global* rule applied to every dataset the embedded resource can reach. Such a request now returns HTTP 400 identifying the offending field instead of issuing a token with an unintended global rule. Integrators that were sending extra fields in RLS rules must remove them; valid dataset-scoped (`{"dataset": 41, "clause": "..."}`) and global (`{"clause": "..."}`) rules are unaffected. + ### Pivot table First/Last aggregations follow data order The pivot table chart's `First` and `Last` aggregations now return the first and last value in data (query result) order, instead of effectively returning the minimum and maximum. Existing pivot tables that use these aggregations for totals/subtotals may show different values after upgrading. For deterministic results, ensure the underlying query has a stable sort order. diff --git a/superset/security/api.py b/superset/security/api.py index c38b3c590fe0..c49535d46e79 100644 --- a/superset/security/api.py +++ b/superset/security/api.py @@ -24,7 +24,7 @@ from flask_appbuilder.security.decorators import permission_name, protect from flask_appbuilder.security.sqla.models import RegisterUser, Role from flask_wtf.csrf import generate_csrf -from marshmallow import EXCLUDE, fields, post_load, Schema, ValidationError +from marshmallow import EXCLUDE, fields, post_load, RAISE, Schema, ValidationError from sqlalchemy import asc, desc from sqlalchemy.orm import selectinload @@ -74,7 +74,26 @@ def convert_enum_to_value( # pylint: disable=unused-argument return data -class RlsRuleSchema(PermissiveSchema): +class RlsRuleSchema(Schema): + """ + Schema for a single row-level security rule attached to a guest token. + + Unlike the other guest-token schemas, this one rejects unknown fields + instead of silently dropping them. A rule is scoped to a dataset only when + it carries a valid integer ``dataset`` key; a rule with no ``dataset`` is + treated as global and its ``clause`` is applied to every dataset the + embedded resource can reach (see ``get_guest_rls_filters``). Silently + excluding an unexpected field -- most commonly a mistyped or legacy scope + key such as ``datasource`` -- would therefore turn an intended + dataset-scoped rule into a global one without any feedback to the caller. + Raising on unknown fields surfaces the mistake as an HTTP 400 before a + token is ever issued and keeps the accepted payload aligned with the + documented ``RlsRule`` contract (``dataset`` and ``clause``). + """ + + class Meta: # pylint: disable=too-few-public-methods + unknown = RAISE + dataset = fields.Integer() clause = fields.String(required=True) # todo other options? diff --git a/tests/unit_tests/security/api_test.py b/tests/unit_tests/security/api_test.py index cc7ecaa77d2a..99ce11a834ab 100644 --- a/tests/unit_tests/security/api_test.py +++ b/tests/unit_tests/security/api_test.py @@ -17,8 +17,10 @@ from typing import Any import pytest +from marshmallow import ValidationError from superset.extensions import csrf +from superset.security.api import RlsRuleSchema @pytest.mark.parametrize( @@ -60,3 +62,44 @@ def test_csrf_exempt_blueprints_with_api_key(app: Any, app_context: None) -> Non config is enabled. """ assert "ApiKeyApi" in {blueprint.name for blueprint in csrf._exempt_blueprints} + + +def test_rls_rule_schema_accepts_dataset_scoped_rule() -> None: + """A rule with an integer ``dataset`` and a ``clause`` loads unchanged.""" + result = RlsRuleSchema().load({"dataset": 41, "clause": "tenant_id = 1"}) + assert result == {"dataset": 41, "clause": "tenant_id = 1"} + + +def test_rls_rule_schema_accepts_global_rule() -> None: + """A rule with no ``dataset`` (a global rule) is still valid.""" + result = RlsRuleSchema().load({"clause": "tenant_id = 1"}) + assert result == {"clause": "tenant_id = 1"} + + +def test_rls_rule_schema_rejects_unknown_scope_key() -> None: + """ + A mistyped or legacy scope key (e.g. ``datasource`` instead of ``dataset``) + used to be silently dropped, turning the rule into an unintended global rule. + It now raises a ``ValidationError`` that names the offending field. + """ + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load( + {"datasource": {"id": 41, "type": "table"}, "clause": "tenant_id = 1"} + ) + assert "datasource" in exc_info.value.messages + + +def test_rls_rule_schema_rejects_unknown_fields() -> None: + """Any unexpected field on an RLS rule is rejected.""" + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load( + {"dataset": 41, "clause": "tenant_id = 1", "extra": "nope"} + ) + assert "extra" in exc_info.value.messages + + +def test_rls_rule_schema_requires_clause() -> None: + """``clause`` remains required.""" + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load({"dataset": 41}) + assert "clause" in exc_info.value.messages From ee21f35d21cb9fabdeeb8cfb02d9f302e31566bc Mon Sep 17 00:00:00 2001 From: Evan Date: Thu, 18 Jun 2026 22:56:45 -0700 Subject: [PATCH 2/2] fix(security): reject falsy dataset ids on guest-token RLS rules A bare Integer field allowed dataset=0 (and false, coerced to 0) which reads as falsy in get_guest_rls_filters and silently widens a scoped rule to every dataset. Constrain dataset to strict, positive integers. Co-Authored-By: Claude Opus 4.8 --- superset/security/api.py | 28 ++++++++++++++++++++------- tests/unit_tests/security/api_test.py | 12 ++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/superset/security/api.py b/superset/security/api.py index c49535d46e79..96f94b1b0942 100644 --- a/superset/security/api.py +++ b/superset/security/api.py @@ -24,7 +24,15 @@ from flask_appbuilder.security.decorators import permission_name, protect from flask_appbuilder.security.sqla.models import RegisterUser, Role from flask_wtf.csrf import generate_csrf -from marshmallow import EXCLUDE, fields, post_load, RAISE, Schema, ValidationError +from marshmallow import ( + EXCLUDE, + fields, + post_load, + RAISE, + Schema, + validate, + ValidationError, +) from sqlalchemy import asc, desc from sqlalchemy.orm import selectinload @@ -80,21 +88,27 @@ class RlsRuleSchema(Schema): Unlike the other guest-token schemas, this one rejects unknown fields instead of silently dropping them. A rule is scoped to a dataset only when - it carries a valid integer ``dataset`` key; a rule with no ``dataset`` is - treated as global and its ``clause`` is applied to every dataset the - embedded resource can reach (see ``get_guest_rls_filters``). Silently - excluding an unexpected field -- most commonly a mistyped or legacy scope - key such as ``datasource`` -- would therefore turn an intended + it carries a valid positive integer ``dataset`` key; a rule with no + ``dataset`` is treated as global and its ``clause`` is applied to every + dataset the embedded resource can reach (see ``get_guest_rls_filters``). + Silently excluding an unexpected field -- most commonly a mistyped or + legacy scope key such as ``datasource`` -- would therefore turn an intended dataset-scoped rule into a global one without any feedback to the caller. Raising on unknown fields surfaces the mistake as an HTTP 400 before a token is ever issued and keeps the accepted payload aligned with the documented ``RlsRule`` contract (``dataset`` and ``clause``). + + For the same reason ``dataset`` is constrained to strict, positive + integers: a falsy value such as ``0`` (or ``false``, which marshmallow + coerces to ``0``) would pass a bare ``Integer`` field but then read as + falsy in ``get_guest_rls_filters``, silently widening a scoped rule to + every dataset. """ class Meta: # pylint: disable=too-few-public-methods unknown = RAISE - dataset = fields.Integer() + dataset = fields.Integer(strict=True, validate=validate.Range(min=1)) clause = fields.String(required=True) # todo other options? diff --git a/tests/unit_tests/security/api_test.py b/tests/unit_tests/security/api_test.py index 99ce11a834ab..88ea25948a50 100644 --- a/tests/unit_tests/security/api_test.py +++ b/tests/unit_tests/security/api_test.py @@ -103,3 +103,15 @@ def test_rls_rule_schema_requires_clause() -> None: with pytest.raises(ValidationError) as exc_info: RlsRuleSchema().load({"dataset": 41}) assert "clause" in exc_info.value.messages + + +@pytest.mark.parametrize("dataset", [0, -1, False]) +def test_rls_rule_schema_rejects_falsy_dataset(dataset: Any) -> None: + """ + A falsy ``dataset`` (``0``, a negative id, or ``false`` which marshmallow + coerces to ``0``) would read as falsy in ``get_guest_rls_filters`` and + silently widen a scoped rule to every dataset. It is rejected at load time. + """ + with pytest.raises(ValidationError) as exc_info: + RlsRuleSchema().load({"dataset": dataset, "clause": "tenant_id = 1"}) + assert "dataset" in exc_info.value.messages