From 7ca0875fa871423fa4d5753e4913d4bf9fb9271e Mon Sep 17 00:00:00 2001 From: Kornel Date: Tue, 24 Mar 2026 01:03:22 +0000 Subject: [PATCH] Limit ns stack depth --- src/parser/lexer/actions.rs | 2 +- src/parser/lexer/mod.rs | 8 ++- src/parser/tag_scanner/mod.rs | 3 + .../tree_builder_simulator/ambiguity_guard.rs | 69 ++++++++++++++----- src/parser/tree_builder_simulator/mod.rs | 10 ++- src/rewriter/mod.rs | 23 +++++++ 6 files changed, 94 insertions(+), 21 deletions(-) diff --git a/src/parser/lexer/actions.rs b/src/parser/lexer/actions.rs index ead181fc..2012962c 100644 --- a/src/parser/lexer/actions.rs +++ b/src/parser/lexer/actions.rs @@ -91,7 +91,7 @@ impl StateMachineActions for Lexer { self.set_last_text_type(TextType::Data); if let Some(feedback) = feedback { - self.handle_tree_builder_feedback(context, feedback, &lexeme); + self.handle_tree_builder_feedback(context, feedback, &lexeme)?; } if let StartTag { diff --git a/src/parser/lexer/mod.rs b/src/parser/lexer/mod.rs index 56b28071..c552a7f9 100644 --- a/src/parser/lexer/mod.rs +++ b/src/parser/lexer/mod.rs @@ -84,17 +84,21 @@ impl Lexer { context: &mut ParserContext, feedback: TreeBuilderFeedback, lexeme: &TagLexeme<'_>, - ) { + ) -> Result<(), ParsingAmbiguityError> { match feedback { TreeBuilderFeedback::SwitchTextType(text_type) => self.set_last_text_type(text_type), TreeBuilderFeedback::SetAllowCdata(cdata_allowed) => self.cdata_allowed = cdata_allowed, TreeBuilderFeedback::RequestLexeme(mut callback) => { let feedback = callback(&mut context.tree_builder_simulator, lexeme); - self.handle_tree_builder_feedback(context, feedback, lexeme); + self.handle_tree_builder_feedback(context, feedback, lexeme)?; + } + TreeBuilderFeedback::DepthExceeded => { + return Err(ParsingAmbiguityError::depth_exceeded()); } TreeBuilderFeedback::None => (), } + Ok(()) } #[inline] diff --git a/src/parser/tag_scanner/mod.rs b/src/parser/tag_scanner/mod.rs index 2f3cf81b..beea8580 100644 --- a/src/parser/tag_scanner/mod.rs +++ b/src/parser/tag_scanner/mod.rs @@ -124,6 +124,9 @@ impl TagScanner { } TreeBuilderFeedback::RequestLexeme(_) => Some(feedback), TreeBuilderFeedback::None => None, + TreeBuilderFeedback::DepthExceeded => { + return Err(ParsingAmbiguityError::depth_exceeded()); + } }) } diff --git a/src/parser/tree_builder_simulator/ambiguity_guard.rs b/src/parser/tree_builder_simulator/ambiguity_guard.rs index cd83796c..4a59790b 100644 --- a/src/parser/tree_builder_simulator/ambiguity_guard.rs +++ b/src/parser/tree_builder_simulator/ambiguity_guard.rs @@ -66,27 +66,64 @@ use thiserror::Error; /// correct parsing context. /// /// [`strict`]: ../struct.Settings.html#structfield.strict -#[derive(Error, Debug, Eq, PartialEq)] +#[derive(Error, Eq, PartialEq)] pub struct ParsingAmbiguityError { - on_tag_name: Box, + cause: Cause, } -impl Display for ParsingAmbiguityError { +impl ParsingAmbiguityError { + pub(crate) fn depth_exceeded() -> Self { + Self { + cause: Cause::DepthExceeded, + } + } +} + +#[derive(Eq, PartialEq)] +pub(crate) enum Cause { + AmbiguousTextTypeSwitch(Box), + DepthExceeded, +} + +impl Display for Cause { #[cold] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - concat!( - "The parser has encountered a text content tag (`<{}>`) in the context where it is ", - "ambiguous whether this tag should be ignored or not. And, thus, it is unclear whether ", - "consequent content should be parsed as raw text or HTML markup.", - "\n\n", - "This error occurs due to the limited capabilities of the streaming parsing. However, ", - "almost all of the cases of this error are caused by a non-conforming markup (e.g. a ", - "`