diff --git a/Changelog.md b/Changelog.md index 8b23f7ff..6b1fbdc4 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,8 +16,23 @@ ### New Features +- [#970]: Add `NsReader::resolver_mut()` and + `NamespaceResolver::{max_declarations_per_element, set_max_declarations_per_element}`. + ### Bug Fixes +- [#970]: `NamespaceResolver::push` (and hence every `NsReader` `Start`/`Empty` + event) now rejects a start tag that declares more than + `DEFAULT_MAX_DECLARATIONS_PER_ELEMENT` (256) `xmlns` / `xmlns:*` namespace + bindings, returning the new `NamespaceError::TooManyDeclarations`. Previously + `push` allocated one `NamespaceBinding` per declaration with no upper bound, + before the event was returned to the caller, so an `NsReader` consumer could + not bound its memory exposure on untrusted input. The limit is configurable + via `NamespaceResolver::set_max_declarations_per_element` (use `usize::MAX` + to disable). + +[#970]: https://github.com/tafia/quick-xml/issues/970 + ### Misc Changes diff --git a/src/name.rs b/src/name.rs index 055d1f03..ab92a4c6 100644 --- a/src/name.rs +++ b/src/name.rs @@ -40,6 +40,13 @@ pub enum NamespaceError { /// /// Contains the prefix that is tried to be bound. InvalidPrefixForXmlns(Vec), + /// A single start tag declared more `xmlns` / `xmlns:*` namespace bindings + /// than the configured [`NamespaceResolver::max_declarations_per_element`] + /// limit. Contains the configured limit. + /// + /// This bounds the heap allocated by [`NamespaceResolver::push`] (and hence + /// by [`NsReader`](crate::reader::NsReader)) on untrusted input. + TooManyDeclarations(usize), } impl fmt::Display for NamespaceError { @@ -70,6 +77,14 @@ impl fmt::Display for NamespaceError { write_byte_string(f, prefix)?; f.write_str("' cannot be bound to 'http://www.w3.org/2000/xmlns/'") } + Self::TooManyDeclarations(limit) => { + write!( + f, + "start tag declares more than {} namespace bindings; \ + raise the limit with NamespaceResolver::set_max_declarations_per_element", + limit, + ) + } } } } @@ -497,14 +512,32 @@ pub struct NamespaceResolver { /// The number of open tags at the moment. We need to keep track of this to know which namespace /// declarations to remove when we encounter an `End` event. nesting_level: u16, + /// Maximum number of `xmlns` / `xmlns:*` declarations [`push`](Self::push) + /// will accept on a single start tag before returning + /// [`NamespaceError::TooManyDeclarations`]. See + /// [`set_max_declarations_per_element`](Self::set_max_declarations_per_element). + max_declarations_per_element: usize, } +/// Default limit on the number of `xmlns` / `xmlns:*` declarations +/// [`NamespaceResolver::push`] will accept on a single start tag. +/// +/// Real-world XML dialects (XHTML, SVG, SOAP, RSS, RRDP, ...) declare a handful +/// of namespaces per element; 256 is orders of magnitude above any legitimate +/// document while bounding the heap allocated for one `<... xmlns:...>` tag to +/// a few kilobytes regardless of input size. +pub const DEFAULT_MAX_DECLARATIONS_PER_ELEMENT: usize = 256; + impl Debug for NamespaceResolver { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("NamespaceResolver") .field("buffer", &Bytes(&self.buffer)) .field("bindings", &self.bindings) .field("nesting_level", &self.nesting_level) + .field( + "max_declarations_per_element", + &self.max_declarations_per_element, + ) .finish() } } @@ -555,6 +588,7 @@ impl Default for NamespaceResolver { buffer, bindings, nesting_level: 0, + max_declarations_per_element: DEFAULT_MAX_DECLARATIONS_PER_ELEMENT, } } } @@ -673,11 +707,18 @@ impl NamespaceResolver { /// [namespace bindings]: https://www.w3.org/TR/xml-names11/#dt-NSDecl pub fn push(&mut self, start: &BytesStart) -> Result<(), NamespaceError> { self.nesting_level += 1; + let mut count = 0usize; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' // (default namespace) attribute. for a in start.attributes().with_checks(false) { if let Ok(Attribute { key: k, value: v }) = a { if let Some(prefix) = k.as_namespace_binding() { + if count >= self.max_declarations_per_element { + return Err(NamespaceError::TooManyDeclarations( + self.max_declarations_per_element, + )); + } + count += 1; self.add(prefix, Namespace(&v))?; } } else { @@ -687,6 +728,32 @@ impl NamespaceResolver { Ok(()) } + /// Returns the maximum number of `xmlns` / `xmlns:*` declarations that + /// [`push`](Self::push) will accept on a single start tag before returning + /// [`NamespaceError::TooManyDeclarations`]. + /// + /// Defaults to [`DEFAULT_MAX_DECLARATIONS_PER_ELEMENT`]. + #[inline] + pub const fn max_declarations_per_element(&self) -> usize { + self.max_declarations_per_element + } + + /// Sets the maximum number of `xmlns` / `xmlns:*` declarations that + /// [`push`](Self::push) will accept on a single start tag. + /// + /// `push` is called by [`NsReader`](crate::reader::NsReader) for every + /// `Start`/`Empty` event *before* the event is returned to the caller, so + /// without this limit a start tag with many `xmlns:*` attributes drives + /// unbounded heap allocation that the caller cannot intercept. See + /// . + /// + /// Pass `usize::MAX` to disable the limit. + #[inline] + pub fn set_max_declarations_per_element(&mut self, limit: usize) -> &mut Self { + self.max_declarations_per_element = limit; + self + } + /// Ends a top-most scope by popping all [namespace bindings], that was added by /// last call to [`Self::push()`] and [`Self::add()`]. /// @@ -1191,6 +1258,52 @@ mod namespaces { use pretty_assertions::assert_eq; use ResolveResult::*; + /// Regression test for : + /// `push()` previously allocated one `NamespaceBinding` per `xmlns:*` + /// attribute with no upper bound, before the caller ever sees the event. + #[test] + fn push_rejects_too_many_declarations() { + let mut tag = String::from("e"); + for i in 0..=DEFAULT_MAX_DECLARATIONS_PER_ELEMENT { + tag.push_str(&format!(" xmlns:p{}=''", i)); + } + let mut resolver = NamespaceResolver::default(); + assert_eq!( + resolver.push(&BytesStart::from_content(&tag, 1)), + Err(NamespaceError::TooManyDeclarations( + DEFAULT_MAX_DECLARATIONS_PER_ELEMENT + )), + ); + + // Exactly at the limit is accepted. + let mut tag = String::from("e"); + for i in 0..DEFAULT_MAX_DECLARATIONS_PER_ELEMENT { + tag.push_str(&format!(" xmlns:p{}=''", i)); + } + let mut resolver = NamespaceResolver::default(); + assert_eq!(resolver.push(&BytesStart::from_content(&tag, 1)), Ok(())); + + // The limit is configurable, and `usize::MAX` disables it. + let mut resolver = NamespaceResolver::default(); + resolver.set_max_declarations_per_element(2); + assert_eq!( + resolver.push(&BytesStart::from_content( + "e xmlns:a='' xmlns:b='' xmlns:c=''", + 1, + )), + Err(NamespaceError::TooManyDeclarations(2)), + ); + let mut resolver = NamespaceResolver::default(); + resolver.set_max_declarations_per_element(usize::MAX); + assert_eq!( + resolver.push(&BytesStart::from_content( + "e xmlns:a='' xmlns:b='' xmlns:c=''", + 1, + )), + Ok(()), + ); + } + /// Unprefixed attribute names (resolved with `false` flag) never have a namespace /// according to : /// diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 46858cc8..e0c89d64 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -121,6 +121,16 @@ impl NsReader { pub const fn resolver(&self) -> &NamespaceResolver { &self.ns_resolver } + + /// Returns a mutable reference to the storage of namespace bindings + /// associated with this reader. + /// + /// Useful for configuring the resolver, e.g. to change the + /// [per-element namespace-declaration limit](NamespaceResolver::set_max_declarations_per_element). + #[inline] + pub fn resolver_mut(&mut self) -> &mut NamespaceResolver { + &mut self.ns_resolver + } } impl NsReader {