Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
- Renamed the internal-use feature `integration_test` to `_integration_test`. The leading
underscore signals to `cargo-semver-checks` and similar tools that the feature is not
part of the public API.
- `Comment::set_text` now also rejects `--!>`, a leading `>`, and a leading `->`, which
WHATWG-conformant browsers treat as comment terminators. Previously only `-->` was
rejected, so a caller passing attacker-influenced data could let an attacker break out
of the comment and inject HTML (security fix).

## v2.9.0

Expand Down
68 changes: 46 additions & 22 deletions src/rewritable_units/tokens/comment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ use thiserror::Error;
/// An error that occurs when invalid value is provided for the HTML comment text.
#[derive(Error, Debug, Eq, PartialEq, Copy, Clone)]
pub enum CommentTextError {
/// The provided value contains the `-->` character sequence that preemptively closes the comment.
#[error("Comment text shouldn't contain comment closing sequence (`-->`).")]
/// The provided value contains a byte sequence that a WHATWG-conformant browser
/// treats as a comment terminator (`-->`, `--!>`, a leading `>`, or a leading `->`).
#[error("Comment text shouldn't contain a comment-closing sequence.")]
CommentClosingSequence,

/// The provided value contains a character that can't be represented in the document's [`encoding`].
Expand All @@ -36,6 +37,20 @@ pub struct Comment<'i> {
user_data: Box<dyn Any>,
}

/// Returns `true` if `text`, when emitted between `<!--` and `-->`, would let a
/// WHATWG-conformant browser close the comment earlier than the trailing `-->`
/// marker, leaking whatever follows as live markup.
///
/// Four sequences trigger this:
///
/// 1. `-->` anywhere in the body (comment-end state).
/// 2. `--!>` anywhere in the body (comment-end-bang state).
/// 3. A leading `>` (abrupt-closing-of-empty-comment from comment-start).
/// 4. A leading `->` (abrupt-closing-of-empty-comment from comment-start-dash).
fn contains_comment_closing_sequence(text: &str) -> bool {
text.contains("-->") || text.contains("--!>") || text.starts_with('>') || text.starts_with("->")
}

impl<'i> Comment<'i> {
#[inline]
#[must_use]
Expand Down Expand Up @@ -68,22 +83,22 @@ impl<'i> Comment<'i> {
/// Sets the text of the comment.
#[inline]
pub fn set_text(&mut self, text: &str) -> Result<(), CommentTextError> {
if text.contains("-->") {
Err(CommentTextError::CommentClosingSequence)
} else {
// NOTE: if character can't be represented in the given
// encoding then encoding_rs replaces it with a numeric
// character reference. Character references are not
// supported in comments, so we need to bail.
match BytesCow::owned_from_str_without_replacements(text, self.encoding) {
Ok(text) => {
self.text = text;
self.raw.set_modified();

Ok(())
}
Err(_) => Err(CommentTextError::UnencodableCharacter),
if contains_comment_closing_sequence(text) {
return Err(CommentTextError::CommentClosingSequence);
}

// NOTE: if character can't be represented in the given
// encoding then encoding_rs replaces it with a numeric
// character reference. Character references are not
// supported in comments, so we need to bail.
match BytesCow::owned_from_str_without_replacements(text, self.encoding) {
Ok(text) => {
self.text = text;
self.raw.set_modified();

Ok(())
}
Err(_) => Err(CommentTextError::UnencodableCharacter),
}
}

Expand Down Expand Up @@ -314,11 +329,20 @@ mod tests {

#[test]
fn comment_closing_sequence_in_text() {
rewrite_comment(b"<!-- foo -->", UTF_8, |c| {
let err = c.set_text("foo -- bar --> baz").unwrap_err();

assert_eq!(err, CommentTextError::CommentClosingSequence);
});
// Every byte sequence here is treated as a comment terminator by a
// WHATWG-conformant browser when emitted between `<!--` and `-->`.
// See `contains_comment_closing_sequence`.
for payload in ["foo -- bar --> baz", "foo --!> bar", ">foo", "->foo"] {
rewrite_comment(b"<!-- foo -->", UTF_8, |c| {
let err = c.set_text(payload).unwrap_err();

assert_eq!(
err,
CommentTextError::CommentClosingSequence,
"expected `{payload}` to be rejected",
);
});
}
}

#[test]
Expand Down
Loading