Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ license = "MIT"
keywords = ["html", "parser", "editor", "dom"]

[dependencies]
html-escape = "0.2.13"
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ pub enum Doctype {
#[derive(Debug, Clone)]
pub enum Node {
Element(Element),
/// A text node in the DOM. The contents of the `Text` has all entities expanded.
/// For example parsing `I &lt;3 HTML` would result in a `Text("I <3 HTML")`.
///
/// Note that `<script>` and `<style>` are considered special, and entities inside
/// are not expanded. This is also respected when serializing a `<script>` or
/// `<style>` element using [Htmlifiable::html](operation::Htmlifiable::html).
Text(String),
Comment(String),
Doctype(Doctype),
Expand Down
24 changes: 20 additions & 4 deletions src/operation/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,27 @@ pub trait Htmlifiable {

impl Htmlifiable for Element {
fn html(&self) -> String {
let children_html = match self.name.as_str() {
"style" | "script" => {
// <style> and <script> tags should not have their contents escaped
let mut html = String::new();
for node in &self.children {
if let Node::Text(text) = node {
html.push_str(text.as_str());
} else {
html.push_str(node.html().as_str());
}
}
html
}
_ => self.children.html(),
};

if self.attrs.is_empty() {
return if VOID_TAGS.contains(&self.name.as_str()) {
format!("<{}>", self.name)
} else {
format!("<{}>{}</{}>", self.name, self.children.html(), self.name)
format!("<{}>{}</{}>", self.name, children_html, self.name)
};
}
let attrs = self
Expand All @@ -43,7 +59,7 @@ impl Htmlifiable for Element {
if v.is_empty() {
k.to_string()
} else {
format!(r#"{}="{}""#, k, v)
format!(r#"{}="{}""#, k, html_escape::encode_double_quoted_attribute(&v).into_owned())
}
})
.collect::<Vec<_>>()
Expand All @@ -56,7 +72,7 @@ impl Htmlifiable for Element {
"<{} {}>{}</{}>",
self.name,
attrs,
self.children.html(),
children_html,
self.name
)
}
Expand All @@ -67,7 +83,7 @@ impl Htmlifiable for Node {
fn html(&self) -> String {
match self {
Node::Element(element) => element.html(),
Node::Text(text) => text.to_string(),
Node::Text(text) => html_escape::encode_text(text).into_owned(),
Node::Comment(comment) => format!("<!--{}-->", comment),
Node::Doctype(doctype) => match &doctype {
Doctype::Html => "<!DOCTYPE html>".to_string(),
Expand Down
4 changes: 2 additions & 2 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ fn html_to_stack(html: &str) -> Result<Vec<Token>, String> {
let txt_text = String::from_iter(chars_stack);
chars_stack = Vec::new();
// Push the text we just got to the token stack.
token_stack.push(Token::Text(txt_text));
token_stack.push(Token::from_raw_text(txt_text));
}
chars_stack.push(ch);
}
Expand Down Expand Up @@ -120,7 +120,7 @@ fn html_to_stack(html: &str) -> Result<Vec<Token>, String> {
}
if !chars_stack.is_empty() {
let text = String::from_iter(chars_stack);
token_stack.push(Token::Text(text));
token_stack.push(Token::from_raw_text(text));
}
Ok(token_stack)
}
Expand Down
2 changes: 1 addition & 1 deletion src/parse/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ pub fn parse(attr_str: String) -> Vec<(String, String)> {
attr_pos = AttrPos::Space;
let value = String::from_iter(chars_stack);
chars_stack = Vec::new();
value_stack.push(value)
value_stack.push(html_escape::decode_html_entities(&value).into_owned())
}
} else {
chars_stack.push(ch)
Expand Down
8 changes: 8 additions & 0 deletions src/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ impl Token {
Self::Comment(comment[4..comment.len() - 3].to_string())
}

/// Takes a raw (escaped) text string, giving a Text token with the entities decoded.
/// ```ignore
/// assert_eq!(Token::from_raw_text("hello &amp; goodbye"), Token::Text("hello & goodbye"));
/// ```
pub fn from_raw_text(text: String) -> Self {
Self::Text(html_escape::decode_html_entities(&text).into_owned())
}

pub fn node(&self) -> Node {
self.clone().into_node()
}
Expand Down
126 changes: 126 additions & 0 deletions tests/escapes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
use html_editor::operation::*;
use html_editor::{parse, Node, Element};

#[test]
fn test_parse() {
const HTML: &str = r#"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>I &lt;3 &#34;escaping&#34;</title>
</head>
<body>
<div id="testee" attr="id-with-&quot;quotes&quot;-inside"></div>
</body>
</html>"#;

let html = parse(HTML).unwrap();
let title_selector = Selector::from("title");

let Some(title) = html.query(&title_selector) else {
assert!(false, "title selector failed to match");
return;
};
assert_eq!(title.name, "title");

match title.children.get(0) {
Some(Node::Text(title_content)) => assert_eq!(title_content, "I <3 \"escaping\""),
_ => assert!(false, "<title> with no text child"),
}

let div_selector = Selector::from("#testee");

match html.query(&div_selector) {
Some(div) => {
assert_eq!(
div.attrs,
vec![
("attr".into(), "id-with-\"quotes\"-inside".into()),
("id".into(), "testee".into()),
]);
}
None => assert!(false, "div selector failed to match")
}
}

#[test]
fn test_generate() {
let element = Element::new(
"dummy-tag",
vec![("attr-1".into(), "attribute containing < and \" and &".into())],
vec![Node::Text("fake <tag>".into())],
);

let generated = element.html();
assert_eq!(generated, r#"<dummy-tag attr-1="attribute containing &lt; and &quot; and &amp;">fake &lt;tag&gt;</dummy-tag>"#);
}

// Nothing inside script and style tags should be escaped
#[test]
fn no_unescapes_in_script_and_style() {
const HTML: &str = r#"
<!DOCTYPE html>
<html lang="en">
<head>
<script>let text = "this tag shouldn't be escaped -> <p> hi </p>"</script>
<style>main:before { content: "fake <b>tag</b>"; }</style>
</head>
</html>"#;

let html = parse(HTML).unwrap();

let script_selector = Selector::from("script");

let Some(script) = html.query(&script_selector) else {
assert!(false, "script selector failed to match");
unreachable!()
};
assert_eq!(script.name, "script");

match script.children.get(0) {
Some(Node::Text(script_content)) => assert_eq!(script_content, r#"let text = "this tag shouldn't be escaped -> <p> hi </p>""#),
_ => {
assert!(false, "script had no text children");
return;
}
}

let style_selector = Selector::from("style");

let Some(style) = html.query(&style_selector) else {
assert!(false, "Couldn't find style");
return;
};

match style.children.get(0) {
Some(Node::Text(style_content)) => assert_eq!(style_content, r#"main:before { content: "fake <b>tag</b>"; }"#),
_ => {
assert!(false, "style had no text children");
return;
}
}
}

#[test]
fn no_escapes_in_script_and_style() {
let element = Element::new(
"head",
vec![],
vec![
Node::Element(Element::new(
"script",
vec![],
vec![Node::Text(r#"let text = "this tag shouldn't be escaped -> <p> hi </p>""#.into())],
)),
Node::Element(Element::new(
"style",
vec![],
vec![Node::Text(r#"main:before { content: "fake <b>tag</b>"; }"#.into())],
)),
],
);

let generated = element.html();
assert_eq!(generated, r#"<head><script>let text = "this tag shouldn't be escaped -> <p> hi </p>"</script><style>main:before { content: "fake <b>tag</b>"; }</style></head>"#);
}