diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 5e073ac..b52ab78 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -26,7 +26,7 @@ jobs:
           - os: macOS-latest
             arch: x86
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
@@ -41,9 +41,13 @@ jobs:
             ${{ runner.os }}-test-${{ env.cache-name }}-
             ${{ runner.os }}-test-
             ${{ runner.os }}-
+      - uses: actions/cache@v4
+        with:
+          path: test/data/w3c
+          key: w3c-xmlconf-v20130923
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v5
         with:
-          file: lcov.info
+          files: lcov.info
diff --git a/.gitignore b/.gitignore
index b000475..929dfc2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 *Manifest.toml
-*generated_xsd.jl
-*.xml
 *.gz
+*.tar
 *.DS_Store
+*.claude
+test/data/w3c/
+benchmarks/data/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..13d6e29
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,158 @@
+# Changelog
+
+All notable changes to XML.jl will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- New streaming tokenizer (`XMLTokenizer` module) for fine-grained XML token iteration.
+- XPath support via `xpath(node, path)`.
+- `test/test_libxml2_testcases.jl`: 243 test cases borrowed from the [libxml2](https://github.com/GNOME/libxml2) test suite covering CDATA, comments, processing instructions, attributes, namespaces, DTD internal subsets, entity references, whitespace handling, Unicode, error cases, and real-world document patterns.
+- `AbstractTrees` package extension: loading both `XML` and `AbstractTrees` enables `print_tree`, `PreOrderDFS`, `Leaves`, etc. on `Node` and `LazyNode`.
+
+### Fixed
+- **Tokenizer: multi-byte UTF-8 in attribute values** — Parsing attribute values containing multi-byte UTF-8 characters (e.g., `<doc city="東京"/>`) could produce a `StringIndexError` because `attr_value()` used byte arithmetic (`ncodeunits - 1`) instead of `prevind` to strip quotes. The same issue existed in `_read_attr_value!`.
+- **Tokenizer: quotes inside DTD comments** — A `"` or `'` character inside a `<!-- -->` comment within a DTD internal subset caused the tokenizer to misinterpret it as a quoted string delimiter, leading to an "Unterminated quoted string" error. The DOCTYPE body parser now correctly skips comment content.
+
+## [0.3.8]
+
+### Fixed
+- `XML.write` now respects `xml:space="preserve"` and suppresses indentation for elements with this attribute ([#49]).
+
+## [0.3.7]
+
+### Fixed
+- Resolved remaining issues from [#45] and fixed [#46] (whitespace preservation edge cases) ([#47]).
+
+## [0.3.6]
+
+### Added
+- `XML.write` respects `xml:space="preserve"` on elements, suppressing automatic indentation ([#45]).
+
+### Fixed
+- `String` type ambiguity on Julia nightly resolved ([#38]).
+
+## [0.3.5]
+
+### Fixed
+- `depth` and `parent` functions corrected to work properly with the DOM tree API ([#37]).
+- `escape` updated to no longer be idempotent — every `&` is now escaped, matching spec behavior ([#32], addressing [#31]).
+- `pushfirst!` support added for `Node` children ([#29]).
+
+## [0.3.4]
+
+### Fixed
+- Fixed [#26].
+- CI updated to use `julia-actions/cache@v4` and `lts` Julia version.
+
+## [0.3.3]
+
+### Added
+- `h` constructor for concise element creation (e.g., `h.div("hello"; class="main")`).
+
+### Fixed
+- Path definition error in README example ([#20]).
+
+## [0.3.2]
+
+### Fixed
+- Minor typos.
+
+## [0.3.1]
+
+### Added
+- Julia 1.6 compatibility ([#16]).
+
+### Changed
+- Smarter escaping logic.
+
+## [0.3.0]
+
+### Changed
+- Attribute internal representation changed from `Dict` to `OrderedDict` (later reverted to `Vector{Pair}`).
+
+## [0.2.3]
+
+### Fixed
+- Parse method fix.
+
+## [0.2.2]
+
+### Added
+- DTD parsing via `parse_dtd`.
+- `is_simple` and `simple_value` exports.
+- `setindex!` methods for modifying attributes.
+- `unescape` function.
+
+### Fixed
+- DOCTYPE parsing made case-insensitive.
+
+## [0.2.1]
+
+### Fixed
+- Write output fixes.
+
+## [0.2.0]
+
+### Changed
+- Major rewrite: introduced `NodeType` enum, `Node{S}` parametric struct, callable `NodeType` constructors, and `XML.write`.
+- Processing instruction support.
+- Benchmarks added.
+
+## [0.1.3]
+
+### Changed
+- Improved print output for `AbstractXMLNode`.
+
+## [0.1.2]
+
+### Added
+- AbstractTrees 0.4 compatibility ([#5]).
+
+## [0.1.1]
+
+### Added
+- `Node` implementation with `print_tree`.
+- Color output in REPL display.
+- Stopped stripping whitespace from text nodes.
+
+## [0.1.0]
+
+- Initial release.
+
+[Unreleased]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.8...HEAD
+[0.3.8]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.7...v0.3.8
+[0.3.7]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.6...v0.3.7
+[0.3.6]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.5...v0.3.6
+[0.3.5]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.4...v0.3.5
+[0.3.4]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.3...v0.3.4
+[0.3.3]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.2...v0.3.3
+[0.3.2]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.1...v0.3.2
+[0.3.1]: https://github.com/JuliaComputing/XML.jl/compare/v0.3.0...v0.3.1
+[0.3.0]: https://github.com/JuliaComputing/XML.jl/compare/v0.2.3...v0.3.0
+[0.2.3]: https://github.com/JuliaComputing/XML.jl/compare/v0.2.2...v0.2.3
+[0.2.2]: https://github.com/JuliaComputing/XML.jl/compare/v0.2.1...v0.2.2
+[0.2.1]: https://github.com/JuliaComputing/XML.jl/compare/v0.2.0...v0.2.1
+[0.2.0]: https://github.com/JuliaComputing/XML.jl/compare/v0.1.3...v0.2.0
+[0.1.3]: https://github.com/JuliaComputing/XML.jl/compare/v0.1.2...v0.1.3
+[0.1.2]: https://github.com/JuliaComputing/XML.jl/compare/v0.1.1...v0.1.2
+[0.1.1]: https://github.com/JuliaComputing/XML.jl/compare/v0.1.0...v0.1.1
+[0.1.0]: https://github.com/JuliaComputing/XML.jl/releases/tag/v0.1.0
+
+[#5]: https://github.com/JuliaComputing/XML.jl/pull/5
+[#16]: https://github.com/JuliaComputing/XML.jl/pull/16
+[#20]: https://github.com/JuliaComputing/XML.jl/pull/20
+[#26]: https://github.com/JuliaComputing/XML.jl/issues/26
+[#29]: https://github.com/JuliaComputing/XML.jl/pull/29
+[#31]: https://github.com/JuliaComputing/XML.jl/issues/31
+[#32]: https://github.com/JuliaComputing/XML.jl/pull/32
+[#37]: https://github.com/JuliaComputing/XML.jl/pull/37
+[#38]: https://github.com/JuliaComputing/XML.jl/pull/38
+[#43]: https://github.com/JuliaComputing/XML.jl/issues/43
+[#45]: https://github.com/JuliaComputing/XML.jl/pull/45
+[#46]: https://github.com/JuliaComputing/XML.jl/issues/46
+[#47]: https://github.com/JuliaComputing/XML.jl/pull/47
+[#49]: https://github.com/JuliaComputing/XML.jl/pull/49
diff --git a/Project.toml b/Project.toml
index 49b96c0..a42a821 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,12 +1,14 @@
 name = "XML"
 uuid = "72c71f33-b9b6-44de-8c94-c961784809e2"
+version = "0.4.0"
 authors = ["Josh Day <emailjoshday@gmail.com> and contributors"]
-version = "0.3.8"
 
-[deps]
-Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
-OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+[weakdeps]
+AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
+
+[extensions]
+XMLAbstractTreesExt = "AbstractTrees"
 
 [compat]
-OrderedCollections = "1.4, 1.5"
-julia = "1.6"
+AbstractTrees = "0.4"
+julia = "1.9"
diff --git a/README.md b/README.md
index ddb1156..067c06d 100644
--- a/README.md
+++ b/README.md
@@ -4,39 +4,8 @@
 
 <p align="center">Read and write XML in pure Julia.</p>
 
-<br><br>
-
-# Introduction
-
-This package offers fast data structures for reading and writing XML files with a consistent interface:
-
-<br>
-
-### `Node`/`LazyNode` Interface:
-
-```
-nodetype(node)      →   XML.NodeType (an enum type)
-tag(node)           →   String or Nothing
-attributes(node)    →   OrderedDict{String, String} or Nothing
-value(node)         →   String or Nothing
-children(node)      →   Vector{typeof(node)}
-is_simple(node)     →   Bool (whether node is simple .e.g. <tag>item</tag>)
-simple_value(node)   →   e.g. "item" from <tag>item</tag>)
-```
-
 <br>
 
-### Extended Interface for `LazyNode`
-
-```
-depth(node)         →   Int
-next(node)          →   typeof(node)
-prev(node)          →   typeof(node)
-parent(node)        →   typeof(node)
-```
-
-<br><br>
-
 # Quickstart
 
 ```julia
@@ -58,79 +27,76 @@ doc[end][2]  # Second child of root
 # Node Element <book id="bk102"> (6 children)
 ```
 
-<br><br>
-
-# Data Structures that Represent XML Nodes
+<br>
 
-## Preliminary: `NodeType`
+# `Node` Interface
 
-- Each item in an XML DOM is classified by its `NodeType`.
-- Every `XML.jl` struct defines a `nodetype(x)` method that returns its `NodeType`.
+Every node in the XML DOM is represented by `Node`, a single type parametrized on its string storage.
 
-| NodeType | XML Representation | `Node` Constructor |
-|----------|--------------------|------------------|
-| `Document` | An entire document | `Document(children...)`
-| `DTD` | `<!DOCTYPE ...>` | `DTD(...) `
-| `Declaration` | `<?xml attributes... ?>` | `Declaration(; attrs...)`
-| `ProcessingInstruction` | `<?tag attributes... ?>` | `ProcessingInstruction(tag; attrs...)`
-| `Comment` | `<!-- text -->` | `Comment(text)`
-| `CData` | `<![CData[text]]>` | `CData(text)`
-| `Element` | `<tag attributes... > children... </NAME>` | `Element(tag, children...; attrs...)`
-| `Text` | the `text` part of `<tag>text</tag>` | `Text(text)`
+```
+nodetype(node)      -> XML.NodeType (an enum)
+tag(node)           -> String or Nothing
+attributes(node)    -> XML.Attributes{String} or Nothing
+value(node)         -> String or Nothing
+children(node)      -> Vector{Node}
+is_simple(node)     -> Bool (e.g. <tag>text</tag>)
+simple_value(node)  -> e.g. "text" from <tag>text</tag>
+```
 
 <br>
 
-## `Node`: Probably What You're Looking For
+## `NodeType`
 
-- `read`-ing a `Node` loads the entire XML DOM in memory.
-- See the table above for convenience constructors.
-- `Node`s have some additional methods that aid in construction/mutation:
+Each item in an XML DOM is classified by its `NodeType`:
 
-```julia
-# Add a child:
-push!(parent::Node, child::Node)
-
-# Replace a child:
-parent[2] = child
-
-# Add/change an attribute:
-node["key"] = value
+| NodeType | XML Representation | Constructor |
+|----------|--------------------|-------------|
+| `Document` | An entire document | `Document(children...)` |
+| `DTD` | `<!DOCTYPE ...>` | `DTD(...)` |
+| `Declaration` | `<?xml attributes... ?>` | `Declaration(; attrs...)` |
+| `ProcessingInstruction` | `<?tag attributes... ?>` | `ProcessingInstruction(tag; attrs...)` |
+| `Comment` | `<!-- text -->` | `Comment(text)` |
+| `CData` | `<![CDATA[text]]>` | `CData(text)` |
+| `Element` | `<tag attrs...> children... </tag>` | `Element(tag, children...; attrs...)` |
+| `Text` | the `text` part of `<tag>text</tag>` | `Text(text)` |
 
-node["key"]
-```
+<br>
 
-- `Node` is an immutable type.  However, you can easily create a copy with one or more field values changed by using the `Node(::Node, children...; attrs...)` constructor where `children` are appended to the source node's children and `attrs` are appended to the node's attributes.
+## Mutation
 
 ```julia
-node = XML.Element("tag", "child")
-# Node Element <tag> (1 child)
+push!(parent, child)   # Add a child
+parent[2] = child      # Replace a child
+node["key"] = "value"  # Add/change an attribute
+node["key"]            # Get an attribute
+```
 
-simple_value(node)
-# "child"
+<br>
 
-node2 = Node(node, "added"; id="my-id")
-# Node Element <tag id="my-id"> (2 children)
+## Tree Navigation
 
-node2.children
-# 2-element Vector{Node}:
-#  Node Text "child"
-#  Node Text "added"
+```julia
+depth(child, root)      # Depth of child relative to root
+parent(child, root)     # Parent of child within root's tree
+siblings(child, root)   # Siblings of child within root's tree
 ```
 
-### Writing `Element` `Node`s with `XML.h`
+<br>
+
+## Writing Elements with `XML.h`
 
 Similar to [Cobweb.jl](https://github.com/JuliaComputing/Cobweb.jl#-creating-nodes-with-cobwebh), `XML.h` enables you to write elements with a simpler syntax:
 
 ```julia
 using XML: h
 
-julia> node = h.parent(
-         h.child("first child content", id="id1"),
-         h.child("second child content", id="id2")
-       )
+node = h.parent(
+    h.child("first child content", id="id1"),
+    h.child("second child content", id="id2")
+)
 # Node Element <parent> (2 children)
 
-julia> print(XML.write(node))
+print(XML.write(node))
 # <parent>
 #   <child id="id1">first child content</child>
 #   <child id="id2">second child content</child>
@@ -139,111 +105,228 @@ julia> print(XML.write(node))
 
 <br>
 
-## `XML.LazyNode`: For Fast Iteration through an XML File
-
-A lazy data structure that just keeps track of the position in the raw data (`Vector{UInt8}`) to read from.
-
-- You can iterate over a `LazyNode` to "read" through an XML file:
-
-```julia
-doc = read(filename, LazyNode)
-
-foreach(println, doc)
-# LazyNode Declaration <?xml version="1.0"?>
-# LazyNode Element <catalog>
-# LazyNode Element <book id="bk101">
-# LazyNode Element <author>
-# LazyNode Text "Gambardella, Matthew"
-# LazyNode Element <title>
-# ⋮
-```
-
-<br><br>
-
 # Reading
 
 ```julia
-# Reading from file:
+# From a file:
 read(filename, Node)
-read(filename, LazyNode)
-
-# Parsing from string:
-parse(Node, str)
-parse(LazyNode, str)
 
+# From a string:
+parse(str, Node)
 ```
 
-<br><br>
+<br>
 
 # Writing
 
 ```julia
 XML.write(filename::String, node)  # write to file
+XML.write(io::IO, node)            # write to stream
+XML.write(node)                    # return String
+```
+
+`XML.write` respects `xml:space="preserve"` on elements, suppressing automatic indentation.
+
+<br>
 
-XML.write(io::IO, node)  # write to stream
+# XPath
 
-XML.write(node)  # String
+Query nodes using a subset of XPath 1.0 via `xpath(node, path)`:
+
+```julia
+doc = parse("""
+<root>
+  <a id="1"><b>hello</b></a>
+  <a id="2"><b>world</b></a>
+</root>
+""", Node)
+
+root = doc[end]
+
+xpath(root, "//b")           # All <b> descendants
+xpath(root, "a[@id='2']/b")  # <b> inside <a id="2">
+xpath(root, "a[1]")          # First <a> child
+xpath(root, "//b/text()")    # Text nodes inside all <b>s
 ```
 
+### Supported syntax
+
+| Expression | Description |
+|------------|-------------|
+| `/` | Root / path separator |
+| `tag` | Child element by name |
+| `*` | Any child element |
+| `//` | Descendant-or-self (recursive) |
+| `.` | Current node |
+| `..` | Parent node |
+| `[n]` | Positional predicate (1-based) |
+| `[@attr]` | Has-attribute predicate |
+| `[@attr='v']` | Attribute-value predicate |
+| `text()` | Text node children |
+| `node()` | All node children |
+| `@attr` | Attribute value (returns strings) |
 
-<br><br>
+<br>
 
-# Performance
+# Streaming Tokenizer
 
-- XML.jl performs comparatively to [EzXML.jl](https://github.com/JuliaIO/EzXML.jl), which wraps the C library [libxml2](https://gitlab.gnome.org/GNOME/libxml2/-/wikis/home).
-- See the `benchmarks/suite.jl` for the code to produce these results.
-- The following output was generated in a Julia session with the following `versioninfo`:
+For large files or when you need fine-grained control, `XML.XMLTokenizer` provides a streaming tokenizer that yields tokens without building a DOM. Token kinds live in the `XML.XMLTokenizer.TokenKinds` baremodule (e.g. `TokenKinds.OPEN_TAG`, `TokenKinds.TEXT`).
 
-```
-julia> versioninfo()
-Julia Version 1.9.4
-Commit 8e5136fa297 (2023-11-14 08:46 UTC)
-Build Info:
-  Official https://julialang.org/ release
-Platform Info:
-  OS: macOS (arm64-apple-darwin22.4.0)
-  CPU: 10 × Apple M1 Pro
-  WORD_SIZE: 64
-  LIBM: libopenlibm
-  LLVM: libLLVM-14.0.6 (ORCJIT, apple-m1)
-  Threads: 8 on 8 virtual cores
+```julia
+using XML.XMLTokenizer: tokenize
+
+for token in tokenize("<root><child attr=\"val\">text</child></root>")
+    println(token.kind, " => ", repr(String(token.raw)))
+end
+# OPEN_TAG => "<root"
+# TAG_CLOSE => ">"
+# OPEN_TAG => "<child"
+# ATTR_NAME => "attr"
+# ATTR_VALUE => "\"val\""
+# TAG_CLOSE => ">"
+# TEXT => "text"
+# CLOSE_TAG => "</child"
+# TAG_CLOSE => ">"
+# CLOSE_TAG => "</root"
+# TAG_CLOSE => ">"
 ```
 
+<br>
 
-### Reading an XML File
+# `LazyNode`
 
-```
-       XML.LazyNode   0.009583
-           XML.Node  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1071.32
-      EzXML.readxml  ■■■■■■■■■ 284.346
-   XMLDict.xml_dict  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 1231.47
+For read-only access without building a full DOM tree, use `LazyNode`. It stores only a reference to the source string and re-tokenizes on demand, using significantly less memory:
+
+```julia
+doc = parse(xml_string, LazyNode)
+doc = read("file.xml", LazyNode)
 ```
 
-### Writing an XML File
+`LazyNode` supports the same read-only interface as `Node`: `nodetype`, `tag`, `attributes`, `value`, `children`, `is_simple`, `simple_value`, plus integer and string indexing.
 
-```
-         Write: XML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 289.638
-       Write: EzXML  ■■■■■■■■■■■■■ 93.4631
-```
+For streaming and high-throughput workloads, several extra accessors avoid materializing intermediate collections:
 
-### Lazily Iterating over Each Node
-```
-           LazyNode  ■■■■■■■■■ 51.752
- EzXML.StreamReader  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 226.271
+```julia
+sourcetext(n)               # zero-copy SubString view of the node's raw source bytes
+eachchildnode(n)            # lazy iterator over children — no Vector allocation
+children!(buf, n)           # collect children into a reusable buffer
+eachattribute(n)            # lazy iterator over attribute name=>value pairs
+is_simple_value(n)          # combined is_simple + simple_value (one tokenizer pass)
+get(n, key, default)        # single-attribute read without building Attributes
+XML.write(n)                # zero-copy: returns node's original source text
+XML.write(n; normalize=true) # re-parse + pretty-print, collapses source whitespace
 ```
 
-### Collecting All Names/Tags in an XML File
-```
-       XML.LazyNode  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 210.482
- EzXML.StreamReader  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 276.238
-      EzXML.readxml  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 263.269
+### Memory-mapped files
+
+For very large files, combine `LazyNode` with memory mapping to avoid reading the entire file into heap memory:
+
+```julia
+using XML, Mmap, StringViews
+
+doc = open("very_large.xml") do io
+    sv = StringView(Mmap.mmap(io))
+    parse(sv, LazyNode)
+end
 ```
 
 <br>
+
+# AbstractTrees Integration
+
+Loading [`AbstractTrees`](https://github.com/JuliaCollections/AbstractTrees.jl) alongside XML enables tree-walking utilities (`print_tree`, `PreOrderDFS`, `Leaves`, etc.) on both `Node` and `LazyNode`:
+
+```julia
+using XML, AbstractTrees
+
+doc = parse("<a><b/><c><d/></c></a>", Node)
+print_tree(doc)
+# Document
+# └─ <a>
+#    ├─ <b>
+#    └─ <c>
+#       └─ <d>
+
+for n in PreOrderDFS(doc)
+    nodetype(n) == Element && println(tag(n))
+end
+```
+
 <br>
 
-# Possible Gotchas
+# Benchmarks
+
+Benchmark source: [benchmarks.jl](benchmarks/benchmarks.jl).  Test data: `books.xml` (small, ~4 KB) and a generated XMark auction XML (medium, ~14 MB).
+
+
+
+```
+                         Parse (small) — median time (ms)
+
+        XML.jl  ■■■■■■■ 0.0374
+   XML.jl (SS)  ■■■■■■■ 0.0339
+         EzXML  ■■■■ 0.0218
+      LightXML  ■■■■ 0.0218
+       XMLDict  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 0.200
+
+
+                         Parse (medium) — median time (ms)
+
+        XML.jl  ■■■■■■■■■■■■■■ 185.0
+   XML.jl (SS)  ■■■■■■■■■■■■■ 168.0
+         EzXML  ■■■■■■ 81.5
+      LightXML  ■■■■■■■■ 107.0
+       XMLDict  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 520.0
+
+
+                      Write (small) — median time (ms)
+
+     XML.jl  ■■■■ 0.00929
+      EzXML  ■■■■ 0.0103
+   LightXML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 0.101
+
+
+                      Write (medium) — median time (ms)
+
+     XML.jl  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 48.0
+      EzXML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 52.6
+   LightXML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 56.1
+
 
-- XML.jl doesn't automatically escape special characters (`<`, `>`, `&`, `"`, and `'` ) for you.  However, we provide utility functions for doing the conversions back and forth:
-  - `XML.escape(::String)` and `XML.unescape(::String)`
-  - `XML.escape!(::Node)` and `XML.unescape!(::Node)`.
+                        Read file — median time (ms)
+
+     XML.jl  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 193.0
+      EzXML  ■■■■■■■■■■■■■■■■■■■■■■■■■ 121.0
+   LightXML  ■■■■■■■■■■■■■■■■■■■■ 95.6
+
+
+                   Collect tags (small) — median time (ms)
+
+     XML.jl  ■■■■■■ 0.000586
+      EzXML  ■■■■■■■■■■■■■■■■■■■■■■ 0.00205
+   LightXML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 0.00368
+
+
+                  Collect tags (medium) — median time (ms)
+
+     XML.jl  ■■■■■■■■■■■■■■■■■■ 13.1
+      EzXML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 29.4
+   LightXML  ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 23.2
+```
+
+```julia
+versioninfo()
+# Julia Version 1.12.6
+# Commit 15346901f00 (2026-04-09 19:20 UTC)
+# Build Info:
+#   Official https://julialang.org release
+# Platform Info:
+#   OS: macOS (arm64-apple-darwin24.0.0)
+#   CPU: 10 × Apple M1 Pro
+#   WORD_SIZE: 64
+#   LLVM: libLLVM-18.1.7 (ORCJIT, apple-m1)
+#   GC: Built with stock GC
+# Threads: 8 default, 1 interactive, 8 GC (on 8 virtual cores)
+# Environment:
+#   JULIA_NUM_THREADS = auto
+```
diff --git a/benchmarks/Project.toml b/benchmarks/Project.toml
index ed90996..043988c 100644
--- a/benchmarks/Project.toml
+++ b/benchmarks/Project.toml
@@ -2,7 +2,8 @@
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
-OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
+InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+LightXML = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
 UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
 XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"
diff --git a/benchmarks/XMarkGenerator.jl b/benchmarks/XMarkGenerator.jl
new file mode 100644
index 0000000..7f780a0
--- /dev/null
+++ b/benchmarks/XMarkGenerator.jl
@@ -0,0 +1,377 @@
+"""
+    XMarkGenerator
+
+XMark-inspired XML benchmark data generator.  Produces well-formed XML documents modeling an
+internet auction site, following the XMark benchmark DTD structure.
+
+    include("xml_generator.jl")
+    using .XMarkGenerator
+
+    xml = generate_xmark(1.0)               # return String (~14 MB)
+    generate_xmark("out.xml", 5.0)          # write to file (~68 MB)
+    generate_xmark(stdout, 0.1; seed=123)   # write to IO   (~1.4 MB)
+"""
+module XMarkGenerator
+
+using Random
+
+export generate_xmark
+
+#-----------------------------------------------------------------# Word lists
+const WORDS = [
+    "about", "above", "across", "after", "again", "against", "along", "already", "also",
+    "always", "among", "another", "answer", "around", "asked", "away", "back", "because",
+    "become", "been", "before", "began", "behind", "being", "below", "between", "body",
+    "book", "both", "brought", "build", "built", "business", "came", "cannot", "carry",
+    "cause", "certain", "change", "children", "city", "close", "come", "complete", "could",
+    "country", "course", "cover", "current", "dark", "days", "deep", "development",
+    "different", "direction", "does", "done", "door", "down", "draw", "during", "each",
+    "early", "earth", "east", "education", "effort", "eight", "either", "else", "end",
+    "enough", "even", "every", "example", "experience", "face", "fact", "family", "feel",
+    "field", "find", "first", "five", "follow", "food", "force", "form", "found", "four",
+    "from", "full", "gave", "general", "give", "going", "gone", "good", "government",
+    "great", "green", "ground", "group", "grow", "half", "hand", "happen", "hard", "have",
+    "head", "help", "here", "high", "himself", "hold", "home", "hope", "house", "however",
+    "hundred", "idea", "important", "inch", "include", "increase", "island", "just", "keep",
+    "kind", "knew", "know", "land", "large", "last", "later", "learn", "left", "less",
+    "letter", "life", "light", "like", "line", "list", "little", "live", "long", "look",
+    "lost", "made", "main", "make", "many", "mark", "matter", "mean", "might", "mind",
+    "miss", "money", "morning", "most", "mother", "move", "much", "music", "must", "name",
+    "near", "need", "never", "next", "night", "nothing", "notice", "number", "often",
+    "once", "only", "open", "order", "other", "over", "page", "paper", "part", "past",
+    "pattern", "people", "perhaps", "period", "person", "picture", "place", "plan", "plant",
+    "play", "point", "position", "possible", "power", "present", "problem", "produce",
+    "product", "program", "public", "pull", "purpose", "question", "quite", "reach", "read",
+    "real", "receive", "record", "remember", "rest", "result", "right", "river", "room",
+    "round", "rule", "same", "school", "second", "seem", "sentence", "service", "seven",
+    "several", "shall", "short", "should", "show", "side", "since", "sing", "size", "small",
+    "social", "some", "song", "soon", "south", "space", "stand", "start", "state", "still",
+    "stood", "story", "strong", "study", "such", "sure", "system", "table", "take", "tell",
+    "test", "their", "them", "then", "there", "these", "thing", "think", "those", "thought",
+    "three", "through", "time", "together", "took", "toward", "travel", "tree", "true",
+    "turn", "under", "unit", "until", "upon", "usually", "value", "very", "voice", "walk",
+    "want", "watch", "water", "well", "went", "were", "west", "what", "where", "which",
+    "while", "white", "whole", "will", "with", "without", "woman", "word", "work", "world",
+    "would", "write", "year", "young",
+]
+const FIRST_NAMES = ["James", "John", "Robert", "Michael", "William", "David", "Richard",
+    "Joseph", "Thomas", "Charles", "Mary", "Patricia", "Jennifer", "Linda", "Barbara",
+    "Elizabeth", "Susan", "Jessica", "Sarah", "Karen"]
+const LAST_NAMES = ["Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller",
+    "Davis", "Rodriguez", "Martinez", "Wilson", "Anderson", "Taylor", "Thomas", "Hernandez",
+    "Moore", "Martin", "Jackson", "Thompson", "White"]
+const COUNTRIES = ["United States", "Germany", "France", "Japan", "Australia", "Brazil",
+    "Canada", "India", "China", "Mexico", "Argentina", "Spain", "Italy", "United Kingdom",
+    "Netherlands", "Sweden", "Norway", "Finland", "Denmark", "Belgium"]
+const CITIES = ["New York", "London", "Paris", "Tokyo", "Sydney", "Berlin", "Rome",
+    "Madrid", "Amsterdam", "Toronto", "Moscow", "Beijing", "Seoul", "Mumbai", "Cairo",
+    "Dublin", "Prague", "Vienna", "Warsaw", "Budapest"]
+const STREETS = ["Main", "Oak", "Elm", "Maple", "Pine", "Cedar", "Birch", "Walnut",
+    "Cherry", "Ash", "Spruce", "Willow", "Poplar", "Laurel", "Juniper"]
+const EDUCATIONS = ["High School", "College", "Graduate", "Associate", "Master", "Doctorate"]
+const GENDERS = ["male", "female"]
+const PAYMENTS = ["Creditcard", "Money order", "Personal check", "Cash"]
+const SHIPPING = ["Will ship only within country", "Will ship internationally",
+    "Buyer pays fixed shipping costs", "Free shipping", "See description for shipping"]
+const REGIONS = ["africa", "asia", "australia", "europe", "namerica", "samerica"]
+
+#-----------------------------------------------------------------# Random data helpers
+rand_word(rng) = rand(rng, WORDS)
+rand_date(rng) = string(rand(rng, 1999:2025), "/", lpad(rand(rng, 1:12), 2, '0'), "/", lpad(rand(rng, 1:28), 2, '0'))
+rand_time(rng) = string(lpad(rand(rng, 0:23), 2, '0'), ":", lpad(rand(rng, 0:59), 2, '0'), ":", lpad(rand(rng, 0:59), 2, '0'))
+rand_price(rng) = string(rand(rng, 1:9999), ".", lpad(rand(rng, 0:99), 2, '0'))
+rand_phone(rng) = string("+", rand(rng, 1:99), " (", rand(rng, 100:999), ") ", rand(rng, 1000000:9999999))
+rand_zip(rng) = string(lpad(rand(rng, 0:99999), 5, '0'))
+rand_cc(rng) = join(rand(rng, 1000:9999, 4), " ")
+rand_email(rng) = string(lowercase(rand(rng, FIRST_NAMES)), rand(rng, 1:999), "@", lowercase(rand(rng, LAST_NAMES)), ".com")
+
+#-----------------------------------------------------------------# XML writing helpers
+function xml_escape_char(io::IO, c::Char)
+    if c == '&';     print(io, "&amp;")
+    elseif c == '<'; print(io, "&lt;")
+    elseif c == '>'; print(io, "&gt;")
+    elseif c == '"'; print(io, "&quot;")
+    else;            print(io, c)
+    end
+end
+
+function write_escaped(io::IO, s::AbstractString)
+    for c in s
+        xml_escape_char(io, c)
+    end
+end
+
+function write_text_content(rng, io; min_words=10, max_words=50)
+    n = rand(rng, min_words:max_words)
+    for i in 1:n
+        i > 1 && print(io, ' ')
+        w = rand_word(rng)
+        r = rand(rng)
+        if r < 0.03
+            print(io, "<bold>", w, "</bold>")
+        elseif r < 0.06
+            print(io, "<emph>", w, "</emph>")
+        elseif r < 0.08
+            print(io, "<keyword>", w, "</keyword>")
+        else
+            print(io, w)
+        end
+    end
+end
+
+function write_description(rng, io, indent)
+    println(io, indent, "<description>")
+    if rand(rng) < 0.7
+        print(io, indent, "  <text>")
+        write_text_content(rng, io; min_words=15, max_words=80)
+        println(io, "</text>")
+    else
+        println(io, indent, "  <parlist>")
+        for _ in 1:rand(rng, 2:6)
+            print(io, indent, "    <listitem><text>")
+            write_text_content(rng, io; min_words=8, max_words=40)
+            println(io, "</text></listitem>")
+        end
+        println(io, indent, "  </parlist>")
+    end
+    println(io, indent, "</description>")
+end
+
+function write_annotation(rng, io, indent, n_people)
+    println(io, indent, "<annotation>")
+    println(io, indent, "  <author person=\"", string("person",rand(rng, 1:n_people)), "\"/>")
+    write_description(rng, io, string(indent, "  "))
+    println(io, indent, "  <happiness>", rand(rng, 1:10), "</happiness>")
+    println(io, indent, "</annotation>")
+end
+
+#-----------------------------------------------------------------# Section writers
+function write_item(rng, io, id, n_categories)
+    featured = rand(rng) < 0.1 ? " featured=\"yes\"" : ""
+    println(io, "      <item id=\"", string("item",id), "\"", featured, ">")
+    println(io, "        <location>", rand(rng, CITIES), "</location>")
+    println(io, "        <quantity>", rand(rng, 1:50), "</quantity>")
+    println(io, "        <name>", rand_word(rng), " ", rand_word(rng), " ", rand_word(rng), "</name>")
+    println(io, "        <payment>", rand(rng, PAYMENTS), "</payment>")
+    write_description(rng, io, "        ")
+    println(io, "        <shipping>", rand(rng, SHIPPING), "</shipping>")
+    for _ in 1:rand(rng, 1:3)
+        println(io, "        <incategory category=\"", string("category",rand(rng, 1:n_categories)), "\"/>")
+    end
+    println(io, "        <mailbox>")
+    for _ in 1:rand(rng, 0:5)
+        println(io, "          <mail>")
+        println(io, "            <from>", rand_email(rng), "</from>")
+        println(io, "            <to>", rand_email(rng), "</to>")
+        println(io, "            <date>", rand_date(rng), "</date>")
+        print(io, "            <text>")
+        write_text_content(rng, io; min_words=10, max_words=60)
+        println(io, "</text>")
+        println(io, "          </mail>")
+    end
+    println(io, "        </mailbox>")
+    println(io, "      </item>")
+end
+
+function write_categories(rng, io, n)
+    println(io, "  <categories>")
+    for i in 1:n
+        println(io, "    <category id=\"", string("category",i), "\">")
+        println(io, "      <name>", rand_word(rng), " ", rand_word(rng), "</name>")
+        write_description(rng, io, "      ")
+        println(io, "    </category>")
+    end
+    println(io, "  </categories>")
+end
+
+function write_catgraph(rng, io, n_edges, n_categories)
+    println(io, "  <catgraph>")
+    for _ in 1:n_edges
+        from = string("category",rand(rng, 1:n_categories))
+        to = string("category",rand(rng, 1:n_categories))
+        println(io, "    <edge from=\"", from, "\" to=\"", to, "\"/>")
+    end
+    println(io, "  </catgraph>")
+end
+
+function write_people(rng, io, n, n_categories, n_open)
+    println(io, "  <people>")
+    for i in 1:n
+        println(io, "    <person id=\"", string("person",i), "\">")
+        println(io, "      <name>", rand(rng, FIRST_NAMES), " ", rand(rng, LAST_NAMES), "</name>")
+        println(io, "      <emailaddress>", rand_email(rng), "</emailaddress>")
+        if rand(rng) < 0.8
+            println(io, "      <phone>", rand_phone(rng), "</phone>")
+        end
+        if rand(rng) < 0.7
+            println(io, "      <address>")
+            println(io, "        <street>", rand(rng, 1:9999), " ", rand(rng, STREETS), " St</street>")
+            println(io, "        <city>", rand(rng, CITIES), "</city>")
+            println(io, "        <country>", rand(rng, COUNTRIES), "</country>")
+            if rand(rng) < 0.5
+                println(io, "        <province>", rand_word(rng), "</province>")
+            end
+            println(io, "        <zipcode>", rand_zip(rng), "</zipcode>")
+            println(io, "      </address>")
+        end
+        if rand(rng) < 0.5
+            println(io, "      <homepage>http://www.", lowercase(rand(rng, LAST_NAMES)), ".com/~",
+                lowercase(rand(rng, FIRST_NAMES)), "</homepage>")
+        end
+        if rand(rng) < 0.6
+            println(io, "      <creditcard>", rand_cc(rng), "</creditcard>")
+        end
+        if rand(rng) < 0.7
+            income = rand(rng) < 0.8 ? string(" income=\"", rand(rng, 10000.0:0.01:250000.0), "\"") : ""
+            println(io, "      <profile", income, ">")
+            for _ in 1:rand(rng, 0:4)
+                println(io, "        <interest category=\"", string("category",rand(rng, 1:n_categories)), "\"/>")
+            end
+            if rand(rng) < 0.8
+                println(io, "        <education>", rand(rng, EDUCATIONS), "</education>")
+            end
+            if rand(rng) < 0.7
+                println(io, "        <gender>", rand(rng, GENDERS), "</gender>")
+            end
+            println(io, "        <business>", rand_word(rng), "</business>")
+            if rand(rng) < 0.8
+                println(io, "        <age>", rand(rng, 18:85), "</age>")
+            end
+            println(io, "      </profile>")
+        end
+        if n_open > 0 && rand(rng) < 0.3
+            println(io, "      <watches>")
+            for _ in 1:rand(rng, 1:5)
+                println(io, "        <watch open_auction=\"", string("open_auction",rand(rng, 1:n_open)), "\"/>")
+            end
+            println(io, "      </watches>")
+        end
+        println(io, "    </person>")
+    end
+    println(io, "  </people>")
+end
+
+function write_open_auctions(rng, io, n, n_items, n_people)
+    println(io, "  <open_auctions>")
+    for i in 1:n
+        println(io, "    <open_auction id=\"", string("open_auction",i), "\">")
+        println(io, "      <initial>", rand_price(rng), "</initial>")
+        if rand(rng) < 0.5
+            println(io, "      <reserve>", rand_price(rng), "</reserve>")
+        end
+        for _ in 1:rand(rng, 0:12)
+            println(io, "      <bidder>")
+            println(io, "        <date>", rand_date(rng), "</date>")
+            println(io, "        <time>", rand_time(rng), "</time>")
+            println(io, "        <personref person=\"", string("person",rand(rng, 1:n_people)), "\"/>")
+            println(io, "        <increase>", rand_price(rng), "</increase>")
+            println(io, "      </bidder>")
+        end
+        println(io, "      <current>", rand_price(rng), "</current>")
+        if rand(rng) < 0.3
+            println(io, "      <privacy>", rand(rng, ["Yes", "No"]), "</privacy>")
+        end
+        println(io, "      <itemref item=\"", string("item",rand(rng, 1:n_items)), "\"/>")
+        println(io, "      <seller person=\"", string("person",rand(rng, 1:n_people)), "\"/>")
+        write_annotation(rng, io, "      ", n_people)
+        println(io, "      <quantity>", rand(rng, 1:10), "</quantity>")
+        println(io, "      <type>", rand(rng, ["Regular", "Featured"]), "</type>")
+        println(io, "      <interval>")
+        println(io, "        <start>", rand_date(rng), "</start>")
+        println(io, "        <end>", rand_date(rng), "</end>")
+        println(io, "      </interval>")
+        println(io, "    </open_auction>")
+    end
+    println(io, "  </open_auctions>")
+end
+
+function write_closed_auctions(rng, io, n, n_open, n_items, n_people)
+    println(io, "  <closed_auctions>")
+    for i in 1:n
+        println(io, "    <closed_auction>")
+        println(io, "      <seller person=\"", string("person",rand(rng, 1:n_people)), "\"/>")
+        println(io, "      <buyer person=\"", string("person",rand(rng, 1:n_people)), "\"/>")
+        # Use item IDs that don't overlap with open auctions
+        item_id = n_open + i
+        item_id = item_id <= n_items ? item_id : rand(rng, 1:n_items)
+        println(io, "      <itemref item=\"", string("item",item_id), "\"/>")
+        println(io, "      <price>", rand_price(rng), "</price>")
+        println(io, "      <date>", rand_date(rng), "</date>")
+        println(io, "      <quantity>", rand(rng, 1:10), "</quantity>")
+        println(io, "      <type>", rand(rng, ["Regular", "Featured"]), "</type>")
+        if rand(rng) < 0.7
+            write_annotation(rng, io, "      ", n_people)
+        end
+        println(io, "    </closed_auction>")
+    end
+    println(io, "  </closed_auctions>")
+end
+
+#-----------------------------------------------------------------# Main entry points
+"""
+    generate_xmark([io_or_filename], factor; seed=42)
+
+Generate an XMark-style auction XML document.  `factor` scales all entity counts linearly.
+
+Approximate output sizes (may vary slightly):
+- `factor=0.1`  → ~1.4 MB
+- `factor=1.0`  → ~14 MB
+- `factor=2.0`  → ~27 MB
+- `factor=5.0`  → ~68 MB
+"""
+function generate_xmark(io::IO, factor::Real; seed::Int=42)
+    factor > 0 || throw(ArgumentError("factor must be positive, got $factor"))
+    rng = Xoshiro(seed)
+
+    n_per_region = max(1, round(Int, 500  * factor))
+    n_people     = max(1, round(Int, 5000 * factor))
+    n_categories = max(1, round(Int, 200  * factor))
+    n_open       = max(1, round(Int, 2000 * factor))
+    n_closed     = max(1, round(Int, 1500 * factor))
+    n_edges      = max(1, round(Int, 1000 * factor))
+    n_items      = n_per_region * 6
+
+    # Clamp auctions to available items
+    n_open   = min(n_open, n_items)
+    n_closed = min(n_closed, max(1, n_items - n_open))
+
+    println(io, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
+    println(io, "<site>")
+
+    # Regions with items
+    println(io, "  <regions>")
+    item_id = 0
+    for region in REGIONS
+        println(io, "    <", region, ">")
+        for _ in 1:n_per_region
+            item_id += 1
+            write_item(rng, io, item_id, n_categories)
+        end
+        println(io, "    </", region, ">")
+    end
+    println(io, "  </regions>")
+
+    write_categories(rng, io, n_categories)
+    write_catgraph(rng, io, n_edges, n_categories)
+    write_people(rng, io, n_people, n_categories, n_open)
+    write_open_auctions(rng, io, n_open, n_items, n_people)
+    write_closed_auctions(rng, io, n_closed, n_open, n_items, n_people)
+
+    println(io, "</site>")
+    nothing
+end
+
+function generate_xmark(filename::AbstractString, factor::Real; seed::Int=42)
+    open(filename, "w") do io
+        generate_xmark(io, factor; seed)
+    end
+    filename
+end
+
+function generate_xmark(factor::Real; seed::Int=42)
+    io = IOBuffer()
+    generate_xmark(io, factor; seed)
+    String(take!(io))
+end
+
+end # module
diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl
new file mode 100644
index 0000000..7bd2cb1
--- /dev/null
+++ b/benchmarks/benchmarks.jl
@@ -0,0 +1,527 @@
+using XML
+using XML: Element, nodetype, tag, children
+using EzXML: EzXML
+using XMLDict: XMLDict
+using LightXML: LightXML
+using BenchmarkTools
+using DataFrames
+using InteractiveUtils
+
+include("XMarkGenerator.jl")
+using .XMarkGenerator
+
+BenchmarkTools.DEFAULT_PARAMETERS.seconds = 10
+BenchmarkTools.DEFAULT_PARAMETERS.samples = 20000
+
+#-----------------------------------------------------------------------------# Test data
+# Small file (~120 lines)
+small_file = joinpath(@__DIR__, "..", "test", "data", "books.xml")
+small_xml = read(small_file, String)
+
+# Medium file (generated XMark auction XML, ~14 MB)
+medium_file = joinpath(@__DIR__, "data", "xmark.xml")
+if !isfile(medium_file)
+    mkpath(dirname(medium_file))
+    @info "Generating XMark benchmark XML..."
+    generate_xmark(medium_file, 1.0)
+end
+medium_xml = read(medium_file, String)
+
+df = DataFrame(kind=String[], name=String[], bench=BenchmarkTools.Trial[])
+
+macro add_benchmark(kind, name, expr...)
+    esc(:(let
+        @info string($kind, " - ", $name)
+        bench = @benchmark $(expr...)
+        push!(df, (; kind=$kind, name=$name, bench))
+    end))
+end
+
+const SSNode = Node{SubString{String}}
+
+#-----------------------------------------------------------------------------# Parse (small)
+@add_benchmark "Parse (small)" "XML.jl" parse($small_xml, Node)
+@add_benchmark "Parse (small)" "XML.jl (SS)" parse($small_xml, SSNode)
+@add_benchmark "Parse (small)" "EzXML" EzXML.parsexml($small_xml)
+@add_benchmark "Parse (small)" "LightXML" LightXML.parse_string($small_xml)
+@add_benchmark "Parse (small)" "XMLDict" XMLDict.xml_dict($small_xml)
+
+#-----------------------------------------------------------------------------# Parse (medium)
+@add_benchmark "Parse (medium)" "XML.jl" parse($medium_xml, Node)
+@add_benchmark "Parse (medium)" "XML.jl (SS)" parse($medium_xml, SSNode)
+@add_benchmark "Parse (medium)" "EzXML" EzXML.parsexml($medium_xml)
+@add_benchmark "Parse (medium)" "LightXML" LightXML.parse_string($medium_xml)
+@add_benchmark "Parse (medium)" "XMLDict" XMLDict.xml_dict($medium_xml)
+
+#-----------------------------------------------------------------------------# Write (small)
+@add_benchmark "Write (small)" "XML.jl" XML.write(o) setup=(o = parse(small_xml, Node))
+@add_benchmark "Write (small)" "EzXML" sprint(print, o) setup=(o = EzXML.parsexml(small_xml))
+@add_benchmark "Write (small)" "LightXML" LightXML.save_file(o, f) setup=(o = LightXML.parse_string(small_xml); f = tempname()) teardown=(LightXML.free(o); rm(f, force=true))
+
+#-----------------------------------------------------------------------------# Write (medium)
+@add_benchmark "Write (medium)" "XML.jl" XML.write(o) setup=(o = parse(medium_xml, Node))
+@add_benchmark "Write (medium)" "EzXML" sprint(print, o) setup=(o = EzXML.parsexml(medium_xml))
+@add_benchmark "Write (medium)" "LightXML" LightXML.save_file(o, f) setup=(o = LightXML.parse_string(medium_xml); f = tempname()) teardown=(LightXML.free(o); rm(f, force=true))
+
+#-----------------------------------------------------------------------------# Read from file
+@add_benchmark "Read file" "XML.jl" read($medium_file, Node)
+@add_benchmark "Read file" "EzXML" EzXML.readxml($medium_file)
+@add_benchmark "Read file" "LightXML" LightXML.parse_file($medium_file)
+
+#-----------------------------------------------------------------------------# Collect element tags
+function xml_collect_tags(node)
+    out = String[]
+    _xml_collect_tags!(out, node)
+    out
+end
+function _xml_collect_tags!(out, node)
+    for c in children(node)
+        if nodetype(c) === Element
+            push!(out, tag(c))
+            _xml_collect_tags!(out, c)
+        end
+    end
+end
+
+function ezxml_collect_tags(node::EzXML.Node)
+    out = String[]
+    _ezxml_collect_tags!(out, node)
+    out
+end
+function _ezxml_collect_tags!(out, node::EzXML.Node)
+    for child in EzXML.eachelement(node)
+        push!(out, child.name)
+        _ezxml_collect_tags!(out, child)
+    end
+end
+
+function lightxml_collect_tags(root::LightXML.XMLElement)
+    out = String[]
+    _lightxml_collect_tags!(out, root)
+    out
+end
+function _lightxml_collect_tags!(out, el::LightXML.XMLElement)
+    for child in LightXML.child_elements(el)
+        push!(out, LightXML.name(child))
+        _lightxml_collect_tags!(out, child)
+    end
+end
+
+@add_benchmark "Collect tags (small)" "XML.jl" xml_collect_tags(o) setup=(o = parse(small_xml, Node))
+@add_benchmark "Collect tags (small)" "EzXML" ezxml_collect_tags(o.root) setup=(o = EzXML.parsexml(small_xml))
+@add_benchmark "Collect tags (small)" "LightXML" lightxml_collect_tags(LightXML.root(o)) setup=(o = LightXML.parse_string(small_xml)) teardown=(LightXML.free(o))
+
+@add_benchmark "Collect tags (medium)" "XML.jl" xml_collect_tags(o) setup=(o = parse(medium_xml, Node))
+@add_benchmark "Collect tags (medium)" "EzXML" ezxml_collect_tags(o.root) setup=(o = EzXML.parsexml(medium_xml))
+@add_benchmark "Collect tags (medium)" "LightXML" lightxml_collect_tags(LightXML.root(o)) setup=(o = LightXML.parse_string(medium_xml)) teardown=(LightXML.free(o))
+
+#-----------------------------------------------------------------------------# XLSX-pattern fixtures
+# These fixtures mirror the shapes that XLSX.jl exercises:
+# - `sst_xml` matches `xl/sharedStrings.xml` (lots of small `<si><t>…</t></si>` entries
+#   separated by whitespace — the layout that exposes the LazyNode write/normalize choice)
+# - `ws_xml` matches `xl/sheetN.xml` (a `<sheetData>` with many `<row>`s of `<c r=… s=… t=…><v>…</v></c>`)
+
+@info "Generating XLSX-pattern fixtures..."
+
+sst_xml = let buf = IOBuffer()
+    print(buf, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
+    print(buf, "<sst xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\" count=\"50000\" uniqueCount=\"50000\">\n")
+    for i in 1:50000
+        print(buf, "  <si><t>shared string value number ", i, "</t></si>\n")
+    end
+    print(buf, "</sst>")
+    String(take!(buf))
+end
+
+ws_xml = let buf = IOBuffer()
+    print(buf, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
+    print(buf, "<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">\n")
+    print(buf, "<sheetData>\n")
+    for r in 1:3000
+        print(buf, "  <row r=\"", r, "\">")
+        for c in 1:15
+            col = Char(UInt32('A') + c - 1)
+            print(buf, "<c r=\"", col, r, "\" s=\"3\" t=\"n\"><v>", r * c, "</v></c>")
+        end
+        print(buf, "</row>\n")
+    end
+    print(buf, "</sheetData></worksheet>")
+    String(take!(buf))
+end
+
+# String-heavy worksheet: cells reference the shared string table (`t="s"`, `<v>` = SST
+# index). This is the most common real-world shape and the one where the `has_entities`
+# short-circuit and zero-copy accessors matter most for XLSX.jl `readtable`.
+ws_str_xml = let buf = IOBuffer()
+    print(buf, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
+    print(buf, "<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">\n")
+    print(buf, "<sheetData>\n")
+    for r in 1:5000
+        print(buf, "  <row r=\"", r, "\">")
+        for c in 1:8
+            col = Char(UInt32('A') + c - 1)
+            print(buf, "<c r=\"", col, r, "\" s=\"2\" t=\"s\"><v>", (r * c) % 50000, "</v></c>")
+        end
+        print(buf, "</row>\n")
+    end
+    print(buf, "</sheetData></worksheet>")
+    String(take!(buf))
+end
+
+# Entity-heavy SST: every <t> needs decoding, exercising the `has_entities` slow path.
+sst_entity_xml = let buf = IOBuffer()
+    print(buf, "<sst count=\"50000\" uniqueCount=\"50000\">")
+    for i in 1:50000
+        print(buf, "<si><t>A &amp; B &lt;tag&gt; #", i, "</t></si>")
+    end
+    print(buf, "</sst>")
+    String(take!(buf))
+end
+
+@info "  sst_xml: $(round(length(sst_xml) / 1024 / 1024, digits=2)) MB ($(50000) <si>)"
+@info "  ws_xml:  $(round(length(ws_xml) / 1024 / 1024, digits=2)) MB ($(3000) <row> × $(15) <c>)"
+@info "  ws_str_xml: $(round(length(ws_str_xml) / 1024 / 1024, digits=2)) MB ($(5000) <row> × $(8) string <c>)"
+@info "  sst_entity_xml: $(round(length(sst_entity_xml) / 1024 / 1024, digits=2)) MB (entity-heavy)"
+
+# Helper: walk a Node-based <si> subtree and concatenate its <t> text content.
+function _node_unformatted(io::IO, el::Node{String})
+    XML.tag(el) == "rPh" && return
+    if XML.tag(el) == "t"
+        if XML.is_simple(el)
+            write(io, XML.simple_value(el))
+        else
+            v = XML.value(el)
+            isnothing(v) || write(io, v)
+        end
+        return
+    end
+    for c in XML.children(el)
+        _node_unformatted(io, c)
+    end
+end
+_node_unformatted(el::Node{String}) = sprint(_node_unformatted, el)
+
+#-----------------------------------------------------------------------------# Parse: XLSX shapes
+@add_benchmark "Parse SST (LazyNode)" "XML.jl" parse($sst_xml, LazyNode)
+@add_benchmark "Parse SST (LazyNode)" "Node (for ref)" parse($sst_xml, Node)
+@add_benchmark "Parse worksheet (LazyNode)" "XML.jl" parse($ws_xml, LazyNode)
+@add_benchmark "Parse worksheet (LazyNode)" "Node (for ref)" parse($ws_xml, Node)
+
+#-----------------------------------------------------------------------------# SST loading (XLSX.jl sst.jl pattern)
+# Mirrors `sst_load!`: stream <si> children, capture raw XML + unformatted text per entry.
+
+@add_benchmark "SST: write each <si>" "LazyNode + write (zero-copy)" begin
+    out = String[]
+    sst_el = doc[end]
+    for si in XML.eachchildnode(sst_el)
+        XML.nodetype(si) === XML.Element || continue
+        push!(out, XML.write(si))
+    end
+    out
+end setup=(doc = parse(sst_xml, LazyNode))
+
+@add_benchmark "SST: write each <si>" "LazyNode + write (normalize)" begin
+    out = String[]
+    sst_el = doc[end]
+    for si in XML.eachchildnode(sst_el)
+        XML.nodetype(si) === XML.Element || continue
+        push!(out, XML.write(si; normalize=true))
+    end
+    out
+end setup=(doc = parse(sst_xml, LazyNode))
+
+@add_benchmark "SST: write each <si>" "Node (for ref)" begin
+    out = String[]
+    sst_el = doc[end]
+    for si in XML.children(sst_el)
+        XML.tag(si) == "si" || continue
+        push!(out, XML.write(si))
+    end
+    out
+end setup=(doc = parse(sst_xml, Node))
+
+@add_benchmark "SST: unformatted text" "LazyNode + is_simple_value" begin
+    out = Vector{Union{Nothing,SubString{String},String}}()
+    sst_el = doc[end]
+    for si in XML.eachchildnode(sst_el)
+        XML.nodetype(si) === XML.Element || continue
+        for t in XML.eachchildnode(si)
+            XML.nodetype(t) === XML.Element || continue
+            XML.tag(t) == "t" || continue
+            push!(out, XML.is_simple_value(t))
+        end
+    end
+    out
+end setup=(doc = parse(sst_xml, LazyNode))
+
+@add_benchmark "SST: unformatted text" "Node (for ref)" begin
+    out = String[]
+    sst_el = doc[end]
+    for si in XML.children(sst_el)
+        XML.tag(si) == "si" || continue
+        push!(out, _node_unformatted(si))
+    end
+    out
+end setup=(doc = parse(sst_xml, Node))
+
+#-----------------------------------------------------------------------------# Worksheet: nested row/cell loops (XLSX.jl cell.jl pattern)
+# Mirrors `Cell(c::LazyNode, ws)` and `get_rowcells!`: iterate <row>, then <c>, then attrs + <v>.
+
+@add_benchmark "Worksheet: collect rows" "children() (fresh Vector each call)" begin
+    sd = doc[end][1]  # <sheetData>
+    XML.children(sd)
+end setup=(doc = parse(ws_xml, LazyNode))
+
+@add_benchmark "Worksheet: collect rows" "children!(buf, n) (reused buffer)" begin
+    sd = doc[end][1]
+    XML.children!(buf, sd)
+end setup=(doc = parse(ws_xml, LazyNode); buf = XML.LazyNode{String}[])
+
+@add_benchmark "Worksheet: attribute scan" "eachattribute" begin
+    n = 0
+    sd = doc[end][1]
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            for (k, v) in XML.eachattribute(c)
+                n += sizeof(v)
+            end
+        end
+    end
+    n
+end setup=(doc = parse(ws_xml, LazyNode))
+
+@add_benchmark "Worksheet: attribute scan" "attributes() (materialize dict)" begin
+    n = 0
+    sd = doc[end][1]
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            a = XML.attributes(c)
+            isnothing(a) && continue
+            for (_, v) in a
+                n += sizeof(v)
+            end
+        end
+    end
+    n
+end setup=(doc = parse(ws_xml, LazyNode))
+
+@add_benchmark "Worksheet: single attr fetch" "get(c, \"r\", \"\")" begin
+    n = 0
+    sd = doc[end][1]
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            n += sizeof(get(c, "r", ""))
+        end
+    end
+    n
+end setup=(doc = parse(ws_xml, LazyNode))
+
+@add_benchmark "Worksheet: single attr fetch" "attributes(c)[\"r\"]" begin
+    n = 0
+    sd = doc[end][1]
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            a = XML.attributes(c)
+            isnothing(a) && continue
+            n += sizeof(a["r"])
+        end
+    end
+    n
+end setup=(doc = parse(ws_xml, LazyNode))
+
+@add_benchmark "Worksheet: <v> value" "is_simple_value" begin
+    n = 0
+    sd = doc[end][1]
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            for v in XML.eachchildnode(c)
+                XML.nodetype(v) === XML.Element || continue
+                val = XML.is_simple_value(v)
+                isnothing(val) || (n += sizeof(val))
+            end
+        end
+    end
+    n
+end setup=(doc = parse(ws_xml, LazyNode))
+
+@add_benchmark "Worksheet: <v> value" "is_simple + simple_value" begin
+    n = 0
+    sd = doc[end][1]
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            for v in XML.eachchildnode(c)
+                XML.nodetype(v) === XML.Element || continue
+                if XML.is_simple(v)
+                    n += sizeof(XML.simple_value(v))
+                end
+            end
+        end
+    end
+    n
+end setup=(doc = parse(ws_xml, LazyNode))
+
+#-----------------------------------------------------------------------------# End-to-end XLSX.jl hot loops
+# The micro-benchmarks above isolate single operations. These mirror the *combined* work
+# XLSX.jl actually does per entry, so a regression in any sub-operation (parse, accessor,
+# entity short-circuit, iterator allocation) shows up where it matters for spreadsheet read
+# performance.
+
+# Mirrors XLSX.jl `sst.jl` `unformatted_text` / `gather_strings!`: recursively walk an
+# <si> subtree concatenating <t> text content.
+function _xlsx_unformatted(io::IO, e::XML.LazyNode)
+    t = XML.tag(e)
+    t == "rPh" && return nothing
+    if t == "t"
+        v = XML.is_simple_value(e)
+        isnothing(v) || write(io, v)
+    else
+        for ch in XML.eachchildnode(e)
+            XML.nodetype(ch) === XML.Element && _xlsx_unformatted(io, ch)
+        end
+    end
+    nothing
+end
+
+# Mirrors XLSX.jl `sst.jl` `sst_load!`: stream <si>, capture raw XML + unformatted text.
+@add_benchmark "XLSX sst_load! (end-to-end)" "LazyNode" begin
+    sst_el = doc[end]
+    shared = String[]
+    unformatted = String[]
+    for si in XML.eachchildnode(sst_el)
+        XML.nodetype(si) === XML.Element || continue
+        XML.tag(si) == "si" || continue
+        push!(shared, XML.write(si))
+        io = IOBuffer()
+        _xlsx_unformatted(io, si)
+        push!(unformatted, String(take!(io)))
+    end
+    (length(shared), length(unformatted))
+end setup=(doc = parse(sst_xml, LazyNode))
+
+# Mirrors XLSX.jl `cell.jl` `Cell(c, ws)` + `get_rowcells!`: per cell, read the r/s/t
+# attributes and the <v> value, exactly as the reader does. Numeric worksheet.
+@add_benchmark "XLSX cell read (end-to-end)" "numeric ws" begin
+    sd = doc[end][1]
+    ncells = 0
+    acc = 0
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            ref = get(c, "r", "")
+            t = get(c, "t", "")
+            s = get(c, "s", "")
+            acc += sizeof(ref) + sizeof(t) + sizeof(s)
+            for child in XML.eachchildnode(c)
+                XML.nodetype(child) === XML.Element || continue
+                if XML.tag(child) == "v"
+                    v = XML.is_simple_value(child)
+                    isnothing(v) || (acc += sizeof(v))
+                end
+            end
+            ncells += 1
+        end
+    end
+    (ncells, acc)
+end setup=(doc = parse(ws_xml, LazyNode))
+
+# Same loop on the string-heavy worksheet (t="s", SST-indexed) — the common real shape
+# and the one most sensitive to the entity short-circuit / zero-copy accessors.
+@add_benchmark "XLSX cell read (end-to-end)" "string ws" begin
+    sd = doc[end][1]
+    ncells = 0
+    acc = 0
+    for row in XML.eachchildnode(sd)
+        XML.nodetype(row) === XML.Element || continue
+        for c in XML.eachchildnode(row)
+            XML.nodetype(c) === XML.Element || continue
+            ref = get(c, "r", "")
+            t = get(c, "t", "")
+            s = get(c, "s", "")
+            acc += sizeof(ref) + sizeof(t) + sizeof(s)
+            for child in XML.eachchildnode(c)
+                XML.nodetype(child) === XML.Element || continue
+                if XML.tag(child) == "v"
+                    v = XML.is_simple_value(child)
+                    isnothing(v) || (acc += sizeof(v))
+                end
+            end
+            ncells += 1
+        end
+    end
+    (ncells, acc)
+end setup=(doc = parse(ws_str_xml, LazyNode))
+
+# Realistic-string SST: entries containing characters that DO need entity decoding, so the
+# `has_entities` slow path is exercised (catches regressions in the decode branch).
+@add_benchmark "XLSX sst_load! (end-to-end)" "LazyNode (entity-heavy)" begin
+    sst_el = doc[end]
+    n = 0
+    for si in XML.eachchildnode(sst_el)
+        XML.nodetype(si) === XML.Element || continue
+        XML.tag(si) == "si" || continue
+        for t in XML.eachchildnode(si)
+            XML.nodetype(t) === XML.Element || continue
+            v = XML.is_simple_value(t)
+            isnothing(v) || (n += sizeof(v))
+        end
+    end
+    n
+end setup=(doc = parse(sst_entity_xml, LazyNode))
+
+#-----------------------------------------------------------------------------# Write benchmarks_results.md
+_fmt_ms(t) = string(round(t, sigdigits=3), " ms")
+
+function _compare_indicator(xml_ms, other_ms)
+    ratio = xml_ms / other_ms
+    pct = abs(round((ratio - 1) * 100, digits=1))
+    ratio > 1.05 ? "(XML.jl $(pct)% slower)" : ratio < 0.95 ? "(XML.jl $(pct)% faster)" : "(~same)"
+end
+
+outfile = joinpath(@__DIR__, "benchmarks_results.md")
+open(outfile, "w") do io
+    println(io, "# XML.jl Benchmarks\n")
+    println(io, "```")
+    for kind in unique(df.kind)
+        g = groupby(df, :kind)
+        haskey(g, (;kind)) || continue
+        sub = g[(;kind)]
+        println(io, kind)
+        # Find XML.jl baseline (first row starting with "XML.jl")
+        xml_row = findfirst(r -> startswith(r.name, "XML.jl") && !contains(r.name, "(SS)"), eachrow(sub))
+        xml_ms = isnothing(xml_row) ? nothing : median(sub[xml_row, :bench]).time / 1e6
+        for row in eachrow(sub)
+            ms = median(row.bench).time / 1e6
+            indicator = ""
+            if !isnothing(xml_ms) && !startswith(row.name, "XML.jl")
+                indicator = "  " * _compare_indicator(xml_ms, ms)
+            end
+            println(io, "\t", rpad(row.name, 16), lpad(_fmt_ms(ms), 12), indicator)
+        end
+        println(io)
+    end
+    println(io, "```")
+
+    println(io, "\n```julia")
+    println(io, "versioninfo()")
+    buf = IOBuffer()
+    InteractiveUtils.versioninfo(buf)
+    for line in eachline(IOBuffer(take!(buf)))
+        println(io, "# ", line)
+    end
+    println(io, "```")
+end
+
+println("Results written to $outfile")
diff --git a/benchmarks/benchmarks_results.md b/benchmarks/benchmarks_results.md
new file mode 100644
index 0000000..60c6ae0
--- /dev/null
+++ b/benchmarks/benchmarks_results.md
@@ -0,0 +1,101 @@
+# XML.jl Benchmarks
+
+```
+Parse (small)
+	XML.jl             0.0378 ms
+	XML.jl (SS)        0.0349 ms
+	EzXML              0.0224 ms  (XML.jl 68.8% slower)
+	LightXML            0.022 ms  (XML.jl 72.3% slower)
+	XMLDict             0.209 ms  (XML.jl 81.9% faster)
+
+Parse (medium)
+	XML.jl              201.0 ms
+	XML.jl (SS)         190.0 ms
+	EzXML                80.3 ms  (XML.jl 150.7% slower)
+	LightXML            114.0 ms  (XML.jl 76.1% slower)
+	XMLDict             608.0 ms  (XML.jl 66.9% faster)
+
+Write (small)
+	XML.jl            0.00957 ms
+	EzXML              0.0108 ms  (XML.jl 11.7% faster)
+	LightXML            0.105 ms  (XML.jl 90.9% faster)
+
+Write (medium)
+	XML.jl               48.3 ms
+	EzXML                36.9 ms  (XML.jl 30.9% slower)
+	LightXML             56.2 ms  (XML.jl 14.1% faster)
+
+Read file
+	XML.jl              191.0 ms
+	EzXML               115.0 ms  (XML.jl 67.2% slower)
+	LightXML             97.4 ms  (XML.jl 96.6% slower)
+
+Collect tags (small)
+	XML.jl           0.000602 ms
+	EzXML              0.0021 ms  (XML.jl 71.4% faster)
+	LightXML          0.00381 ms  (XML.jl 84.2% faster)
+
+Collect tags (medium)
+	XML.jl               12.7 ms
+	EzXML                16.3 ms  (XML.jl 21.8% faster)
+	LightXML             23.5 ms  (XML.jl 45.9% faster)
+
+Parse SST (LazyNode)
+	XML.jl            5.29e-6 ms
+	Node (for ref)       45.8 ms  (XML.jl 100.0% faster)
+
+Parse worksheet (LazyNode)
+	XML.jl            5.21e-6 ms
+	Node (for ref)       69.6 ms  (XML.jl 100.0% faster)
+
+SST: write each <si>
+	LazyNode + write (zero-copy)     93.0 ms
+	LazyNode + write (normalize)    157.0 ms
+	Node (for ref)       9.83 ms
+
+SST: unformatted text
+	LazyNode + is_simple_value    102.0 ms
+	Node (for ref)       5.31 ms
+
+Worksheet: collect rows
+	children() (fresh Vector each call)     87.9 ms
+	children!(buf, n) (reused buffer)     87.9 ms
+
+Worksheet: attribute scan
+	eachattribute        87.8 ms
+	attributes() (materialize dict)     87.2 ms
+
+Worksheet: single attr fetch
+	get(c, "r", "")      87.6 ms
+	attributes(c)["r"]     88.0 ms
+
+Worksheet: <v> value
+	is_simple_value      87.1 ms
+	is_simple + simple_value     87.8 ms
+
+XLSX sst_load! (end-to-end)
+	LazyNode            149.0 ms
+	LazyNode (entity-heavy)    113.0 ms
+
+XLSX cell read (end-to-end)
+	numeric ws           87.9 ms
+	string ws            80.2 ms
+
+```
+
+```julia
+versioninfo()
+# Julia Version 1.12.6
+# Commit 15346901f00 (2026-04-09 19:20 UTC)
+# Build Info:
+#   Official https://julialang.org release
+# Platform Info:
+#   OS: macOS (arm64-apple-darwin24.0.0)
+#   CPU: 10 × Apple M1 Pro
+#   WORD_SIZE: 64
+#   LLVM: libLLVM-18.1.7 (ORCJIT, apple-m1)
+#   GC: Built with stock GC
+# Threads: 8 default, 1 interactive, 8 GC (on 8 virtual cores)
+# Environment:
+#   JULIA_NUM_THREADS = auto
+```
diff --git a/benchmarks/compare.jl b/benchmarks/compare.jl
new file mode 100644
index 0000000..4bdc22a
--- /dev/null
+++ b/benchmarks/compare.jl
@@ -0,0 +1,290 @@
+#= Compare current dev XML.jl against the last released version.
+
+Usage:
+    julia benchmarks/compare.jl [tag]
+
+`tag` defaults to the latest git tag (e.g. v0.3.8).
+
+This script:
+1. Runs benchmarks using the current (dev) code
+2. Checks out the release tag into a temp worktree
+3. Runs the same benchmarks against that version
+4. Prints a side-by-side comparison
+=#
+
+using BenchmarkTools, Serialization, InteractiveUtils
+
+BenchmarkTools.DEFAULT_PARAMETERS.seconds = 5
+BenchmarkTools.DEFAULT_PARAMETERS.samples = 10000
+
+const ROOT = dirname(@__DIR__)
+
+const RELEASE_TAG = if length(ARGS) >= 1
+    ARGS[1]
+else
+    tags = readlines(`git -C $ROOT tag --sort=version:refname`)
+    filter!(t -> startswith(t, "v"), tags)
+    last(tags)
+end
+
+const SMALL_FILE = joinpath(ROOT, "test", "data", "books.xml")
+const SMALL_XML = read(SMALL_FILE, String)
+
+# Generate medium file if needed
+include(joinpath(ROOT, "benchmarks", "XMarkGenerator.jl"))
+using .XMarkGenerator
+const MEDIUM_FILE = joinpath(ROOT, "benchmarks", "data", "xmark.xml")
+if !isfile(MEDIUM_FILE)
+    mkpath(dirname(MEDIUM_FILE))
+    @info "Generating XMark benchmark XML..."
+    generate_xmark(MEDIUM_FILE, 1.0)
+end
+const MEDIUM_XML = read(MEDIUM_FILE, String)
+
+#-----------------------------------------------------------------------------# Helpers
+function _collect_tags!(out, node)
+    for c in XML.children(node)
+        if XML.nodetype(c) === XML.Element
+            push!(out, XML.tag(c))
+            _collect_tags!(out, c)
+        end
+    end
+end
+
+function bench_collect_tags(node)
+    out = String[]
+    _collect_tags!(out, node)
+    out
+end
+
+#-----------------------------------------------------------------------------# Run dev benchmarks
+println("="^60)
+println("  XML.jl Benchmark Comparison")
+println("  Current (dev) vs $RELEASE_TAG")
+println("="^60)
+println()
+
+print("Running dev benchmarks...")
+flush(stdout)
+
+using XML
+
+dev_results = Dict{String, BenchmarkTools.Trial}()
+
+const SSNode = Node{SubString{String}}
+
+dev_small = parse(SMALL_XML, Node)
+dev_small_ss = parse(SMALL_XML, SSNode)
+dev_medium = parse(MEDIUM_XML, Node)
+dev_medium_ss = parse(MEDIUM_XML, SSNode)
+
+dev_results["Parse (small), String"] = @benchmark parse($SMALL_XML, Node)
+dev_results["Parse (small), SubString"] = @benchmark parse($SMALL_XML, SSNode)
+dev_results["Parse (medium), String"] = @benchmark parse($MEDIUM_XML, Node)
+dev_results["Parse (medium), SubString"] = @benchmark parse($MEDIUM_XML, SSNode)
+dev_results["Write (small)"] = @benchmark XML.write($dev_small)
+dev_results["Write (medium)"] = @benchmark XML.write($dev_medium)
+dev_results["Read file (medium), String"] = @benchmark read($MEDIUM_FILE, Node)
+dev_results["Read file (medium), SubString"] = @benchmark parse(read($MEDIUM_FILE, String), SSNode)
+dev_results["Collect tags (small), String"] = @benchmark bench_collect_tags($dev_small)
+dev_results["Collect tags (small), SubString"] = @benchmark bench_collect_tags($dev_small_ss)
+dev_results["Collect tags (medium), String"] = @benchmark bench_collect_tags($dev_medium)
+dev_results["Collect tags (medium), SubString"] = @benchmark bench_collect_tags($dev_medium_ss)
+
+# LazyNode benchmarks
+dev_lazy_small = parse(SMALL_XML, LazyNode)
+dev_lazy_medium = parse(MEDIUM_XML, LazyNode)
+
+dev_results["Parse (small), LazyNode"] = @benchmark parse($SMALL_XML, LazyNode)
+dev_results["Parse (medium), LazyNode"] = @benchmark parse($MEDIUM_XML, LazyNode)
+dev_results["Write (small), LazyNode"] = @benchmark XML.write($(dev_lazy_small[1]))
+dev_results["Write (medium), LazyNode"] = @benchmark XML.write($(dev_lazy_medium[1]))
+dev_results["sourcetext, small"] = @benchmark sourcetext($(dev_lazy_small[1]))
+dev_results["sourcetext, medium"] = @benchmark sourcetext($(dev_lazy_medium[1]))
+dev_lazy_medium_root = let ch = children(dev_lazy_medium)
+    i = findfirst(c -> nodetype(c) === Element, ch)
+    ch[i]
+end
+dev_results["children vs eachchildnode, children"] = @benchmark children($dev_lazy_medium_root)
+dev_results["children vs eachchildnode, eachchildnode"] = @benchmark collect(eachchildnode($dev_lazy_medium_root))
+
+# SST-like benchmark: many children, write each one
+const SST_N = 10_000
+const SST_XML = "<sst>" * join("""<si><t>string_$i</t></si>""" for i in 1:SST_N) * "</sst>"
+dev_sst_node = parse(SST_XML, Node)
+dev_sst_lazy = parse(SST_XML, LazyNode)
+dev_sst_root_node = only(children(dev_sst_node))
+dev_sst_root_lazy = only(children(dev_sst_lazy))
+
+function bench_sst_node(xml)
+    root = only(children(parse(xml, Node)))
+    out = String[]
+    for c in XML.children(root)
+        XML.nodetype(c) === XML.Element && push!(out, XML.write(c))
+    end
+    out
+end
+function bench_sst_lazy_children(xml)
+    root = only(children(parse(xml, LazyNode)))
+    out = String[]
+    for c in XML.children(root)
+        XML.nodetype(c) === XML.Element && push!(out, XML.write(c))
+    end
+    out
+end
+function bench_sst_lazy_eachchildnode(xml)
+    root = only(children(parse(xml, LazyNode)))
+    out = String[]
+    for c in XML.eachchildnode(root)
+        XML.nodetype(c) === XML.Element && push!(out, XML.write(c))
+    end
+    out
+end
+
+dev_results["SST (parse+iterate+write), Node"] = @benchmark bench_sst_node($SST_XML)
+dev_results["SST (parse+iterate+write), LazyNode+children"] = @benchmark bench_sst_lazy_children($SST_XML)
+dev_results["SST (parse+iterate+write), LazyNode+eachchildnode"] = @benchmark bench_sst_lazy_eachchildnode($SST_XML)
+
+println(" done")
+
+#-----------------------------------------------------------------------------# Run release benchmarks via temp worktree + separate process
+print("Setting up $RELEASE_TAG worktree...")
+flush(stdout)
+
+worktree_dir = mktempdir()
+run(pipeline(`git -C $ROOT worktree add $worktree_dir $RELEASE_TAG`, stdout=devnull, stderr=devnull))
+println(" done")
+
+release_results_file = joinpath(worktree_dir, "_results.jls")
+
+release_script = joinpath(worktree_dir, "_bench.jl")
+write(release_script, """
+using Pkg
+Pkg.activate(; temp=true)
+Pkg.develop(path=$(repr(worktree_dir)))
+Pkg.add("BenchmarkTools")
+Pkg.add("Serialization")
+
+using BenchmarkTools, Serialization, XML
+
+BenchmarkTools.DEFAULT_PARAMETERS.seconds = 5
+BenchmarkTools.DEFAULT_PARAMETERS.samples = 10000
+
+small_xml = read($(repr(SMALL_FILE)), String)
+medium_xml = read($(repr(MEDIUM_FILE)), String)
+results = Dict{String, BenchmarkTools.Trial}()
+
+results["Parse (small)"] = @benchmark parse(\$small_xml, Node)
+
+try
+    SSNode = Node{SubString{String}}
+    results["Parse (small, SS)"] = @benchmark parse(\$small_xml, SSNode)
+    results["Parse (medium, SS)"] = @benchmark parse(\$medium_xml, SSNode)
+catch
+end
+
+results["Parse (medium)"] = @benchmark parse(\$medium_xml, Node)
+
+small_node = parse(small_xml, Node)
+medium_node = parse(medium_xml, Node)
+results["Write (small)"] = @benchmark XML.write(\$small_node)
+results["Write (medium)"] = @benchmark XML.write(\$medium_node)
+results["Read file (medium)"] = @benchmark read($(repr(MEDIUM_FILE)), Node)
+
+function _collect_tags!(out, node)
+    for c in XML.children(node)
+        if XML.nodetype(c) === XML.Element
+            push!(out, XML.tag(c))
+            _collect_tags!(out, c)
+        end
+    end
+end
+function bench_collect_tags(node)
+    out = String[]
+    _collect_tags!(out, node)
+    out
+end
+results["Collect tags (small)"] = @benchmark bench_collect_tags(\$small_node)
+results["Collect tags (medium)"] = @benchmark bench_collect_tags(\$medium_node)
+
+try
+    lazy_small = parse(small_xml, LazyNode)
+    lazy_medium = parse(medium_xml, LazyNode)
+    results["Parse (small), LazyNode"] = @benchmark parse(\$small_xml, LazyNode)
+    results["Parse (medium), LazyNode"] = @benchmark parse(\$medium_xml, LazyNode)
+catch
+end
+
+serialize($(repr(release_results_file)), results)
+""")
+
+print("Running $RELEASE_TAG benchmarks...")
+flush(stdout)
+run(pipeline(`julia $release_script`, stdout=devnull, stderr=devnull))
+release_results = deserialize(release_results_file)
+println(" done")
+
+# Cleanup worktree
+run(pipeline(`git -C $ROOT worktree remove --force $worktree_dir`, stdout=devnull, stderr=devnull))
+
+#-----------------------------------------------------------------------------# Write compare_results.md
+_fmt_ms(t) = string(round(t, sigdigits=3), " ms")
+
+function _compare_indicator(dev_ms, rel_ms)
+    change = (dev_ms / rel_ms - 1) * 100
+    pct = abs(round(change, digits=1))
+    change < -5 ? "($(pct)% faster)" : change > 5 ? "($(pct)% slower)" : "(~same)"
+end
+
+groups = [
+    ("Parse (small)",        "Parse (small)",        ["Parse (small), String", "Parse (small), SubString", "Parse (small), LazyNode"]),
+    ("Parse (medium)",       "Parse (medium)",       ["Parse (medium), String", "Parse (medium), SubString", "Parse (medium), LazyNode"]),
+    ("Write (small)",        "Write (small)",        ["Write (small)", "Write (small), LazyNode"]),
+    ("Write (medium)",       "Write (medium)",       ["Write (medium)", "Write (medium), LazyNode"]),
+    ("Read file (medium)",   "Read file (medium)",   ["Read file (medium), String", "Read file (medium), SubString"]),
+    ("Collect tags (small)", "Collect tags (small)",  ["Collect tags (small), String", "Collect tags (small), SubString"]),
+    ("Collect tags (medium)","Collect tags (medium)", ["Collect tags (medium), String", "Collect tags (medium), SubString"]),
+    ("sourcetext",           nothing,                 ["sourcetext, small", "sourcetext, medium"]),
+    ("children vs eachchildnode (medium)", nothing,   ["children vs eachchildnode, children", "children vs eachchildnode, eachchildnode"]),
+    ("SST-like: parse+iterate+write (10k)", nothing,  ["SST (parse+iterate+write), Node", "SST (parse+iterate+write), LazyNode+children", "SST (parse+iterate+write), LazyNode+eachchildnode"]),
+]
+
+outfile = joinpath(@__DIR__, "compare_results.md")
+open(outfile, "w") do io
+    println(io, "# XML.jl Benchmark Comparison: dev vs $RELEASE_TAG\n")
+    println(io, "```")
+    for (title, rel_key, dev_keys) in groups
+        rel_ms = (!isnothing(rel_key) && haskey(release_results, rel_key)) ? median(release_results[rel_key]).time / 1e6 : nothing
+        any(k -> haskey(dev_results, k), dev_keys) || (isnothing(rel_ms) && continue)
+
+        println(io, title)
+        if !isnothing(rel_ms)
+            println(io, "\t", rpad(RELEASE_TAG, 16), lpad(_fmt_ms(rel_ms), 12))
+        end
+        for dk in dev_keys
+            haskey(dev_results, dk) || continue
+            dev_ms = median(dev_results[dk]).time / 1e6
+            label = occursin(", ", dk) ? split(dk, ", "; limit=2)[2] : "dev"
+            ms_str = lpad(_fmt_ms(dev_ms), 12)
+            padlen = max(16, length(label) + 2)
+            if isnothing(rel_ms)
+                println(io, "\t", rpad(label, padlen), ms_str)
+            else
+                println(io, "\t", rpad(label, padlen), ms_str, "  ", _compare_indicator(dev_ms, rel_ms))
+            end
+        end
+        println(io)
+    end
+    println(io, "```")
+
+    println(io, "\n```julia")
+    println(io, "versioninfo()")
+    buf = IOBuffer()
+    InteractiveUtils.versioninfo(buf)
+    for line in eachline(IOBuffer(take!(buf)))
+        println(io, "# ", line)
+    end
+    println(io, "```")
+end
+
+println("Results written to $outfile")
diff --git a/benchmarks/compare_results.md b/benchmarks/compare_results.md
new file mode 100644
index 0000000..dffbcae
--- /dev/null
+++ b/benchmarks/compare_results.md
@@ -0,0 +1,71 @@
+# XML.jl Benchmark Comparison: dev vs v0.3.8
+
+```
+Parse (small)
+	v0.3.8              0.139 ms
+	String             0.0409 ms  (70.6% faster)
+	SubString           0.033 ms  (76.3% faster)
+	LazyNode          6.33e-6 ms  (100.0% faster)
+
+Parse (medium)
+	v0.3.8              829.0 ms
+	String              200.0 ms  (75.8% faster)
+	SubString           163.0 ms  (80.4% faster)
+	LazyNode          6.33e-6 ms  (100.0% faster)
+
+Write (small)
+	v0.3.8              0.032 ms
+	dev                0.0215 ms  (32.6% faster)
+	LazyNode         0.000217 ms  (99.3% faster)
+
+Write (medium)
+	v0.3.8              156.0 ms
+	dev                  99.2 ms  (36.3% faster)
+	LazyNode         0.000273 ms  (100.0% faster)
+
+Read file (medium)
+	v0.3.8              755.0 ms
+	String              193.0 ms  (74.4% faster)
+	SubString           179.0 ms  (76.3% faster)
+
+Collect tags (small)
+	v0.3.8            0.00064 ms
+	String           0.000714 ms  (11.7% slower)
+	SubString         0.00211 ms  (230.3% slower)
+
+Collect tags (medium)
+	v0.3.8               21.6 ms
+	String               13.3 ms  (38.7% faster)
+	SubString            20.3 ms  (6.2% faster)
+
+sourcetext
+	small            0.000191 ms
+	medium           0.000248 ms
+
+children vs eachchildnode (medium)
+	children             76.8 ms
+	eachchildnode        80.4 ms
+
+SST-like: parse+iterate+write (10k)
+	Node                 9.01 ms
+	LazyNode+children       9.78 ms
+	LazyNode+eachchildnode       10.4 ms
+
+```
+
+```julia
+versioninfo()
+# Julia Version 1.12.6
+# Commit 15346901f00 (2026-04-09 19:20 UTC)
+# Build Info:
+#   Official https://julialang.org release
+# Platform Info:
+#   OS: macOS (arm64-apple-darwin24.0.0)
+#   CPU: 10 × Apple M1 Pro
+#   WORD_SIZE: 64
+#   LLVM: libLLVM-18.1.7 (ORCJIT, apple-m1)
+#   GC: Built with stock GC
+# Threads: 8 default, 1 interactive, 8 GC (on 8 virtual cores)
+# Environment:
+#   JULIA_NUM_THREADS = auto
+```
diff --git a/benchmarks/dict_benchmarks.jl b/benchmarks/dict_benchmarks.jl
new file mode 100644
index 0000000..7dd90a3
--- /dev/null
+++ b/benchmarks/dict_benchmarks.jl
@@ -0,0 +1,71 @@
+using XML
+using BenchmarkTools
+
+BenchmarkTools.DEFAULT_PARAMETERS.seconds = 5
+
+#-----------------------------------------------------------------------------# Setup
+sizes = [2, 5, 10, 20]
+
+function make_xml(n::Int)
+    attrs = join((" attr$i=\"value$i\"" for i in 1:n))
+    "<root$attrs/>"
+end
+
+function make_pairs(n::Int)
+    Pair{String,String}["attr$i" => "value$i" for i in 1:n]
+end
+
+pt(t) = BenchmarkTools.prettytime(t)
+
+function printrow(n, op, t_dict, t_attr)
+    pct = round(100 * (t_dict - t_attr) / t_dict, digits=1)
+    label = pct > 0 ? "$(pct)% faster" : "$(-pct)% slower"
+    println(rpad("$n attrs", 10), " | ", rpad(op, 22), " | ",
+            rpad("Dict $(pt(t_dict))", 22), " | ",
+            rpad("Attributes $(pt(t_attr))", 26), " | ", label)
+end
+
+#-----------------------------------------------------------------------------# Benchmarks
+println("=" ^ 110)
+println("  Attributes vs Dict Benchmarks")
+println("=" ^ 110)
+println(rpad("Size", 10), " | ", rpad("Operation", 22), " | ",
+        rpad("Dict", 22), " | ", rpad("Attributes", 26), " | Change")
+println("-" ^ 110)
+
+for n in sizes
+    pairs = make_pairs(n)
+    d = Dict(pairs)
+    a = XML.Attributes(pairs)
+    key_mid = "attr$(n ÷ 2 + 1)"
+    key_last = "attr$n"
+
+    tests = [
+        ("construct",       () -> @benchmark(Dict($pairs)),               () -> @benchmark(XML.Attributes($pairs))),
+        ("getindex [mid]",  () -> @benchmark($d[$key_mid]),               () -> @benchmark($a[$key_mid])),
+        ("getindex [last]", () -> @benchmark($d[$key_last]),              () -> @benchmark($a[$key_last])),
+        ("get [miss]",      () -> @benchmark(get($d, "nope", nothing)),   () -> @benchmark(get($a, "nope", nothing))),
+        ("haskey [hit]",    () -> @benchmark(haskey($d, $key_mid)),       () -> @benchmark(haskey($a, $key_mid))),
+        ("keys",            () -> @benchmark(collect(keys($d))),          () -> @benchmark(keys($a))),
+        ("iterate",         () -> @benchmark(sum(length(v) for (_,v) in $d)), () -> @benchmark(sum(length(v) for (_,v) in $a))),
+    ]
+
+    for (op, bench_dict, bench_attr) in tests
+        t_dict = median(bench_dict()).time
+        t_attr = median(bench_attr()).time
+        printrow(n, op, t_dict, t_attr)
+    end
+    println("-" ^ 110)
+end
+
+#-----------------------------------------------------------------------------# End-to-end: attributes() call on parsed Node
+println()
+println(rpad("Size", 10), " | ", rpad("Operation", 22), " | Time")
+println("-" ^ 50)
+for n in sizes
+    doc = parse(make_xml(n), Node)
+    el = doc[1]
+    t = median(@benchmark(attributes($el))).time
+    println(rpad("$n attrs", 10), " | ", rpad("attributes(node)", 22), " | ", pt(t))
+end
+println()
diff --git a/benchmarks/suite.jl b/benchmarks/suite.jl
deleted file mode 100644
index e06dc61..0000000
--- a/benchmarks/suite.jl
+++ /dev/null
@@ -1,74 +0,0 @@
-using Pkg
-Pkg.activate(@__DIR__)
-
-using XML
-using EzXML: EzXML
-using XMLDict: XMLDict
-using BenchmarkTools
-using DataFrames
-using UnicodePlots
-using OrderedCollections: OrderedDict
-
-
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = 10
-BenchmarkTools.DEFAULT_PARAMETERS.samples = 20000
-
-
-# nasa.xml was downloaded from:
-# http://aiweb.cs.washington.edu/research/projects/xmltk/xmldata/www/repository.html#nasa
-file = joinpath(@__DIR__, "nasa.xml")
-
-df = DataFrame(kind=String[], name=String[], bench=BenchmarkTools.Trial[])
-
-macro add_benchmark(kind, name, expr...)
-    esc(:(let
-        @info string($kind, " - ", $name)
-        bench = @benchmark $(expr...)
-        push!(df, (; kind=$kind, name=$name, bench))
-    end))
-end
-
-#-----------------------------------------------------------------------------# Write
-@add_benchmark "Write" "XML.write" XML.write($(tempname()), o) setup = (o = read(file, Node))
-@add_benchmark "Write" "EzXML.writexml" EzXML.write($(tempname()), o) setup = (o = EzXML.readxml(file))
-
-#-----------------------------------------------------------------------------# Read
-@add_benchmark "Read" "XML.LazyNode" read($file, LazyNode)
-@add_benchmark "Read" "XML.Node" read($file, Node)
-@add_benchmark "Read" "EzXML.readxml" EzXML.readxml($file)
-@add_benchmark "Read" "XMLDict.xml_dict" XMLDict.xml_dict(read($file, String))
-
-#-----------------------------------------------------------------------------# Lazy Iteration
-@add_benchmark "Lazy Iteration" "LazyNode" for x in read($file, LazyNode); end
-@add_benchmark "Lazy Iteration" "EzXML.StreamReader" (reader = open(EzXML.StreamReader, $file); for x in reader; end; close(reader))
-
-#-----------------------------------------------------------------------------# Lazy Iteration: Collect Tags
-@add_benchmark "Collect Tags" "LazyNode" [tag(x) for x in o] setup = (o = read(file, LazyNode))
-@add_benchmark "Collect Tags" "EzXML.StreamReader" [r.name for x in r if x == EzXML.READER_ELEMENT] setup=(r=open(EzXML.StreamReader, file)) teardown=(close(r))
-
-function get_tags(o::EzXML.Node)
-    out = String[]
-    for node in EzXML.eachelement(o)
-        push!(out, node.name)
-        for tag in get_tags(node)
-            push!(out, tag)
-        end
-    end
-    out
-end
-@add_benchmark "Collect Tags" "EzXML.readxml" get_tags(o.root) setup=(o = EzXML.readxml(file))
-
-
-#-----------------------------------------------------------------------------# Plots
-function plot(df, kind)
-    g = groupby(df, :kind)
-    sub = g[(;kind)]
-    x = map(row -> "$(row.name)", eachrow(sub))
-    y = map(x -> median(x).time / 1000^2, sub.bench)
-    display(barplot(x, y, title = "$kind Time (ms)", border=:none, width=50))
-end
-
-plot(df, "Read")
-plot(df, "Write")
-plot(df, "Lazy Iteration")
-plot(df, "Collect Tags")
diff --git a/ext/XMLAbstractTreesExt.jl b/ext/XMLAbstractTreesExt.jl
new file mode 100644
index 0000000..60add31
--- /dev/null
+++ b/ext/XMLAbstractTreesExt.jl
@@ -0,0 +1,71 @@
+module XMLAbstractTreesExt
+
+using XML: XML, Node, LazyNode, NodeType, Element, Text, CData, Comment,
+    Declaration, DTD, Document, ProcessingInstruction,
+    nodetype, tag, value, attributes
+import AbstractTrees
+
+#-----------------------------------------------------------------------------# children
+AbstractTrees.children(n::Node) = XML.children(n)
+AbstractTrees.children(n::LazyNode) = XML.children(n)
+
+#-----------------------------------------------------------------------------# nodevalue
+AbstractTrees.nodevalue(n::Node) = n
+AbstractTrees.nodevalue(n::LazyNode) = n
+
+#-----------------------------------------------------------------------------# printnode
+# Single-line label for `print_tree`; mirrors the REPL `show` for each NodeType but
+# without trailing child-count annotations (AbstractTrees draws the structure).
+_printnode(io::IO, n::Union{Node, LazyNode}) = _printnode(io, n, nodetype(n))
+
+function _printnode(io::IO, n, ::Val{Element})
+    print(io, '<', tag(n))
+    attrs = attributes(n)
+    if !isnothing(attrs)
+        for (k, v) in attrs
+            print(io, ' ', k, '=', '"', v, '"')
+        end
+    end
+    print(io, '>')
+end
+
+_printnode(io::IO, n, ::Val{Text})     = show(io, value(n))
+_printnode(io::IO, n, ::Val{Comment})  = print(io, "<!--", value(n), "-->")
+_printnode(io::IO, n, ::Val{CData})    = print(io, "<![CDATA[", value(n), "]]>")
+_printnode(io::IO, n, ::Val{DTD})      = print(io, "<!DOCTYPE ", value(n), '>')
+
+function _printnode(io::IO, n, ::Val{Declaration})
+    print(io, "<?xml")
+    attrs = attributes(n)
+    if !isnothing(attrs)
+        for (k, v) in attrs
+            print(io, ' ', k, '=', '"', v, '"')
+        end
+    end
+    print(io, "?>")
+end
+
+function _printnode(io::IO, n, ::Val{ProcessingInstruction})
+    print(io, "<?", tag(n))
+    v = value(n)
+    !isnothing(v) && print(io, ' ', v)
+    print(io, "?>")
+end
+
+_printnode(io::IO, n, ::Val{Document}) = print(io, "Document")
+
+# Dispatch helper: avoid an Enum branch chain by tag-dispatching on Val{NodeType}.
+_printnode(io::IO, n, nt::NodeType) = _printnode(io, n, Val(nt))
+
+AbstractTrees.printnode(io::IO, n::Node)     = _printnode(io, n)
+AbstractTrees.printnode(io::IO, n::LazyNode) = _printnode(io, n)
+
+#-----------------------------------------------------------------------------# traits
+AbstractTrees.NodeType(::Type{<:Node})     = AbstractTrees.HasNodeType()
+AbstractTrees.NodeType(::Type{<:LazyNode}) = AbstractTrees.HasNodeType()
+AbstractTrees.nodetype(::Type{N}) where {N <: Node}     = N
+AbstractTrees.nodetype(::Type{L}) where {L <: LazyNode} = L
+
+AbstractTrees.ChildIndexing(::Type{<:Node}) = AbstractTrees.IndexedChildren()
+
+end # module
diff --git a/src/XML.jl b/src/XML.jl
index 273bfda..a431541 100644
--- a/src/XML.jl
+++ b/src/XML.jl
@@ -1,31 +1,66 @@
 module XML
 
-using Mmap
-using OrderedCollections: OrderedDict
-
 export
-    # Core Types:
-    Node, LazyNode,
-    # Interface:
-    children, nodetype, tag, attributes, value, is_simple, simplevalue, simple_value,
-    # Extended Interface for LazyNode:
-    parent, depth, next, prev
+    Node, LazyNode, NodeType, Attributes,
+    CData, Comment, Declaration, Document, DTD, Element, ProcessingInstruction, Text,
+    nodetype, tag, attributes, value, children, children!, eachchildnode, eachattribute,
+    is_simple, simple_value, is_simple_value, sourcetext,
+    depth, siblings,
+    xpath,
+    h
+
+include("XMLTokenizer.jl")
+using .XMLTokenizer:
+    XMLTokenizer, tokenize, tag_name, attr_value, pi_target,
+    TokenKinds, Token, Tokenizer, TokenizerState
 
 #-----------------------------------------------------------------------------# escape/unescape
-const escape_chars = ('&' => "&amp;", '<' => "&lt;", '>' => "&gt;", "'" => "&apos;", '"' => "&quot;")
+const ESCAPE_CHARS = ('&' => "&amp;", '<' => "&lt;", '>' => "&gt;", '\'' => "&apos;", '"' => "&quot;")
+
+"""
+    escape(x::AbstractString) -> String
+
+Escape the five XML predefined entities: `&` `<` `>` `'` `"`.
+
+!!! note "Changed in v0.4"
+    `escape` is no longer idempotent.  In previous versions, already-escaped sequences like
+    `&amp;` were left untouched.  Now every `&` is escaped, so `escape("&amp;")` produces
+    `"&amp;amp;"`.  Call `escape` only on raw, unescaped text.
+"""
+escape(x::AbstractString) = replace(x, ESCAPE_CHARS...)
+
+# Replace a numeric character reference with its Unicode character.
+# Numeric character references encode characters by code point: decimal (&#233; → é) or hex (&#xE9; → é).
+function _unescape_charref(ref::AbstractString)
+    is_hex = length(ref) > 3 && ref[3] in ('x', 'X')
+    digits = SubString(ref, is_hex ? 4 : 3, length(ref) - 1)
+    cp = tryparse(UInt32, digits; base = is_hex ? 16 : 10)
+    !isnothing(cp) && isvalid(Char, cp) ? string(Char(cp)) : ref
+end
+
+"""
+    unescape(x::AbstractString) -> String
+    unescape(x::SubString{String}) -> Union{SubString{String}, String}
+
+Unescape XML entities in `x`: the five predefined entities (`&amp;` `&lt;` `&gt;` `&apos;`
+`&quot;`) and numeric character references (`&#123;`, `&#xAB;`). Each reference is processed
+exactly once (no double-unescaping).
+
+When `x` is a `SubString{String}` containing no `&`, the input is returned unchanged with
+no allocation — the common case for typical XML attribute and text content.
+"""
 function unescape(x::AbstractString)
-    result = x
-    for (pat, r) in reverse.(escape_chars)
-        result = replace(result, pat => r)
-    end
-    return result
+    s = string(x)
+    occursin('&', s) || return s
+    occursin("&#", s) && (s = replace(s, r"&#[xX]?[0-9a-fA-F]+;" => _unescape_charref))
+    replace(s, "&lt;" => "<", "&gt;" => ">", "&apos;" => "'", "&quot;" => "\"", "&amp;" => "&")
 end
-function escape(x::String)
-    result = replace(x, r"&(?!amp;|quot;|apos;|gt;|lt;)" => "&amp;")
-    for (pat, r) in escape_chars[2:end]
-        result = replace(result, pat => r)
-    end
-    return result
+
+function unescape(x::SubString{String})
+    occursin('&', x) || return x
+    s = String(x)
+    occursin("&#", s) && (s = replace(s, r"&#[xX]?[0-9a-fA-F]+;" => _unescape_charref))
+    replace(s, "&lt;" => "<", "&gt;" => ">", "&apos;" => "'", "&quot;" => "\"", "&amp;" => "&")
 end
 
 #-----------------------------------------------------------------------------# NodeType
@@ -34,9 +69,9 @@ end
     - Document                  # prolog & root Element
     - DTD                       # <!DOCTYPE ...>
     - Declaration               # <?xml attributes... ?>
-    - ProcessingInstruction    # <?NAME attributes... ?>
+    - ProcessingInstruction    # <?NAME content... ?>
     - Comment                   # <!-- ... -->
-    - CData                     # <![CData[...]]>
+    - CData                     # <![CDATA[...]]>
     - Element                   # <NAME attributes... > children... </NAME>
     - Text                      # text
 
@@ -45,381 +80,1131 @@ NodeTypes can be used to construct XML.Nodes:
     Document(children...)
     DTD(value)
     Declaration(; attributes)
-    ProcessingInstruction(tag, attributes)
+    ProcessingInstruction(tag, content)
     Comment(text)
     CData(text)
     Element(tag, children...; attributes)
     Text(text)
 """
-@enum(NodeType, CData, Comment, Declaration, Document, DTD, Element, ProcessingInstruction, Text)
+@enum NodeType::UInt8 CData Comment Declaration Document DTD Element ProcessingInstruction Text
 
+#-----------------------------------------------------------------------------# Attributes
+"""
+    Attributes{S} <: AbstractDict{S, S}
+
+An ordered dictionary of XML attributes backed by a `Vector{Pair{S, S}}`.
+Returned by [`attributes`](@ref).  Preserves insertion order and supports the
+full `AbstractDict` interface (`get`, `haskey`, `keys`, `values`, iteration, etc.).
+"""
+struct Attributes{S} <: AbstractDict{S, S}
+    entries::Vector{Pair{S, S}}
+end
+
+Base.length(a::Attributes) = length(a.entries)
+Base.iterate(a::Attributes, state...) = iterate(a.entries, state...)
+
+function Base.getindex(a::Attributes, key::AbstractString)
+    for (k, v) in a.entries
+        k == key && return v
+    end
+    throw(KeyError(key))
+end
 
-#-----------------------------------------------------------------------------# includes
-include("raw.jl")
-include("dtd.jl")
+function Base.get(a::Attributes, key::AbstractString, default)
+    for (k, v) in a.entries
+        k == key && return v
+    end
+    default
+end
+
+function Base.haskey(a::Attributes, key::AbstractString)
+    any(p -> first(p) == key, a.entries)
+end
 
-abstract type AbstractXMLNode end
+Base.keys(a::Attributes) = first.(a.entries)
+Base.values(a::Attributes) = last.(a.entries)
 
-#-----------------------------------------------------------------------------# LazyNode
+#-----------------------------------------------------------------------------# Node
 """
-    LazyNode(file::AbstractString)
-    LazyNode(data::XML.Raw)
+    Node{S}
+
+In-memory DOM node parameterized on the string storage type `S` (typically `String`, or
+`SubString{String}` for zero-copy parsing). Every kind of XML node — `Element`, `Text`,
+`Comment`, `CData`, `ProcessingInstruction`, `Declaration`, `DTD`, `Document` — is
+represented by a single `Node{S}` whose [`NodeType`](@ref) determines which fields are
+populated.
 
-A Lazy representation of an XML node.
+    parse(xml, Node)             # parse a string into a Node{String}
+    parse(xml, Node{SubString{String}})  # zero-copy variant
+    read(filename, Node)         # read & parse a file
+
+Use the accessor functions ([`nodetype`](@ref), [`tag`](@ref), [`attributes`](@ref),
+[`value`](@ref), [`children`](@ref)) rather than the raw fields when navigating a tree.
+Integer indexing returns children (`node[1]`); string indexing returns attribute values
+(`node["class"]`).
 """
-mutable struct LazyNode <: AbstractXMLNode
-    raw::Raw
-    tag::Union{Nothing, String}
-    attributes::Union{Nothing, OrderedDict{String, String}}
-    value::Union{Nothing, String}
-end
-LazyNode(raw::Raw) = LazyNode(raw, nothing, nothing, nothing)
+struct Node{S}
+    nodetype::NodeType
+    tag::Union{Nothing, S}
+    attributes::Union{Nothing, Vector{Pair{S, S}}}
+    value::Union{Nothing, S}
+    children::Union{Nothing, Vector{Node{S}}}
 
-function Base.getproperty(o::LazyNode, x::Symbol)
-    x === :raw && return getfield(o, :raw)
-    x === :nodetype && return nodetype(o.raw)
-    x === :tag && return isnothing(getfield(o, x)) ? setfield!(o, x, tag(o.raw)) : getfield(o, x)
-    x === :attributes && return isnothing(getfield(o, x)) ? setfield!(o, x, attributes(o.raw)) : getfield(o, x)
-    x === :value && return isnothing(getfield(o, x)) ? setfield!(o, x, value(o.raw)) : getfield(o, x)
-    x === :depth && return depth(o.raw)
-    x === :children && return LazyNode.(children(o.raw))
-    error("type LazyNode has no field $(x)")
+    function Node{S}(nodetype::NodeType, tag, attributes, value, children) where {S}
+        if nodetype in (Text, Comment, CData, DTD)
+            isnothing(tag) && isnothing(attributes) && !isnothing(value) && isnothing(children) ||
+                error("$nodetype nodes only accept a value.")
+        elseif nodetype === Element
+            !isnothing(tag) && isnothing(value) ||
+                error("Element nodes require a tag and no value.")
+        elseif nodetype === Declaration
+            isnothing(tag) && isnothing(value) && isnothing(children) ||
+                error("Declaration nodes only accept attributes.")
+        elseif nodetype === ProcessingInstruction
+            !isnothing(tag) && isnothing(attributes) && isnothing(children) ||
+                error("ProcessingInstruction nodes require a tag and only accept a value.")
+        elseif nodetype === Document
+            isnothing(tag) && isnothing(attributes) && isnothing(value) ||
+                error("Document nodes only accept children.")
+        end
+        new{S}(nodetype, tag, attributes, value, children)
+    end
 end
-Base.propertynames(o::LazyNode) = (:raw, :nodetype, :tag, :attributes, :value, :depth, :children)
 
-Base.show(io::IO, o::LazyNode) = _show_node(io, o)
+#-----------------------------------------------------------------------------# interface
+"""
+    nodetype(node) -> NodeType
+
+Return the [`NodeType`](@ref) of `node` (`Element`, `Text`, `Comment`, `CData`,
+`ProcessingInstruction`, `Declaration`, `DTD`, or `Document`).
+"""
+nodetype(o::Node) = o.nodetype
+
+"""
+    tag(node) -> Union{String, SubString{String}, Nothing}
+
+Return the tag name of `node`. Defined for `Element` (element name) and
+`ProcessingInstruction` (target name); returns `nothing` for other node types.
+"""
+tag(o::Node) = o.tag
+
+"""
+    attributes(node::Node) -> Union{Nothing, Attributes{String}}
+
+Return the attributes of an `Element` or `Declaration` node as an [`Attributes`](@ref) dict,
+or `nothing` if the node has no attributes.
+
+!!! note "Changed in v0.4"
+    In previous versions, `attributes` returned an `OrderedDict` from OrderedCollections.jl.
+    It now returns an [`Attributes`](@ref), an ordered `AbstractDict` backed by a
+    `Vector{Pair}`.
+"""
+attributes(o::Node) = isnothing(o.attributes) ? nothing : Attributes(o.attributes)
+
+"""
+    value(node) -> Union{String, SubString{String}, Nothing}
+
+Return the textual content of `node`. Defined for `Text`, `Comment`, `CData`, `DTD`, and
+`ProcessingInstruction`; returns `nothing` for `Element`, `Declaration`, and `Document`
+(use [`children`](@ref) for those).
+"""
+value(o::Node) = o.value
+
+"""
+    children(node) -> Vector{Node} or ()
+
+Return the child nodes of `node` in document order. Returns an empty tuple `()` for nodes
+that cannot have children (e.g. `Text`, `Comment`, `CData`).
+"""
+children(o::Node) = something(o.children, ())
+
+"""
+    is_simple(node) -> Bool
+
+Return `true` if `node` is an `Element` with no attributes and exactly one `Text` or
+`CData` child — i.e. the `<tag>content</tag>` pattern with no nested markup. See also
+[`simple_value`](@ref).
+"""
+is_simple(o::Node) = o.nodetype === Element &&
+    (isnothing(o.attributes) || isempty(o.attributes)) &&
+    !isnothing(o.children) && length(o.children) == 1 &&
+    o.children[1].nodetype in (Text, CData)
+
+"""
+    simple_value(node) -> String
+
+Return the textual content of a simple element (see [`is_simple`](@ref)). Errors if
+`node` is not simple.
+"""
+simple_value(o::Node) = is_simple(o) ? o.children[1].value :
+    error("`simple_value` is only defined for simple nodes.")
+
+"""
+    is_simple_value(node) -> Union{Nothing, String, SubString{String}}
+
+Combined predicate-and-accessor: return the simple text/CData value of `node` if it is a
+simple element (see [`is_simple`](@ref)), or `nothing` otherwise. Avoids the redundant
+tokenization that `is_simple(n) ? simple_value(n) : ...` does on `LazyNode`.
+"""
+is_simple_value(o::Node) = is_simple(o) ? o.children[1].value : nothing
 
-Base.read(io::IO, ::Type{LazyNode}) = LazyNode(read(io, Raw))
-Base.read(filename::AbstractString, ::Type{LazyNode}) = LazyNode(read(filename, Raw))
-Base.parse(x::AbstractString, ::Type{LazyNode}) = LazyNode(parse(x, Raw))
+#-----------------------------------------------------------------------------# tree navigation
 
-children(o::LazyNode) = LazyNode.(children(o.raw))
-parent(o::LazyNode) = LazyNode(parent(o.raw))
-depth(o::LazyNode) = depth(o.raw)
+"""
+    parent(child::Node, root::Node) -> Node
 
-Base.IteratorSize(::Type{LazyNode}) = Base.SizeUnknown()
-Base.eltype(::Type{LazyNode}) = LazyNode
+Return the parent of `child` within the tree rooted at `root`.
 
-function Base.iterate(o::LazyNode, state=o)
-    n = next(state)
-    return isnothing(n) ? nothing : (n, n)
+Since `Node` does not store parent pointers, this performs a tree search from `root`.
+Throws an error if `child` is not found or if `child === root`.
+"""
+function Base.parent(child::Node, root::Node)
+    child === root && error("Root node has no parent.")
+    result = _find_parent(child, root)
+    isnothing(result) && error("Node not found in tree.")
+    result
 end
 
-function next(o::LazyNode)
-    n = next(o.raw)
-    isnothing(n) && return nothing
-    n.type === RawElementClose ? next(LazyNode(n)) : LazyNode(n)
+# Depth-first search for `child` within `current`; returns the containing node or nothing.
+function _find_parent(child::Node, current::Node)
+    for c in children(current)
+        c === child && return current
+        result = _find_parent(child, c)
+        isnothing(result) || return result
+    end
+    nothing
 end
-function prev(o::LazyNode)
-    n = prev(o.raw)
-    isnothing(n) && return nothing
-    n.type === RawElementClose ? prev(LazyNode(n)) : LazyNode(n)
+
+"""
+    depth(child::Node, root::Node) -> Int
+
+Return the depth of `child` within the tree rooted at `root` (root has depth 0).
+
+Since `Node` does not store parent pointers, this performs a tree search from `root`.
+Throws an error if `child` is not found in the tree.
+"""
+function depth(child::Node, root::Node)
+    child === root && return 0
+    result = _find_depth(child, root, 0)
+    isnothing(result) && error("Node not found in tree.")
+    result
+end
+
+# Depth-first search returning the depth of `child` relative to `current` (where children
+# of `current` are at depth `d + 1`), or nothing if not found.
+function _find_depth(child::Node, current::Node, d::Int)
+    for c in children(current)
+        c === child && return d + 1
+        result = _find_depth(child, c, d + 1)
+        isnothing(result) || return result
+    end
+    nothing
 end
 
-#-----------------------------------------------------------------------------# Node
 """
-    Node(nodetype, tag, attributes, value, children)
-    Node(node::Node; kw...)  # copy node with keyword overrides
-    Node(node::LazyNode)  # un-lazy the LazyNode
+    siblings(child::Node, root::Node) -> Vector{Node}
+
+Return the siblings of `child` (other children of the same parent) within the tree rooted
+at `root`.  The returned vector does not include `child` itself.
 
-A representation of an XML DOM node.  For simpler construction, use `(::NodeType)(args...)`
+Throws an error if `child` is the root or is not found in the tree.
 """
-struct Node <: AbstractXMLNode
-    nodetype::NodeType
-    tag::Union{Nothing, String}
-    attributes::Union{Nothing, OrderedDict{String, String}}
-    value::Union{Nothing, String}
-    children::Union{Nothing, Vector{Node}}
-
-    function Node(nodetype::NodeType, tag=nothing, attributes=nothing, value=nothing, children=nothing)
-        new(nodetype,
-            isnothing(tag) ? nothing : string(tag),
-            isnothing(attributes) ? nothing : OrderedDict(string(k) => string(v) for (k, v) in pairs(attributes)),
-            isnothing(value) ? nothing : string(value),
-            isnothing(children) ? nothing :
-                children isa Node ? [children] :
-                children isa Vector{Node} ? children :
-                children isa Vector ? map(Node, children) :
-                children isa Tuple ? map(Node, collect(children)) :
-                [Node(children)]
-        )
+function siblings(child::Node, root::Node)
+    p = parent(child, root)
+    [c for c in children(p) if c !== child]
+end
+
+include("xpath.jl")
+include("lazynode.jl")
+
+
+#-----------------------------------------------------------------------------# _to_node
+# Coerce a positional argument to a Node{String}: identity for nodes, wrap non-nodes as
+# Text. The middle method rejects non-String parameterizations to keep mixed-storage trees
+# from being silently constructed.
+_to_node(n::Node{String}) = n
+_to_node(n::Node) = throw(ArgumentError("Expected Node{String}, got $(typeof(n))"))
+_to_node(x) = Node{String}(Text, nothing, nothing, string(x), nothing)
+
+#-----------------------------------------------------------------------------# NodeType constructors
+# Make each NodeType variant callable as a constructor: `Element("div", ...)`,
+# `Text("hi")`, etc. Dispatches on `T` to validate args/kwargs and build the right Node.
+function (T::NodeType)(args...; attrs...)
+    S = String
+    if T in (Text, Comment, CData, DTD)
+        length(args) == 1 || error("$T nodes require exactly one value argument.")
+        !isempty(attrs) && error("$T nodes do not accept attributes.")
+        Node{S}(T, nothing, nothing, string(only(args)), nothing)
+    elseif T === Element
+        isempty(args) && error("Element nodes require at least a tag.")
+        t = string(first(args))
+        a = Pair{S,S}[String(k) => String(v) for (k, v) in pairs(attrs)]
+        c = Node{S}[_to_node(x) for x in args[2:end]]
+        Node{S}(T, t, a, nothing, c)
+    elseif T === Declaration
+        !isempty(args) && error("Declaration nodes only accept keyword attributes.")
+        a = isempty(attrs) ? nothing : [String(k) => String(v) for (k, v) in pairs(attrs)]
+        Node{S}(T, nothing, a, nothing, nothing)
+    elseif T === ProcessingInstruction
+        length(args) >= 1 || error("ProcessingInstruction nodes require a target.")
+        length(args) <= 2 || error("ProcessingInstruction nodes accept a target and optional content.")
+        !isempty(attrs) && error("ProcessingInstruction nodes do not accept attributes.")
+        t = string(args[1])
+        v = length(args) == 2 ? string(args[2]) : nothing
+        Node{S}(T, t, nothing, v, nothing)
+    elseif T === Document
+        !isempty(attrs) && error("Document nodes do not accept attributes.")
+        c = Node{S}[_to_node(x) for x in args]
+        Node{S}(T, nothing, nothing, nothing, c)
     end
 end
 
-function Node(o::Node, x...; kw...)
-    attrs = !isnothing(kw) ?
-        merge(
-            OrderedDict(string(k) => string(v) for (k, v) in pairs(kw)),
-            isnothing(o.attributes) ? OrderedDict{String,String}() : o.attributes
-        ) :
-        o.attributes
-    children = isempty(x) ? o.children : vcat(isnothing(o.children) ? [] : o.children, collect(x))
-    Node(o.nodetype, o.tag, attrs, o.value, children)
+#-----------------------------------------------------------------------------# equality
+# Treat `nothing` and an empty collection as equivalent so that an absent attribute /
+# children field compares equal to an explicitly empty one.
+_eq(::Nothing, ::Nothing) = true
+_eq(::Nothing, b) = isempty(b)
+_eq(a, ::Nothing) = isempty(a)
+_eq(a, b) = a == b
+
+# Attribute equality is order-insensitive per XML spec.
+function _attrs_eq(a, b)
+    a_empty = isnothing(a) || isempty(a)
+    b_empty = isnothing(b) || isempty(b)
+    a_empty && b_empty && return true
+    (a_empty != b_empty) && return false
+    length(a) != length(b) && return false
+    for p in a
+        p in b || return false
+    end
+    true
 end
 
-function Node(node::LazyNode)
-    nodetype = node.nodetype
-    tag = node.tag
-    attributes = node.attributes
-    value = node.value
-    c = XML.children(node)
-    Node(nodetype, tag, attributes, value, isempty(c) ? nothing : map(Node, c))
+function Base.:(==)(a::Node, b::Node)
+    a.nodetype == b.nodetype &&
+    a.tag == b.tag &&
+    _attrs_eq(a.attributes, b.attributes) &&
+    a.value == b.value &&
+    _eq(a.children, b.children)
 end
 
-Node(data::Raw) = Node(LazyNode(data))
+#-----------------------------------------------------------------------------# indexing
+Base.getindex(o::Node, i::Integer) = children(o)[i]
+Base.getindex(o::Node, ::Colon) = children(o)
+Base.lastindex(o::Node) = lastindex(children(o))
+Base.only(o::Node) = only(children(o))
+Base.length(o::Node) = length(children(o))
+
+function Base.get(o::Node, key::AbstractString, default)
+    isnothing(o.attributes) && return default
+    for (k, v) in o.attributes
+        k == key && return v
+    end
+    default
+end
 
-# Anything that's not Vector{UInt8} or a (Lazy)Node is converted to a Text Node
-Node(x) = Node(Text, nothing, nothing, string(x), nothing)
+const _MISSING_ATTR = gensym(:missing_attr)
 
-h(tag::Union{Symbol, String}, children...; kw...) = Node(Element, tag, kw, nothing, children)
-Base.getproperty(::typeof(h), tag::Symbol) = h(tag)
-(o::Node)(children...; kw...) = Node(o, Node.(children)...; kw...)
+function Base.getindex(o::Node, key::AbstractString)
+    val = get(o, key, _MISSING_ATTR)
+    val === _MISSING_ATTR && throw(KeyError(key))
+    val
+end
 
-# NOT in-place for Text Nodes
-function escape!(o::Node, warn::Bool=true)
-    if o.nodetype == Text
-        warn && @warn "escape!() called on a Text Node creates a new node."
-        return Text(escape(o.value))
+function Base.haskey(o::Node, key::AbstractString)
+    get(o, key, _MISSING_ATTR) !== _MISSING_ATTR
+end
+
+Base.keys(o::Node) = isnothing(o.attributes) ? () : first.(o.attributes)
+
+#-----------------------------------------------------------------------------# mutation
+function Base.setindex!(o::Node, val, i::Integer)
+    isnothing(o.children) && error("Node has no children.")
+    o.children[i] = _to_node(val)
+end
+
+function Base.setindex!(o::Node, val, key::AbstractString)
+    isnothing(o.attributes) && error("Node has no attributes.")
+    v = string(val)
+    for i in eachindex(o.attributes)
+        if first(o.attributes[i]) == key
+            o.attributes[i] = key => v
+            return v
+        end
     end
-    isnothing(o.children) && return o
-    map!(x -> escape!(x, false), o.children, o.children)
-    o
+    push!(o.attributes, key => v)
+    v
 end
-function unescape!(o::Node, warn::Bool=true)
-    if o.nodetype == Text
-        warn && @warn "unescape!() called on a Text Node creates a new node."
-        return Text(unescape(o.value))
+
+function Base.push!(a::Node, b)
+    isnothing(a.children) && error("Node does not accept children.")
+    push!(a.children, _to_node(b))
+    a
+end
+
+function Base.pushfirst!(a::Node, b)
+    isnothing(a.children) && error("Node does not accept children.")
+    pushfirst!(a.children, _to_node(b))
+    a
+end
+
+#-----------------------------------------------------------------------------# show (REPL)
+function Base.show(io::IO, o::Node)
+    nt = o.nodetype
+    print(io, nt)
+    if nt === Text
+        print(io, ' ', repr(o.value))
+    elseif nt === Element
+        print(io, " <", o.tag)
+        if !isnothing(o.attributes)
+            for (k, v) in o.attributes
+                print(io, ' ', k, '=', '"', v, '"')
+            end
+        end
+        print(io, '>')
+        n = length(children(o))
+        n > 0 && print(io, n == 1 ? " (1 child)" : " ($n children)")
+    elseif nt === DTD
+        print(io, " <!DOCTYPE ", o.value, '>')
+    elseif nt === Declaration
+        print(io, " <?xml")
+        if !isnothing(o.attributes)
+            for (k, v) in o.attributes
+                print(io, ' ', k, '=', '"', v, '"')
+            end
+        end
+        print(io, "?>")
+    elseif nt === ProcessingInstruction
+        print(io, " <?", o.tag)
+        !isnothing(o.value) && print(io, ' ', o.value)
+        print(io, "?>")
+    elseif nt === Comment
+        print(io, " <!--", o.value, "-->")
+    elseif nt === CData
+        print(io, " <![CDATA[", o.value, "]]>")
+    elseif nt === Document
+        n = length(children(o))
+        n > 0 && print(io, n == 1 ? " (1 child)" : " ($n children)")
     end
-    isnothing(o.children) && return o
-    map!(x -> unescape!(x, false), o.children, o.children)
-    o
 end
 
+#-----------------------------------------------------------------------------# show (text/xml)
 
-Base.read(filename::AbstractString, ::Type{Node}) = Node(read(filename, Raw))
-Base.read(io::IO, ::Type{Node}) = Node(read(io, Raw))
-Base.parse(x::AbstractString, ::Type{Node}) = Node(parse(x, Raw))
+# Write XML-escaped content directly to IO (single pass, no intermediate string)
+function _write_escaped(io::IO, s::String)
+    start = 1
+    i = 1
+    n = ncodeunits(s)
+    @inbounds while i <= n
+        b = codeunit(s, i)
+        esc = if b == UInt8('&'); "&amp;"
+        elseif b == UInt8('<'); "&lt;"
+        elseif b == UInt8('>'); "&gt;"
+        elseif b == UInt8('"'); "&quot;"
+        elseif b == UInt8('\''); "&apos;"
+        else
+            i += 1
+            continue
+        end
+        i > start && GC.@preserve s Base.unsafe_write(io, pointer(s, start), (i - start) % UInt)
+        print(io, esc)
+        i += 1
+        start = i
+    end
+    start <= n && GC.@preserve s Base.unsafe_write(io, pointer(s, start), (n - start + 1) % UInt)
+    nothing
+end
 
-Base.setindex!(o::Node, val, i::Integer) = o.children[i] = Node(val)
-Base.push!(a::Node, b::Node) = push!(a.children, b)
-Base.pushfirst!(a::Node, b::Node) = pushfirst!(a.children, b)
+# Cached indentation strings to avoid repeated allocation
+const _MAX_CACHED_INDENT = 64
+const _INDENT_STRINGS = [" " ^ n for n in 0:_MAX_CACHED_INDENT]
+@inline function _indent_str(n::Int)
+    0 <= n <= _MAX_CACHED_INDENT && return @inbounds _INDENT_STRINGS[n + 1]
+    " " ^ n
+end
 
-Base.setindex!(o::Node, val, key::AbstractString) = (o.attributes[key] = string(val))
-Base.getindex(o::Node, val::AbstractString) = o.attributes[val]
-Base.haskey(o::Node, key::AbstractString) = isnothing(o.attributes) ? false : haskey(o.attributes, key)
-Base.keys(o::Node) = isnothing(o.attributes) ? () : keys(o.attributes)
+# Serialize `key="escaped-value"` pairs for an attributes vector (no leading space outside).
+# Uses byte-level `Base.write` instead of `print` to avoid the varargs-print dispatch
+# overhead that shows up under profile when an element has many attributes.
+function _print_attrs(io::IO, attributes)
+    isnothing(attributes) && return
+    for (k, v) in attributes
+        Base.write(io, UInt8(' '))
+        Base.write(io, k)
+        Base.write(io, UInt8('='))
+        Base.write(io, UInt8('"'))
+        _write_escaped(io, v)
+        Base.write(io, UInt8('"'))
+    end
+end
 
-Base.show(io::IO, o::Node) = _show_node(io, o)
+# Whitespace-only Text — emitted by the parser to round-trip source whitespace; pretty
+# printing regenerates indentation from the tree shape and drops these.
+@inline function _is_ignorable_text(node::Node)
+    node.nodetype === Text && !isnothing(node.value) && all(isspace, node.value)
+end
 
-#-----------------------------------------------------------------------------# Node Constructors
-function (T::NodeType)(args...; attr...)
-    if T === Document
-        !isempty(attr) && error("Document nodes do not have attributes.")
-        Node(T, nothing, nothing, nothing, args)
-    elseif T === DTD
-        !isempty(attr) && error("DTD nodes only accept a value.")
-        length(args) > 1 && error("DTD nodes only accept a value.")
-        Node(T, nothing, nothing, only(args))
-    elseif T === Declaration
-        !isempty(args) && error("Declaration nodes only accept attributes")
-        Node(T, nothing, attr)
-    elseif T === ProcessingInstruction
-        length(args) == 1 || error("ProcessingInstruction nodes require a tag and attributes.")
-        Node(T, only(args), attr)
-    elseif T === Comment
-        !isempty(attr) && error("Comment nodes do not have attributes.")
-        length(args) > 1 && error("Comment nodes only accept a single input.")
-        Node(T, nothing, nothing, only(args))
-    elseif T === CData
-        !isempty(attr) && error("CData nodes do not have attributes.")
-        length(args) > 1 && error("CData nodes only accept a single input.")
-        Node(T, nothing, nothing, only(args))
-    elseif T === Text
-        !isempty(attr) && error("Text nodes do not have attributes.")
-        length(args) > 1 && error("Text nodes only accept a single input.")
-        Node(T, nothing, nothing, only(args))
-    elseif T === Element
-        tag = first(args)
-        Node(T, tag, attr, nothing, args[2:end])
-    else
-        error("Unreachable reached while trying to create a Node via (::NodeType)(args...; kw...).")
+# Mixed content = at least one Text/CData child carrying actual (non-whitespace) data.
+# In that case the original whitespace is significant and we must not reformat.
+function _has_significant_text(children)
+    for c in children
+        nt = c.nodetype
+        if nt === Text
+            (!isnothing(c.value) && !all(isspace, c.value)) && return true
+        elseif nt === CData
+            return true
+        end
     end
+    false
 end
 
-#-----------------------------------------------------------------------------# !!! common !!!
-# Everything below here is common to all data structures
+# Main XML serializer. `depth` controls indentation; `preserve` propagates `xml:space=
+# "preserve"` semantics down the subtree so we don't reformat whitespace-sensitive content.
+function _write_xml(io::IO, node::Node, depth::Int=0, indent::Int=2, preserve::Bool=false)
+    pad = preserve ? "" : _indent_str(indent * depth)
+    nt = node.nodetype
+    if nt === Text
+        _write_escaped(io, node.value)
+    elseif nt === Element
+        # Check xml:space on this element
+        child_preserve = preserve
+        if !isnothing(node.attributes)
+            for (k, v) in node.attributes
+                k == "xml:space" && (child_preserve = v == "preserve")
+            end
+        end
+        Base.write(io, pad)
+        Base.write(io, UInt8('<'))
+        Base.write(io, node.tag)
+        _print_attrs(io, node.attributes)
+        ch = node.children
+        if isnothing(ch) || isempty(ch)
+            Base.write(io, UInt8('/'))
+            Base.write(io, UInt8('>'))
+        elseif length(ch) == 1 && only(ch).nodetype === Text
+            Base.write(io, UInt8('>'))
+            _write_xml(io, only(ch), 0, 0, child_preserve)
+            Base.write(io, UInt8('<'))
+            Base.write(io, UInt8('/'))
+            Base.write(io, node.tag)
+            Base.write(io, UInt8('>'))
+        else
+            # If real Text or any CData lives among the children, treat as mixed
+            # content and preserve the original layout. Otherwise pretty-print
+            # and skip whitespace-only Text children — those were emitted by the
+            # parser purely to round-trip source whitespace, and the writer
+            # regenerates indentation from the tree shape.
+            effective_preserve = child_preserve || _has_significant_text(ch)
+            if effective_preserve
+                Base.write(io, UInt8('>'))
+            else
+                Base.write(io, UInt8('>'))
+                Base.write(io, UInt8('\n'))
+            end
+            for child in ch
+                if !effective_preserve && _is_ignorable_text(child)
+                    continue
+                end
+                _write_xml(io, child, depth + 1, indent, effective_preserve)
+                effective_preserve || Base.write(io, UInt8('\n'))
+            end
+            effective_preserve || Base.write(io, pad)
+            Base.write(io, UInt8('<'))
+            Base.write(io, UInt8('/'))
+            Base.write(io, node.tag)
+            Base.write(io, UInt8('>'))
+        end
+    elseif nt === Declaration
+        Base.write(io, pad)
+        Base.write(io, "<?xml")
+        _print_attrs(io, node.attributes)
+        Base.write(io, "?>")
+    elseif nt === ProcessingInstruction
+        Base.write(io, pad)
+        Base.write(io, "<?")
+        Base.write(io, node.tag)
+        if !isnothing(node.value)
+            Base.write(io, UInt8(' '))
+            Base.write(io, node.value)
+        end
+        Base.write(io, "?>")
+    elseif nt === Comment
+        Base.write(io, pad)
+        Base.write(io, "<!--")
+        Base.write(io, node.value)
+        Base.write(io, "-->")
+    elseif nt === CData
+        Base.write(io, pad)
+        Base.write(io, "<![CDATA[")
+        Base.write(io, node.value)
+        Base.write(io, "]]>")
+    elseif nt === DTD
+        Base.write(io, pad)
+        Base.write(io, "<!DOCTYPE ")
+        Base.write(io, node.value)
+        Base.write(io, UInt8('>'))
+    elseif nt === Document
+        ch = node.children
+        if !isnothing(ch)
+            # Drop whitespace-only Text between top-level nodes when pretty
+            # printing (XML grammar disallows text at document level, so any
+            # such Text comes from inter-node whitespace in the source).
+            visible = preserve ? ch : filter(!_is_ignorable_text, ch)
+            n_visible = length(visible)
+            for (i, child) in enumerate(visible)
+                _write_xml(io, child, 0, indent, preserve)
+                i < n_visible && Base.write(io, UInt8('\n'))
+            end
+        end
+    end
+end
 
+Base.show(io::IO, ::MIME"text/xml", node::Node) = _write_xml(io, node)
 
-#-----------------------------------------------------------------------------# interface fallbacks
-nodetype(o) = o.nodetype
-tag(o) = o.tag
-attributes(o) = o.attributes
-value(o) = o.value
-children(o::T) where {T} = isnothing(o.children) ? () : o.children
+#-----------------------------------------------------------------------------# write / read
+write(node::Node; indentsize::Int=2) = (io = IOBuffer(); _write_xml(io, node, 0, indentsize); String(take!(io)))
+write(filename::AbstractString, node::Node; kw...) = open(io -> write(io, node; kw...), filename, "w")
+write(io::IO, node::Node; indentsize::Int=2) = _write_xml(io, node, 0, indentsize)
 
-depth(o) = missing
-parent(o) = missing
-next(o) = missing
-prev(o) = missing
+Base.read(filename::AbstractString, ::Type{Node}) = parse(read(filename, String), Node)
+Base.read(io::IO, ::Type{Node}) = parse(read(io, String), Node)
 
-is_simple(o) = nodetype(o) == Element && (isnothing(attributes(o)) || isempty(attributes(o))) &&
-    length(children(o)) == 1 && nodetype(only(o)) in (Text, CData)
+#-----------------------------------------------------------------------------# parse
+Base.parse(::Type{Node}, xml::AbstractString) = parse(xml, Node)
 
-simple_value(o) = is_simple(o) ? value(only(o)) : error("`XML.simple_value` is only defined for simple nodes.")
+function Base.parse(xml::AbstractString, ::Type{Node})
+    _parse(String(xml), String, unescape)
+end
 
-Base.@deprecate_binding simplevalue simple_value
+function Base.parse(xml::AbstractString, ::Type{Node{SubString{String}}})
+    _parse(String(xml), SubString{String}, identity)
+end
 
-#-----------------------------------------------------------------------------# nodes_equal
-function nodes_equal(a, b)
-    out = XML.tag(a) == XML.tag(b)
-    out &= XML.nodetype(a) == XML.nodetype(b)
-    out &= XML.attributes(a) == XML.attributes(b)
-    out &= XML.value(a) == XML.value(b)
-    out &= length(XML.children(a)) == length(XML.children(b))
-    out &= all(nodes_equal(ai, bi) for (ai,bi) in zip(XML.children(a), XML.children(b)))
-    return out
+# Convert a parser substring to the requested storage type — copy to a fresh String, or
+# keep the zero-copy SubString view.
+_to(::Type{String}, s::AbstractString) = String(s)
+_to(::Type{SubString{String}}, s::SubString{String}) = s
+
+# Collapse an empty Vector to `nothing` so Node fields store "absent" canonically.
+_nothingify(v::Vector) = isempty(v) ? nothing : v
+
+# Decode the raw bytes of a TEXT/ATTR_VALUE token into the parser's storage type. When the
+# tokenizer guarantees no `&` was seen (`has_entities=false`), we skip the entity-decode
+# pass entirely. The `convert_text=identity` specialization (SubString parse) skips the
+# runtime branch as well — both arms would return the same value.
+@inline _text_value(::Type{S}, raw, _, ::typeof(identity)) where {S} = _to(S, raw)
+@inline _text_value(::Type{S}, raw, has_entities, convert_text::F) where {S, F} =
+    has_entities ? convert_text(raw) : _to(S, raw)
+
+# Token-stream → Node{S} builder. `convert_text` is `unescape` for parsed content (with
+# entity decoding) and `identity` for zero-copy SubString parsing where the caller opts
+# to keep raw escapes.
+function _parse(xml::String, ::Type{S}, convert_text::F) where {S, F}
+    tags = S[]
+    attrs_stack = Vector{Pair{S,S}}[]
+    children_stack = Vector{Vector{Node{S}}}()
+    push!(children_stack, Node{S}[])
+
+    pending_attr_name = SubString(xml, 1, 0)
+    decl_attrs = nothing
+    pending_pi_tag = SubString(xml, 1, 0)
+    pending_pi_value = nothing
+    in_close_tag = false
+
+    for token in tokenize(xml)
+        k = token.kind
+
+        if k === TokenKinds.TEXT
+            v = _text_value(S, token.raw, token.has_entities, convert_text)
+            push!(last(children_stack), Node{S}(Text, nothing, nothing, v, nothing))
+
+        elseif k === TokenKinds.OPEN_TAG
+            push!(tags, _to(S, tag_name(token)))
+            push!(attrs_stack, Pair{S,S}[])
+            push!(children_stack, Node{S}[])
+
+        elseif k === TokenKinds.SELF_CLOSE
+            t = pop!(tags)
+            a = pop!(attrs_stack)
+            pop!(children_stack)
+            push!(last(children_stack), Node{S}(Element, t, _nothingify(a), nothing, nothing))
+
+        elseif k === TokenKinds.TAG_CLOSE
+            in_close_tag && (in_close_tag = false)
+
+        elseif k === TokenKinds.CLOSE_TAG
+            close_name = tag_name(token)
+            isempty(tags) && error("Closing tag </$close_name> with no matching open tag.")
+            t = pop!(tags)
+            t == close_name || error("Mismatched tags: expected </$t>, got </$close_name>.")
+            a = pop!(attrs_stack)
+            c = pop!(children_stack)
+            push!(last(children_stack), Node{S}(Element, t, _nothingify(a), nothing, isempty(c) ? nothing : c))
+            in_close_tag = true
+
+        elseif k === TokenKinds.ATTR_NAME
+            pending_attr_name = token.raw
+
+        elseif k === TokenKinds.ATTR_VALUE
+            val = _text_value(S, attr_value(token), token.has_entities, convert_text)
+            name = _to(S, pending_attr_name)
+            if decl_attrs !== nothing
+                any(p -> first(p) == name, decl_attrs) && error("Duplicate attribute: $name")
+                push!(decl_attrs, name => val)
+            elseif !isempty(attrs_stack)
+                any(p -> first(p) == name, last(attrs_stack)) && error("Duplicate attribute: $name")
+                push!(last(attrs_stack), name => val)
+            end
+
+        elseif k === TokenKinds.XML_DECL_OPEN
+            decl_attrs = Pair{S,S}[]
+
+        elseif k === TokenKinds.XML_DECL_CLOSE
+            a = isempty(decl_attrs) ? nothing : decl_attrs
+            push!(last(children_stack), Node{S}(Declaration, nothing, a, nothing, nothing))
+            decl_attrs = nothing
+
+        elseif k === TokenKinds.COMMENT_CONTENT
+            push!(last(children_stack), Node{S}(Comment, nothing, nothing, _to(S, token.raw), nothing))
+
+        elseif k === TokenKinds.CDATA_CONTENT
+            push!(last(children_stack), Node{S}(CData, nothing, nothing, _to(S, token.raw), nothing))
+
+        elseif k === TokenKinds.DOCTYPE_CONTENT
+            push!(last(children_stack), Node{S}(DTD, nothing, nothing, _to(S, lstrip(token.raw)), nothing))
+
+        elseif k === TokenKinds.PI_OPEN
+            pending_pi_tag = pi_target(token)
+            pending_pi_value = nothing
+
+        elseif k === TokenKinds.PI_CONTENT
+            content = strip(token.raw)
+            pending_pi_value = isempty(content) ? nothing : _to(S, content)
+
+        elseif k === TokenKinds.PI_CLOSE
+            push!(last(children_stack), Node{S}(ProcessingInstruction, _to(S, pending_pi_tag), nothing, pending_pi_value, nothing))
+        end
+    end
+
+    !isempty(tags) && error("Unclosed tags: $(join(tags, ", "))")
+    doc_children = only(children_stack)
+    Node{S}(Document, nothing, nothing, nothing, isempty(doc_children) ? nothing : doc_children)
 end
 
-Base.:(==)(a::AbstractXMLNode, b::AbstractXMLNode) = nodes_equal(a, b)
+#-----------------------------------------------------------------------------# h (HTML/XML element builder)
+"""
+    h(tag, children...; attrs...)
+    h.tag(children...; attrs...)
 
-#-----------------------------------------------------------------------------# parse
-Base.parse(::Type{T}, str::AbstractString) where {T <: AbstractXMLNode} = parse(str, T)
+Convenience constructor for `Element` nodes.
 
-#-----------------------------------------------------------------------------# indexing
-Base.getindex(o::Union{Raw, AbstractXMLNode}) = o
-Base.getindex(o::Union{Raw, AbstractXMLNode}, i::Integer) = children(o)[i]
-Base.getindex(o::Union{Raw, AbstractXMLNode}, ::Colon) = children(o)
-Base.lastindex(o::Union{Raw, AbstractXMLNode}) = lastindex(children(o))
-
-Base.only(o::Union{Raw, AbstractXMLNode}) = only(children(o))
-
-Base.length(o::AbstractXMLNode) = length(children(o))
-
-#-----------------------------------------------------------------------------# printing
-function _show_node(io::IO, o)
-    printstyled(io, typeof(o), ' '; color=:light_black)
-    !ismissing(depth(o)) && printstyled(io, "(depth=", depth(o), ") ", color=:light_black)
-    printstyled(io, nodetype(o), ; color=:light_green)
-    if o.nodetype === Text
-        printstyled(io, ' ', repr(value(o)))
-    elseif o.nodetype === Element
-        printstyled(io, " <", tag(o), color=:light_cyan)
-        _print_attrs(io, o; color=:light_yellow)
-        printstyled(io, '>', color=:light_cyan)
-        _print_n_children(io, o)
-    elseif o.nodetype === DTD
-        printstyled(io, " <!DOCTYPE "; color=:light_cyan)
-        printstyled(io, value(o), color=:light_black)
-        printstyled(io, '>', color=:light_cyan)
-    elseif o.nodetype === Declaration
-        printstyled(io, " <?xml", color=:light_cyan)
-        _print_attrs(io, o; color=:light_yellow)
-        printstyled(io, "?>", color=:light_cyan)
-    elseif o.nodetype === ProcessingInstruction
-        printstyled(io, " <?", tag(o), color=:light_cyan)
-        _print_attrs(io, o; color=:light_yellow)
-        printstyled(io, "?>", color=:light_cyan)
-    elseif o.nodetype === Comment
-        printstyled(io, " <!--", color=:light_cyan)
-        printstyled(io, value(o), color=:light_black)
-        printstyled(io, "-->", color=:light_cyan)
-    elseif o.nodetype === CData
-        printstyled(io, " <![CData[", color=:light_cyan)
-        printstyled(io, value(o), color=:light_black)
-        printstyled(io, "]]>", color=:light_cyan)
-    elseif o.nodetype === Document
-        _print_n_children(io, o)
-    elseif o.nodetype === UNKNOWN
-        printstyled(io, "Unknown", color=:light_cyan)
-        _print_n_children(io, o)
-    else
-        error("Unreachable reached")
+    h("div", "hello"; class="main")  # <div class="main">hello</div>
+    h.div("hello"; class="main")     # same thing
+"""
+function h(tag::Union{Symbol, AbstractString}, children...; attrs...)
+    t = String(tag)
+    a = Pair{String,String}[String(k) => String(v) for (k, v) in pairs(attrs)]
+    c = Node{String}[_to_node(x) for x in children]
+    Node{String}(Element, t, a, nothing, c)
+end
+
+Base.getproperty(::typeof(h), tag::Symbol) = h(tag)
+
+function (o::Node)(args...; attrs...)
+    o.nodetype === Element || error("Only Element nodes are callable.")
+    old_children = something(o.children, ())
+    old_attrs = isnothing(o.attributes) ? () : (Symbol(k) => v for (k, v) in o.attributes)
+    h(o.tag, old_children..., args...; old_attrs..., attrs...)
+end
+
+#-----------------------------------------------------------------------------# DTD parsing
+struct ElementDecl
+    name::String
+    content::String  # "EMPTY", "ANY", or content model like "(#PCDATA)" or "(a,b,c)*"
+end
+
+struct AttDecl
+    element::String
+    name::String
+    type::String     # "CDATA", "ID", "(val1|val2)", "NOTATION (a|b)", etc.
+    default::String  # "#REQUIRED", "#IMPLIED", "#FIXED \"val\"", or "\"val\""
+end
+
+struct EntityDecl
+    name::String
+    value::Union{Nothing, String}       # replacement text (internal entities)
+    external_id::Union{Nothing, String} # "SYSTEM \"uri\"" or "PUBLIC \"pubid\" \"uri\""
+    parameter::Bool
+end
+
+struct NotationDecl
+    name::String
+    external_id::String
+end
+
+struct ParsedDTD
+    root::String
+    system_id::Union{Nothing, String}
+    public_id::Union{Nothing, String}
+    elements::Vector{ElementDecl}
+    attributes::Vector{AttDecl}
+    entities::Vector{EntityDecl}
+    notations::Vector{NotationDecl}
+end
+
+# DTD parsing helpers — each returns (parsed_piece, new_pos) so calls compose.
+
+# A byte that can appear in an XML Name (letters, digits, `_`, `-`, `.`, `:`).
+@inline _dtd_is_name_char(c::Char) =
+    ('a' <= c <= 'z') || ('A' <= c <= 'Z') || ('0' <= c <= '9') ||
+    c == '_' || c == '-' || c == '.' || c == ':'
+
+# Advance past any whitespace.
+function _dtd_skip_ws(s, pos)
+    while pos <= ncodeunits(s) && isspace(s[pos])
+        pos += 1
     end
+    pos
 end
 
-function _print_attrs(io::IO, o; color=:normal)
-    attr = attributes(o)
-    isnothing(attr) && return nothing
-    for (k,v) in attr
-        # printstyled(io, ' ', k, '=', '"', v, '"'; color)
-        print(io, ' ', k, '=', '"', v, '"')
+# Read an XML Name token; errors if no name characters are present.
+function _dtd_read_name(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    start = pos
+    while pos <= ncodeunits(s) && _dtd_is_name_char(s[pos])
+        pos += 1
     end
+    start == pos && error("Expected name at position $pos in DTD")
+    SubString(s, start, pos - 1), pos
 end
-function _print_n_children(io::IO, o::Node)
-    n = length(children(o))
-    text = n == 0 ? "" : n == 1 ? " (1 child)" : " ($n children)"
-    printstyled(io, text, color=:light_black)
-end
-_print_n_children(io::IO, o) = nothing
-
-#-----------------------------------------------------------------------------# write_xml
-write(x; kw...) = (io = IOBuffer(); write(io, x; kw...); String(take!(io)))
-
-write(filename::AbstractString, x; kw...) = open(io -> write(io, x; kw...), filename, "w")
-
-function write(io::IO, x, ctx::Vector{Bool}=[false]; indentsize::Int=2, depth::Int=1)
-    indent = ' ' ^ indentsize
-    nodetype = XML.nodetype(x)
-    tag = XML.tag(x)
-    value = XML.value(x)
-    children = XML.children(x)
-
-    padding = indent ^ max(0, depth - 1)
-    !ctx[end] && print(io, padding)
-
-    if nodetype === Text
-        print(io, value)
-
-    elseif nodetype === Element
-        push!(ctx, ctx[end])
-        update_ctx!(ctx, x)
-        print(io, '<', tag)
-        _print_attrs(io, x)
-        print(io, isempty(children) ? '/' : "", '>')
-        if !isempty(children)
-            if length(children) == 1 && XML.nodetype(only(children)) === Text
-                write(io, only(children), ctx; indentsize=0)
-                print(io, "</", tag, '>')
-            else
-                !ctx[end] && println(io)
-                foreach(children) do child
-                    write(io, child, ctx; indentsize, depth=depth + 1)
-                    !ctx[end] && println(io)
-                end
-                print(io, !ctx[end] ? padding : "", "</", tag, '>')
+
+# Read a `"..."` or `'...'` string and return the contents without the surrounding quotes.
+function _dtd_read_quoted(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    q = s[pos]
+    (q == '"' || q == '\'') || error("Expected quoted string at position $pos in DTD")
+    pos += 1
+    start = pos
+    while pos <= ncodeunits(s) && s[pos] != q
+        pos += 1
+    end
+    val = SubString(s, start, pos - 1)
+    pos += 1
+    val, pos
+end
+
+# Read a balanced parenthesized expression (e.g. `(a|b|(c,d))`), returning the full
+# substring including the outer `(` and `)`. Skips over quoted strings inside.
+function _dtd_read_parens(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    s[pos] == '(' || error("Expected '(' at position $pos in DTD")
+    depth = 1
+    start = pos
+    pos += 1
+    while pos <= ncodeunits(s) && depth > 0
+        c = s[pos]
+        if c == '('
+            depth += 1
+        elseif c == ')'
+            depth -= 1
+        elseif c == '"' || c == '\''
+            pos += 1
+            while pos <= ncodeunits(s) && s[pos] != c
+                pos += 1
             end
         end
-        pop!(ctx)
+        pos += 1
+    end
+    SubString(s, start, pos - 1), pos
+end
 
-    elseif nodetype === DTD
-        print(io, "<!DOCTYPE ", value, '>')
+# Advance past the next `>` that terminates a markup declaration, ignoring `>` inside
+# quoted strings.
+function _dtd_skip_to_close(s, pos)
+    while pos <= ncodeunits(s) && s[pos] != '>'
+        c = s[pos]
+        if c == '"' || c == '\''
+            pos += 1
+            while pos <= ncodeunits(s) && s[pos] != c
+                pos += 1
+            end
+        end
+        pos += 1
+    end
+    pos <= ncodeunits(s) ? pos + 1 : pos
+end
 
-    elseif nodetype === Declaration
-        print(io, "<?xml")
-        _print_attrs(io, x)
-        print(io, "?>")
+# Parse `<!ELEMENT name content>` — content is either a name (EMPTY/ANY) or a parens
+# group with an optional `*`/`+`/`?` quantifier appended.
+function _dtd_parse_element(s, pos)
+    name, pos = _dtd_read_name(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    if s[pos] == '('
+        content, pos = _dtd_read_parens(s, pos)
+        if pos <= ncodeunits(s) && s[pos] in ('*', '+', '?')
+            content = string(content, s[pos])
+            pos += 1
+        end
+    else
+        content, pos = _dtd_read_name(s, pos)
+    end
+    pos = _dtd_skip_to_close(s, pos)
+    ElementDecl(String(name), String(content)), pos
+end
 
-    elseif nodetype === ProcessingInstruction
-        print(io, "<?", tag)
-        _print_attrs(io, x)
-        print(io, "?>")
+# Parse `<!ATTLIST element name type default ...>` — emits one AttDecl per attribute.
+function _dtd_parse_attlist(s, pos)
+    element, pos = _dtd_read_name(s, pos)
+    atts = AttDecl[]
+    while true
+        pos = _dtd_skip_ws(s, pos)
+        (pos > ncodeunits(s) || s[pos] == '>') && break
 
-    elseif nodetype === Comment
-        print(io, "<!--", value, "-->")
+        name, pos = _dtd_read_name(s, pos)
+        pos = _dtd_skip_ws(s, pos)
 
-    elseif nodetype === CData
-        print(io, "<![CData[", value, "]]>")
+        # Attribute type
+        if s[pos] == '('
+            atype, pos = _dtd_read_parens(s, pos)
+        else
+            atype, pos = _dtd_read_name(s, pos)
+            if atype == "NOTATION"
+                pos = _dtd_skip_ws(s, pos)
+                parens, pos = _dtd_read_parens(s, pos)
+                atype = string("NOTATION ", parens)
+            end
+        end
+        pos = _dtd_skip_ws(s, pos)
 
-    elseif nodetype === Document
-        foreach(children) do child
-            write(io, child, ctx; indentsize)
-            !ctx[end] && println(io)
+        # Default declaration
+        if s[pos] == '#'
+            pos += 1
+            keyword, pos = _dtd_read_name(s, pos)
+            if keyword == "FIXED"
+                pos = _dtd_skip_ws(s, pos)
+                val, pos = _dtd_read_quoted(s, pos)
+                default = string("#FIXED \"", val, "\"")
+            else
+                default = string("#", keyword)
+            end
+        elseif s[pos] == '"' || s[pos] == '\''
+            val, pos = _dtd_read_quoted(s, pos)
+            default = string("\"", val, "\"")
+        else
+            error("Expected default declaration at position $pos in DTD")
         end
+        push!(atts, AttDecl(String(element), String(name), String(atype), default))
+    end
+    pos <= ncodeunits(s) && s[pos] == '>' && (pos += 1)
+    atts, pos
+end
+
+# Parse `<!ENTITY [%] name "value">` or `<!ENTITY name SYSTEM/PUBLIC ...>`. `%` marks a
+# parameter entity (referenced as `%name;` in DTDs only).
+function _dtd_parse_entity(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    parameter = false
+    if pos <= ncodeunits(s) && s[pos] == '%'
+        parameter = true
+        pos += 1
+    end
+    name, pos = _dtd_read_name(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+
+    value = nothing
+    external_id = nothing
+    if s[pos] == '"' || s[pos] == '\''
+        v, pos = _dtd_read_quoted(s, pos)
+        value = String(v)
+    else
+        keyword, pos = _dtd_read_name(s, pos)
+        pos = _dtd_skip_ws(s, pos)
+        if keyword == "SYSTEM"
+            uri, pos = _dtd_read_quoted(s, pos)
+            external_id = string("SYSTEM \"", uri, "\"")
+        elseif keyword == "PUBLIC"
+            pubid, pos = _dtd_read_quoted(s, pos)
+            pos = _dtd_skip_ws(s, pos)
+            uri, pos = _dtd_read_quoted(s, pos)
+            external_id = string("PUBLIC \"", pubid, "\" \"", uri, "\"")
+        else
+            error("Expected SYSTEM, PUBLIC, or quoted value in ENTITY declaration")
+        end
+    end
+    pos = _dtd_skip_to_close(s, pos)
+    EntityDecl(String(name), value, external_id, parameter), pos
+end
 
+# Parse `<!NOTATION name SYSTEM "uri">` / `<!NOTATION name PUBLIC "pubid" ["uri"]>`.
+function _dtd_parse_notation(s, pos)
+    name, pos = _dtd_read_name(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    keyword, pos = _dtd_read_name(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+    if keyword == "SYSTEM"
+        uri, pos = _dtd_read_quoted(s, pos)
+        external_id = string("SYSTEM \"", uri, "\"")
+    elseif keyword == "PUBLIC"
+        pubid, pos = _dtd_read_quoted(s, pos)
+        pos = _dtd_skip_ws(s, pos)
+        if pos <= ncodeunits(s) && (s[pos] == '"' || s[pos] == '\'')
+            uri, pos = _dtd_read_quoted(s, pos)
+            external_id = string("PUBLIC \"", pubid, "\" \"", uri, "\"")
+        else
+            external_id = string("PUBLIC \"", pubid, "\"")
+        end
     else
-        error("Unreachable case reached during XML.write")
+        error("Expected SYSTEM or PUBLIC in NOTATION declaration")
     end
+    pos = _dtd_skip_to_close(s, pos)
+    NotationDecl(String(name), external_id), pos
+end
+
+"""
+    parse_dtd(value::AbstractString) -> ParsedDTD
+    parse_dtd(node::Node) -> ParsedDTD
 
+Parse a DTD value string (from a `DTD` node) into structured declarations.
+"""
+function parse_dtd(value::AbstractString)
+    s = String(value)
+    pos = 1
+
+    root, pos = _dtd_read_name(s, pos)
+    pos = _dtd_skip_ws(s, pos)
+
+    # External ID
+    system_id = nothing
+    public_id = nothing
+    if pos <= ncodeunits(s) && _dtd_is_name_char(s[pos])
+        keyword, kpos = _dtd_read_name(s, pos)
+        if keyword == "SYSTEM"
+            pos = kpos
+            uri, pos = _dtd_read_quoted(s, pos)
+            system_id = String(uri)
+        elseif keyword == "PUBLIC"
+            pos = kpos
+            pubid, pos = _dtd_read_quoted(s, pos)
+            public_id = String(pubid)
+            pos = _dtd_skip_ws(s, pos)
+            if pos <= ncodeunits(s) && (s[pos] == '"' || s[pos] == '\'')
+                uri, pos = _dtd_read_quoted(s, pos)
+                system_id = String(uri)
+            end
+        end
+    end
+
+    elements = ElementDecl[]
+    attributes = AttDecl[]
+    entities = EntityDecl[]
+    notations = NotationDecl[]
+
+    # Internal subset
+    pos = _dtd_skip_ws(s, pos)
+    if pos <= ncodeunits(s) && s[pos] == '['
+        pos += 1
+        while pos <= ncodeunits(s)
+            pos = _dtd_skip_ws(s, pos)
+            pos > ncodeunits(s) && break
+            s[pos] == ']' && break
+
+            rest = SubString(s, pos)
+            if startswith(rest, "<!--")
+                i = findnext("-->", s, pos + 4)
+                isnothing(i) && error("Unterminated comment in DTD")
+                pos = last(i) + 1
+            elseif startswith(rest, "<?")
+                i = findnext("?>", s, pos + 2)
+                isnothing(i) && error("Unterminated PI in DTD")
+                pos = last(i) + 1
+            elseif startswith(rest, "<!ELEMENT")
+                elem, pos = _dtd_parse_element(s, pos + 9)
+                push!(elements, elem)
+            elseif startswith(rest, "<!ATTLIST")
+                atts, pos = _dtd_parse_attlist(s, pos + 9)
+                append!(attributes, atts)
+            elseif startswith(rest, "<!ENTITY")
+                ent, pos = _dtd_parse_entity(s, pos + 8)
+                push!(entities, ent)
+            elseif startswith(rest, "<!NOTATION")
+                not, pos = _dtd_parse_notation(s, pos + 10)
+                push!(notations, not)
+            elseif s[pos] == '%'
+                i = findnext(';', s, pos + 1)
+                isnothing(i) && error("Unterminated parameter entity reference in DTD")
+                pos = i + 1
+            else
+                pos += 1
+            end
+        end
+    end
+
+    ParsedDTD(String(root), system_id, public_id, elements, attributes, entities, notations)
+end
+
+function parse_dtd(node::Node)
+    node.nodetype === DTD || error("parse_dtd requires a DTD node.")
+    parse_dtd(node.value)
+end
+
+#-----------------------------------------------------------------------------# deprecations
+Base.@deprecate_binding simplevalue simple_value false
+
+# Removed types — informative errors
+struct Raw
+    Raw(args...; kw...) = error("""
+        `XML.Raw` has been removed in XML.jl v0.4.
+        Use `parse(str, Node)` or `read(filename, Node)` instead.
+        The streaming Raw/LazyNode API has been replaced by a token-based parser.
+        See `?XML.Node` for the new API.""")
+end
+
+# Removed functions — informative errors
+const _REMOVED_LAZYNODE_MSG = """
+    This function was part of the LazyNode API, which has been removed in XML.jl v0.4.
+    Use `parse(str, Node)` to get a full DOM tree and navigate with `children`, `tag`,
+    `attributes`, `value`, and integer indexing (e.g. `node[1]`)."""
+
+for f in (:next, :prev)
+    msg = "`XML.$f` has been removed. $_REMOVED_LAZYNODE_MSG"
+    @eval function $f(o::Node)
+        Base.depwarn($msg, $(QuoteNode(f)))
+        error($msg)
+    end
+end
+
+# 1-arg parent/depth were part of LazyNode API; 2-arg versions are defined above
+const _PARENT_1ARG_MSG = "`XML.parent(node)` (single-argument) has been removed. $_REMOVED_LAZYNODE_MSG\n    Use `parent(child, root)` instead to search from a known root node."
+function Base.parent(o::Node)
+    Base.depwarn(_PARENT_1ARG_MSG, :parent)
+    error(_PARENT_1ARG_MSG)
+end
+
+const _DEPTH_1ARG_MSG = "`XML.depth(node)` (single-argument) has been removed. $_REMOVED_LAZYNODE_MSG\n    Use `depth(child, root)` instead to search from a known root node."
+function depth(o::Node)
+    Base.depwarn(_DEPTH_1ARG_MSG, :depth)
+    error(_DEPTH_1ARG_MSG)
+end
+
+function nodes_equal(a, b)
+    msg = """`XML.nodes_equal` has been removed in XML.jl v0.4. Use `==` instead:
+        a == b"""
+    Base.depwarn(msg, :nodes_equal)
+    error(msg)
+end
+
+function escape!(o::Node, warn::Bool=true)
+    msg = """`XML.escape!` has been removed in XML.jl v0.4.
+        Text is now escaped automatically during `XML.write`."""
+    Base.depwarn(msg, :escape!)
+    error(msg)
+end
+
+function unescape!(o::Node, warn::Bool=true)
+    msg = """`XML.unescape!` has been removed in XML.jl v0.4.
+        Text is now unescaped automatically during `parse`."""
+    Base.depwarn(msg, :unescape!)
+    error(msg)
 end
 
 end # module XML
diff --git a/src/XMLTokenizer.jl b/src/XMLTokenizer.jl
new file mode 100644
index 0000000..c84f881
--- /dev/null
+++ b/src/XMLTokenizer.jl
@@ -0,0 +1,543 @@
+module XMLTokenizer
+
+#-----------------------------------------------------------------------# TokenKinds
+baremodule TokenKinds
+    import Base: @enum
+
+    @enum Kind::UInt8 begin
+        # Character data
+        TEXT               # text content between markup
+
+        # Element tags
+        OPEN_TAG           # <name
+        CLOSE_TAG          # </name
+        TAG_CLOSE          # >
+        SELF_CLOSE         # />
+        ATTR_NAME          # attribute name
+        ATTR_VALUE         # "value" or 'value' (with quotes in raw)
+
+        # CDATA sections
+        CDATA_OPEN         # <![CDATA[
+        CDATA_CONTENT      # raw text content
+        CDATA_CLOSE        # ]]>
+
+        # Comments
+        COMMENT_OPEN       # <!--
+        COMMENT_CONTENT    # comment text
+        COMMENT_CLOSE      # -->
+
+        # Processing instructions
+        PI_OPEN            # <?target (includes target name)
+        PI_CONTENT         # PI body text
+        PI_CLOSE           # ?>
+
+        # XML declaration (<?xml ...?>)
+        XML_DECL_OPEN      # <?xml
+        XML_DECL_CLOSE     # ?>
+        # (reuses ATTR_NAME / ATTR_VALUE for pseudo-attributes)
+
+        # DOCTYPE
+        DOCTYPE_OPEN       # <!DOCTYPE (or other <! declarations)
+        DOCTYPE_CONTENT    # declaration body
+        DOCTYPE_CLOSE      # >
+    end
+end
+
+#-----------------------------------------------------------------------# Token
+# `has_entities` records whether the raw bytes contain a `&`. It is set by the readers for
+# `TEXT` and `ATTR_VALUE` (where entity references can appear) and stays `false` for every
+# other token kind. The downstream parser uses it to skip `unescape`'s redundant byte scan
+# when no entities are present.
+#
+# Field order matters: `has_entities` lives in the alignment padding that would otherwise
+# sit between the 1-byte `kind` and the 24-byte `raw`. This keeps `sizeof(Token{String})`
+# at 32 bytes instead of 40, which matters because tokens are allocated by the million
+# during parse.
+struct Token{S <: AbstractString}
+    kind::TokenKinds.Kind
+    has_entities::Bool
+    raw::SubString{S}
+end
+
+# Backwards-compatible constructor for the many internal call sites that emit non-entity
+# tokens (markup, names, close tokens, etc.).
+@inline Token(kind::TokenKinds.Kind, raw::SubString{S}) where {S} = Token{S}(kind, false, raw)
+
+function Base.show(io::IO, t::Token)
+    print(io, t.kind, ": ", repr(String(t.raw)))
+end
+
+#-----------------------------------------------------------------------# Tokenizer mode
+@enum Mode::UInt8 begin
+    M_DEFAULT            # normal content mode
+    M_TAG                # inside open tag, reading attributes
+    M_TAG_VALUE          # expecting quoted attribute value
+    M_CLOSE_TAG          # inside close tag, expecting >
+    M_XML_DECL           # inside <?xml, reading pseudo-attributes
+    M_XML_DECL_VALUE     # expecting quoted attr value in xml decl
+    M_COMMENT            # after <!--, reading content
+    M_CDATA              # after <![CDATA[, reading content
+    M_PI                 # after <?target, reading content
+    M_DOCTYPE            # after <!DOCTYPE, reading content
+end
+
+#-----------------------------------------------------------------------# TokenizerState (immutable, SROA-friendly)
+struct TokenizerState{S <: AbstractString}
+    pos::Int
+    mode::Mode
+    pending::Token{S}  # buffered token for constructs that emit two tokens at once (e.g. content + close)
+end
+
+# Create an empty token (no pending token buffered)
+@inline no_token(s::AbstractString) = Token(TokenKinds.TEXT, @inbounds SubString(s, 1, 0))
+# Check whether the state has a buffered pending token
+@inline has_pending(st::TokenizerState) = !isempty(st.pending.raw)
+
+
+#-----------------------------------------------------------------------# Tokenizer (immutable iterator)
+"""
+    tokenize(xml::AbstractString) -> Tokenizer
+
+Return a lazy iterator of `Token`s over the XML string `xml`.
+"""
+struct Tokenizer{S <: AbstractString}
+    data::S
+    start::Int
+end
+
+tokenize(xml::AbstractString) = Tokenizer(xml, 1)
+tokenize(xml::AbstractString, pos::Int) = StatefulTokenizer(Tokenizer(xml, pos))
+
+# Lightweight mutable holder that drives the immutable `Tokenizer`'s iterate protocol with
+# a single state field — avoids the `Union{VS,Nothing}` field and per-iteration tuple
+# storage that `Iterators.Stateful` carries.
+mutable struct StatefulTokenizer{S <: AbstractString}
+    const t::Tokenizer{S}
+    state::TokenizerState{S}
+    done::Bool
+end
+
+StatefulTokenizer(t::Tokenizer{S}) where {S <: AbstractString} =
+    StatefulTokenizer{S}(t, TokenizerState(t.start, M_DEFAULT, no_token(t.data)), false)
+
+Base.IteratorSize(::Type{<:StatefulTokenizer}) = Base.SizeUnknown()
+Base.eltype(::Type{StatefulTokenizer{S}}) where {S} = Token{S}
+
+@inline function Base.iterate(st::StatefulTokenizer, _ = nothing)
+    st.done && return nothing
+    r = iterate(st.t, st.state)
+    if r === nothing
+        st.done = true
+        return nothing
+    end
+    st.state = r[2]
+    (r[1], nothing)
+end
+
+function Base.show(io::IO, t::Tokenizer)
+    n = ncodeunits(t.data)
+    print(io, "Tokenizer(")
+    t.start > 1 && print(io, t.start, "/")
+    print(io, Base.format_bytes(n), ")")
+end
+
+Base.IteratorSize(::Type{<:Tokenizer}) = Base.SizeUnknown()
+Base.eltype(::Type{Tokenizer{S}}) where {S} = Token{S}
+
+function Base.iterate(t::Tokenizer, st::TokenizerState=TokenizerState(t.start, M_DEFAULT, no_token(t.data)))
+    (; data) = t
+    (; pending, pos, mode) = st
+
+    if has_pending(st)
+        return (pending, TokenizerState(pos, mode, no_token(data)))
+    end
+    iseof(data, pos) && return nothing
+
+    if mode == M_DEFAULT
+        peek(data, pos) == UInt8('<') ? read_markup(data, pos) : read_text(data, pos)
+    elseif mode == M_TAG || mode == M_XML_DECL
+        read_in_tag(data, pos, mode)
+    elseif mode == M_TAG_VALUE || mode == M_XML_DECL_VALUE
+        read_attr_value(data, pos, mode)
+    elseif mode == M_CLOSE_TAG
+        read_close_tag_end(data, pos)
+    elseif mode == M_COMMENT
+        read_comment_body(data, pos)
+    elseif mode == M_CDATA
+        read_cdata_body(data, pos)
+    elseif mode == M_PI
+        read_pi_body(data, pos)
+    else  # M_DOCTYPE
+        read_doctype_body(data, pos)
+    end
+end
+
+#-----------------------------------------------------------------------# Internal helpers
+# Check if pos is past the end of data
+@inline iseof(data::AbstractString, pos::Int)::Bool = pos > ncodeunits(data)
+# Read the byte at pos without bounds checking
+@inline peek(data::AbstractString, pos::Int)::UInt8 = @inbounds codeunit(data, pos)
+# Check if pos + offset is within bounds
+@inline canpeek(data::AbstractString, pos::Int, offset::Int)::Bool = pos + offset <= ncodeunits(data)
+
+# Lookup table for XML name bytes (letter, digit, _, -, ., :)
+const NAME_BYTE_TABLE = let t = falses(256)
+    for r in (UInt8('a'):UInt8('z'), UInt8('A'):UInt8('Z'), UInt8('0'):UInt8('9'))
+        for b in r; t[b + 1] = true; end
+    end
+    for b in (UInt8('_'), UInt8('-'), UInt8('.'), UInt8(':')); t[b + 1] = true; end
+    NTuple{256,Bool}(t)
+end
+@inline is_name_byte(b::UInt8)::Bool = @inbounds NAME_BYTE_TABLE[b + 1]
+
+# Check if byte is XML whitespace (space, tab, newline, carriage return)
+@inline function is_whitespace(b::UInt8)::Bool
+    b == UInt8(' ') || b == UInt8('\t') || b == UInt8('\n') || b == UInt8('\r')
+end
+
+# Advance pos past any whitespace bytes
+@inline function skip_whitespace(data::AbstractString, pos::Int)::Int
+    @inbounds while !iseof(data, pos) && is_whitespace(peek(data, pos))
+        pos += 1
+    end
+    pos
+end
+
+# Advance pos past a quoted string (single or double quotes)
+function skip_quoted(data::AbstractString, pos::Int)::Int
+    q = @inbounds peek(data, pos)
+    pos += 1
+    @inbounds while !iseof(data, pos)
+        peek(data, pos) == q && return pos + 1
+        pos += 1
+    end
+    error("Unterminated quoted string")
+end
+
+# Throw a tokenizer error with position context (noinline to keep error paths out of hot code)
+@noinline err(msg::AbstractString, pos::Int) = throw(ArgumentError("XML tokenizer error at position $pos: $msg"))
+
+#-----------------------------------------------------------------------# Text and markup
+# Read text content up to the next '<'. Uses `findnext` (memchr-backed for `String`) to
+# find the end-of-text delimiter, then scans for `&` only within the text region — a full
+# document `findnext('&', ...)` would be O(doc_size) per text token and degrade to
+# O(doc_size²) on entity-free documents.
+function read_text(data::AbstractString, pos::Int)
+    start = pos
+    n = ncodeunits(data)
+    lt_idx = findnext('<', data, pos)
+    end_pos = isnothing(lt_idx) ? n + 1 : lt_idx
+    raw = @inbounds SubString(data, start, prevind(data, end_pos))
+    has_amp = occursin('&', raw)
+    tok = Token{typeof(data)}(TokenKinds.TEXT, has_amp, raw)
+    (tok, TokenizerState(end_pos, M_DEFAULT, no_token(data)))
+end
+
+# Dispatch on the character after '<' to the appropriate reader
+function read_markup(data::AbstractString, pos::Int)
+    start = pos
+    pos += 1  # skip '<'
+    iseof(data, pos) && err("unexpected end of input after '<'", start)
+
+    b = peek(data, pos)
+    if b == UInt8('!')
+        read_bang(data, pos + 1, start)
+    elseif b == UInt8('?')
+        read_pi_start(data, pos + 1, start)
+    elseif b == UInt8('/')
+        read_close_tag_start(data, pos + 1, start)
+    else
+        read_open_tag_start(data, pos, start)
+    end
+end
+
+#-----------------------------------------------------------------------# <! dispatch
+# Handle '<!' — comment, CDATA, or DOCTYPE
+function read_bang(data::AbstractString, pos::Int, start::Int)
+    # Comment: <!--
+    if !iseof(data, pos) && peek(data, pos) == UInt8('-')
+        pos += 1
+        (!iseof(data, pos) && peek(data, pos) == UInt8('-')) || err("expected '<!--'", start)
+        pos += 1
+        tok = Token(TokenKinds.COMMENT_OPEN, @inbounds SubString(data, start, pos - 1))
+        return (tok, TokenizerState(pos, M_COMMENT, no_token(data)))
+    end
+
+    # CDATA: <![CDATA[
+    if !iseof(data, pos) && peek(data, pos) == UInt8('[')
+        pos += 1
+        for expected in (UInt8('C'), UInt8('D'), UInt8('A'), UInt8('T'), UInt8('A'), UInt8('['))
+            iseof(data, pos) && err("unterminated CDATA", start)
+            peek(data, pos) == expected || err("invalid CDATA section", start)
+            pos += 1
+        end
+        tok = Token(TokenKinds.CDATA_OPEN, @inbounds SubString(data, start, pos - 1))
+        return (tok, TokenizerState(pos, M_CDATA, no_token(data)))
+    end
+
+    # <!DOCTYPE ...> or other <! declaration
+    @inbounds while !iseof(data, pos) && is_name_byte(peek(data, pos))
+        pos += 1
+    end
+    tok = Token(TokenKinds.DOCTYPE_OPEN, @inbounds SubString(data, start, pos - 1))
+    (tok, TokenizerState(pos, M_DOCTYPE, no_token(data)))
+end
+
+#-----------------------------------------------------------------------# <? (PI / XML declaration)
+# Handle '<?' — XML declaration or processing instruction
+function read_pi_start(data::AbstractString, pos::Int, start::Int)
+    name_start = pos
+    @inbounds while !iseof(data, pos) && is_name_byte(peek(data, pos))
+        pos += 1
+    end
+
+    is_xml = (pos - name_start == 3) &&
+        codeunit(data, name_start)     == UInt8('x') &&
+        codeunit(data, name_start + 1) == UInt8('m') &&
+        codeunit(data, name_start + 2) == UInt8('l')
+
+    if is_xml
+        tok = Token(TokenKinds.XML_DECL_OPEN, @inbounds SubString(data, start, pos - 1))
+        (tok, TokenizerState(pos, M_XML_DECL, no_token(data)))
+    else
+        tok = Token(TokenKinds.PI_OPEN, @inbounds SubString(data, start, pos - 1))
+        (tok, TokenizerState(pos, M_PI, no_token(data)))
+    end
+end
+
+#-----------------------------------------------------------------------# Tags
+# Read '<name' and enter tag-attribute mode
+function read_open_tag_start(data::AbstractString, pos::Int, start::Int)
+    @inbounds while !iseof(data, pos) && is_name_byte(peek(data, pos))
+        pos += 1
+    end
+    tok = Token(TokenKinds.OPEN_TAG, @inbounds SubString(data, start, pos - 1))
+    (tok, TokenizerState(pos, M_TAG, no_token(data)))
+end
+
+# Read '</name' and enter close-tag mode
+function read_close_tag_start(data::AbstractString, pos::Int, start::Int)
+    @inbounds while !iseof(data, pos) && is_name_byte(peek(data, pos))
+        pos += 1
+    end
+    tok = Token(TokenKinds.CLOSE_TAG, @inbounds SubString(data, start, pos - 1))
+    (tok, TokenizerState(pos, M_CLOSE_TAG, no_token(data)))
+end
+
+# Consume the '>' that closes a '</name>' tag
+function read_close_tag_end(data::AbstractString, pos::Int)
+    pos = skip_whitespace(data, pos)
+    iseof(data, pos) && err("unterminated close tag", pos)
+    peek(data, pos) == UInt8('>') || err("expected '>'", pos)
+    tok = Token(TokenKinds.TAG_CLOSE, @inbounds SubString(data, pos, pos))
+    (tok, TokenizerState(pos + 1, M_DEFAULT, no_token(data)))
+end
+
+#-----------------------------------------------------------------------# Attributes (shared by M_TAG and M_XML_DECL)
+# Read the next attribute name or tag-close delimiter (>, />, ?>)
+function read_in_tag(data::AbstractString, pos::Int, mode::Mode)
+    pos = skip_whitespace(data, pos)
+    iseof(data, pos) && err("unterminated tag", pos)
+
+    b = peek(data, pos)
+    is_decl = (mode == M_XML_DECL)
+
+    # Check for end delimiters
+    if is_decl
+        if b == UInt8('?') && canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8('>')
+            tok = Token(TokenKinds.XML_DECL_CLOSE, @inbounds SubString(data, pos, pos + 1))
+            return (tok, TokenizerState(pos + 2, M_DEFAULT, no_token(data)))
+        end
+    else
+        if b == UInt8('>')
+            tok = Token(TokenKinds.TAG_CLOSE, @inbounds SubString(data, pos, pos))
+            return (tok, TokenizerState(pos + 1, M_DEFAULT, no_token(data)))
+        end
+        if b == UInt8('/') && canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8('>')
+            tok = Token(TokenKinds.SELF_CLOSE, @inbounds SubString(data, pos, pos + 1))
+            return (tok, TokenizerState(pos + 2, M_DEFAULT, no_token(data)))
+        end
+    end
+
+    # Attribute name
+    name_start = pos
+    @inbounds while !iseof(data, pos) && is_name_byte(peek(data, pos))
+        pos += 1
+    end
+    name_end = pos - 1
+    name_start > name_end && err("expected attribute name or tag close", pos)
+
+    # Consume '=' and surrounding whitespace (not part of any token)
+    pos = skip_whitespace(data, pos)
+    (!iseof(data, pos) && peek(data, pos) == UInt8('=')) || err("expected '=' after attribute name", pos)
+    pos += 1
+    pos = skip_whitespace(data, pos)
+
+    next_state = is_decl ? M_XML_DECL_VALUE : M_TAG_VALUE
+    tok = Token(TokenKinds.ATTR_NAME, @inbounds SubString(data, name_start, name_end))
+    (tok, TokenizerState(pos, next_state, no_token(data)))
+end
+
+# Read a quoted attribute value (including the quotes). Same shape as `read_text`: use
+# `findnext` for the closing quote (memchr-backed for `String`), then a bounded `occursin`
+# over the value range for entity detection so we never scan past the quote.
+function read_attr_value(data::AbstractString, pos::Int, mode::Mode)
+    iseof(data, pos) && err("expected attribute value", pos)
+
+    q = peek(data, pos)
+    (q == UInt8('"') || q == UInt8('\'')) || err("expected quoted attribute value", pos)
+
+    start = pos
+    pos += 1  # skip opening quote
+    quote_char = Char(q)
+    close_idx = findnext(quote_char, data, pos)
+    isnothing(close_idx) && err("unterminated attribute value", start)
+    # Value range is [pos, close_idx - 1]; entity check is bounded to this view.
+    inner = @inbounds SubString(data, pos, prevind(data, close_idx))
+    has_amp = occursin('&', inner)
+    pos = close_idx + 1  # one past the closing quote (always ASCII)
+
+    next_state = (mode == M_XML_DECL_VALUE) ? M_XML_DECL : M_TAG
+    raw = @inbounds SubString(data, start, pos - 1)
+    tok = Token{typeof(data)}(TokenKinds.ATTR_VALUE, has_amp, raw)
+    (tok, TokenizerState(pos, next_state, no_token(data)))
+end
+
+#-----------------------------------------------------------------------# Content bodies (comment, CDATA, PI, DOCTYPE)
+# Scan for '-->' and emit comment content + close tokens
+function read_comment_body(data::AbstractString, pos::Int)
+    start = pos
+    @inbounds while !iseof(data, pos)
+        if peek(data, pos) == UInt8('-') &&
+           canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8('-') &&
+           canpeek(data, pos, 2) && peek(data, pos + 2) == UInt8('>')
+            content_end = prevind(data, pos)
+            close_start = pos
+            pos += 3
+            pending = Token(TokenKinds.COMMENT_CLOSE, SubString(data, close_start, pos - 1))
+            tok = Token(TokenKinds.COMMENT_CONTENT, SubString(data, start, content_end))
+            return (tok, TokenizerState(pos, M_DEFAULT, pending))
+        end
+        pos += 1
+    end
+    err("unterminated comment", start)
+end
+
+# Scan for ']]>' and emit CDATA content + close tokens
+function read_cdata_body(data::AbstractString, pos::Int)
+    start = pos
+    @inbounds while !iseof(data, pos)
+        if peek(data, pos) == UInt8(']') &&
+           canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8(']') &&
+           canpeek(data, pos, 2) && peek(data, pos + 2) == UInt8('>')
+            content_end = prevind(data, pos)
+            close_start = pos
+            pos += 3
+            pending = Token(TokenKinds.CDATA_CLOSE, SubString(data, close_start, pos - 1))
+            tok = Token(TokenKinds.CDATA_CONTENT, SubString(data, start, content_end))
+            return (tok, TokenizerState(pos, M_DEFAULT, pending))
+        end
+        pos += 1
+    end
+    err("unterminated CDATA section", start)
+end
+
+# Scan for '?>' and emit PI content + close tokens
+function read_pi_body(data::AbstractString, pos::Int)
+    start = pos
+    @inbounds while !iseof(data, pos)
+        if peek(data, pos) == UInt8('?') && canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8('>')
+            content_end = prevind(data, pos)
+            close_start = pos
+            pos += 2
+            pending = Token(TokenKinds.PI_CLOSE, SubString(data, close_start, pos - 1))
+            tok = Token(TokenKinds.PI_CONTENT, SubString(data, start, content_end))
+            return (tok, TokenizerState(pos, M_DEFAULT, pending))
+        end
+        pos += 1
+    end
+    err("unterminated processing instruction", start)
+end
+
+# Scan DOCTYPE body, handling nested brackets, quotes, and comments
+function read_doctype_body(data::AbstractString, pos::Int)
+    start = pos
+    depth = 0
+    @inbounds while !iseof(data, pos)
+        b = peek(data, pos)
+        if b == UInt8('-') && canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8('-') &&
+                pos >= 3 &&
+                codeunit(data, pos - 1) == UInt8('!') &&
+                codeunit(data, pos - 2) == UInt8('<')
+            # Inside a <!-- comment: skip until -->
+            pos += 2  # skip "--"
+            while !iseof(data, pos)
+                if peek(data, pos) == UInt8('-') && canpeek(data, pos, 1) && peek(data, pos + 1) == UInt8('-') &&
+                        canpeek(data, pos, 2) && peek(data, pos + 2) == UInt8('>')
+                    pos += 3  # skip "-->"
+                    break
+                end
+                pos += 1
+            end
+        elseif b == UInt8('"') || b == UInt8('\'')
+            pos = skip_quoted(data, pos)
+        elseif b == UInt8('[')
+            depth += 1
+            pos += 1
+        elseif b == UInt8(']')
+            depth -= 1
+            pos += 1
+        elseif b == UInt8('>') && depth == 0
+            content_end = prevind(data, pos)
+            close_start = pos
+            pos += 1
+            pending = Token(TokenKinds.DOCTYPE_CLOSE, @inbounds SubString(data, close_start, pos - 1))
+            tok = Token(TokenKinds.DOCTYPE_CONTENT, @inbounds SubString(data, start, content_end))
+            return (tok, TokenizerState(pos, M_DEFAULT, pending))
+        else
+            pos += 1
+        end
+    end
+    err("unterminated DOCTYPE", start)
+end
+
+#-----------------------------------------------------------------------# Utility functions
+
+"""
+    tag_name(token::Token) -> SubString{String}
+
+Extract the element name from an `OPEN_TAG` or `CLOSE_TAG` token.
+"""
+function tag_name(token::Token)
+    if token.kind == TokenKinds.OPEN_TAG
+        @inbounds SubString(token.raw, 2, ncodeunits(token.raw))  # skip '<'
+    elseif token.kind == TokenKinds.CLOSE_TAG
+        @inbounds SubString(token.raw, 3, ncodeunits(token.raw))  # skip '</'
+    else
+        throw(ArgumentError("tag_name requires OPEN_TAG or CLOSE_TAG, got $(token.kind)"))
+    end
+end
+
+"""
+    attr_value(token::Token) -> SubString{String}
+
+Strip the surrounding quotes from an `ATTR_VALUE` token.
+"""
+function attr_value(token::Token)
+    token.kind == TokenKinds.ATTR_VALUE ||
+        throw(ArgumentError("attr_value requires ATTR_VALUE, got $(token.kind)"))
+    @inbounds SubString(token.raw, 2, prevind(token.raw, lastindex(token.raw)))
+end
+
+"""
+    pi_target(token::Token) -> SubString{String}
+
+Extract the target name from a `PI_OPEN` or `XML_DECL_OPEN` token.
+"""
+function pi_target(token::Token)
+    (token.kind == TokenKinds.PI_OPEN || token.kind == TokenKinds.XML_DECL_OPEN) ||
+        throw(ArgumentError("pi_target requires PI_OPEN or XML_DECL_OPEN, got $(token.kind)"))
+    @inbounds SubString(token.raw, 3, ncodeunits(token.raw))  # skip '<?'
+end
+
+end # module XMLTokenizer
diff --git a/src/dtd.jl b/src/dtd.jl
deleted file mode 100644
index 58299f0..0000000
--- a/src/dtd.jl
+++ /dev/null
@@ -1,141 +0,0 @@
-# This is all a work in progress
-
-#-----------------------------------------------------------------------------# position_after
-function position_after(needle::Vector{UInt8}, haystack::Vector{UInt8}, i)
-    x = findnext(needle, haystack, i)
-    isnothing(x) ? nothing : x[end] + 1
-end
-
-position_after(needle::String, haystack::Vector{UInt8}, i) = position_after(Vector{UInt8}(needle), haystack, i)
-
-
-#-----------------------------------------------------------------------------# DeclaredElement
-struct DeclaredElement
-    name::String
-    content::String  # "ANY", "EMPTY", or "(children...)"
-    function DeclaredElement(name, content)
-        content in ("ANY", "EMPTY") || (content[1] == '('  && content[end] == ')') ||
-            error("DeclaredElement `content` must be 'ANY', 'EMPTY', or '(children...)'.  Got $content.")
-        new(name, content)
-    end
-end
-Base.show(io::IO, o::DeclaredElement) = print(io, "<!ELEMENT ", o.name, " ", o.content, ">")
-
-function get_declared_elements(data::Vector{UInt8})
-    i = position_after("<!ELEMENT", data, 1)
-    out = DeclaredElement[]
-    while !isnothing(i)
-        name, i = get_name(data, i + 1)
-        i = findnext(!isspace, data, i)
-        if data[i] == UInt8('(')
-            j = findnext(==(UInt8(')')), data, i + 1)
-            content = String(data[i:j])
-        else
-            content, i = get_name(data, i)
-        end
-        push!(out, DeclaredElement(name, content))
-        i = position_after("<!ELEMENT", data, i)
-    end
-    return out
-end
-
-#-----------------------------------------------------------------------------# DeclaredAttribute
-struct DeclaredAttribute
-    element_name::String
-    attribute_name::String
-    attribute_type::String
-    attribute_value::String
-end
-Base.show(io::IO, o::DeclaredAttribute) = print(io, "<!ATTLIST ", o.element_name, " ", o.attribute_name, " ", o.attribute_type, " ", o.attribute_value, ">")
-
-
-function get_declared_attributes(data)
-    i = position_after("<!ATTLIST", data, 1)
-    out = DeclaredAttribute[]
-    while !isnothing(i)
-        element_name, i = get_name(data, i)
-        attribute_name, i = get_name(data, i)
-        i = findnext(!isspace, data, i)
-        attribute_type = if data[i] == UInt('(')
-            j = findnext(==(UInt8(')')), data, i)
-            String(data[i:j])
-            i = j + 1
-        else
-            nm, i = get_name(data, i)
-            nm
-        end
-        i = findnext(!isspace, data, i)
-        is_hash = data[i] == UInt8('#')
-        val, i = get_name(data, i)
-        attribute_value = is_hash ? '#' * val : val
-        push!(out, DeclaredAttribute(element_name, attribute_name, attribute_type, attribute_value))
-        i = position_after("<!ATTLIST", data, i)
-    end
-    return out
-end
-
-#-----------------------------------------------------------------------------# DeclaredEntity
-struct DeclaredEntity
-    name::String
-    external::Bool
-    value::String
-end
-function Base.show(io::IO, o::DeclaredEntity)
-    print(io, "<!ENTITY ", o.name, " ", o.external ? "SYSTEM" : "", repr(o.value), ">")
-end
-
-function get_declared_entities(data)
-    i = position_after("<!ENTITY", data, 1)
-    out = DeclaredEntity[]
-    while !isnothing(i)
-        name, i = get_name(data, i)
-        value, i = get_name(data, i)
-        external = value == "SYSTEM"
-        if external
-            value, i = get_name(data, i)
-        end
-        push!(out, DeclaredEntity(name, external, value))
-        i = position_after("<!ENTITY", data, i)
-    end
-    return out
-end
-
-#-----------------------------------------------------------------------------# DTDBody
-struct DTDBody
-    elements::Vector{DeclaredElement}
-    attributes::Vector{DeclaredAttribute}
-    entities::Vector{DeclaredEntity}
-end
-
-function Base.show(io::IO, o::DTDBody)
-    printstyled(io, "DTDBody\n", color=:light_cyan)
-    printstyled(io, "   DeclaredElements (", length(o.elements), ")\n", color=:light_green)
-    foreach(x -> println(io, "        ", x), o.elements)
-    printstyled(io, "    DeclaredAttributes (", length(o.attributes), ")\n", color=:light_green)
-    foreach(x -> println(io, "        ", x), o.attributes)
-    printstyled(io, "    DeclaredEntities (", length(o.entities), ")\n", color=:light_green)
-    foreach(x -> println(io, "        ", x), o.entities)
-end
-
-
-function DTDBody(data::Vector{UInt8}, file = false)
-    file && @goto isfile
-    i = position_after("<!DOCTYPE", data, 1)
-    root, i = get_name(data, i)
-
-    i = findnext(==(UInt8('[')), data, i)
-    isnothing(i) && return DTDBody(root, [], [], [])
-
-    @label isfile
-    elements = get_declared_elements(data)
-    attributes = get_declared_attributes(data)
-    entities = get_declared_entities(data)
-    return DTDBody(root, elements, attributes, entities)
-end
-
-
-Base.read(filename::String, ::Type{DTDBody}) = DTDBody(read(filename), true)
-Base.read(io::IO, ::Type{DTDBody}) = DTDBody(read(io), true)
-
-Base.parse(s::AbstractString, ::Type{DTDBody}) = DTDBody(Vector{UInt8}(s))
-Base.parse(::Type{DTDBody}, s::AbstractString) = parse(s, DTDBody)
diff --git a/src/lazynode.jl b/src/lazynode.jl
new file mode 100644
index 0000000..185b53d
--- /dev/null
+++ b/src/lazynode.jl
@@ -0,0 +1,548 @@
+#-----------------------------------------------------------------------------# LazyNode
+"""
+    LazyNode
+
+A lightweight, read-only view into an XML document that navigates the token stream on demand
+instead of building a full tree in memory.
+
+    doc = parse(xml_string, LazyNode)
+    doc = read("file.xml", LazyNode)
+
+Supports the same read-only interface as `Node`: [`nodetype`](@ref), [`tag`](@ref),
+[`attributes`](@ref), [`value`](@ref), [`children`](@ref), plus integer and string indexing.
+
+Accessor methods (`tag`, `value`, `keys`, `attributes`) return `SubString{String}` views
+into the original document rather than allocated `String`s, so reading a large document
+through `LazyNode` does not duplicate its text data.
+"""
+struct LazyNode{S <: AbstractString}
+    data::S
+    token::Token{S}
+    nodetype::NodeType
+end
+
+function LazyNode(data::S, nt::NodeType) where {S <: AbstractString}
+    LazyNode{S}(data, Token(TokenKinds.TEXT, SubString(data, 1, 0)), nt)
+end
+
+nodetype(n::LazyNode) = n.nodetype
+
+_lazy_pos(n::LazyNode) = n.token.raw.offset + 1
+_lazy_tokenizer(n::LazyNode) = tokenize(n.data, _lazy_pos(n))
+
+# Entity-decode a TEXT/ATTR_VALUE token only when the tokenizer actually saw a `&`. When
+# `has_entities` is false the raw `SubString{String}` view is returned with no allocation
+# and no byte scan — the dominant case for spreadsheet-style data. `_decode_attr` strips
+# the surrounding quotes first; the flag is read from the token, not the stripped view.
+@inline _decode(tok::Token) = tok.has_entities ? unescape(tok.raw) : tok.raw
+@inline _decode_attr(tok::Token) = tok.has_entities ? unescape(attr_value(tok)) : attr_value(tok)
+
+#-----------------------------------------------------------------------------# tag / value
+function tag(n::LazyNode)
+    nt = n.nodetype
+    if nt === Element
+        return tag_name(n.token)
+    elseif nt === ProcessingInstruction
+        return pi_target(n.token)
+    end
+    nothing
+end
+
+function value(n::LazyNode)
+    nt = n.nodetype
+    if nt === Text
+        return _decode(n.token)
+    elseif nt === Comment
+        iter = _lazy_tokenizer(n)
+        iterate(iter)  # COMMENT_OPEN
+        return iterate(iter)[1].raw
+    elseif nt === CData
+        iter = _lazy_tokenizer(n)
+        iterate(iter)  # CDATA_OPEN
+        return iterate(iter)[1].raw
+    elseif nt === DTD
+        iter = _lazy_tokenizer(n)
+        iterate(iter)  # DOCTYPE_OPEN
+        return lstrip(iterate(iter)[1].raw)
+    elseif nt === ProcessingInstruction
+        iter = _lazy_tokenizer(n)
+        iterate(iter)  # PI_OPEN
+        result = iterate(iter)
+        result === nothing && return nothing
+        result[1].kind === TokenKinds.PI_CONTENT || return nothing
+        content = strip(result[1].raw)
+        return isempty(content) ? nothing : content
+    end
+    nothing
+end
+
+#-----------------------------------------------------------------------------# attributes
+# Promote a `String` returned from `unescape` to a SubString so the homogeneous
+# `Attributes{SubString{String}}` parameterization works. The String was already
+# allocated for entity decoding; the SubString wrapper is just a view on top.
+@inline _as_substring(s::SubString{String}) = s
+@inline _as_substring(s::String) = SubString(s, 1, lastindex(s))
+
+function attributes(n::LazyNode)
+    n.nodetype in (Element, Declaration) || return nothing
+    iter = _lazy_tokenizer(n)
+    iterate(iter)  # skip OPEN_TAG or XML_DECL_OPEN
+    attrs = Pair{SubString{String}, SubString{String}}[]
+    for tok in iter
+        tok.kind === TokenKinds.ATTR_NAME || break
+        name = tok.raw
+        result = iterate(iter)
+        result === nothing && break
+        push!(attrs, name => _as_substring(_decode_attr(result[1])))
+    end
+    isempty(attrs) ? nothing : Attributes(attrs)
+end
+
+"""
+    get(n::LazyNode, key::AbstractString, default)
+
+Return the value of attribute `key` on `n`, or `default` if absent. Walks the token stream
+once — no `Attributes` allocation — so this is the recommended way to read a single
+attribute from a `LazyNode`. Use [`eachattribute`](@ref) to stream all attribute pairs
+without allocating, or [`attributes`](@ref) for the materialized dict.
+"""
+function Base.get(n::LazyNode, key::AbstractString, default)
+    n.nodetype in (Element, Declaration) || return default
+    iter = _lazy_tokenizer(n)
+    iterate(iter)  # skip OPEN_TAG or XML_DECL_OPEN
+    for tok in iter
+        tok.kind === TokenKinds.ATTR_NAME || return default
+        if tok.raw == key
+            result = iterate(iter)
+            result === nothing && return default
+            return _decode_attr(result[1])
+        else
+            iterate(iter)  # skip value
+        end
+    end
+    default
+end
+
+#-----------------------------------------------------------------------------# eachattribute
+struct LazyAttrIterator{I}
+    iter::I
+    done::Base.RefValue{Bool}
+end
+
+Base.IteratorSize(::Type{<:LazyAttrIterator}) = Base.SizeUnknown()
+Base.eltype(::Type{<:LazyAttrIterator}) = Pair{SubString{String}, Union{SubString{String}, String}}
+
+"""
+    eachattribute(n::LazyNode)
+
+Lazy iterator yielding `name => value` pairs for the attributes of `n` (an `Element` or
+`Declaration`). Does not allocate an [`Attributes`](@ref) dict or intermediate vector;
+suitable for hot paths that only need to scan attributes.
+
+For a single attribute by name, prefer `get(n, key, default)` — it short-circuits as soon
+as the match is found.
+"""
+function eachattribute(n::LazyNode)
+    iter = _lazy_tokenizer(n)
+    is_attrs = n.nodetype === Element || n.nodetype === Declaration
+    is_attrs && iterate(iter)  # skip OPEN_TAG / XML_DECL_OPEN
+    LazyAttrIterator{typeof(iter)}(iter, Ref(!is_attrs))
+end
+
+function Base.iterate(it::LazyAttrIterator, _ = nothing)
+    it.done[] && return nothing
+    r = iterate(it.iter)
+    isnothing(r) && (it.done[] = true; return nothing)
+    tok = r[1]
+    if tok.kind !== TokenKinds.ATTR_NAME
+        it.done[] = true
+        return nothing
+    end
+    name = tok.raw
+    r = iterate(it.iter)
+    if isnothing(r)
+        it.done[] = true
+        return nothing
+    end
+    val = _decode_attr(r[1])
+    ((name => val), nothing)
+end
+
+function Base.getindex(n::LazyNode, key::AbstractString)
+    val = get(n, key, _MISSING_ATTR)
+    val === _MISSING_ATTR && throw(KeyError(key))
+    val
+end
+
+function Base.haskey(n::LazyNode, key::AbstractString)
+    get(n, key, _MISSING_ATTR) !== _MISSING_ATTR
+end
+
+function Base.keys(n::LazyNode)
+    n.nodetype in (Element, Declaration) || return ()
+    iter = _lazy_tokenizer(n)
+    iterate(iter)
+    result = SubString{String}[]
+    for tok in iter
+        tok.kind === TokenKinds.ATTR_NAME || break
+        push!(result, tok.raw)
+        iterate(iter)  # skip value
+    end
+    result
+end
+
+#-----------------------------------------------------------------------------# children
+function children(n::LazyNode{S}) where {S}
+    nt = n.nodetype
+    (nt === Document || nt === Element) || return ()
+    children!(LazyNode{S}[], n)
+end
+
+"""
+    children!(buf::Vector{LazyNode{S}}, n::LazyNode{S}) -> buf
+
+Collect children of `n` into `buf` (cleared first) and return it. Lets callers reuse a
+single buffer across many nodes — useful when streaming through siblings (e.g. XLSX row
+iteration) to avoid one `Vector` allocation per node.
+"""
+function children!(buf::Vector{LazyNode{S}}, n::LazyNode{S}) where {S}
+    empty!(buf)
+    nt = n.nodetype
+    if nt === Document
+        return _lazy_collect_children!(buf, n.data, _lazy_tokenizer(n))
+    elseif nt !== Element
+        return buf
+    end
+    iter = _lazy_tokenizer(n)
+    for tok in iter
+        tok.kind === TokenKinds.SELF_CLOSE && return buf
+        tok.kind === TokenKinds.TAG_CLOSE && break
+    end
+    _lazy_collect_children!(buf, n.data, iter)
+end
+
+function _lazy_collect_children!(result::Vector{LazyNode{S}}, data::S, iter) where {S <: AbstractString}
+    for tok in iter
+        k = tok.kind
+        if k === TokenKinds.TEXT
+            push!(result, LazyNode(data, tok, Text))
+        elseif k === TokenKinds.OPEN_TAG
+            push!(result, LazyNode(data, tok, Element))
+            _lazy_skip_element!(iter)
+        elseif k === TokenKinds.COMMENT_OPEN
+            push!(result, LazyNode(data, tok, Comment))
+            _lazy_skip_until!(iter, TokenKinds.COMMENT_CLOSE)
+        elseif k === TokenKinds.CDATA_OPEN
+            push!(result, LazyNode(data, tok, CData))
+            _lazy_skip_until!(iter, TokenKinds.CDATA_CLOSE)
+        elseif k === TokenKinds.PI_OPEN
+            push!(result, LazyNode(data, tok, ProcessingInstruction))
+            _lazy_skip_until!(iter, TokenKinds.PI_CLOSE)
+        elseif k === TokenKinds.XML_DECL_OPEN
+            push!(result, LazyNode(data, tok, Declaration))
+            _lazy_skip_until!(iter, TokenKinds.XML_DECL_CLOSE)
+        elseif k === TokenKinds.DOCTYPE_OPEN
+            push!(result, LazyNode(data, tok, DTD))
+            _lazy_skip_until!(iter, TokenKinds.DOCTYPE_CLOSE)
+        elseif k === TokenKinds.CLOSE_TAG
+            break
+        end
+    end
+    result
+end
+
+function _lazy_skip_element!(iter)
+    depth = 1
+    for tok in iter
+        k = tok.kind
+        if k === TokenKinds.OPEN_TAG
+            depth += 1
+        elseif k === TokenKinds.SELF_CLOSE
+            depth -= 1
+            depth == 0 && return
+        elseif k === TokenKinds.CLOSE_TAG
+            depth -= 1
+            if depth == 0
+                iterate(iter)  # consume trailing TAG_CLOSE
+                return
+            end
+        end
+    end
+end
+
+function _lazy_skip_until!(iter, target::TokenKinds.Kind)
+    for tok in iter
+        tok.kind === target && return
+    end
+end
+
+_token_end(tok) = tok.raw.offset + tok.raw.ncodeunits
+
+function _scan_to_close(iter, close_kind::TokenKinds.Kind)
+    for tok in iter
+        tok.kind === close_kind && return _token_end(tok)
+    end
+    error("Could not find closing token")
+end
+
+#-----------------------------------------------------------------------------# sourcetext
+"""
+    sourcetext(n::LazyNode) -> SubString{String}
+
+Return the original source text of the node as a `SubString`, with no parsing, escaping,
+or reformatting.  This is the zero-copy counterpart of [`write`](@ref) for lazy nodes.
+"""
+function sourcetext(n::LazyNode)
+    nt = n.nodetype
+    start = _lazy_pos(n)
+    if nt === Element
+        iter = _lazy_tokenizer(n)
+        for tok in iter
+            tok.kind === TokenKinds.SELF_CLOSE && return SubString(n.data, start, _token_end(tok))
+            tok.kind === TokenKinds.TAG_CLOSE && break
+        end
+        depth = 1
+        for tok in iter
+            k = tok.kind
+            if k === TokenKinds.OPEN_TAG
+                depth += 1
+            elseif k === TokenKinds.SELF_CLOSE
+                depth -= 1
+            elseif k === TokenKinds.CLOSE_TAG
+                depth -= 1
+                if depth == 0
+                    result = iterate(iter)
+                    result === nothing && error("Could not find closing '>'")
+                    return SubString(n.data, start, _token_end(result[1]))
+                end
+            end
+        end
+        error("Could not find closing tag")
+    elseif nt === Comment
+        return SubString(n.data, start, _scan_to_close(_lazy_tokenizer(n), TokenKinds.COMMENT_CLOSE))
+    elseif nt === CData
+        return SubString(n.data, start, _scan_to_close(_lazy_tokenizer(n), TokenKinds.CDATA_CLOSE))
+    elseif nt === ProcessingInstruction
+        return SubString(n.data, start, _scan_to_close(_lazy_tokenizer(n), TokenKinds.PI_CLOSE))
+    elseif nt === Declaration
+        return SubString(n.data, start, _scan_to_close(_lazy_tokenizer(n), TokenKinds.XML_DECL_CLOSE))
+    elseif nt === DTD
+        return SubString(n.data, start, _scan_to_close(_lazy_tokenizer(n), TokenKinds.DOCTYPE_CLOSE))
+    elseif nt === Text
+        return n.token.raw
+    elseif nt === Document
+        return SubString(n.data)
+    end
+end
+
+#-----------------------------------------------------------------------------# write
+"""
+    write(n::LazyNode; normalize::Bool=false, indentsize::Int=2) -> String
+    write(io::IO, n::LazyNode; normalize::Bool=false, indentsize::Int=2)
+    write(filename::AbstractString, n::LazyNode; normalize::Bool=false, indentsize::Int=2)
+
+Serialize a `LazyNode`. With `normalize=false` (the default) the result is the node's
+original source bytes (zero-copy via [`sourcetext`](@ref)) — fast, but any source-side
+whitespace between tags is preserved verbatim.
+
+With `normalize=true` the node is parsed into a `Node` tree and re-serialized, which
+collapses incidental source whitespace and pretty-prints with `indentsize`-space
+indentation.
+"""
+function write(n::LazyNode; normalize::Bool=false, indentsize::Int=2)
+    normalize ? write(parse(String(sourcetext(n)), Node); indentsize) : String(sourcetext(n))
+end
+
+function write(io::IO, n::LazyNode; normalize::Bool=false, indentsize::Int=2)
+    if normalize
+        write(io, parse(String(sourcetext(n)), Node); indentsize)
+    else
+        Base.write(io, sourcetext(n))
+    end
+end
+
+function write(filename::AbstractString, n::LazyNode; normalize::Bool=false, indentsize::Int=2)
+    open(io -> write(io, n; normalize, indentsize), filename, "w")
+end
+
+#-----------------------------------------------------------------------------# eachchildnode
+struct LazyChildIterator{S <: AbstractString, I}
+    data::S
+    iter::I
+    done::Base.RefValue{Bool}
+end
+
+Base.IteratorSize(::Type{<:LazyChildIterator}) = Base.SizeUnknown()
+Base.eltype(::Type{LazyChildIterator{S,I}}) where {S,I} = LazyNode{S}
+
+"""
+    eachchildnode(n::LazyNode)
+
+Return a lazy iterator over the children of `n`, yielding one [`LazyNode`](@ref) at a time
+without collecting them all into a vector.
+
+See also [`children`](@ref), which returns a `Vector{LazyNode}`.
+"""
+function eachchildnode(n::LazyNode{S}) where {S}
+    nt = n.nodetype
+    iter = _lazy_tokenizer(n)
+    if nt === Document
+        return LazyChildIterator{S, typeof(iter)}(n.data, iter, Ref(false))
+    elseif nt === Element
+        for tok in iter
+            if tok.kind === TokenKinds.SELF_CLOSE
+                return LazyChildIterator{S, typeof(iter)}(n.data, iter, Ref(true))
+            elseif tok.kind === TokenKinds.TAG_CLOSE
+                return LazyChildIterator{S, typeof(iter)}(n.data, iter, Ref(false))
+            end
+        end
+    end
+    LazyChildIterator{S, typeof(iter)}(n.data, iter, Ref(true))
+end
+
+function Base.iterate(ci::LazyChildIterator, _ = nothing)
+    ci.done[] && return nothing
+    for tok in ci.iter
+        k = tok.kind
+        if k === TokenKinds.TEXT
+            return (LazyNode(ci.data, tok, Text), nothing)
+        elseif k === TokenKinds.OPEN_TAG
+            node = LazyNode(ci.data, tok, Element)
+            _lazy_skip_element!(ci.iter)
+            return (node, nothing)
+        elseif k === TokenKinds.COMMENT_OPEN
+            node = LazyNode(ci.data, tok, Comment)
+            _lazy_skip_until!(ci.iter, TokenKinds.COMMENT_CLOSE)
+            return (node, nothing)
+        elseif k === TokenKinds.CDATA_OPEN
+            node = LazyNode(ci.data, tok, CData)
+            _lazy_skip_until!(ci.iter, TokenKinds.CDATA_CLOSE)
+            return (node, nothing)
+        elseif k === TokenKinds.PI_OPEN
+            node = LazyNode(ci.data, tok, ProcessingInstruction)
+            _lazy_skip_until!(ci.iter, TokenKinds.PI_CLOSE)
+            return (node, nothing)
+        elseif k === TokenKinds.XML_DECL_OPEN
+            node = LazyNode(ci.data, tok, Declaration)
+            _lazy_skip_until!(ci.iter, TokenKinds.XML_DECL_CLOSE)
+            return (node, nothing)
+        elseif k === TokenKinds.DOCTYPE_OPEN
+            node = LazyNode(ci.data, tok, DTD)
+            _lazy_skip_until!(ci.iter, TokenKinds.DOCTYPE_CLOSE)
+            return (node, nothing)
+        elseif k === TokenKinds.CLOSE_TAG || k === TokenKinds.TAG_CLOSE
+            ci.done[] = true
+            return nothing
+        end
+    end
+    ci.done[] = true
+    return nothing
+end
+
+#-----------------------------------------------------------------------------# is_simple / simple_value
+function is_simple(n::LazyNode)
+    n.nodetype === Element || return false
+    attrs = attributes(n)
+    (!isnothing(attrs) && !isempty(attrs)) && return false
+    ch = children(n)
+    length(ch) == 1 && ch[1].nodetype in (Text, CData)
+end
+
+function simple_value(n::LazyNode)
+    n.nodetype === Element || error("`simple_value` is only defined for simple nodes.")
+    attrs = attributes(n)
+    (!isnothing(attrs) && !isempty(attrs)) && error("`simple_value` is only defined for simple nodes.")
+    ch = children(n)
+    length(ch) == 1 && ch[1].nodetype in (Text, CData) || error("`simple_value` is only defined for simple nodes.")
+    value(ch[1])
+end
+
+# Single-pass combined predicate+accessor: returns the simple text/CData value, or
+# `nothing` if `n` is not a simple element. Avoids the double tokenization of
+# `is_simple(n) ? simple_value(n) : ...`.
+function is_simple_value(n::LazyNode)
+    n.nodetype === Element || return nothing
+    iter = _lazy_tokenizer(n)
+    iterate(iter)  # skip OPEN_TAG
+    found_close = false
+    for tok in iter
+        k = tok.kind
+        k === TokenKinds.TAG_CLOSE && (found_close = true; break)
+        return nothing  # attributes (ATTR_NAME), self-close, or anything else => not simple
+    end
+    found_close || return nothing
+    result = iterate(iter)
+    isnothing(result) && return nothing
+    tok = result[1]
+    k = tok.kind
+    if k === TokenKinds.TEXT
+        nxt = iterate(iter)
+        (isnothing(nxt) || nxt[1].kind !== TokenKinds.CLOSE_TAG) && return nothing
+        return _decode(tok)
+    elseif k === TokenKinds.CDATA_OPEN
+        r = iterate(iter)
+        (isnothing(r) || r[1].kind !== TokenKinds.CDATA_CONTENT) && return nothing
+        content = r[1].raw
+        r = iterate(iter)
+        (isnothing(r) || r[1].kind !== TokenKinds.CDATA_CLOSE) && return nothing
+        r = iterate(iter)
+        (isnothing(r) || r[1].kind !== TokenKinds.CLOSE_TAG) && return nothing
+        return content
+    end
+    nothing
+end
+
+#-----------------------------------------------------------------------------# indexing
+Base.getindex(n::LazyNode, i::Integer) = children(n)[i]
+Base.getindex(n::LazyNode, ::Colon) = children(n)
+Base.lastindex(n::LazyNode) = lastindex(children(n))
+Base.only(n::LazyNode) = only(children(n))
+Base.length(n::LazyNode) = length(children(n))
+
+#-----------------------------------------------------------------------------# parse / read
+Base.parse(::Type{LazyNode}, xml::AbstractString) = parse(xml, LazyNode)
+Base.parse(xml::AbstractString, ::Type{LazyNode}) = LazyNode(String(xml), Document)
+
+Base.read(filename::AbstractString, ::Type{LazyNode}) = parse(read(filename, String), LazyNode)
+Base.read(io::IO, ::Type{LazyNode}) = parse(read(io, String), LazyNode)
+
+#-----------------------------------------------------------------------------# show
+function Base.show(io::IO, n::LazyNode)
+    nt = n.nodetype
+    print(io, "Lazy ", nt)
+    if nt === Text
+        print(io, ' ', repr(value(n)))
+    elseif nt === Element
+        print(io, " <", tag(n))
+        attrs = attributes(n)
+        if !isnothing(attrs)
+            for (k, v) in attrs
+                print(io, ' ', k, '=', '"', v, '"')
+            end
+        end
+        print(io, '>')
+    elseif nt === DTD
+        print(io, " <!DOCTYPE ", value(n), '>')
+    elseif nt === Declaration
+        print(io, " <?xml")
+        attrs = attributes(n)
+        if !isnothing(attrs)
+            for (k, v) in attrs
+                print(io, ' ', k, '=', '"', v, '"')
+            end
+        end
+        print(io, "?>")
+    elseif nt === ProcessingInstruction
+        print(io, " <?", tag(n))
+        v = value(n)
+        !isnothing(v) && print(io, ' ', v)
+        print(io, "?>")
+    elseif nt === Comment
+        print(io, " <!--", value(n), "-->")
+    elseif nt === CData
+        print(io, " <![CDATA[", value(n), "]]>")
+    elseif nt === Document
+        n_ch = length(children(n))
+        n_ch > 0 && print(io, n_ch == 1 ? " (1 child)" : " ($n_ch children)")
+    end
+end
diff --git a/src/raw.jl b/src/raw.jl
deleted file mode 100644
index 29d0a10..0000000
--- a/src/raw.jl
+++ /dev/null
@@ -1,568 +0,0 @@
-#-----------------------------------------------------------------------------# RawType
-"""
-    RawType:
-    - RawText                   # text
-    - RawComment                # <!-- ... -->
-    - RawCData                  # <![CData[...]]>
-    - RawDeclaration            # <?xml attributes... ?>
-    - RawProcessingInstruction  # <?NAME attributes... ?>
-    - RawDTD                    # <!DOCTYPE ...>
-    - RawElementOpen            # <NAME attributes... >
-    - RawElementClose           # </NAME>
-    - RawElementSelfClosed      # <NAME attributes... />
-    - RawDocument               # Something to initialize with (not really used)
-"""
-@enum(RawType, RawDocument, RawText, RawComment, RawCData, RawProcessingInstruction,
-    RawDeclaration, RawDTD, RawElementOpen, RawElementClose, RawElementSelfClosed)
-
-@inline nodetype(x::RawType) =
-    x === RawElementOpen ? Element :
-    x === RawElementClose ? Element :
-    x === RawElementSelfClosed ? Element :
-    x === RawText ? Text :
-    x === RawComment ? Comment :
-    x === RawCData ? CData :
-    x === RawDeclaration ? Declaration :
-    x === RawDTD ? DTD :
-    x === RawProcessingInstruction ? ProcessingInstruction :
-    x === RawDocument ? Document :
-    nothing
-
-#-----------------------------------------------------------------------------# Raw
-"""
-    Raw(filename::String)
-
-Create an iterator over raw chunks of data in an XML file.  Each chunk of data represents one of:
-
-    - RawDocument                # Only used to initialize the iterator state.
-    - RawText                    # text
-    - RawComment                 # <!-- ... -->
-    - RawCData                   # <![CData[...]]>
-    - RawDeclaration             # <?xml attributes... ?>
-    - RawProcessingInstruction   # <?NAME attributes... ?>
-    - RawDTD                     # <!DOCTYPE ...>
-    - RawElementOpen             # <NAME attributes... >
-    - RawElementClose            # </NAME>
-    - RawElementSelfClosed       # <NAME attributes... />
-
-Useful functions:
-
-    - view(o::Raw) --> view of the Vector{UInt8} chunk.
-    - String(o::Raw) --> String of the chunk.
-    - next(o::Raw) --> Raw of the next chunk (or `nothing`).
-    - prev(o::Raw) --> Raw of the previous chunk (or `nothing`).
-    - tag(o::Raw) --> String of the tag name (or `nothing`).
-    - attributes(o::Raw) --> OrderedDict{String, String} of the attributes (or `nothing`).
-    - value(o::Raw) --> String of the value (or `nothing`).
-    - children(o::Raw) --> Vector{Raw} of the children (or `nothing`).
-    - parent(o::Raw) --> Raw of the parent (or `nothing`)
-    - depth(o::Raw) --> Int of the depth of the node in the XML DOM.
-"""
-struct Raw
-    type::RawType
-    depth::Int
-    pos::Int
-    len::Int
-    data::Vector{UInt8}
-    ctx::Vector{Bool} # Context for xml:space (Vector to support inheritance of context)
-    has_xml_space::Bool # Whether data contains `xml:space` attribute at least once
-end
-function Raw(data::Vector{UInt8})#, ctx::Vector{Bool}=Bool[false])
-    needle = Vector{UInt8}("xml:space")
-    has_xml_space = findfirst(needle, data) !== nothing
-    return Raw(RawDocument, 0, 0, 0, data, [false], has_xml_space)
-end
-function Raw(data::Vector{UInt8}, has_xml_space::Bool, ctx::Vector{Bool}=Bool[false])
-    return Raw(RawDocument, 0, 0, 0, data, ctx, has_xml_space)
-end
-
-const _RAW_INDEX = WeakKeyDict{Vector{UInt8}, Any}()
-
-struct _TokRec
-    type::RawType
-    depth::Int
-    pos::Int
-    len::Int
-    ctx::Vector{Bool}
-end
-
-mutable struct _Index
-    recs::Vector{_TokRec}
-    last_raw::Raw
-    built_end::Int
-end
-
-Base.read(filename::String, ::Type{Raw}) = isfile(filename) ?
-                                           Raw(Mmap.mmap(filename)) :
-                                           error("File \"$filename\" does not exist.")
-
-Base.read(io::IO, ::Type{Raw}) = Raw(read(io))
-
-Base.parse(x::AbstractString, ::Type{Raw}) = Raw(Vector{UInt8}(x))
-
-# Mostly for debugging
-Base.peek(o::Raw, n::Int) = String(view(o.data[o.pos+o.len+1:min(end, o.pos + o.len + n + 1)]))
-
-function Base.show(io::IO, o::Raw)
-    print(io, o.type, ':', o.depth, " (pos=", o.pos, ", len=", o.len, ")")
-    o.len > 0 && printstyled(io, ": ", String(o); color=:light_green)
-end
-function Base.:(==)(a::Raw, b::Raw)
-    a.type == b.type && a.depth == b.depth && a.pos == b.pos && a.len == b.len && a.data === b.data && a.ctx == b.ctx && a.has_xml_space == b.has_xml_space
-end
-
-Base.view(o::Raw) = view(o.data, o.pos:o.pos+o.len)
-Base.String(o::Raw) = String(view(o))
-
-Base.IteratorSize(::Type{Raw}) = Base.SizeUnknown()
-Base.eltype(::Type{Raw}) = Raw
-
-function Base.iterate(o::Raw, state=o)
-    n = next(state)
-    return isnothing(n) ? nothing : (n, n)
-end
-
-is_node(o::Raw) = o.type !== RawElementClose
-xml_nodes(o::Raw) = Iterators.Filter(is_node, o)
-
-#-----------------------------------------------------------------------------# get_name
-is_name_start_char(x::UInt8) = x in UInt8('A'):UInt8('Z') || x in UInt8('a'):UInt8('z') || x == UInt8('_')
-is_name_char(x::UInt8) = is_name_start_char(x) || x in UInt8('0'):UInt8('9') || x == UInt8('-') || x == UInt8('.') || x == UInt8(':')
-
-name_start(data, i) = findnext(is_name_start_char, data, i)
-name_stop(data, i) = findnext(!is_name_char, data, i) - 1
-
-function get_name(data, i)
-    i = name_start(data, i)
-    j = name_stop(data, i)
-    @views String(data[i:j]), j + 1
-end
-
-#-----------------------------------------------------------------------------# get_attributes
-# starting at position i, return attributes up until the next '>' or '?' (DTD)
-function get_attributes(data, i, j)
-    i = name_start(data, i)
-    (isnothing(j) || isnothing(i) || i > j) && return nothing
-    out = OrderedDict{String,String}()
-    while !isnothing(i) && i < j
-        key, i = get_name(data, i)
-        # get quotechar the value is wrapped in (either ' or ")
-        i = findnext(x -> x === UInt8('"') || x === UInt8('''), data, i + 1)
-        quotechar = data[i]
-        i2 = findnext(==(quotechar), data, i + 1)
-        @views value = String(data[i+1:i2-1])
-        out[key] = value
-        i = name_start(data, i2)
-    end
-    return out
-end
-
-# ----------------------------------------------------------------------------# Utilities supporting prev
-function _get_or_init_index(o::Raw)
-    idx = get(_RAW_INDEX, o.data, nothing)
-    if idx === nothing
-        start = Raw(o.data)  # fresh RawDocument
-        _RAW_INDEX[o.data] = _Index(_TokRec[], start, 0)
-        idx = _RAW_INDEX[o.data]
-    end
-    return idx
-end
-function _ensure_index_upto!(o::Raw, target_pos::Int)
-    idx = _get_or_init_index(o)
-    r = idx.last_raw
-    while true
-        n = next(r)
-        if n === nothing
-            idx.built_end = typemax(Int)
-            idx.last_raw = r
-            return idx
-        end
-        push!(idx.recs, _TokRec(n.type, n.depth, n.pos, n.len, copy(n.ctx)))
-        endpos = n.pos + n.len
-        idx.built_end = endpos
-        idx.last_raw = n
-        r = n
-        if endpos >= target_pos
-            return idx
-        end
-    end
-end
-function _find_prev_token(recs::Vector{_TokRec}, p::Int)
-    lo, hi = 1, length(recs)
-    ans = 0
-    while lo <= hi
-        mid = (lo + hi) >>> 1
-        endpos = recs[mid].pos + recs[mid].len
-        if endpos < p + 1
-            ans = mid
-            lo = mid + 1
-        else
-            hi = mid - 1
-        end
-    end
-    return ans == 0 ? nothing : recs[ans]
-end
-
-#-----------------------------------------------------------------------------# update xml:space context
-# check attributes for xml:space and update ctx if necessary
-function get_ctx(o)
-    att = attributes(o)
-    if !isnothing(att) && haskey(att, "xml:space")
-        if att["xml:space"] == "preserve"
-            return true
-        elseif att["xml:space"] == "default"
-            return false
-        else
-            error("Invalid value for xml:space attribute: $(att["xml:space"]).  Must be 'preserve' or 'default'.")
-        end
-    end
-    return nothing
-end
-function update_ctx!(ctx, o)
-    new_ctx = get_ctx(o)
-    if new_ctx !== nothing
-        ctx[end] = new_ctx
-    end
-    return nothing
-end
-
-#-----------------------------------------------------------------------------# interface
-"""
-    nodetype(node) --> XML.NodeType
-
-Return the `XML.NodeType` of the node.
-"""
-nodetype(o::Raw) = nodetype(o.type)
-
-"""
-    tag(node) --> String or Nothing
-
-Return the tag name of `Element` and `PROCESSING_INSTRUCTION` nodes.
-"""
-function tag(o::Raw)
-    o.type ∉ [RawElementOpen, RawElementClose, RawElementSelfClosed, RawProcessingInstruction] && return nothing
-    return get_name(o.data, o.pos + 1)[1]
-end
-
-"""
-    attributes(node) --> OrderedDict{String, String} or Nothing
-
-Return the attributes of `Element`, `Declaration`, or `ProcessingInstruction` nodes.
-"""
-function attributes(o::Raw)
-    if o.type === RawElementOpen || o.type === RawElementSelfClosed || o.type === RawProcessingInstruction
-        i = o.pos
-        i = name_start(o.data, i)
-        i = name_stop(o.data, i)
-        get_attributes(o.data, i + 1, o.pos + o.len)
-    elseif o.type === RawDeclaration
-        get_attributes(o.data, o.pos + 6, o.pos + o.len)
-    else
-        nothing
-    end
-end
-
-"""
-    value(node) --> String or Nothing
-
-Return the value of `Text`, `CData`, `Comment`, or `DTD` nodes.
-"""
-function value(o::Raw)
-    if o.type === RawText
-        String(o)
-    elseif o.type === RawCData
-        String(view(o.data, o.pos+length("<![CData["):o.pos+o.len-3))
-    elseif o.type === RawComment
-        String(view(o.data, o.pos+length("<!--"):o.pos+o.len-3))
-    elseif o.type === RawDTD
-        String(view(o.data, o.pos+length("<!DOCTYPE "):o.pos+o.len-1))
-    else
-        nothing
-    end
-end
-
-"""
-    children(node) --> Vector{typeof(node)}
-
-Return the children the node.  Will only be nonempty for `Element` and `Document` nodes.
-"""
-function children(o::Raw)
-    if o.type === RawElementOpen || o.type === RawDocument
-        depth = o.depth
-        out = Raw[]
-        for item in xml_nodes(o)
-            if item.depth == depth + 1
-                push!(out, item)
-            end
-            item.depth == depth && break
-            o.type === RawDocument && item.depth == 2 && break # break if we've seen the doc root
-        end
-        out
-    else
-        Raw[]
-    end
-end
-
-"""
-    depth(node) --> Int
-
-Return the depth of the node.  Will be `0` for `Document` nodes.  Not defined for `XML.Node`.
-"""
-function depth(o::Raw)
-    o.depth
-end
-
-"""
-    parent(node) --> typeof(node), Nothing
-
-Return the parent of the node.  Will be `nothing` for `Document` nodes.  Not defined for `XML.Node`.
-"""
-function parent(o::Raw)
-    depth = o.depth
-    depth === 0 && return nothing
-    p = prev(o)
-    while p.depth >= depth
-        p = prev(p)
-    end
-    return p
-end
-
-#-----------------------------------------------------------------------------# next Raw
-# isspace(x::UInt8) = Base.isspace(Char(x))
-
-# XML whitespace per XML 1.0/1.1 production S:
-#   S ::= (#x20 | #x9 | #xD | #xA)+
-@inline xml_isspace(b::UInt8)::Bool = (b == 0x20) | (b == 0x09) | (b == 0x0A) | (b == 0x0D)
-
-"""
-    next(node) --> typeof(node) or Nothing
-
-Return the next node in the document during depth-first traversal.  Depth-first is the order you
-would visit nodes by reading top-down through an XML file.  Not defined for `XML.Node`.
-"""
-function next(o::Raw)
-    if o.has_xml_space # using xml:space context at least once in data
-        return next_xml_space(o)
-    else # not using xml:space context at all (same as v0.3.5)
-        return next_no_xml_space(o)
-    end
-end
-
-function next_xml_space(o::Raw)
-    i = o.pos + o.len + 1
-    depth = o.depth
-    data = o.data
-    type = o.type
-    has_xml_space = o.has_xml_space
-    ctx = copy(o.ctx)
-    last_type = type
-    k = findnext(!xml_isspace, data, i)
-    if isnothing(k)
-        return nothing
-    end
-    if last_type === RawElementOpen || last_type === RawDocument
-        depth += 1
-        push!(ctx, ctx[end])  # inherit the xml:space context from parent
-        last_type === RawElementOpen && update_ctx!(ctx, o) # check attributes for xml:space and update if necessary
-    end
-    i = ctx[end] ? i : k
-    b = i > 1 ? Char(o.data[i-1]) : Char('<')
-    c = Char(o.data[i])
-    d = Char(o.data[k+1])
-    if c !== '<' || ctx[end] && c === '<' && b === ' ' && last_type === RawElementOpen && d === '/'
-        type = RawText
-        j = findnext(==(UInt8('<')), data, i) - 1
-        j = ctx[end] ? j : findprev(!xml_isspace, data, j) # preserving whitespace if needed
-        if last_type === RawElementClose || last_type === RawElementSelfClosed|| last_type === RawDocument
-            # Maybe drop pure-whitespace inter-element text nodes?
-            # (e.g. whitespace between a closing and an opening tag which would otherwise make an orphan text node)
-            #if all(xml_isspace, @view data[i:j]) && depth > 1
-            #    return next(Raw(type, depth, j, 0, data, ctx, has_xml_space))
-            #end
-        end
-    else
-        i = k
-        j = k + 1
-        if c === '<'
-            c2 = Char(o.data[i+1])
-            if c2 === '!'
-                c3 = Char(o.data[i+2])
-                if c3 === '-'
-                    type = RawComment
-                    j = findnext(Vector{UInt8}("-->"), data, i)[end]
-                elseif c3 === '['
-                    type = RawCData
-                    j = findnext(Vector{UInt8}("]]>"), data, i)[end]
-                elseif c3 === 'D' || c3 == 'd'
-                    type = RawDTD
-                    j = findnext(==(UInt8('>')), data, i)
-                    while sum(==(UInt8('>')), @view data[k:j]) != sum(==(UInt8('<')), @view data[i:j])
-                        j = findnext(==(UInt8('>')), data, j + 1)
-                    end
-                end
-            elseif c2 === '?'
-                if get_name(data, i + 2)[1] == "xml"
-                    type = RawDeclaration
-                else
-                    type = RawProcessingInstruction
-                end
-                j = findnext(Vector{UInt8}("?>"), data, i)[end]
-            elseif c2 === '/'
-                type = RawElementClose
-                depth -= 1
-                pop!(ctx) # revert to parent xml:space context
-                j = findnext(==(UInt8('>')), data, i)
-            else
-                j = findnext(==(UInt8('>')), data, i)
-                if data[j-1] === UInt8('/')
-                    type = RawElementSelfClosed
-                else
-                    type = RawElementOpen
-                end
-            end
-        end
-    end
-    return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
-end
-
-function next_no_xml_space(o::Raw) # same as v0.3.5
-    i = o.pos + o.len + 1
-    depth = o.depth
-    data = o.data
-    type = o.type
-    has_xml_space = o.has_xml_space
-    ctx = [false]
-    i = findnext(!xml_isspace, data, i)
-    if isnothing(i)
-        return nothing
-    end
-    if type === RawElementOpen || type === RawDocument
-        depth += 1
-    end
-    c = Char(o.data[i])
-    d = Char(o.data[i+1])
-    if c !== '<'
-        type = RawText
-        j = findnext(==(UInt8('<')), data, i) - 1
-        j = findprev(!xml_isspace, data, j)   # "rstrip"
-    elseif c === '<'
-        c2 = Char(o.data[i+1])
-        if c2 === '!'
-            c3 = Char(o.data[i+2])
-            if c3 === '-'
-                type = RawComment
-                j = findnext(Vector{UInt8}("-->"), data, i)[end]
-            elseif c3 === '['
-                type = RawCData
-                j = findnext(Vector{UInt8}("]]>"), data, i)[end]
-            elseif c3 === 'D' || c3 == 'd'
-                type = RawDTD
-                j = findnext(==(UInt8('>')), data, i)
-                while sum(==(UInt8('>')), @view data[i:j]) != sum(==(UInt8('<')), @view data[i:j])
-                    j = findnext(==(UInt8('>')), data, j + 1)
-                end
-            end
-        elseif c2 === '?'
-            if get_name(data, i + 2)[1] == "xml"
-                type = RawDeclaration
-            else
-                type = RawProcessingInstruction
-            end
-            j = findnext(Vector{UInt8}("?>"), data, i)[end]
-        elseif c2 === '/'
-            type = RawElementClose
-            depth -= 1
-            j = findnext(==(UInt8('>')), data, i)
-        else
-            j = findnext(==(UInt8('>')), data, i)
-            if data[j-1] === UInt8('/')
-                type = RawElementSelfClosed
-            else
-                type = RawElementOpen
-            end
-        end
-    end
-    return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
-end
-
-#-----------------------------------------------------------------------------# prev Raw
-"""
-    prev(node) --> typeof(node), Nothing, or Missing (only for XML.Node)
-
-Return the previous node in the document during depth-first traversal.  Not defined for `XML.Node`.
-"""
-function prev(o::Raw)
-    if o.has_xml_space # using xml:space context at least once in data
-        return prev_xml_space(o)
-    else # not using xml:space context at all (same as v0.3.5)
-        return prev_no_xml_space(o)
-    end
-end
-
-function prev_xml_space(o::Raw)
-    o.type === RawDocument && return nothing
-
-    idx = _ensure_index_upto!(o, o.pos - 1)
-    rec = _find_prev_token(idx.recs, o.pos - 1)
-    if rec === nothing
-        return Raw(o.data, o.has_xml_space, copy(o.ctx))
-    end
-    return Raw(rec.type, rec.depth, rec.pos, rec.len, o.data, copy(rec.ctx), o.has_xml_space)
-end
-function prev_no_xml_space(o::Raw) # same as v0.3.5
-    depth = o.depth
-    data = o.data
-    type = o.type
-    has_xml_space = o.has_xml_space
-    ctx = has_xml_space ? copy(o.ctx) : [false]
-    type === RawDocument && return nothing
-    j = o.pos - 1
-    j = findprev(!xml_isspace, data, j)
-    if isnothing(j)
-        return Raw(data, has_xml_space, ctx)  # RawDocument
-    end
-    c = Char(o.data[j])
-    next_type = type
-    if c !== '>' # text
-        type = RawText
-        i = findprev(==(UInt8('>')), data, j) + 1
-        i = findnext(!xml_isspace, data, i)  # "lstrip"
-    elseif c === '>'
-        c2 = Char(o.data[j-1])
-        if c2 === '-'
-            type = RawComment
-            i = findprev(Vector{UInt8}("<--"), data, j)[1]
-        elseif c2 === ']'
-            type = RawCData
-            i = findprev(Vector{UInt8}("<![CData["), data, j)[1]
-        elseif c2 === '?'
-            i = findprev(Vector{UInt8}("<?"), data, j)[1]
-            if get_name(data, i + 2)[1] == "xml"
-                type = RawDeclaration
-            else
-                type = RawProcessingInstruction
-            end
-        else
-            i = findprev(==(UInt8('<')), data, j)
-            char = Char(data[i+1])
-            if char === '/'
-                type = RawElementClose
-            elseif char === '!'
-                type = DTD
-            elseif isletter(char) || char === '_'
-                type = Char(o.data[j-2]) === '/' ? RawElementSelfClosed : RawElementOpen
-            else
-                error("Should be unreachable.  Unexpected data: <$char ... $c3$c2$c1>.")
-            end
-        end
-    else
-        error("Unreachable reached in XML.prev")
-    end
-    if type !== RawElementOpen && next_type === RawElementClose
-        depth += 1
-    elseif type === RawElementOpen && next_type !== RawElementClose
-        depth -= 1
-    end
-    return Raw(type, depth, i, j - i, data, ctx, has_xml_space)
-end
-
diff --git a/src/xpath.jl b/src/xpath.jl
new file mode 100644
index 0000000..87da263
--- /dev/null
+++ b/src/xpath.jl
@@ -0,0 +1,345 @@
+#-----------------------------------------------------------------------------# XPath
+# A subset of XPath 1.0 for querying XML.Node trees.
+#
+# Supported syntax:
+#   /            root (absolute path)
+#   tag          child element by name
+#   *            any child element
+#   //           descendant-or-self (recursive)
+#   .            current node
+#   ..           parent node
+#   [n]          positional predicate (1-based)
+#   [@attr]      has-attribute predicate
+#   [@attr='v']  attribute-value predicate
+#   text()       text node children
+#   node()       all node children
+#   @attr        attribute value (returns strings)
+
+#-----------------------------------------------------------------------------# Token types
+
+"""
+    XPathTokenKind
+
+Discriminator for the kinds of tokens produced by [`_xpath_tokenize`](@ref).
+
+| Variant            | Source syntax            |
+|--------------------|--------------------------|
+| `XPATH_ROOT`       | `/` (path separator)     |
+| `XPATH_DESCENDANT` | `//`                     |
+| `XPATH_NAME`       | element tag name         |
+| `XPATH_WILDCARD`   | `*`                      |
+| `XPATH_DOT`        | `.` (self)               |
+| `XPATH_DOTDOT`     | `..` (parent)            |
+| `XPATH_TEXT_FN`    | `text()`                 |
+| `XPATH_NODE_FN`    | `node()`                 |
+| `XPATH_PREDICATE`  | `[...]` body             |
+| `XPATH_ATTRIBUTE`  | `@attr` (result position) |
+"""
+@enum XPathTokenKind::UInt8 begin
+    XPATH_ROOT           # /
+    XPATH_DESCENDANT     # //
+    XPATH_NAME           # tag name
+    XPATH_WILDCARD       # *
+    XPATH_DOT            # .
+    XPATH_DOTDOT         # ..
+    XPATH_TEXT_FN        # text()
+    XPATH_NODE_FN        # node()
+    XPATH_PREDICATE      # [...]
+    XPATH_ATTRIBUTE      # @attr (in result position)
+end
+
+"""
+    XPathToken
+
+A single token from a parsed XPath expression: a [`XPathTokenKind`](@ref) tag together with
+the relevant textual payload (tag name, predicate body, attribute name, etc.). Tokens with
+no payload (`XPATH_ROOT`, `XPATH_WILDCARD`, …) carry the literal source character(s) for
+debuggability.
+"""
+struct XPathToken
+    kind::XPathTokenKind
+    value::String
+end
+
+#-----------------------------------------------------------------------------# Tokenizer
+
+# Lex an XPath expression into a flat token stream. Whitespace is discarded; unterminated
+# predicates / function calls and unrecognised characters raise an error. Tokens preserve
+# source order and are consumed left-to-right by `xpath`.
+function _xpath_tokenize(expr::AbstractString)
+    tokens = XPathToken[]
+    s = String(expr)
+    i = 1
+    n = ncodeunits(s)
+
+    while i <= n
+        c = s[i]
+
+        if c == '/'
+            if i < n && s[i+1] == '/'
+                push!(tokens, XPathToken(XPATH_DESCENDANT, "//"))
+                i += 2
+            else
+                push!(tokens, XPathToken(XPATH_ROOT, "/"))
+                i += 1
+            end
+
+        elseif c == '.'
+            if i < n && s[i+1] == '.'
+                push!(tokens, XPathToken(XPATH_DOTDOT, ".."))
+                i += 2
+            else
+                push!(tokens, XPathToken(XPATH_DOT, "."))
+                i += 1
+            end
+
+        elseif c == '*'
+            push!(tokens, XPathToken(XPATH_WILDCARD, "*"))
+            i += 1
+
+        elseif c == '['
+            j = findnext(']', s, i + 1)
+            isnothing(j) && error("Unterminated predicate in XPath: $(repr(s))")
+            push!(tokens, XPathToken(XPATH_PREDICATE, SubString(s, i + 1, j - 1)))
+            i = j + 1
+
+        elseif c == '@'
+            j = i + 1
+            while j <= n && (isletter(s[j]) || s[j] == '-' || s[j] == '_' || s[j] == ':' || isdigit(s[j]))
+                j += 1
+            end
+            j == i + 1 && error("Empty attribute name after @ in XPath: $(repr(s))")
+            push!(tokens, XPathToken(XPATH_ATTRIBUTE, SubString(s, i + 1, j - 1)))
+            i = j
+
+        elseif isletter(c) || c == '_'
+            j = i + 1
+            while j <= n && (isletter(s[j]) || s[j] == '-' || s[j] == '_' || s[j] == ':' || isdigit(s[j]) || s[j] == '.')
+                j += 1
+            end
+            name = SubString(s, i, j - 1)
+            # Check for function calls: text(), node()
+            if j <= n && s[j] == '('
+                j2 = findnext(')', s, j + 1)
+                isnothing(j2) && error("Unterminated function call in XPath: $(repr(s))")
+                if name == "text"
+                    push!(tokens, XPathToken(XPATH_TEXT_FN, "text()"))
+                elseif name == "node"
+                    push!(tokens, XPathToken(XPATH_NODE_FN, "node()"))
+                else
+                    error("Unknown XPath function: $name()")
+                end
+                i = j2 + 1
+            else
+                push!(tokens, XPathToken(XPATH_NAME, String(name)))
+                i = j
+            end
+
+        elseif isspace(c)
+            i += 1
+
+        else
+            error("Unexpected character '$(c)' in XPath: $(repr(s))")
+        end
+    end
+    tokens
+end
+
+#-----------------------------------------------------------------------------# Predicate evaluation
+
+const _RE_ATTR_PRED = r"^@([A-Za-z_:][\w.\-:]*)$"
+const _RE_ATTR_VAL_PRED = r"^@([A-Za-z_:][\w.\-:]*)\s*=\s*['\"]([^'\"]*)['\"]$"
+
+# Filter `nodes` by the body of a `[...]` predicate. Supports positional indices `[n]`
+# (1-based; out-of-range yields empty), `[last()]`, `[@attr]` (has-attribute), and
+# `[@attr='value']` / `[@attr="value"]` (attribute equals literal). Anything else errors.
+# `root` is accepted for symmetry with `_xpath_step` but is unused by current predicates.
+function _eval_predicate(predicate::AbstractString, nodes::Vector{Node{S}}, root::Node{S}) where S
+    s = strip(predicate)
+
+    # Positional: [n]
+    pos = tryparse(Int, s)
+    if !isnothing(pos)
+        1 <= pos <= length(nodes) || return Node{S}[]
+        return [nodes[pos]]
+    end
+
+    # last()
+    if s == "last()"
+        isempty(nodes) && return Node{S}[]
+        return [nodes[end]]
+    end
+
+    # [@attr] — has attribute
+    m = match(_RE_ATTR_PRED, s)
+    if !isnothing(m)
+        attr_name = m.captures[1]
+        return filter(n -> n.nodetype === Element && haskey(n, attr_name), nodes)
+    end
+
+    # [@attr='value'] or [@attr="value"]
+    m = match(_RE_ATTR_VAL_PRED, s)
+    if !isnothing(m)
+        attr_name = m.captures[1]
+        attr_val = m.captures[2]
+        return filter(n -> n.nodetype === Element && get(n, attr_name, nothing) == attr_val, nodes)
+    end
+
+    error("Unsupported XPath predicate: [$predicate]")
+end
+
+#-----------------------------------------------------------------------------# Step evaluation
+
+# Apply a single non-predicate, non-descendant step to the current context and return the
+# new context. Handles XPATH_NAME, XPATH_WILDCARD, XPATH_DOT, XPATH_DOTDOT, XPATH_TEXT_FN,
+# XPATH_NODE_FN. XPATH_DESCENDANT is intentionally not handled here — the main evaluator
+# expands `//` to descendant-or-self before the next step. `root` is used by `..` to avoid
+# walking past the document root.
+function _xpath_step(nodes::Vector{Node{S}}, token::XPathToken, root::Node{S}) where S
+    result = Node{S}[]
+    k = token.kind
+
+    if k === XPATH_NAME
+        for n in nodes
+            for c in children(n)
+                c.nodetype === Element && c.tag == token.value && push!(result, c)
+            end
+        end
+
+    elseif k === XPATH_WILDCARD
+        for n in nodes
+            for c in children(n)
+                c.nodetype === Element && push!(result, c)
+            end
+        end
+
+    elseif k === XPATH_DOT
+        append!(result, nodes)
+
+    elseif k === XPATH_DOTDOT
+        for n in nodes
+            n === root && continue
+            p = _find_parent(n, root)
+            isnothing(p) || push!(result, p)
+        end
+
+    elseif k === XPATH_TEXT_FN
+        for n in nodes
+            for c in children(n)
+                c.nodetype === Text && push!(result, c)
+            end
+        end
+
+    elseif k === XPATH_NODE_FN
+        for n in nodes
+            append!(result, children(n))
+        end
+
+    elseif k === XPATH_DESCENDANT
+        # Handled by caller — collects all descendants before next step
+        error("XPATH_DESCENDANT should be handled by the evaluator, not _xpath_step")
+    end
+
+    result
+end
+
+# Append every descendant of `node` (children, grandchildren, ...) to `out` in document
+# order. Does not include `node` itself.
+function _descendants!(out::Vector{Node{S}}, node::Node{S}) where S
+    for c in children(node)
+        push!(out, c)
+        _descendants!(out, c)
+    end
+end
+
+# Implements XPath's descendant-or-self axis: for each input node, emit the node itself
+# followed by all of its descendants in document order.
+function _descendants(nodes::Vector{Node{S}}) where S
+    result = Node{S}[]
+    for n in nodes
+        push!(result, n)  # descendant-or-self includes self
+        _descendants!(result, n)
+    end
+    result
+end
+
+#-----------------------------------------------------------------------------# Main evaluator
+
+"""
+    xpath(node::Node, expr::AbstractString) -> Vector{Node}
+
+Evaluate an XPath expression against a `Node` tree and return matching nodes.
+
+Supports a practical subset of XPath 1.0:
+- Absolute (`/root/child`) and relative (`child/sub`) paths
+- Recursive descent (`//tag`)
+- Wildcards (`*`), self (`.`), parent (`..`)
+- Positional predicates (`[1]`, `[last()]`)
+- Attribute predicates (`[@attr]`, `[@attr='value']`)
+- `text()` and `node()` functions
+- Attribute selection (`@attr`) — returns `Text` nodes containing attribute values
+
+# Examples
+```julia
+doc = parse("<root><a x='1'/><a x='2'/><b/></root>", Node)
+xpath(doc, "/root/a")          # both <a> elements
+xpath(doc, "/root/a[1]")       # first <a>
+xpath(doc, "//a[@x='2']")      # <a x="2"/>
+xpath(doc, "/root/b/@x")       # attribute value as Text node (empty here)
+```
+"""
+function xpath(node::Node{S}, expr::AbstractString) where S
+    tokens = _xpath_tokenize(expr)
+    isempty(tokens) && return Node{S}[]
+
+    # Determine root for .. navigation
+    root = node.nodetype === Document ? node : node
+
+    i = 1
+    # Start context
+    if tokens[1].kind === XPATH_ROOT
+        # Absolute path — start from the document or its root element
+        if node.nodetype === Document
+            current = Node{S}[node]
+        else
+            current = Node{S}[node]
+        end
+        i = 2
+    else
+        current = Node{S}[node]
+    end
+
+    while i <= length(tokens)
+        tok = tokens[i]
+
+        if tok.kind === XPATH_PREDICATE
+            current = _eval_predicate(tok.value, current, root)
+            i += 1
+
+        elseif tok.kind === XPATH_DESCENDANT
+            current = _descendants(current)
+            # // must be followed by a step
+            i += 1
+
+        elseif tok.kind === XPATH_ROOT
+            # / as separator between steps — skip
+            i += 1
+
+        elseif tok.kind === XPATH_ATTRIBUTE
+            # @attr in result position — return attribute values as Text nodes
+            result = Node{S}[]
+            for n in current
+                v = get(n, tok.value, nothing)
+                !isnothing(v) && push!(result, Node{S}(Text, nothing, nothing, v, nothing))
+            end
+            current = result
+            i += 1
+
+        else
+            current = _xpath_step(current, tok, root)
+            i += 1
+        end
+    end
+
+    current
+end
diff --git a/test/Project.toml b/test/Project.toml
index d4883bd..c1703f7 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,4 +1,5 @@
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
+Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/data/complex_dtd.xml b/test/data/complex_dtd.xml
new file mode 100644
index 0000000..cb69747
--- /dev/null
+++ b/test/data/complex_dtd.xml
@@ -0,0 +1,105 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!DOCTYPE test [
+<!-- ===== Bookstore DTD (complex demo) ===== -->
+
+<!-- Reusable parameter entities -->
+<!ENTITY % text  "(#PCDATA | em | code | xref | br)*">
+<!ENTITY % block "p | ul | ol | figure | table">
+
+<!-- Notations (used by unparsed entities) -->
+<!NOTATION jpeg SYSTEM "image/jpeg">
+<!NOTATION png  SYSTEM "image/png">
+
+<!-- Unparsed external entities (binary media) -->
+<!ENTITY cover1 SYSTEM "covers/b123.jpg" NDATA jpeg>
+<!ENTITY cover2 SYSTEM "covers/b456.png" NDATA png>
+
+<!ELEMENT catalog (metadata?, (book | magazine)+)>
+<!ATTLIST catalog
+          tier CDATA #FIXED "retail"
+          xml:lang CDATA #IMPLIED>
+
+<!ELEMENT metadata (publisher?, contact?)>
+<!ELEMENT publisher %text;>
+<!ELEMENT contact (email, phone?)>
+<!ELEMENT email (#PCDATA)>
+<!ELEMENT phone (#PCDATA)>
+
+<!ELEMENT book (title, subtitle?, authors, pubinfo, description?, section*, reviews?, related?)>
+<!ATTLIST book
+          id ID #REQUIRED
+          isbn CDATA #IMPLIED
+          format (hardcover | paperback | ebook) "paperback"
+          inStock (yes | no) #REQUIRED
+          xml:space (default | preserve) "default">
+
+<!ELEMENT magazine (title, issue, article+)>
+<!ATTLIST magazine id ID #REQUIRED>
+<!ELEMENT issue (#PCDATA)>
+
+<!ELEMENT title %text;>
+<!ELEMENT subtitle %text;>
+
+<!ELEMENT authors (author+)>
+<!ELEMENT author (name, affiliation?)>
+<!ATTLIST author id ID #IMPLIED>
+<!ELEMENT name %text;>
+<!ELEMENT affiliation %text;>
+
+<!ELEMENT pubinfo (publisher, year, price?, pages?)>
+<!ELEMENT year  (#PCDATA)>
+<!ELEMENT price (#PCDATA)>
+<!ELEMENT pages (#PCDATA)>
+
+<!ELEMENT description (%block;)*>
+
+<!ELEMENT section (title, (%block;)*, section*)>
+<!ATTLIST section id ID #IMPLIED>
+
+<!ELEMENT p  %text;>
+<!ELEMENT ul (li+)>
+<!ELEMENT ol (li+)>
+<!ELEMENT li %text;>
+
+<!ELEMENT figure (caption?, media)>
+<!ATTLIST figure
+          entity   ENTITY                 #IMPLIED   <!-- refers to cover1/cover2 -->
+          notation NOTATION (jpeg | png) #IMPLIED>
+<!ELEMENT caption %text;>
+<!ELEMENT media EMPTY>
+<!ATTLIST media
+          src CDATA #REQUIRED
+          alt CDATA #IMPLIED>
+
+<!ELEMENT table (thead?, tbody, tfoot?)>
+<!ELEMENT thead (tr+)>
+<!ELEMENT tbody (tr+)>
+<!ELEMENT tfoot (tr+)>
+<!ELEMENT tr (th | td)+>
+<!ELEMENT th %text;>
+<!ELEMENT td %text;>
+
+<!ELEMENT reviews (review+)>
+<!ELEMENT review (rating, p+)>
+<!ATTLIST review by IDREF #REQUIRED
+                  date CDATA #IMPLIED>
+<!ELEMENT rating EMPTY>
+<!ATTLIST rating value (poor | avg | good | excellent) #REQUIRED>
+
+<!ELEMENT related (seealso*)>
+<!ELEMENT seealso EMPTY>
+<!ATTLIST seealso ref IDREF #REQUIRED>
+
+<!ELEMENT article (title, authorrefs, p+)>
+<!ELEMENT authorrefs (authorref+)>
+<!ELEMENT authorref EMPTY>
+<!ATTLIST authorref ref IDREF #REQUIRED>
+
+<!-- Conditional section controlled by a parameter entity -->
+<!ENTITY % longdocs "INCLUDE">
+<![%longdocs;[
+  <!ELEMENT appendix (title, p+)>
+  <!ATTLIST appendix id ID #REQUIRED>
+]]>
+]>
diff --git a/test/data/preserve.xml b/test/data/preserve.xml
new file mode 100644
index 0000000..e77add1
--- /dev/null
+++ b/test/data/preserve.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<root xml:space="preserve">
+    This node has preserved space
+    with <child xml:space="default">  default  </child> children.
+</root>
diff --git a/test/runtests.jl b/test/runtests.jl
index 89978eb..4ab562c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,646 +1,3410 @@
 using XML
-using XML: Document, Element, Declaration, Comment, CData, DTD, ProcessingInstruction, Text, escape, unescape, OrderedDict, h
-using Downloads: download
+using XML: Document, Element, Declaration, Comment, CData, DTD, ProcessingInstruction, Text
+using XML: escape, unescape, h, parse_dtd
+using XML: ParsedDTD, ElementDecl, AttDecl, EntityDecl, NotationDecl
 using Test
-import AbstractTrees
-
-AbstractTrees.children(x::Node) = children(x)
-
-#-----------------------------------------------------------------------------# files
-xml_xsd = joinpath("data", "xml.xsd")
-kml_xsd = joinpath("data", "kml.xsd")
-books_xml = joinpath("data", "books.xml")
-example_kml = joinpath("data", "example.kml")
-simple_dtd = joinpath("data", "simple_dtd.xml")
-
-all_files = [xml_xsd, kml_xsd, books_xml, example_kml, simple_dtd]
-
-#-----------------------------------------------------------------------------# h
-@testset "h function" begin
-    @test h.tag == XML.Element("tag")
-    @test h.tag(id="id") == XML.Element("tag"; id="id")
-    @test h.tag(1, 2, a="a", b="b") == XML.Element("tag", 1, 2; a="a", b="b")
-end
-
-#-----------------------------------------------------------------------------# escaping/unescaping
-@testset "escaping/unescaping" begin
-    s = "This > string < has & some \" special ' characters"
-    @test escape(s) == "This &gt; string &lt; has &amp; some &quot; special &apos; characters"
-    @test escape(escape(s)) == escape(s)
-    @test s == unescape(escape(s))
-    @test s == unescape(unescape(escape(s)))
-
-    n = Element("tag", Text(s))
-    @test XML.simple_value(n) == s
-
-    XML.escape!(n)
-    @test XML.simple_value(n) == escape(s)
-
-    XML.unescape!(n)
-    @test XML.simple_value(n) == s
-end
-
-#-----------------------------------------------------------------------------# DTD
-# @testset "DTDBody and friends" begin
-#     s = read(simple_dtd, String)
-#     data = read(simple_dtd)
-
-#     dtd = XML.DTDBody(data)
-#     dtd2 = parse(s, XML.DTDBody)
-
-#     @test length(dtd.elements) == length(dtd2.elements) == 0
-#     @test length(dtd.attributes) == length(dtd2.attributes) == 0
-#     @test length(dtd.entities) == length(dtd2.entities) == 3
-
-#     o = read("data/tv.dtd", XML.DTDBody)
-# end
-
-#-----------------------------------------------------------------------------# Raw
-@testset "Raw tag/attributes/value" begin
-    examples = [
-        (xml = "<!DOCTYPE html>",
-            nodetype = DTD,
-            tag=nothing,
-            attributes=nothing,
-            value="html"),
-        (xml = "<?xml version=\"1.0\" key=\"value\"?>",
-            nodetype = Declaration,
-            tag=nothing,
-            attributes=Dict("version" => "1.0", "key" => "value"),
-            value=nothing),
-        (xml = "<tag _id=\"1\", x=\"abc\" />",
-            nodetype = Element,
-            tag="tag",
-            attributes=Dict("_id" => "1", "x" => "abc"),
-            value=nothing),
-        (xml = "<!-- comment -->",
-            nodetype = Comment,
-            tag=nothing,
-            attributes=nothing,
-            value=" comment "),
-        (xml = "<![CData[cdata test]]>",
-            nodetype = CData,
-            tag=nothing,
-            attributes=nothing,
-            value="cdata test"),
-    ]
-    for x in examples
-        # @info "Testing: $(x.xml)"
-        data = XML.next(XML.parse(x.xml, XML.Raw))
-        @test XML.nodetype(data) == x.nodetype
-        @test XML.tag(data) == x.tag
-        @test XML.attributes(data) == x.attributes
-        @test XML.value(data) == x.value
-    end
-end
-
-@testset "Raw with books.xml" begin
-    data = read(books_xml, XML.Raw)
-    doc = collect(data)
-    @test length(doc) > countlines(books_xml)
-    # Check that the first 5 lines are correct
-    first_5_lines = [
-        XML.RawDeclaration => """<?xml version="1.0"?>""",
-        XML.RawElementOpen => "<catalog>",
-        XML.RawElementOpen => "<book id=\"bk101\">",
-        XML.RawElementOpen => "<author>",
-        XML.RawText => "Gambardella, Matthew"
-    ]
-    for (i, (typ, str)) in enumerate(first_5_lines)
-        dt = doc[i]
-        @test dt.type == typ
-        @test String(dt) == str
-    end
-    # Check that the last line is correct
-    @test doc[end].type == XML.RawElementClose
-    @test String(doc[end]) == "</catalog>"
-
-    @testset "next and prev" begin
-        @test XML.prev(doc[1]) == data # can't use === here because prev returns a copy of ctx
-        @test prev(data) === nothing
-        @test XML.next(doc[end]) === nothing
-
-        n = length(doc)
-        next_res = [doc[1]]
-        foreach(_ -> push!(next_res, XML.next(next_res[end])), 1:n-1)
-
-        prev_res = [doc[end]]
-        foreach(_ -> pushfirst!(prev_res, XML.prev(prev_res[1])), 1:n-1)
-
-        idx = findall(next_res .!= prev_res)
-
-        for (a,b) in zip(next_res, prev_res)
-            @test a == b
+
+#==============================================================================#
+#                              ESCAPE / UNESCAPE                               #
+#==============================================================================#
+@testset "escape / unescape" begin
+    @testset "all five predefined entities" begin
+        @test escape("&") == "&amp;"
+        @test escape("<") == "&lt;"
+        @test escape(">") == "&gt;"
+        @test escape("'") == "&apos;"
+        @test escape("\"") == "&quot;"
+    end
+
+    @testset "unescape reverses escape" begin
+        @test unescape("&amp;") == "&"
+        @test unescape("&lt;") == "<"
+        @test unescape("&gt;") == ">"
+        @test unescape("&apos;") == "'"
+        @test unescape("&quot;") == "\""
+    end
+
+    @testset "roundtrip on mixed strings" begin
+        s = "This > string < has & some \" special ' characters"
+        @test unescape(escape(s)) == s
+    end
+
+    @testset "idempotent unescape" begin
+        s = "plain text with no entities"
+        @test unescape(s) == s
+    end
+
+    @testset "multiple entities in one string" begin
+        @test escape("a < b & c > d") == "a &lt; b &amp; c &gt; d"
+        @test unescape("a &lt; b &amp; c &gt; d") == "a < b & c > d"
+    end
+
+    @testset "empty string" begin
+        @test escape("") == ""
+        @test unescape("") == ""
+    end
+end
+
+#==============================================================================#
+#              XML 1.0 SPEC SECTION 2.1: Well-Formed XML Documents             #
+#==============================================================================#
+@testset "Spec 2.1: Well-Formed XML Documents" begin
+    # The spec's simplest example:
+    #   <?xml version="1.0"?>
+    #   <greeting>Hello, world!</greeting>
+    xml = """<?xml version="1.0"?><greeting>Hello, world!</greeting>"""
+    doc = parse(xml, Node)
+    @test nodetype(doc) == Document
+    @test length(doc) == 2  # Declaration + Element
+    @test nodetype(doc[1]) == Declaration
+    @test nodetype(doc[2]) == Element
+    @test tag(doc[2]) == "greeting"
+    @test simple_value(doc[2]) == "Hello, world!"
+end
+
+#==============================================================================#
+#         XML 1.0 SPEC SECTION 2.4: Character Data and Markup                  #
+#==============================================================================#
+@testset "Spec 2.4: Character Data and Markup" begin
+    @testset "text content between tags" begin
+        doc = parse("<root>Hello</root>", Node)
+        @test simple_value(doc[1]) == "Hello"
+    end
+
+    @testset "entity references in text are unescaped" begin
+        doc = parse("<root>&amp; &lt; &gt; &apos; &quot;</root>", Node)
+        @test simple_value(doc[1]) == "& < > ' \""
+    end
+
+    @testset "mixed text and child elements" begin
+        doc = parse("<p>Hello <b>world</b>!</p>", Node)
+        root = doc[1]
+        @test length(root) == 3
+        @test nodetype(root[1]) == Text
+        @test value(root[1]) == "Hello "
+        @test nodetype(root[2]) == Element
+        @test tag(root[2]) == "b"
+        @test simple_value(root[2]) == "world"
+        @test nodetype(root[3]) == Text
+        @test value(root[3]) == "!"
+    end
+
+    @testset "empty element has no text" begin
+        doc = parse("<empty/>", Node)
+        @test length(children(doc[1])) == 0
+    end
+end
+
+#==============================================================================#
+#                    XML 1.0 SPEC SECTION 2.5: Comments                        #
+#==============================================================================#
+@testset "Spec 2.5: Comments" begin
+    @testset "basic comment (spec example)" begin
+        # Spec example: <!-- declarations for <head> & <body> -->
+        doc = parse("<root><!-- declarations for <head> &amp; <body> --></root>", Node)
+        c = doc[1][1]
+        @test nodetype(c) == Comment
+        @test value(c) == " declarations for <head> &amp; <body> "
+    end
+
+    @testset "empty comment" begin
+        doc = parse("<root><!----></root>", Node)
+        c = doc[1][1]
+        @test nodetype(c) == Comment
+        @test value(c) == ""
+    end
+
+    @testset "comment before root element" begin
+        doc = parse("<!-- before --><root/>", Node)
+        @test nodetype(doc[1]) == Comment
+        @test value(doc[1]) == " before "
+        @test nodetype(doc[2]) == Element
+    end
+
+    @testset "comment after root element" begin
+        doc = parse("<root/><!-- after -->", Node)
+        @test nodetype(doc[1]) == Element
+        @test nodetype(doc[2]) == Comment
+    end
+
+    @testset "comment with markup-like content preserved verbatim" begin
+        doc = parse("<root><!-- <b>not</b> a tag --></root>", Node)
+        @test value(doc[1][1]) == " <b>not</b> a tag "
+    end
+
+    @testset "multiple comments" begin
+        doc = parse("<root><!-- A --><!-- B --></root>", Node)
+        @test length(doc[1]) == 2
+        @test value(doc[1][1]) == " A "
+        @test value(doc[1][2]) == " B "
+    end
+end
+
+#==============================================================================#
+#             XML 1.0 SPEC SECTION 2.6: Processing Instructions                #
+#==============================================================================#
+@testset "Spec 2.6: Processing Instructions" begin
+    @testset "xml-stylesheet PI (spec example)" begin
+        doc = parse("""<?xml-stylesheet type="text/xsl" href="style.xsl"?><root/>""", Node)
+        pi = doc[1]
+        @test nodetype(pi) == ProcessingInstruction
+        @test tag(pi) == "xml-stylesheet"
+        @test contains(value(pi), "type=\"text/xsl\"")
+    end
+
+    @testset "PI with no content" begin
+        doc = parse("<?target?><root/>", Node)
+        pi = doc[1]
+        @test nodetype(pi) == ProcessingInstruction
+        @test tag(pi) == "target"
+        @test value(pi) === nothing
+    end
+
+    @testset "PI inside element" begin
+        doc = parse("<root><?mypi some data?></root>", Node)
+        pi = doc[1][1]
+        @test nodetype(pi) == ProcessingInstruction
+        @test tag(pi) == "mypi"
+        @test value(pi) == "some data"
+    end
+
+    @testset "PI after root element" begin
+        doc = parse("<root/><?post-process?>", Node)
+        @test nodetype(doc[2]) == ProcessingInstruction
+        @test tag(doc[2]) == "post-process"
+    end
+end
+
+#==============================================================================#
+#                XML 1.0 SPEC SECTION 2.7: CDATA Sections                      #
+#==============================================================================#
+@testset "Spec 2.7: CDATA Sections" begin
+    @testset "CDATA preserves markup characters" begin
+        # Spec example
+        doc = parse("<root><![CDATA[<greeting>Hello, world!</greeting>]]></root>", Node)
+        cd = doc[1][1]
+        @test nodetype(cd) == CData
+        @test value(cd) == "<greeting>Hello, world!</greeting>"
+    end
+
+    @testset "empty CDATA" begin
+        doc = parse("<root><![CDATA[]]></root>", Node)
+        cd = doc[1][1]
+        @test nodetype(cd) == CData
+        @test value(cd) == ""
+    end
+
+    @testset "CDATA with ampersands and less-thans" begin
+        doc = parse("<root><![CDATA[a < b && c > d]]></root>", Node)
+        @test value(doc[1][1]) == "a < b && c > d"
+    end
+
+    @testset "CDATA with special characters" begin
+        doc = parse("<root><![CDATA[line1\nline2\ttab]]></root>", Node)
+        @test value(doc[1][1]) == "line1\nline2\ttab"
+    end
+
+    @testset "CDATA mixed with text" begin
+        doc = parse("<root>before<![CDATA[inside]]>after</root>", Node)
+        @test length(doc[1]) == 3
+        @test nodetype(doc[1][1]) == Text
+        @test value(doc[1][1]) == "before"
+        @test nodetype(doc[1][2]) == CData
+        @test value(doc[1][2]) == "inside"
+        @test nodetype(doc[1][3]) == Text
+        @test value(doc[1][3]) == "after"
+    end
+end
+
+#==============================================================================#
+#        XML 1.0 SPEC SECTION 2.8: Prolog and Document Type Declaration        #
+#==============================================================================#
+@testset "Spec 2.8: Prolog and Document Type Declaration" begin
+    @testset "XML declaration - version only" begin
+        doc = parse("""<?xml version="1.0"?><root/>""", Node)
+        decl = doc[1]
+        @test nodetype(decl) == Declaration
+        @test decl["version"] == "1.0"
+    end
+
+    @testset "XML declaration - version and encoding" begin
+        doc = parse("""<?xml version="1.0" encoding="UTF-8"?><root/>""", Node)
+        decl = doc[1]
+        @test decl["version"] == "1.0"
+        @test decl["encoding"] == "UTF-8"
+    end
+
+    @testset "XML declaration - all three pseudo-attributes" begin
+        doc = parse("""<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root/>""", Node)
+        decl = doc[1]
+        @test decl["version"] == "1.0"
+        @test decl["encoding"] == "UTF-8"
+        @test decl["standalone"] == "yes"
+    end
+
+    @testset "XML declaration with single quotes" begin
+        doc = parse("<?xml version='1.0'?><root/>", Node)
+        @test doc[1]["version"] == "1.0"
+    end
+
+    @testset "no XML declaration" begin
+        doc = parse("<root/>", Node)
+        @test length(doc) == 1
+        @test nodetype(doc[1]) == Element
+    end
+
+    @testset "DOCTYPE - SYSTEM" begin
+        # Spec example
+        doc = parse("""<!DOCTYPE greeting SYSTEM "hello.dtd"><greeting/>""", Node)
+        dtd = doc[1]
+        @test nodetype(dtd) == DTD
+        @test contains(value(dtd), "greeting")
+        @test contains(value(dtd), "SYSTEM")
+        @test contains(value(dtd), "hello.dtd")
+    end
+
+    @testset "DOCTYPE - with internal subset" begin
+        xml = """<!DOCTYPE greeting [
+  <!ELEMENT greeting (#PCDATA)>
+]><greeting>Hello, world!</greeting>"""
+        doc = parse(xml, Node)
+        dtd = doc[1]
+        @test nodetype(dtd) == DTD
+        @test contains(value(dtd), "greeting")
+        @test contains(value(dtd), "<!ELEMENT")
+    end
+
+    @testset "DOCTYPE with entities (spec-like)" begin
+        xml = """<!DOCTYPE note [
+<!ENTITY nbsp "&#xA0;">
+<!ENTITY writer "Writer: Donald Duck.">
+<!ENTITY copyright "Copyright: W3Schools.">
+]><note/>"""
+        doc = parse(xml, Node)
+        @test nodetype(doc[1]) == DTD
+        @test contains(value(doc[1]), "ENTITY")
+    end
+
+    @testset "full prolog: declaration + DOCTYPE" begin
+        xml = """<?xml version="1.0"?><!DOCTYPE root SYSTEM "root.dtd"><root/>"""
+        doc = parse(xml, Node)
+        @test nodetype(doc[1]) == Declaration
+        @test nodetype(doc[2]) == DTD
+        @test nodetype(doc[3]) == Element
+    end
+end
+
+#==============================================================================#
+#          XML 1.0 SPEC SECTION 2.9: Standalone Document Declaration           #
+#==============================================================================#
+@testset "Spec 2.9: Standalone Document Declaration" begin
+    doc = parse("""<?xml version="1.0" standalone="yes"?><root/>""", Node)
+    @test doc[1]["standalone"] == "yes"
+
+    doc2 = parse("""<?xml version="1.0" standalone="no"?><root/>""", Node)
+    @test doc2[1]["standalone"] == "no"
+end
+
+#==============================================================================#
+#              XML 1.0 SPEC SECTION 2.10: White Space Handling                 #
+#==============================================================================#
+@testset "Spec 2.10: White Space Handling" begin
+    @testset "parser preserves all text content verbatim" begin
+        doc = parse("<root>  hello  </root>", Node)
+        @test simple_value(doc[1]) == "  hello  "
+    end
+
+    @testset "parser preserves whitespace-only text" begin
+        doc = parse("<root>   </root>", Node)
+        @test simple_value(doc[1]) == "   "
+    end
+
+    @testset "parser preserves inter-element whitespace as Text nodes" begin
+        xml = "<root><a>x</a>\n  <b>y</b></root>"
+        doc = parse(xml, Node)
+        @test length(doc[1]) == 3
+        @test value(doc[1][1][1]) == "x"
+        @test nodetype(doc[1][2]) == Text
+        @test value(doc[1][2]) == "\n  "
+        @test value(doc[1][3][1]) == "y"
+    end
+
+    @testset "xml:space attribute is preserved during parsing" begin
+        doc = parse("""<root xml:space="preserve"><child>  text  </child></root>""", Node)
+        @test doc[1]["xml:space"] == "preserve"
+        @test value(doc[1][1][1]) == "  text  "
+    end
+
+    @testset "xml:space='preserve' affects write formatting" begin
+        # When xml:space="preserve", writer doesn't add indentation
+        el = Element("s", XML.Text(" pre "), Element("t"), XML.Text(" post "); var"xml:space"="preserve")
+        @test XML.write(el) == "<s xml:space=\"preserve\"> pre <t/> post </s>"
+    end
+
+    @testset "write formats with indentation by default" begin
+        el = Element("root", Element("a"), Element("b"))
+        s = XML.write(el)
+        @test contains(s, "  <a/>")  # indented
+        @test contains(s, "  <b/>")  # indented
+    end
+
+    @testset "Unicode non-breaking space is NOT XML whitespace" begin
+        nbsp = "\u00A0"
+        xml = "<root>$(nbsp) y $(nbsp)</root>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "$(nbsp) y $(nbsp)"
+    end
+end
+
+#==============================================================================#
+#       XML 1.0 SPEC SECTION 3.1: Start-Tags, End-Tags, Empty-Element Tags     #
+#==============================================================================#
+@testset "Spec 3.1: Start-Tags, End-Tags, Empty-Element Tags" begin
+    @testset "element with attributes (spec example)" begin
+        # <termdef id="dt-dog" term="dog">
+        doc = parse("""<termdef id="dt-dog" term="dog">A dog.</termdef>""", Node)
+        el = doc[1]
+        @test tag(el) == "termdef"
+        @test el["id"] == "dt-dog"
+        @test el["term"] == "dog"
+        @test value(el[1]) == "A dog."
+    end
+
+    @testset "self-closing tag (spec example)" begin
+        # <IMG align="left" src="http://www.w3.org/Icons/WWW/w3c_home"/>
+        doc = parse("""<IMG align="left" src="http://www.w3.org/Icons/WWW/w3c_home"/>""", Node)
+        el = doc[1]
+        @test tag(el) == "IMG"
+        @test el["align"] == "left"
+        @test el["src"] == "http://www.w3.org/Icons/WWW/w3c_home"
+        @test length(children(el)) == 0
+    end
+
+    @testset "simple self-closing tag" begin
+        doc = parse("<br/>", Node)
+        @test tag(doc[1]) == "br"
+        @test length(children(doc[1])) == 0
+    end
+
+    @testset "self-closing tag with space before />" begin
+        doc = parse("<br />", Node)
+        @test tag(doc[1]) == "br"
+    end
+
+    @testset "empty element with start and end tag" begin
+        doc = parse("<empty></empty>", Node)
+        el = doc[1]
+        @test tag(el) == "empty"
+        @test isnothing(el.children)
+    end
+
+    @testset "nested elements" begin
+        doc = parse("<a><b><c/></b></a>", Node)
+        @test tag(doc[1]) == "a"
+        @test tag(doc[1][1]) == "b"
+        @test tag(doc[1][1][1]) == "c"
+    end
+
+    @testset "sibling elements" begin
+        doc = parse("<root><a/><b/><c/></root>", Node)
+        @test length(doc[1]) == 3
+        @test tag(doc[1][1]) == "a"
+        @test tag(doc[1][2]) == "b"
+        @test tag(doc[1][3]) == "c"
+    end
+
+    @testset "attributes with single quotes" begin
+        doc = parse("<x a='val'/>", Node)
+        @test doc[1]["a"] == "val"
+    end
+
+    @testset "attributes with double quotes" begin
+        doc = parse("""<x a="val"/>""", Node)
+        @test doc[1]["a"] == "val"
+    end
+
+    @testset "mixed quote styles in attributes" begin
+        doc = parse("""<x a="1" b='2'/>""", Node)
+        @test doc[1]["a"] == "1"
+        @test doc[1]["b"] == "2"
+    end
+
+    @testset "attribute with > in value" begin
+        doc = parse("""<x a="1>2"/>""", Node)
+        @test doc[1]["a"] == "1>2"
+    end
+
+    @testset "attribute with entity reference" begin
+        doc = parse("""<x a="a&amp;b"/>""", Node)
+        @test doc[1]["a"] == "a&b"
+    end
+
+    @testset "multiple attributes accessible via attributes()" begin
+        doc = parse("""<x first="1" second="2" third="3"/>""", Node)
+        attrs = attributes(doc[1])
+        @test attrs isa Attributes
+        @test attrs["first"] == "1"
+        @test attrs["second"] == "2"
+        @test attrs["third"] == "3"
+    end
+
+    @testset "whitespace around = in attributes" begin
+        doc = parse("""<x a = "1" />""", Node)
+        @test doc[1]["a"] == "1"
+    end
+end
+
+#==============================================================================#
+#                  XML 1.0 SPEC SECTION 4.1: Entity References                 #
+#==============================================================================#
+@testset "Spec 4.1: Character and Entity References" begin
+    @testset "predefined entity references in text" begin
+        doc = parse("<root>&lt;</root>", Node)
+        @test simple_value(doc[1]) == "<"
+
+        doc = parse("<root>&gt;</root>", Node)
+        @test simple_value(doc[1]) == ">"
+
+        doc = parse("<root>&amp;</root>", Node)
+        @test simple_value(doc[1]) == "&"
+
+        doc = parse("<root>&apos;</root>", Node)
+        @test simple_value(doc[1]) == "'"
+
+        doc = parse("<root>&quot;</root>", Node)
+        @test simple_value(doc[1]) == "\""
+    end
+
+    @testset "predefined entities in attribute values" begin
+        doc = parse("""<x a="&lt;&gt;&amp;&apos;&quot;"/>""", Node)
+        @test doc[1]["a"] == "<>&'\""
+    end
+
+    @testset "multiple entity references in one text node" begin
+        doc = parse("<root>&lt;tag&gt; &amp; &quot;value&quot;</root>", Node)
+        @test simple_value(doc[1]) == "<tag> & \"value\""
+    end
+end
+
+#==============================================================================#
+#                  NAMESPACES (Colon in Tag and Attribute Names)                #
+#==============================================================================#
+@testset "Namespaces" begin
+    @testset "namespaced element" begin
+        doc = parse("""<ns:root xmlns:ns="http://example.com"><ns:child/></ns:root>""", Node)
+        @test tag(doc[1]) == "ns:root"
+        @test doc[1]["xmlns:ns"] == "http://example.com"
+        @test tag(doc[1][1]) == "ns:child"
+    end
+
+    @testset "default namespace" begin
+        doc = parse("""<root xmlns="http://example.com"/>""", Node)
+        @test doc[1]["xmlns"] == "http://example.com"
+    end
+
+    @testset "multiple namespace prefixes" begin
+        xml = """<root xmlns:a="http://a.com" xmlns:b="http://b.com"><a:x/><b:y/></root>"""
+        doc = parse(xml, Node)
+        @test tag(doc[1][1]) == "a:x"
+        @test tag(doc[1][2]) == "b:y"
+    end
+end
+
+#==============================================================================#
+#                           NODE CONSTRUCTORS                                  #
+#==============================================================================#
+@testset "Node Constructors" begin
+    @testset "Text" begin
+        t = Text("hello")
+        @test nodetype(t) == Text
+        @test value(t) == "hello"
+        @test tag(t) === nothing
+        @test attributes(t) === nothing
+    end
+
+    @testset "Comment" begin
+        c = Comment(" a comment ")
+        @test nodetype(c) == Comment
+        @test value(c) == " a comment "
+    end
+
+    @testset "CData" begin
+        cd = CData("raw <data>")
+        @test nodetype(cd) == CData
+        @test value(cd) == "raw <data>"
+    end
+
+    @testset "DTD" begin
+        d = DTD("html")
+        @test nodetype(d) == DTD
+        @test value(d) == "html"
+    end
+
+    @testset "Declaration" begin
+        decl = Declaration(; version="1.0", encoding="UTF-8")
+        @test nodetype(decl) == Declaration
+        @test decl["version"] == "1.0"
+        @test decl["encoding"] == "UTF-8"
+    end
+
+    @testset "Declaration with no attributes" begin
+        decl = Declaration()
+        @test nodetype(decl) == Declaration
+        @test attributes(decl) === nothing
+    end
+
+    @testset "ProcessingInstruction with content" begin
+        pi = ProcessingInstruction("target", "data here")
+        @test nodetype(pi) == ProcessingInstruction
+        @test tag(pi) == "target"
+        @test value(pi) == "data here"
+    end
+
+    @testset "ProcessingInstruction without content" begin
+        pi = ProcessingInstruction("target")
+        @test nodetype(pi) == ProcessingInstruction
+        @test tag(pi) == "target"
+        @test value(pi) === nothing
+    end
+
+    @testset "Element with tag only" begin
+        el = Element("div")
+        @test nodetype(el) == Element
+        @test tag(el) == "div"
+        @test length(children(el)) == 0
+    end
+
+    @testset "Element with children" begin
+        el = Element("div", Text("hello"), Element("span"))
+        @test length(el) == 2
+        @test nodetype(el[1]) == Text
+        @test nodetype(el[2]) == Element
+    end
+
+    @testset "Element with attributes" begin
+        el = Element("div"; class="main", id="content")
+        @test el["class"] == "main"
+        @test el["id"] == "content"
+    end
+
+    @testset "Element with children and attributes" begin
+        el = Element("a", "click here"; href="http://example.com")
+        @test tag(el) == "a"
+        @test el["href"] == "http://example.com"
+        @test value(el[1]) == "click here"
+    end
+
+    @testset "Element auto-converts non-Node children to Text" begin
+        el = Element("p", 42)
+        @test nodetype(el[1]) == Text
+        @test value(el[1]) == "42"
+    end
+
+    @testset "Document" begin
+        doc = Document(
+            Declaration(; version="1.0"),
+            Element("root")
+        )
+        @test nodetype(doc) == Document
+        @test length(doc) == 2
+        @test nodetype(doc[1]) == Declaration
+        @test nodetype(doc[2]) == Element
+    end
+
+    @testset "Document with all node types" begin
+        doc = Document(
+            Declaration(; version="1.0"),
+            DTD("root"),
+            Comment("comment"),
+            ProcessingInstruction("pi", "data"),
+            Element("root", CData("cdata"), Text("text"))
+        )
+        @test map(nodetype, children(doc)) == [Declaration, DTD, Comment, ProcessingInstruction, Element]
+        @test length(doc[end]) == 2
+        @test nodetype(doc[end][1]) == CData
+        @test value(doc[end][1]) == "cdata"
+        @test nodetype(doc[end][2]) == Text
+        @test value(doc[end][2]) == "text"
+    end
+
+    @testset "invalid constructions" begin
+        @test_throws Exception Text("a", "b")               # too many args
+        @test_throws Exception Comment("a"; x="1")           # no attrs
+        @test_throws Exception CData("a"; x="1")             # no attrs
+        @test_throws Exception DTD("a"; x="1")               # no attrs
+        @test_throws Exception Element()                      # need tag
+        @test_throws Exception Declaration("bad")             # no positional args
+        @test_throws Exception Document(; x="1")              # no attrs
+        @test_throws Exception ProcessingInstruction()        # need target
+        @test_throws Exception ProcessingInstruction("a", "b", "c")  # too many args
+    end
+end
+
+#==============================================================================#
+#                        h CONSTRUCTOR                                         #
+#==============================================================================#
+@testset "h constructor" begin
+    @testset "h(tag)" begin
+        el = h("div")
+        @test nodetype(el) == Element
+        @test tag(el) == "div"
+    end
+
+    @testset "h(tag, children...)" begin
+        el = h("div", "hello")
+        @test simple_value(el) == "hello"
+    end
+
+    @testset "h(tag; attrs...)" begin
+        el = h("div"; class="main")
+        @test el["class"] == "main"
+    end
+
+    @testset "h(tag, children...; attrs...)" begin
+        el = h("div", "hello"; class="main")
+        @test el["class"] == "main"
+        @test value(el[1]) == "hello"
+    end
+
+    @testset "h.tag syntax" begin
+        el = h.div("hello"; class="main")
+        @test tag(el) == "div"
+        @test el["class"] == "main"
+        @test value(el[1]) == "hello"
+    end
+
+    @testset "h.tag with no args" begin
+        el = h.br()
+        @test tag(el) == "br"
+        @test length(children(el)) == 0
+    end
+
+    @testset "h.tag with only attrs" begin
+        el = h.img(; src="image.png")
+        @test tag(el) == "img"
+        @test el["src"] == "image.png"
+    end
+
+    @testset "nested h constructors" begin
+        el = h.div(
+            h.h1("Title"),
+            h.p("Paragraph")
+        )
+        @test tag(el) == "div"
+        @test length(el) == 2
+        @test tag(el[1]) == "h1"
+        @test tag(el[2]) == "p"
+    end
+
+    @testset "h with symbol tag" begin
+        el = h(:div)
+        @test tag(el) == "div"
+    end
+end
+
+#==============================================================================#
+#                        NODE INTERFACE                                        #
+#==============================================================================#
+@testset "Node Interface" begin
+    doc = parse("""<?xml version="1.0"?><root attr="val"><child>text</child></root>""", Node)
+
+    @testset "nodetype" begin
+        @test nodetype(doc) == Document
+        @test nodetype(doc[1]) == Declaration
+        @test nodetype(doc[2]) == Element
+    end
+
+    @testset "tag" begin
+        @test tag(doc) === nothing
+        @test tag(doc[2]) == "root"
+        @test tag(doc[2][1]) == "child"
+    end
+
+    @testset "attributes" begin
+        @test attributes(doc) === nothing
+        @test attributes(doc[2])["attr"] == "val"
+    end
+
+    @testset "value" begin
+        @test value(doc) === nothing
+        @test value(doc[2][1][1]) == "text"
+    end
+
+    @testset "children" begin
+        @test length(children(doc)) == 2
+        @test length(children(doc[2])) == 1
+    end
+
+    @testset "is_simple" begin
+        @test is_simple(doc[2][1]) == true
+        @test is_simple(doc[2]) == false
+    end
+
+    @testset "simple_value" begin
+        @test simple_value(doc[2][1]) == "text"
+        @test_throws ErrorException simple_value(doc[2])
+    end
+
+    @testset "simple_value for CData child" begin
+        el = Element("x", CData("data"))
+        @test is_simple(el)
+        @test simple_value(el) == "data"
+    end
+end
+
+#==============================================================================#
+#                        NODE INDEXING                                          #
+#==============================================================================#
+@testset "Node Indexing" begin
+    doc = parse("<root><a/><b/><c/></root>", Node)
+    root = doc[1]
+
+    @testset "integer indexing" begin
+        @test tag(root[1]) == "a"
+        @test tag(root[2]) == "b"
+        @test tag(root[3]) == "c"
+    end
+
+    @testset "colon indexing" begin
+        all = root[:]
+        @test length(all) == 3
+    end
+
+    @testset "lastindex" begin
+        @test tag(root[end]) == "c"
+    end
+
+    @testset "only" begin
+        single = parse("<root><only/></root>", Node)
+        @test tag(only(single[1])) == "only"
+    end
+
+    @testset "length" begin
+        @test length(root) == 3
+    end
+
+    @testset "attribute indexing" begin
+        el = parse("""<x a="1" b="2"/>""", Node)[1]
+        @test el["a"] == "1"
+        @test el["b"] == "2"
+        @test_throws KeyError el["nonexistent"]
+    end
+
+    @testset "haskey" begin
+        el = parse("""<x a="1"/>""", Node)[1]
+        @test haskey(el, "a") == true
+        @test haskey(el, "b") == false
+    end
+
+    @testset "keys" begin
+        el = parse("""<x a="1" b="2"/>""", Node)[1]
+        @test collect(keys(el)) == ["a", "b"]
+    end
+
+    @testset "keys on element with no attributes" begin
+        el = parse("<x/>", Node)[1]
+        @test isempty(keys(el))
+    end
+end
+
+#==============================================================================#
+#                        NODE MUTATION                                         #
+#==============================================================================#
+@testset "Node Mutation" begin
+    @testset "setindex! child" begin
+        el = Element("root", Element("old"))
+        el[1] = Element("new")
+        @test tag(el[1]) == "new"
+    end
+
+    @testset "setindex! child with auto-conversion" begin
+        el = Element("root", Text("old"))
+        el[1] = "new text"
+        @test value(el[1]) == "new text"
+    end
+
+    @testset "setindex! attribute" begin
+        el = Element("root"; a="1")
+        el["a"] = "2"
+        @test el["a"] == "2"
+    end
+
+    @testset "setindex! new attribute" begin
+        el = Element("root"; a="1")
+        el["b"] = "2"
+        @test el["b"] == "2"
+    end
+
+    @testset "push! child" begin
+        el = Element("root")
+        push!(el, Element("child"))
+        @test length(el) == 1
+        @test tag(el[1]) == "child"
+    end
+
+    @testset "push! with auto-conversion" begin
+        el = Element("root")
+        push!(el, "text")
+        @test nodetype(el[1]) == Text
+        @test value(el[1]) == "text"
+    end
+
+    @testset "pushfirst! child" begin
+        el = Element("root", Element("second"))
+        pushfirst!(el, Element("first"))
+        @test tag(el[1]) == "first"
+        @test tag(el[2]) == "second"
+    end
+
+    @testset "push! on non-container node errors" begin
+        t = Text("hello")
+        @test_throws ErrorException push!(t, "more")
+    end
+end
+
+#==============================================================================#
+#                        NODE EQUALITY                                         #
+#==============================================================================#
+@testset "Node Equality" begin
+    @testset "identical elements are equal" begin
+        a = Element("div", Text("hello"); class="main")
+        b = Element("div", Text("hello"); class="main")
+        @test a == b
+    end
+
+    @testset "different tag names are not equal" begin
+        @test Element("a") != Element("b")
+    end
+
+    @testset "different attributes are not equal" begin
+        @test Element("a"; x="1") != Element("a"; x="2")
+    end
+
+    @testset "different children are not equal" begin
+        @test Element("a", Text("x")) != Element("a", Text("y"))
+    end
+
+    @testset "different node types are not equal" begin
+        @test Text("x") != Comment("x")
+    end
+
+    @testset "empty attributes vs nothing" begin
+        a = Element("a")
+        b = Element("a")
+        @test a == b
+    end
+
+    @testset "parse equality" begin
+        xml = "<root><child>text</child></root>"
+        @test parse(xml, Node) == parse(xml, Node)
+    end
+end
+
+#==============================================================================#
+#                        XML WRITING                                           #
+#==============================================================================#
+@testset "XML Writing" begin
+    @testset "write Text" begin
+        el = Element("p", "hello & goodbye")
+        @test XML.write(el) == "<p>hello &amp; goodbye</p>"
+    end
+
+    @testset "write Element with attributes" begin
+        el = Element("div"; class="main", id="content")
+        s = XML.write(el)
+        @test contains(s, "<div")
+        @test contains(s, "class=\"main\"")
+        @test contains(s, "id=\"content\"")
+        @test contains(s, "/>")
+    end
+
+    @testset "write self-closing element" begin
+        @test XML.write(Element("br")) == "<br/>"
+    end
+
+    @testset "write element with single text child (inline)" begin
+        @test XML.write(Element("p", "hello")) == "<p>hello</p>"
+    end
+
+    @testset "write element with multiple children (indented)" begin
+        el = Element("div", Element("a"), Element("b"))
+        s = XML.write(el)
+        @test contains(s, "<div>")
+        @test contains(s, "  <a/>")
+        @test contains(s, "  <b/>")
+        @test contains(s, "</div>")
+    end
+
+    @testset "write Comment" begin
+        el = Element("root", Comment(" comment "))
+        @test contains(XML.write(el), "<!-- comment -->")
+    end
+
+    @testset "write CData" begin
+        el = Element("root", CData("raw <data>"))
+        @test contains(XML.write(el), "<![CDATA[raw <data>]]>")
+    end
+
+    @testset "write ProcessingInstruction with content" begin
+        pi = ProcessingInstruction("target", "data")
+        @test XML.write(pi) == "<?target data?>"
+    end
+
+    @testset "write ProcessingInstruction without content" begin
+        pi = ProcessingInstruction("target")
+        @test XML.write(pi) == "<?target?>"
+    end
+
+    @testset "write Declaration" begin
+        decl = Declaration(; version="1.0", encoding="UTF-8")
+        s = XML.write(decl)
+        @test contains(s, "<?xml")
+        @test contains(s, "version=\"1.0\"")
+        @test contains(s, "encoding=\"UTF-8\"")
+        @test contains(s, "?>")
+    end
+
+    @testset "write DTD" begin
+        dtd = DTD("html")
+        @test XML.write(dtd) == "<!DOCTYPE html>"
+    end
+
+    @testset "write Document" begin
+        doc = Document(Declaration(; version="1.0"), Element("root"))
+        s = XML.write(doc)
+        @test startswith(s, "<?xml")
+        @test contains(s, "<root/>")
+    end
+
+    @testset "write escapes special characters in text" begin
+        el = Element("p", "a < b & c > d")
+        @test XML.write(el) == "<p>a &lt; b &amp; c &gt; d</p>"
+    end
+
+    @testset "write escapes special characters in attribute values" begin
+        el = Element("x"; a="a\"b")
+        @test contains(XML.write(el), "a=\"a&quot;b\"")
+    end
+
+    @testset "indentsize parameter" begin
+        el = Element("root", Element("child"))
+        s2 = XML.write(el; indentsize=2)
+        s4 = XML.write(el; indentsize=4)
+        @test contains(s2, "  <child/>")
+        @test contains(s4, "    <child/>")
+    end
+
+    @testset "write xml:space='preserve' respects whitespace" begin
+        el = Element("root", Element("p", Text("  hello  "); var"xml:space"="preserve"))
+        s = XML.write(el)
+        @test contains(s, ">  hello  </p>")
+    end
+end
+
+#==============================================================================#
+#                 WRITE TO FILE / READ FROM FILE                               #
+#==============================================================================#
+@testset "File I/O" begin
+    @testset "write and read back" begin
+        doc = Document(
+            Declaration(; version="1.0"),
+            Element("root", Element("child", "text"))
+        )
+        temp = tempname() * ".xml"
+        XML.write(temp, doc)
+        content = read(temp, String)
+        @test contains(content, "<?xml")
+        @test contains(content, "<root>")
+        @test contains(content, "<child>text</child>")
+        doc2 = read(temp, Node)
+        @test nodetype(doc2) == Document
+        # Find the root element
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        child = first(filter(x -> nodetype(x) == Element, children(root)))
+        @test tag(child) == "child"
+        @test simple_value(child) == "text"
+        rm(temp)
+    end
+
+    @testset "read from IO" begin
+        xml = """<?xml version="1.0"?><root>hello</root>"""
+        doc = read(IOBuffer(xml), Node)
+        @test nodetype(doc) == Document
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test simple_value(root) == "hello"
+    end
+end
+
+#==============================================================================#
+#                        PARSE → WRITE → PARSE ROUNDTRIP                       #
+#==============================================================================#
+@testset "Roundtrip: parse → write preserves semantics" begin
+    @testset "declaration and root" begin
+        xml = """<?xml version="1.0"?><root/>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        decls = filter(x -> nodetype(x) == Declaration, children(doc2))
+        @test length(decls) == 1
+        @test decls[1]["version"] == "1.0"
+        els = filter(x -> nodetype(x) == Element, children(doc2))
+        @test length(els) == 1
+        @test tag(els[1]) == "root"
+    end
+
+    @testset "element with attributes and text" begin
+        xml = """<root><child attr="val">text</child></root>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        child = first(filter(x -> nodetype(x) == Element, children(root)))
+        @test tag(child) == "child"
+        @test child["attr"] == "val"
+        text_children = filter(x -> nodetype(x) == Text, children(child))
+        @test any(t -> value(t) == "text", text_children)
+    end
+
+    @testset "all special node types survive roundtrip" begin
+        xml = """<root><!-- comment --><![CDATA[data]]><?pi content?></root>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        types = map(nodetype, filter(x -> nodetype(x) != Text, children(root)))
+        @test Comment in types
+        @test CData in types
+        @test ProcessingInstruction in types
+    end
+
+    @testset "DOCTYPE survives roundtrip" begin
+        xml = """<!DOCTYPE html><html><body/></html>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        dtds = filter(x -> nodetype(x) == DTD, children(doc2))
+        @test length(dtds) == 1
+        @test value(dtds[1]) == "html"
+    end
+
+    @testset "namespace attributes survive roundtrip" begin
+        xml = """<root xmlns:ns="http://example.com"><ns:child/></root>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        @test root["xmlns:ns"] == "http://example.com"
+        child = first(filter(x -> nodetype(x) == Element, children(root)))
+        @test tag(child) == "ns:child"
+    end
+
+    @testset "mixed content survives roundtrip" begin
+        xml = """<p>Hello <b>world</b>!</p>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        non_ws = filter(x -> !(nodetype(x) == Text && isempty(strip(value(x)))), children(root))
+        texts = [value(x) for x in non_ws if nodetype(x) == Text]
+        @test any(t -> contains(t, "Hello"), texts)
+        @test any(t -> contains(t, "!"), texts)
+        bolds = filter(x -> nodetype(x) == Element && tag(x) == "b", non_ws)
+        @test length(bolds) == 1
+        @test simple_value(bolds[1]) == "world"
+    end
+end
+
+@testset "Roundtrip: file-based semantic preservation" begin
+    all_files = filter(isfile, [
+        joinpath(@__DIR__, "data", "xml.xsd"),
+        joinpath(@__DIR__, "data", "kml.xsd"),
+        joinpath(@__DIR__, "data", "books.xml"),
+        # example.kml uses invalid <![CData[...]]> (lowercase), skip roundtrip
+        joinpath(@__DIR__, "data", "simple_dtd.xml"),
+        joinpath(@__DIR__, "data", "preserve.xml"),
+    ])
+
+    for path in all_files
+        node = read(path, Node)
+        temp = tempname() * ".xml"
+        XML.write(temp, node)
+        node2 = read(temp, Node)
+        # Verify structural properties are preserved
+        @test nodetype(node) == nodetype(node2)
+        # Count non-whitespace elements
+        count_elements(n) = sum(1 for c in children(n) if nodetype(c) == Element; init=0)
+        @test count_elements(node) == count_elements(node2)
+        rm(temp)
+    end
+end
+
+#==============================================================================#
+#                       PARSE Node{SubString{String}}                          #
+#==============================================================================#
+@testset "Parse with SubString{String}" begin
+    xml = """<?xml version="1.0"?><root attr="val"><child>text</child></root>"""
+    doc = parse(xml, Node{SubString{String}})
+    @test nodetype(doc) == Document
+    @test tag(doc[2]) == "root"
+    @test doc[2]["attr"] == "val"
+    # SubString values
+    @test value(doc[2][1][1]) isa SubString{String}
+end
+
+#==============================================================================#
+#                       COMPLEX DOCUMENT PARSING                               #
+#==============================================================================#
+@testset "Complex Document Parsing" begin
+    @testset "books.xml" begin
+        path = joinpath(@__DIR__, "data", "books.xml")
+        isfile(path) || return
+        doc = read(path, Node)
+        @test nodetype(doc) == Document
+
+        # Should have declaration + catalog
+        decl_nodes = filter(x -> nodetype(x) == Declaration, children(doc))
+        @test length(decl_nodes) == 1
+        @test decl_nodes[1]["version"] == "1.0"
+
+        el_nodes = filter(x -> nodetype(x) == Element, children(doc))
+        @test length(el_nodes) == 1
+        catalog = el_nodes[1]
+        @test tag(catalog) == "catalog"
+
+        # Catalog has 12 books
+        books = filter(x -> nodetype(x) == Element, children(catalog))
+        @test length(books) == 12
+
+        # First book
+        book1 = books[1]
+        @test book1["id"] == "bk101"
+
+        # Each book has: author, title, genre, price, publish_date, description
+        book_children = filter(x -> nodetype(x) == Element, children(book1))
+        book_tags = map(tag, book_children)
+        @test "author" in book_tags
+        @test "title" in book_tags
+        @test "genre" in book_tags
+        @test "price" in book_tags
+        @test "publish_date" in book_tags
+        @test "description" in book_tags
+
+        author = first(filter(x -> tag(x) == "author", book_children))
+        @test simple_value(author) == "Gambardella, Matthew"
+    end
+
+    @testset "simple_dtd.xml" begin
+        path = joinpath(@__DIR__, "data", "simple_dtd.xml")
+        isfile(path) || return
+        doc = read(path, Node)
+        @test nodetype(doc) == Document
+
+        dtd_nodes = filter(x -> nodetype(x) == DTD, children(doc))
+        @test length(dtd_nodes) == 1
+        @test contains(value(dtd_nodes[1]), "ENTITY")
+    end
+
+    @testset "preserve.xml" begin
+        path = joinpath(@__DIR__, "data", "preserve.xml")
+        isfile(path) || return
+        doc = read(path, Node)
+        @test nodetype(doc) == Document
+
+        root = filter(x -> nodetype(x) == Element, children(doc))[1]
+        @test tag(root) == "root"
+        @test root["xml:space"] == "preserve"
+
+        child_els = filter(x -> nodetype(x) == Element, children(root))
+        @test length(child_els) == 1
+        @test tag(child_els[1]) == "child"
+        @test child_els[1]["xml:space"] == "default"
+    end
+
+    @testset "example.kml" begin
+        # example.kml uses invalid <![CData[...]]> (lowercase 'd') which is not valid XML
+        path = joinpath(@__DIR__, "data", "example.kml")
+        isfile(path) || return
+        @test_throws ArgumentError read(path, Node)
+    end
+
+    @testset "tv.dtd" begin
+        path = joinpath(@__DIR__, "data", "tv.dtd")
+        isfile(path) || return
+        dtd_text = read(path, String)
+        pd = parse_dtd("TVSCHEDULE [\n" * dtd_text * "\n]")
+        @test pd.root == "TVSCHEDULE"
+
+        @test length(pd.elements) == 10
+        elem_names = map(e -> e.name, pd.elements)
+        @test "TVSCHEDULE" in elem_names
+        @test "CHANNEL" in elem_names
+        @test "PROGRAMSLOT" in elem_names
+        @test "TITLE" in elem_names
+
+        @test length(pd.attributes) == 5
+        attr_elements = map(a -> a.element, pd.attributes)
+        @test "TVSCHEDULE" in attr_elements
+        @test "CHANNEL" in attr_elements
+        @test "TITLE" in attr_elements
+    end
+end
+
+#==============================================================================#
+#                        DTD PARSING (parse_dtd)                               #
+#==============================================================================#
+@testset "DTD Parsing (parse_dtd)" begin
+    @testset "simple DTD with entities" begin
+        path = joinpath(@__DIR__, "data", "simple_dtd.xml")
+        isfile(path) || return
+        doc = read(path, Node)
+        dtd_node = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        pd = parse_dtd(dtd_node)
+        @test pd.root == "note"
+        @test length(pd.entities) == 3
+        @test pd.entities[1].name == "nbsp"
+        @test pd.entities[2].name == "writer"
+        @test pd.entities[3].name == "copyright"
+        @test pd.entities[2].value == "Writer: Donald Duck."
+    end
+
+    @testset "DTD with SYSTEM external ID" begin
+        pd = parse_dtd("""root SYSTEM "root.dtd\"""")
+        @test pd.root == "root"
+        @test pd.system_id == "root.dtd"
+        @test pd.public_id === nothing
+    end
+
+    @testset "DTD with PUBLIC external ID" begin
+        pd = parse_dtd("""root PUBLIC "-//W3C//DTD XHTML 1.0//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"""")
+        @test pd.root == "root"
+        @test pd.public_id == "-//W3C//DTD XHTML 1.0//EN"
+        @test pd.system_id == "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
+    end
+
+    @testset "DTD with ELEMENT declarations" begin
+        pd = parse_dtd("""root [
+<!ELEMENT root (child)>
+<!ELEMENT child (#PCDATA)>
+<!ELEMENT empty EMPTY>
+<!ELEMENT any ANY>
+]""")
+        @test pd.root == "root"
+        @test length(pd.elements) == 4
+        @test pd.elements[1].name == "root"
+        @test pd.elements[1].content == "(child)"
+        @test pd.elements[2].name == "child"
+        @test pd.elements[2].content == "(#PCDATA)"
+        @test pd.elements[3].name == "empty"
+        @test pd.elements[3].content == "EMPTY"
+        @test pd.elements[4].name == "any"
+        @test pd.elements[4].content == "ANY"
+    end
+
+    @testset "DTD with ATTLIST declarations (spec examples)" begin
+        pd = parse_dtd("""root [
+<!ATTLIST termdef id ID #REQUIRED name CDATA #IMPLIED>
+<!ATTLIST list type (bullets|ordered|glossary) "ordered">
+<!ATTLIST form method CDATA #FIXED "POST">
+]""")
+        @test length(pd.attributes) == 4
+        @test pd.attributes[1].element == "termdef"
+        @test pd.attributes[1].name == "id"
+        @test pd.attributes[1].type == "ID"
+        @test pd.attributes[1].default == "#REQUIRED"
+        @test pd.attributes[2].name == "name"
+        @test pd.attributes[2].type == "CDATA"
+        @test pd.attributes[2].default == "#IMPLIED"
+        @test pd.attributes[3].element == "list"
+        @test pd.attributes[3].name == "type"
+        @test pd.attributes[3].default == "\"ordered\""
+        @test pd.attributes[4].element == "form"
+        @test pd.attributes[4].name == "method"
+        @test pd.attributes[4].default == "#FIXED \"POST\""
+    end
+
+    @testset "DTD with ENTITY declarations (spec examples)" begin
+        pd = parse_dtd("""root [
+<!ENTITY Pub-Status "This is a pre-release of the specification.">
+<!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
+<!ENTITY open-hatch2 PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
+<!ENTITY % YN '"Yes"'>
+]""")
+        @test length(pd.entities) == 4
+        @test pd.entities[1].name == "Pub-Status"
+        @test pd.entities[1].value == "This is a pre-release of the specification."
+        @test pd.entities[1].parameter == false
+
+        @test pd.entities[2].name == "open-hatch"
+        @test pd.entities[2].value === nothing
+        @test contains(pd.entities[2].external_id, "SYSTEM")
+
+        @test pd.entities[3].name == "open-hatch2"
+        @test contains(pd.entities[3].external_id, "PUBLIC")
+
+        @test pd.entities[4].name == "YN"
+        @test pd.entities[4].parameter == true
+    end
+
+    @testset "DTD with NOTATION declarations (spec example)" begin
+        pd = parse_dtd("""root [
+<!NOTATION vrml PUBLIC "VRML 1.0">
+<!NOTATION jpeg SYSTEM "image/jpeg">
+]""")
+        @test length(pd.notations) == 2
+        @test pd.notations[1].name == "vrml"
+        @test contains(pd.notations[1].external_id, "PUBLIC")
+        @test pd.notations[2].name == "jpeg"
+        @test contains(pd.notations[2].external_id, "SYSTEM")
+    end
+
+    @testset "parse_dtd from Node" begin
+        dtd = DTD("root [<!ELEMENT root (#PCDATA)>]")
+        pd = parse_dtd(dtd)
+        @test pd.root == "root"
+        @test length(pd.elements) == 1
+    end
+
+    @testset "parse_dtd errors on non-DTD node" begin
+        @test_throws ErrorException parse_dtd(Element("x"))
+    end
+
+    @testset "complex DTD file (structure test)" begin
+        # complex_dtd.xml uses parameter entity references (%text;) which parse_dtd
+        # does not expand, so we just verify parsing the XML document itself works
+        path = joinpath(@__DIR__, "data", "complex_dtd.xml")
+        isfile(path) || return
+        doc = read(path, Node)
+        dtd_node = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        @test nodetype(dtd_node) == DTD
+        @test contains(value(dtd_node), "test")
+        @test contains(value(dtd_node), "ELEMENT")
+        @test contains(value(dtd_node), "ATTLIST")
+        @test contains(value(dtd_node), "NOTATION")
+        @test contains(value(dtd_node), "ENTITY")
+    end
+end
+
+#==============================================================================#
+#         XML 1.0 SPEC: ELEMENT TYPE DECLARATIONS (Section 3.2)                #
+#==============================================================================#
+@testset "Spec 3.2: Element Type Declarations" begin
+    @testset "EMPTY content model" begin
+        pd = parse_dtd("root [<!ELEMENT br EMPTY>]")
+        @test pd.elements[1].content == "EMPTY"
+    end
+
+    @testset "ANY content model" begin
+        pd = parse_dtd("root [<!ELEMENT container ANY>]")
+        @test pd.elements[1].content == "ANY"
+    end
+
+    @testset "#PCDATA content model" begin
+        pd = parse_dtd("root [<!ELEMENT text (#PCDATA)>]")
+        @test pd.elements[1].content == "(#PCDATA)"
+    end
+
+    @testset "mixed content model" begin
+        pd = parse_dtd("root [<!ELEMENT p (#PCDATA|emph)*>]")
+        @test pd.elements[1].content == "(#PCDATA|emph)*"
+    end
+
+    @testset "sequence content model" begin
+        pd = parse_dtd("root [<!ELEMENT spec (front, body, back?)>]")
+        @test pd.elements[1].content == "(front, body, back?)"
+    end
+
+    @testset "choice content model" begin
+        pd = parse_dtd("root [<!ELEMENT div1 (head, (p | list | note)*, div2*)>]")
+        @test pd.elements[1].content == "(head, (p | list | note)*, div2*)"
+    end
+end
+
+#==============================================================================#
+#       XML 1.0 SPEC: ATTRIBUTE-LIST DECLARATIONS (Section 3.3)                #
+#==============================================================================#
+@testset "Spec 3.3: Attribute-List Declarations" begin
+    @testset "ID attribute" begin
+        pd = parse_dtd("root [<!ATTLIST el id ID #REQUIRED>]")
+        @test pd.attributes[1].type == "ID"
+        @test pd.attributes[1].default == "#REQUIRED"
+    end
+
+    @testset "CDATA attribute with default" begin
+        pd = parse_dtd("""root [<!ATTLIST el name CDATA "default">]""")
+        @test pd.attributes[1].type == "CDATA"
+        @test pd.attributes[1].default == "\"default\""
+    end
+
+    @testset "enumerated attribute" begin
+        pd = parse_dtd("""root [<!ATTLIST list type (bullets|ordered|glossary) "ordered">]""")
+        @test contains(pd.attributes[1].type, "bullets")
+        @test pd.attributes[1].default == "\"ordered\""
+    end
+
+    @testset "#IMPLIED attribute" begin
+        pd = parse_dtd("root [<!ATTLIST el opt CDATA #IMPLIED>]")
+        @test pd.attributes[1].default == "#IMPLIED"
+    end
+
+    @testset "#FIXED attribute" begin
+        pd = parse_dtd("""root [<!ATTLIST el method CDATA #FIXED "POST">]""")
+        @test pd.attributes[1].default == "#FIXED \"POST\""
+    end
+
+    @testset "NOTATION attribute type" begin
+        pd = parse_dtd("root [<!ATTLIST fig notation NOTATION (jpeg|png) #IMPLIED>]")
+        @test contains(pd.attributes[1].type, "NOTATION")
+    end
+
+    @testset "multiple attributes in one ATTLIST" begin
+        pd = parse_dtd("""root [<!ATTLIST book
+  id ID #REQUIRED
+  isbn CDATA #IMPLIED
+  format (hardcover|paperback|ebook) "paperback">]""")
+        @test length(pd.attributes) == 3
+        @test pd.attributes[1].name == "id"
+        @test pd.attributes[2].name == "isbn"
+        @test pd.attributes[3].name == "format"
+    end
+end
+
+#==============================================================================#
+#          XML 1.0 SPEC: ENTITY DECLARATIONS (Section 4.2)                     #
+#==============================================================================#
+@testset "Spec 4.2: Entity Declarations" begin
+    @testset "internal general entity (spec example)" begin
+        pd = parse_dtd("""root [<!ENTITY Pub-Status "This is a pre-release of the specification.">]""")
+        @test pd.entities[1].name == "Pub-Status"
+        @test pd.entities[1].value == "This is a pre-release of the specification."
+        @test pd.entities[1].external_id === nothing
+        @test pd.entities[1].parameter == false
+    end
+
+    @testset "external entity with SYSTEM (spec example)" begin
+        pd = parse_dtd("""root [<!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">]""")
+        @test pd.entities[1].name == "open-hatch"
+        @test pd.entities[1].value === nothing
+        @test contains(pd.entities[1].external_id, "SYSTEM")
+        @test contains(pd.entities[1].external_id, "http://www.textuality.com/boilerplate/OpenHatch.xml")
+    end
+
+    @testset "external entity with PUBLIC (spec example)" begin
+        pd = parse_dtd("""root [<!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">]""")
+        @test pd.entities[1].name == "open-hatch"
+        @test contains(pd.entities[1].external_id, "PUBLIC")
+    end
+
+    @testset "parameter entity" begin
+        pd = parse_dtd("""root [<!ENTITY % YN '"Yes"'>]""")
+        @test pd.entities[1].name == "YN"
+        @test pd.entities[1].parameter == true
+    end
+end
+
+#==============================================================================#
+#         XML 1.0 SPEC: NOTATION DECLARATIONS (Section 4.7)                    #
+#==============================================================================#
+@testset "Spec 4.7: Notation Declarations" begin
+    @testset "NOTATION with PUBLIC (spec example)" begin
+        pd = parse_dtd("""root [<!NOTATION vrml PUBLIC "VRML 1.0">]""")
+        @test pd.notations[1].name == "vrml"
+        @test contains(pd.notations[1].external_id, "PUBLIC")
+        @test contains(pd.notations[1].external_id, "VRML 1.0")
+    end
+
+    @testset "NOTATION with SYSTEM" begin
+        pd = parse_dtd("""root [<!NOTATION jpeg SYSTEM "image/jpeg">]""")
+        @test pd.notations[1].name == "jpeg"
+        @test contains(pd.notations[1].external_id, "SYSTEM")
+    end
+end
+
+#==============================================================================#
+#                        ERROR HANDLING                                        #
+#==============================================================================#
+@testset "Error Handling" begin
+    @testset "mismatched tags" begin
+        @test_throws ErrorException parse("<a></b>", Node)
+    end
+
+    @testset "unclosed tag" begin
+        @test_throws ErrorException parse("<a><b></a>", Node)
+    end
+
+    @testset "closing tag with no open tag" begin
+        @test_throws ErrorException parse("</a>", Node)
+    end
+
+    @testset "unclosed root element" begin
+        @test_throws ErrorException parse("<root>", Node)
+    end
+
+    @testset "unterminated comment" begin
+        @test_throws Exception parse("<root><!-- no end", Node)
+    end
+
+    @testset "unterminated CDATA" begin
+        @test_throws Exception parse("<root><![CDATA[no end", Node)
+    end
+
+    @testset "unterminated PI" begin
+        @test_throws Exception parse("<?pi no end", Node)
+    end
+
+    @testset "unterminated attribute value" begin
+        @test_throws Exception parse("""<a b="no end""", Node)
+    end
+end
+
+#==============================================================================#
+#                     ILL-FORMED XML (must error)                              #
+#==============================================================================#
+@testset "Ill-Formed XML" begin
+    # ---- Tag structure ----
+    @testset "mismatched close tag" begin
+        @test_throws Exception parse("<a></b>", Node)
+    end
+
+    @testset "overlapping elements" begin
+        @test_throws Exception parse("<a><b></a></b>", Node)
+    end
+
+    @testset "deeply mismatched nesting" begin
+        @test_throws Exception parse("<a><b><c></b></c></a>", Node)
+    end
+
+    @testset "multiple unclosed tags" begin
+        @test_throws Exception parse("<a><b><c>", Node)
+    end
+
+    @testset "close tag without open" begin
+        @test_throws Exception parse("</a>", Node)
+    end
+
+    @testset "close tag after self-closing" begin
+        @test_throws Exception parse("<a/></a>", Node)
+    end
+
+    @testset "nested close tag without open" begin
+        @test_throws Exception parse("<root></inner></root>", Node)
+    end
+
+    # ---- Unterminated constructs ----
+    @testset "unterminated open tag at EOF" begin
+        @test_throws Exception parse("<root><unclosed", Node)
+    end
+
+    @testset "unterminated attribute value (double quote)" begin
+        @test_throws Exception parse("""<a x="no end""", Node)
+    end
+
+    @testset "unterminated attribute value (single quote)" begin
+        @test_throws Exception parse("<a x='no end", Node)
+    end
+
+    @testset "unterminated comment" begin
+        @test_throws Exception parse("<!-- no end", Node)
+    end
+
+    @testset "unterminated CDATA" begin
+        @test_throws Exception parse("<![CDATA[no end", Node)
+    end
+
+    @testset "unterminated processing instruction" begin
+        @test_throws Exception parse("<?pi no end", Node)
+    end
+
+    @testset "unterminated DOCTYPE" begin
+        @test_throws Exception parse("<!DOCTYPE x", Node)
+    end
+
+    # ---- Attribute errors ----
+    @testset "duplicate attribute on element" begin
+        @test_throws Exception parse("""<a x="1" x="2"/>""", Node)
+    end
+
+    @testset "duplicate attribute (different values)" begin
+        @test_throws Exception parse("""<root attr="a" attr="b"></root>""", Node)
+    end
+
+    @testset "duplicate attribute in declaration" begin
+        @test_throws Exception parse("""<?xml version="1.0" version="1.1"?><a/>""", Node)
+    end
+
+    @testset "attribute without value" begin
+        @test_throws Exception parse("<a disabled/>", Node)
+    end
+
+    @testset "attribute with unquoted value" begin
+        @test_throws Exception parse("<a x=hello/>", Node)
+    end
+
+    # ---- Tokenizer-level errors ----
+    @testset "lone <" begin
+        @test_throws Exception parse("<", Node)
+    end
+
+    @testset "lone < in text content" begin
+        @test_throws Exception parse("<root>a < b</root>", Node)
+    end
+
+    @testset "tag with space before name" begin
+        @test_throws Exception parse("< root/>", Node)
+    end
+end
+
+#==============================================================================#
+#                        UNICODE SUPPORT                                       #
+#==============================================================================#
+@testset "Unicode Support" begin
+    @testset "Unicode in text content" begin
+        doc = parse("<root>caf\u00e9 \u00f1 \u65e5\u672c\u8a9e</root>", Node)
+        @test simple_value(doc[1]) == "caf\u00e9 \u00f1 \u65e5\u672c\u8a9e"
+    end
+
+    @testset "Unicode in attribute values" begin
+        doc = parse("<root name=\"\u00fcber\"/>", Node)
+        @test doc[1]["name"] == "\u00fcber"
+    end
+
+    @testset "Unicode in comments" begin
+        doc = parse("<root><!-- h\u00e9llo --></root>", Node)
+        @test value(doc[1][1]) == " h\u00e9llo "
+    end
+
+    @testset "CJK characters" begin
+        doc = parse("<root>\u4e2d\u6587</root>", Node)
+        @test simple_value(doc[1]) == "\u4e2d\u6587"
+    end
+
+    @testset "emoji in text" begin
+        doc = parse("<root>\U0001f600\U0001f680</root>", Node)
+        @test simple_value(doc[1]) == "\U0001f600\U0001f680"
+    end
+
+    @testset "Cyrillic characters" begin
+        doc = parse("<root>\u041f\u0440\u0438\u0432\u0435\u0442</root>", Node)
+        @test simple_value(doc[1]) == "\u041f\u0440\u0438\u0432\u0435\u0442"
+    end
+
+    @testset "Arabic characters" begin
+        doc = parse("<root>\u0645\u0631\u062d\u0628\u0627</root>", Node)
+        @test simple_value(doc[1]) == "\u0645\u0631\u062d\u0628\u0627"
+    end
+end
+
+#==============================================================================#
+#                        EDGE CASES                                            #
+#==============================================================================#
+@testset "Edge Cases" begin
+    @testset "document with only whitespace around root" begin
+        doc = parse("  \n  <root/>\n  ", Node)
+        # Parser preserves whitespace as Text nodes
+        els = filter(x -> nodetype(x) == Element, children(doc))
+        @test length(els) == 1
+        @test tag(els[1]) == "root"
+    end
+
+    @testset "deeply nested elements" begin
+        xml = "<a><b><c><d><e><f>deep</f></e></d></c></b></a>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1][1][1][1][1][1]) == "deep"
+    end
+
+    @testset "many siblings" begin
+        items = join(["<item>$i</item>" for i in 1:100])
+        xml = "<root>$items</root>"
+        doc = parse(xml, Node)
+        @test length(doc[1]) == 100
+        @test simple_value(doc[1][1]) == "1"
+        @test simple_value(doc[1][100]) == "100"
+    end
+
+    @testset "element with hyphens and dots in name" begin
+        doc = parse("<my-element.name/>", Node)
+        @test tag(doc[1]) == "my-element.name"
+    end
+
+    @testset "element with underscore in name" begin
+        doc = parse("<_private/>", Node)
+        @test tag(doc[1]) == "_private"
+    end
+
+    @testset "attribute with numeric value" begin
+        doc = parse("""<x count="42"/>""", Node)
+        @test doc[1]["count"] == "42"
+    end
+
+    @testset "empty text content" begin
+        doc = parse("<root></root>", Node)
+        @test isnothing(doc[1].children)
+    end
+
+    @testset "adjacent CDATA and text" begin
+        doc = parse("<root>text<![CDATA[cdata]]>more</root>", Node)
+        @test length(doc[1]) == 3
+        @test value(doc[1][1]) == "text"
+        @test value(doc[1][2]) == "cdata"
+        @test value(doc[1][3]) == "more"
+    end
+
+    @testset "multiple CDATA sections" begin
+        doc = parse("<root><![CDATA[a]]><![CDATA[b]]></root>", Node)
+        @test length(doc[1]) == 2
+        @test value(doc[1][1]) == "a"
+        @test value(doc[1][2]) == "b"
+    end
+
+    @testset "comment between elements" begin
+        doc = parse("<root><a/><!-- between --><b/></root>", Node)
+        @test length(doc[1]) == 3
+        @test nodetype(doc[1][2]) == Comment
+    end
+
+    @testset "PI between elements" begin
+        doc = parse("<root><a/><?pi data?><b/></root>", Node)
+        @test length(doc[1]) == 3
+        @test nodetype(doc[1][2]) == ProcessingInstruction
+    end
+
+    @testset "all node types in one document" begin
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE root SYSTEM "root.dtd">
+<!-- comment -->
+<?pi data?>
+<root>
+  text
+  <child attr="val"/>
+  <!-- inner comment -->
+  <![CDATA[cdata]]>
+  <?inner-pi inner data?>
+</root>"""
+        doc = parse(xml, Node)
+        types = map(nodetype, children(doc))
+        @test Declaration in types
+        @test DTD in types
+        @test Comment in types
+        @test ProcessingInstruction in types
+        @test Element in types
+    end
+
+    @testset "very long attribute value" begin
+        long_val = repeat("a", 10000)
+        doc = parse("""<x attr="$(long_val)"/>""", Node)
+        @test doc[1]["attr"] == long_val
+    end
+
+    @testset "very long text content" begin
+        long_text = repeat("hello ", 10000)
+        doc = parse("<root>$(long_text)</root>", Node)
+        @test simple_value(doc[1]) == long_text
+    end
+
+    @testset "CDATA with ]] but not followed by >" begin
+        doc = parse("<root><![CDATA[a]]b]]></root>", Node)
+        @test value(doc[1][1]) == "a]]b"
+    end
+end
+
+#==============================================================================#
+#                  SPEC EXAMPLES: FULL DOCUMENTS                               #
+#==============================================================================#
+@testset "Full Spec-Like Documents" begin
+    @testset "spec section 2.1: minimal document" begin
+        xml = """<?xml version="1.0"?>
+<greeting>Hello, world!</greeting>"""
+        doc = parse(xml, Node)
+        @test nodetype(doc) == Document
+        @test simple_value(doc[end]) == "Hello, world!"
+    end
+
+    @testset "spec section 2.8: document with external DTD" begin
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE greeting SYSTEM "hello.dtd">
+<greeting>Hello, world!</greeting>"""
+        doc = parse(xml, Node)
+        # Filter out whitespace text nodes to check structure
+        typed = filter(x -> nodetype(x) != Text, children(doc))
+        @test length(typed) == 3
+        @test nodetype(typed[1]) == Declaration
+        @test nodetype(typed[2]) == DTD
+        @test nodetype(typed[3]) == Element
+    end
+
+    @testset "spec: document with internal subset" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE greeting [
+  <!ELEMENT greeting (#PCDATA)>
+]>
+<greeting>Hello, world!</greeting>"""
+        doc = parse(xml, Node)
+        typed = filter(x -> nodetype(x) != Text, children(doc))
+        @test typed[1]["encoding"] == "UTF-8"
+        @test nodetype(typed[2]) == DTD
+        pd = parse_dtd(typed[2])
+        @test pd.root == "greeting"
+        @test length(pd.elements) == 1
+        @test pd.elements[1].name == "greeting"
+        @test pd.elements[1].content == "(#PCDATA)"
+        @test simple_value(typed[3]) == "Hello, world!"
+    end
+
+    @testset "typical HTML5-like doctype" begin
+        xml = """<!DOCTYPE html><html><head><title>Test</title></head><body><p>Content</p></body></html>"""
+        doc = parse(xml, Node)
+        @test nodetype(doc[1]) == DTD
+        @test value(doc[1]) == "html"
+        @test tag(doc[2]) == "html"
+    end
+
+    @testset "SVG document" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="100" height="100">
+  <circle cx="50" cy="50" r="40" fill="red"/>
+  <text x="50" y="50">Hello SVG</text>
+</svg>"""
+        doc = parse(xml, Node)
+        svg = doc[end]
+        @test tag(svg) == "svg"
+        @test svg["xmlns"] == "http://www.w3.org/2000/svg"
+        @test svg["width"] == "100"
+
+        elements = filter(x -> nodetype(x) == Element, children(svg))
+        @test length(elements) == 2
+        @test tag(elements[1]) == "circle"
+        @test elements[1]["fill"] == "red"
+        @test tag(elements[2]) == "text"
+        @test value(elements[2][1]) == "Hello SVG"
+    end
+
+    @testset "SOAP-like envelope" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope">
+  <soap:Header/>
+  <soap:Body>
+    <m:GetPrice xmlns:m="http://www.example.org/stock">
+      <m:StockName>IBM</m:StockName>
+    </m:GetPrice>
+  </soap:Body>
+</soap:Envelope>"""
+        doc = parse(xml, Node)
+        env = doc[end]
+        @test tag(env) == "soap:Envelope"
+        elements = filter(x -> nodetype(x) == Element, children(env))
+        @test tag(elements[1]) == "soap:Header"
+        @test tag(elements[2]) == "soap:Body"
+    end
+
+    @testset "RSS-like feed" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+  <channel>
+    <title>Example Feed</title>
+    <link>http://example.com</link>
+    <description>An example RSS feed</description>
+    <item>
+      <title>Item 1</title>
+      <link>http://example.com/1</link>
+    </item>
+    <item>
+      <title>Item 2</title>
+      <link>http://example.com/2</link>
+    </item>
+  </channel>
+</rss>"""
+        doc = parse(xml, Node)
+        rss = doc[end]
+        @test tag(rss) == "rss"
+        @test rss["version"] == "2.0"
+        channel = first(filter(x -> nodetype(x) == Element, children(rss)))
+        @test tag(channel) == "channel"
+        items = filter(x -> nodetype(x) == Element && tag(x) == "item", children(channel))
+        @test length(items) == 2
+    end
+
+    @testset "Atom-like feed" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <title>Example Feed</title>
+  <entry>
+    <title>Atom-Powered Robots Run Amok</title>
+    <link href="http://example.org/2003/12/13/atom03"/>
+    <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+    <updated>2003-12-13T18:30:02Z</updated>
+    <summary>Some text.</summary>
+  </entry>
+</feed>"""
+        doc = parse(xml, Node)
+        feed = doc[end]
+        @test tag(feed) == "feed"
+        @test feed["xmlns"] == "http://www.w3.org/2005/Atom"
+        entries = filter(x -> nodetype(x) == Element && tag(x) == "entry", children(feed))
+        @test length(entries) == 1
+    end
+
+    @testset "MathML-like document" begin
+        xml = """<math xmlns="http://www.w3.org/1998/Math/MathML">
+  <mrow>
+    <msup>
+      <mi>x</mi>
+      <mn>2</mn>
+    </msup>
+    <mo>+</mo>
+    <mn>1</mn>
+  </mrow>
+</math>"""
+        doc = parse(xml, Node)
+        math = doc[1]
+        @test tag(math) == "math"
+        @test math["xmlns"] == "http://www.w3.org/1998/Math/MathML"
+    end
+
+    @testset "document with processing instructions and comments mixed" begin
+        xml = """<?xml version="1.0"?>
+<!-- This is a comment before the root -->
+<?xml-stylesheet type="text/css" href="style.css"?>
+<root>
+  <!-- inner comment -->
+  <child/>
+  <?pi-inside data?>
+</root>
+<!-- trailing comment -->"""
+        doc = parse(xml, Node)
+        types = map(nodetype, children(doc))
+        @test count(==(Comment), types) == 2
+        @test count(==(ProcessingInstruction), types) >= 1
+        @test count(==(Element), types) == 1
+    end
+end
+
+#==============================================================================#
+#                        SHOW / DISPLAY                                        #
+#==============================================================================#
+@testset "Show (REPL display)" begin
+    @testset "show Text" begin
+        t = Text("hello")
+        s = sprint(show, t)
+        @test contains(s, "Text")
+        @test contains(s, "hello")
+    end
+
+    @testset "show Element" begin
+        el = Element("div"; class="main")
+        s = sprint(show, el)
+        @test contains(s, "Element")
+        @test contains(s, "<div")
+        @test contains(s, "class")
+    end
+
+    @testset "show Comment" begin
+        c = Comment(" test ")
+        s = sprint(show, c)
+        @test contains(s, "Comment")
+        @test contains(s, "<!--")
+    end
+
+    @testset "show CData" begin
+        cd = CData("data")
+        s = sprint(show, cd)
+        @test contains(s, "CData")
+        @test contains(s, "<![CDATA[")
+    end
+
+    @testset "show DTD" begin
+        d = DTD("html")
+        s = sprint(show, d)
+        @test contains(s, "DTD")
+        @test contains(s, "<!DOCTYPE")
+    end
+
+    @testset "show Declaration" begin
+        decl = Declaration(; version="1.0")
+        s = sprint(show, decl)
+        @test contains(s, "Declaration")
+        @test contains(s, "<?xml")
+    end
+
+    @testset "show ProcessingInstruction" begin
+        pi = ProcessingInstruction("target", "data")
+        s = sprint(show, pi)
+        @test contains(s, "ProcessingInstruction")
+        @test contains(s, "<?target")
+    end
+
+    @testset "show Document" begin
+        doc = Document(Element("root"))
+        s = sprint(show, doc)
+        @test contains(s, "Document")
+        @test contains(s, "1 child")
+    end
+
+    @testset "show Element with children count" begin
+        el = Element("div", Element("a"), Element("b"), Element("c"))
+        s = sprint(show, el)
+        @test contains(s, "3 children")
+    end
+
+    @testset "text/xml MIME" begin
+        el = Element("p", "hello")
+        s = sprint(show, MIME("text/xml"), el)
+        @test s == "<p>hello</p>"
+    end
+end
+
+#==============================================================================#
+#                    SHOW (text/xml MIME) ROUNDTRIP                             #
+#==============================================================================#
+@testset "text/xml MIME output" begin
+    doc = Document(
+        Declaration(; version="1.0"),
+        Element("root", Element("child", "text"))
+    )
+    xml_str = sprint(show, MIME("text/xml"), doc)
+    @test contains(xml_str, "<?xml")
+    @test contains(xml_str, "<root>")
+    @test contains(xml_str, "<child>text</child>")
+    # Verify it's parseable
+    doc2 = parse(xml_str, Node)
+    @test nodetype(doc2) == Document
+    root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+    @test tag(root) == "root"
+    child = first(filter(x -> nodetype(x) == Element, children(root)))
+    @test simple_value(child) == "text"
+end
+
+#==============================================================================#
+#                    CONSTRUCTION → WRITE → PARSE ROUNDTRIP                    #
+#==============================================================================#
+@testset "Construction → Write → Parse" begin
+    @testset "simple element: write then parse preserves semantics" begin
+        el = Element("greeting", "Hello, world!")
+        xml = XML.write(Document(el))
+        doc2 = parse(xml, Node)
+        @test simple_value(doc2[1]) == "Hello, world!"
+    end
+
+    @testset "element with attributes: write then parse preserves attributes" begin
+        el = Element("item"; id="1", class="active")
+        xml = XML.write(Document(el))
+        doc2 = parse(xml, Node)
+        @test doc2[1]["id"] == "1"
+        @test doc2[1]["class"] == "active"
+    end
+
+    @testset "single-child text elements roundtrip" begin
+        doc = Document(Element("root", "text"))
+        xml = XML.write(doc)
+        doc2 = parse(xml, Node)
+        @test doc == doc2
+    end
+
+    @testset "self-closing elements roundtrip" begin
+        doc = Document(Element("root"))
+        xml = XML.write(doc)
+        doc2 = parse(xml, Node)
+        @test doc == doc2
+    end
+
+    @testset "all node types survive write → parse" begin
+        doc = Document(
+            Declaration(; version="1.0"),
+            Comment(" header "),
+            Element("root",
+                Element("child", "text"),
+                CData("raw <data>"),
+                Comment(" inner "),
+                ProcessingInstruction("pi", "content")
+            )
+        )
+        xml = XML.write(doc)
+        doc2 = parse(xml, Node)
+        typed = filter(x -> nodetype(x) != Text, children(doc2))
+        @test count(==(Declaration), map(nodetype, typed)) == 1
+        @test count(==(Comment), map(nodetype, typed)) == 1
+        @test count(==(Element), map(nodetype, typed)) == 1
+        root = first(filter(x -> nodetype(x) == Element, typed))
+        inner = filter(x -> nodetype(x) != Text, children(root))
+        inner_types = map(nodetype, inner)
+        @test Element in inner_types
+        @test CData in inner_types
+        @test Comment in inner_types
+        @test ProcessingInstruction in inner_types
+    end
+
+    @testset "special characters in text roundtrip" begin
+        el = Element("p", "a < b & c > d ' e \" f")
+        xml = XML.write(Document(el))
+        doc2 = parse(xml, Node)
+        @test simple_value(doc2[1]) == "a < b & c > d ' e \" f"
+    end
+
+    @testset "special characters in attributes roundtrip" begin
+        el = Element("x"; data="a&b<c>d'e\"f")
+        xml = XML.write(Document(el))
+        doc2 = parse(xml, Node)
+        @test doc2[1]["data"] == "a&b<c>d'e\"f"
+    end
+end
+
+#==============================================================================#
+#                        KML-LIKE DOCUMENT                                     #
+#==============================================================================#
+@testset "KML-like Document" begin
+    xml = """<?xml version="1.0" encoding="UTF-8"?>
+<kml xmlns="http://www.opengis.net/kml/2.2">
+  <Document>
+    <name>KML Sample</name>
+    <Placemark>
+      <name>Simple placemark</name>
+      <description>Attached to the ground.</description>
+      <Point>
+        <coordinates>-122.0822035,37.4220033612141,0</coordinates>
+      </Point>
+    </Placemark>
+  </Document>
+</kml>"""
+    doc = parse(xml, Node)
+    kml = doc[end]
+    @test tag(kml) == "kml"
+    @test kml["xmlns"] == "http://www.opengis.net/kml/2.2"
+
+    document = first(filter(x -> nodetype(x) == Element, children(kml)))
+    @test tag(document) == "Document"
+
+    name = first(filter(x -> nodetype(x) == Element && tag(x) == "name", children(document)))
+    @test simple_value(name) == "KML Sample"
+
+    pm = first(filter(x -> nodetype(x) == Element && tag(x) == "Placemark", children(document)))
+    pm_name = first(filter(x -> nodetype(x) == Element && tag(x) == "name", children(pm)))
+    @test simple_value(pm_name) == "Simple placemark"
+end
+
+#==============================================================================#
+#                        XHTML-LIKE DOCUMENT                                   #
+#==============================================================================#
+@testset "XHTML-like Document" begin
+    xml = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <title>XHTML Test</title>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+  </head>
+  <body>
+    <h1>Hello World</h1>
+    <p>This is a <strong>test</strong> of XHTML.</p>
+    <br/>
+    <img src="image.png" alt="An image"/>
+  </body>
+</html>"""
+    doc = parse(xml, Node)
+    typed = filter(x -> nodetype(x) != Text, children(doc))
+    @test nodetype(typed[1]) == Declaration
+    @test nodetype(typed[2]) == DTD
+    @test contains(value(typed[2]), "PUBLIC")
+
+    html = first(filter(x -> nodetype(x) == Element, children(doc)))
+    @test tag(html) == "html"
+    @test html["xmlns"] == "http://www.w3.org/1999/xhtml"
+
+    head_el = first(filter(x -> nodetype(x) == Element && tag(x) == "head", children(html)))
+    title_el = first(filter(x -> nodetype(x) == Element && tag(x) == "title", children(head_el)))
+    @test simple_value(title_el) == "XHTML Test"
+
+    body_el = first(filter(x -> nodetype(x) == Element && tag(x) == "body", children(html)))
+    h1_el = first(filter(x -> nodetype(x) == Element && tag(x) == "h1", children(body_el)))
+    @test simple_value(h1_el) == "Hello World"
+
+    # Verify write produces valid XML that can be re-parsed
+    xml2 = XML.write(doc)
+    doc2 = parse(xml2, Node)
+    @test nodetype(doc2) == Document
+end
+
+#==============================================================================#
+#                    PLIST-LIKE DOCUMENT                                        #
+#==============================================================================#
+@testset "plist-like Document" begin
+    xml = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+  <dict>
+    <key>CFBundleName</key>
+    <string>MyApp</string>
+    <key>CFBundleVersion</key>
+    <string>1.0</string>
+    <key>LSRequiresIPhoneOS</key>
+    <true/>
+  </dict>
+</plist>"""
+    doc = parse(xml, Node)
+    plist = doc[end]
+    @test tag(plist) == "plist"
+    @test plist["version"] == "1.0"
+
+    dict = first(filter(x -> nodetype(x) == Element, children(plist)))
+    @test tag(dict) == "dict"
+
+    elements = filter(x -> nodetype(x) == Element, children(dict))
+    keys_found = [simple_value(e) for e in elements if tag(e) == "key"]
+    @test "CFBundleName" in keys_found
+    @test "CFBundleVersion" in keys_found
+end
+
+#==============================================================================#
+#                    MAVEN POM-LIKE DOCUMENT                                   #
+#==============================================================================#
+@testset "Maven POM-like Document" begin
+    xml = """<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>com.example</groupId>
+  <artifactId>my-app</artifactId>
+  <version>1.0-SNAPSHOT</version>
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.13.2</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>"""
+    doc = parse(xml, Node)
+    project = doc[end]
+    @test tag(project) == "project"
+
+    elements = filter(x -> nodetype(x) == Element, children(project))
+    version = first(filter(x -> tag(x) == "version", elements))
+    @test simple_value(version) == "1.0-SNAPSHOT"
+
+    deps = first(filter(x -> tag(x) == "dependencies", elements))
+    dep_list = filter(x -> nodetype(x) == Element, children(deps))
+    @test length(dep_list) == 1
+    @test tag(dep_list[1]) == "dependency"
+end
+
+#==============================================================================#
+#                    GITHUB ISSUES REGRESSION TESTS                            #
+#==============================================================================#
+@testset "GitHub Issues" begin
+
+    #--- Issue #7: attribute order should not affect equality ---
+    @testset "#7: attribute-order-insensitive ==" begin
+        a = Element("x"; first="1", second="2")
+        b = Element("x"; second="2", first="1")
+        @test a == b
+
+        # Same attrs same order still works
+        c = Element("x"; a="1", b="2")
+        d = Element("x"; a="1", b="2")
+        @test c == d
+
+        # Different values are still not equal
+        @test Element("x"; a="1") != Element("x"; a="2")
+
+        # Different attr names are not equal
+        @test Element("x"; a="1") != Element("x"; b="1")
+
+        # Different number of attrs
+        @test Element("x"; a="1") != Element("x"; a="1", b="2")
+
+        # Parsed elements with same attrs in different order
+        doc1 = parse("""<x a="1" b="2"/>""", Node)
+        doc2 = parse("""<x b="2" a="1"/>""", Node)
+        @test doc1[1] == doc2[1]
+
+        # No attrs vs empty attrs (both are "no attributes")
+        @test Element("x") == Element("x")
+    end
+
+    #--- Issue #17: numeric character references ---
+    @testset "#17: numeric character references (&#decimal; and &#xHex;)" begin
+        # Decimal character references
+        @test unescape("&#60;") == "<"
+        @test unescape("&#62;") == ">"
+        @test unescape("&#38;") == "&"
+        @test unescape("&#39;") == "'"
+        @test unescape("&#34;") == "\""
+
+        # Hex character references (lowercase x)
+        @test unescape("&#x3c;") == "<"
+        @test unescape("&#x3C;") == "<"
+        @test unescape("&#x3e;") == ">"
+        @test unescape("&#x26;") == "&"
+        @test unescape("&#x27;") == "'"
+        @test unescape("&#x22;") == "\""
+
+        # Uppercase X also works
+        @test unescape("&#X41;") == "A"
+
+        # Unicode character references
+        @test unescape("&#x41;") == "A"
+        @test unescape("&#65;") == "A"
+        @test unescape("&#x00e9;") == "\u00e9"  # é
+        @test unescape("&#233;") == "\u00e9"     # é
+        @test unescape("&#x4e2d;") == "\u4e2d"   # 中
+        @test unescape("&#x1f600;") == "\U0001f600"  # 😀
+
+        # Mixed with named entities
+        @test unescape("&amp;&#60;&lt;") == "&<<"
+        @test unescape("&#60;tag&#62;") == "<tag>"
+
+        # In parsed XML text
+        doc = parse("<root>&#60;hello&#62;</root>", Node)
+        @test simple_value(doc[1]) == "<hello>"
+
+        # In parsed XML attributes
+        doc = parse("""<x a="&#60;&#62;"/>""", Node)
+        @test doc[1]["a"] == "<>"
+
+        # Non-breaking space
+        @test unescape("&#xA0;") == "\u00a0"
+        @test unescape("&#160;") == "\u00a0"
+
+        # Invalid numeric reference preserved verbatim
+        @test unescape("&#xZZZ;") == "&#xZZZ;"
+
+        # Named entity references that aren't predefined are preserved verbatim
+        @test unescape("&foo;") == "&foo;"
+
+        # Ampersand without semicolon is preserved
+        @test unescape("a & b") == "a & b"
+    end
+
+    #--- Issue #33: empty attributes consistency ---
+    @testset "#33: empty attributes [] vs nothing" begin
+        # Constructed elements have empty Vector for attrs
+        a = Element("x")
+        # Parsed elements with no attrs have nothing
+        b = parse("<x/>", Node)[1]
+        # They should compare equal via _eq / _attrs_eq
+        @test a == b
+    end
+
+    #--- Issue #35: write → parse preserves structure ---
+    @testset "#35: write then parse preserves structure" begin
+        doc = Document(
+            Declaration(; version="1.0"),
+            Element("root",
+                Element("child", "text"),
+                Element("empty")
+            )
+        )
+        xml = XML.write(doc)
+        doc2 = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        child_elements = filter(x -> nodetype(x) == Element, children(root))
+        @test length(child_elements) == 2
+        @test tag(child_elements[1]) == "child"
+        @test tag(child_elements[2]) == "empty"
+    end
+
+    #--- Issue #50: Base.get with default ---
+    @testset "#50: Base.get(node, key, default)" begin
+        el = parse("""<x a="1" b="2"/>""", Node)[1]
+
+        # Existing keys return their values
+        @test get(el, "a", "default") == "1"
+        @test get(el, "b", "default") == "2"
+
+        # Non-existing key returns default
+        @test get(el, "c", "default") == "default"
+        @test get(el, "c", nothing) === nothing
+
+        # Works on elements with no attributes
+        el2 = parse("<x/>", Node)[1]
+        @test get(el2, "a", "nope") == "nope"
+
+        # Works on constructed elements
+        el3 = Element("x"; foo="bar")
+        @test get(el3, "foo", "default") == "bar"
+        @test get(el3, "baz", "default") == "default"
+    end
+
+    #--- Issue #52: escape double-escapes (expected behavior) ---
+    @testset "#52: escape is not idempotent (by design)" begin
+        @test escape("&") == "&amp;"
+        @test escape("&amp;") == "&amp;amp;"  # double-escaping is correct
+    end
+
+    #--- Issue #53: unescape works correctly ---
+    @testset "#53: unescape works correctly on parsed content" begin
+        doc = parse("<root>&amp;</root>", Node)
+        @test simple_value(doc[1]) == "&"
+        doc = parse("<root>&lt;tag&gt;</root>", Node)
+        @test simple_value(doc[1]) == "<tag>"
+    end
+end
+
+#==============================================================================#
+#                        TREE NAVIGATION: parent, depth, siblings              #
+#==============================================================================#
+@testset "Tree Navigation" begin
+    doc = parse("<root><a><a1/><a2/></a><b/><c><c1><c1a/></c1></c></root>", Node)
+    root = doc[1]
+    a = root[1]
+    a1 = a[1]
+    a2 = a[2]
+    b = root[2]
+    c = root[3]
+    c1 = c[1]
+    c1a = c1[1]
+
+    @testset "parent" begin
+        @test parent(root, doc) === doc
+        @test parent(a, doc) === root
+        @test parent(a1, doc) === a
+        @test parent(c1a, doc) === c1
+        @test parent(b, root) === root
+        @test_throws ErrorException parent(doc, doc)  # root has no parent
+        @test_throws ErrorException parent(Element("x"), doc)  # not in tree
+    end
+
+    @testset "depth" begin
+        @test depth(doc, doc) == 0
+        @test depth(root, doc) == 1
+        @test depth(a, doc) == 2
+        @test depth(a1, doc) == 3
+        @test depth(c1a, doc) == 4
+        @test depth(b, root) == 1
+        @test_throws ErrorException depth(Element("x"), doc)
+    end
+
+    @testset "siblings" begin
+        @test siblings(a, doc) == [b, c]
+        @test siblings(b, doc) == [a, c]
+        @test siblings(a1, doc) == [a2]
+        @test siblings(a2, doc) == [a1]
+        @test isempty(siblings(c1, doc))
+        @test_throws ErrorException siblings(doc, doc)  # root has no parent
+    end
+
+    @testset "1-arg parent/depth errors" begin
+        @test_throws ErrorException parent(a)
+        @test_throws ErrorException depth(a)
+    end
+end
+
+#==============================================================================#
+#                        DEPRECATIONS / REMOVED API                            #
+#==============================================================================#
+@testset "Deprecations and Removed API" begin
+    node = Element("test")
+    node2 = Element("other")
+
+    @testset "XML.next errors" begin
+        @test_throws ErrorException XML.next(node)
+    end
+
+    @testset "XML.prev errors" begin
+        @test_throws ErrorException XML.prev(node)
+    end
+
+    @testset "XML.nodes_equal errors" begin
+        @test_throws ErrorException XML.nodes_equal(node, node2)
+    end
+
+    @testset "XML.escape! errors" begin
+        @test_throws ErrorException XML.escape!(node)
+        @test_throws ErrorException XML.escape!(node, false)
+    end
+
+    @testset "XML.unescape! errors" begin
+        @test_throws ErrorException XML.unescape!(node)
+        @test_throws ErrorException XML.unescape!(node, false)
+    end
+
+    @testset "XML.Raw errors" begin
+        @test_throws ErrorException XML.Raw()
+        @test_throws ErrorException XML.Raw("arg")
+    end
+
+    @testset "simplevalue binding redirects to simple_value" begin
+        el = Element("x", "val")
+        @test XML.simplevalue(el) == simple_value(el)
+    end
+end
+
+#==============================================================================#
+#                              XPATH                                           #
+#==============================================================================#
+@testset "XPath" begin
+    doc = parse("""<root>
+        <users>
+            <user id="1" role="admin"><name>Alice</name></user>
+            <user id="2" role="user"><name>Bob</name></user>
+            <user id="3" role="admin"><name>Carol</name></user>
+        </users>
+        <settings><theme>dark</theme></settings>
+    </root>""", Node)
+
+    @testset "absolute path" begin
+        results = xpath(doc, "/root/users/user")
+        @test length(results) == 3
+        @test all(n -> tag(n) == "user", results)
+    end
+
+    @testset "single child" begin
+        results = xpath(doc, "/root/settings/theme")
+        @test length(results) == 1
+        @test tag(results[1]) == "theme"
+    end
+
+    @testset "positional predicate [n]" begin
+        results = xpath(doc, "/root/users/user[1]")
+        @test length(results) == 1
+        @test results[1]["id"] == "1"
+
+        results = xpath(doc, "/root/users/user[3]")
+        @test length(results) == 1
+        @test results[1]["id"] == "3"
+    end
+
+    @testset "[last()]" begin
+        results = xpath(doc, "/root/users/user[last()]")
+        @test length(results) == 1
+        @test results[1]["id"] == "3"
+    end
+
+    @testset "out of bounds predicate" begin
+        results = xpath(doc, "/root/users/user[99]")
+        @test isempty(results)
+    end
+
+    @testset "has-attribute predicate [@attr]" begin
+        results = xpath(doc, "/root/users/user[@role]")
+        @test length(results) == 3
+    end
+
+    @testset "attribute-value predicate [@attr='v']" begin
+        results = xpath(doc, "/root/users/user[@role='admin']")
+        @test length(results) == 2
+        ids = sort([n["id"] for n in results])
+        @test ids == ["1", "3"]
+    end
+
+    @testset "attribute-value with double quotes" begin
+        results = xpath(doc, """/root/users/user[@id="2"]""")
+        @test length(results) == 1
+        @test results[1]["id"] == "2"
+    end
+
+    @testset "descendant //" begin
+        results = xpath(doc, "//name")
+        @test length(results) == 3
+        @test all(n -> tag(n) == "name", results)
+    end
+
+    @testset "// with predicate" begin
+        results = xpath(doc, "//user[@role='admin']/name")
+        @test length(results) == 2
+    end
+
+    @testset "wildcard *" begin
+        results = xpath(doc, "/root/*")
+        @test length(results) == 2
+        @test Set(tag.(results)) == Set(["users", "settings"])
+    end
+
+    @testset "text()" begin
+        results = xpath(doc, "/root/settings/theme/text()")
+        @test length(results) == 1
+        @test value(results[1]) == "dark"
+    end
+
+    @testset "node()" begin
+        results = xpath(doc, "/root/users/user[1]/node()")
+        @test length(results) >= 1
+    end
+
+    @testset "attribute selection @attr" begin
+        results = xpath(doc, "//user/@id")
+        @test length(results) == 3
+        vals = sort([value(n) for n in results])
+        @test vals == ["1", "2", "3"]
+    end
+
+    @testset "self ." begin
+        results = xpath(doc, ".")
+        @test length(results) == 1
+        @test results[1] === doc
+    end
+
+    @testset "no match returns empty" begin
+        @test isempty(xpath(doc, "/root/nonexistent"))
+        @test isempty(xpath(doc, "//nonexistent"))
+    end
+
+    @testset "empty expression" begin
+        @test isempty(xpath(doc, ""))
+    end
+
+    @testset "deep // with path" begin
+        results = xpath(doc, "//theme/text()")
+        @test length(results) == 1
+        @test value(results[1]) == "dark"
+    end
+
+    @testset "error: unterminated predicate" begin
+        @test_throws ErrorException xpath(doc, "/root/user[1")
+    end
+
+    @testset "error: unsupported predicate" begin
+        @test_throws ErrorException xpath(doc, "/root/user[position()>1]")
+    end
+
+    @testset "self-closing elements" begin
+        doc2 = parse("<root><a/><b/><c/></root>", Node)
+        @test length(xpath(doc2, "/root/*")) == 3
+    end
+
+    @testset "relative path" begin
+        root = xpath(doc, "/root")[1]
+        results = xpath(root, "users/user")
+        @test length(results) == 3
+    end
+
+    @testset ".. parent navigation" begin
+        # /root/users/user[1]/.. goes back to <users>
+        results = xpath(doc, "/root/users/user[1]/..")
+        @test length(results) == 1
+        @test tag(results[1]) == "users"
+    end
+
+    @testset ".. in mid-path" begin
+        # /root/users/.. should go back to root
+        results = xpath(doc, "/root/users/..")
+        @test length(results) == 1
+        @test tag(results[1]) == "root"
+    end
+
+    @testset "// mid-path" begin
+        # /root//name finds all <name> elements anywhere under root
+        results = xpath(doc, "/root//name")
+        @test length(results) == 3
+        @test all(n -> tag(n) == "name", results)
+    end
+
+    @testset "// with wildcard //*" begin
+        doc2 = parse("<r><a><b/></a><c/></r>", Node)
+        results = xpath(doc2, "//*")
+        tags = [tag(n) for n in results if nodetype(n) === Element]
+        @test "r" in tags
+        @test "a" in tags
+        @test "b" in tags
+        @test "c" in tags
+    end
+
+    @testset "// with text()" begin
+        results = xpath(doc, "//text()")
+        @test length(results) >= 3  # at least Alice, Bob, Carol
+        vals = [value(n) for n in results]
+        @test "Alice" in vals
+        @test "Bob" in vals
+        @test "dark" in vals
+    end
+
+    @testset "multiple // segments" begin
+        results = xpath(doc, "//users//name")
+        @test length(results) == 3
+        @test all(n -> tag(n) == "name", results)
+    end
+
+    @testset "chained predicates" begin
+        results = xpath(doc, "/root/users/user[@role='admin'][1]")
+        @test length(results) == 1
+        @test results[1]["id"] == "1"
+    end
+
+    @testset "@attr with no match" begin
+        results = xpath(doc, "//user/@nonexistent")
+        @test isempty(results)
+    end
+
+    @testset "namespaced tag" begin
+        doc2 = parse("""<root xmlns:ns="http://example.com"><ns:item>val</ns:item></root>""", Node)
+        results = xpath(doc2, "/root/ns:item")
+        @test length(results) == 1
+        @test tag(results[1]) == "ns:item"
+    end
+
+    @testset "whitespace in expression" begin
+        results = xpath(doc, " / root / users / user ")
+        @test length(results) == 3
+    end
+
+    @testset "error: empty @" begin
+        @test_throws ErrorException xpath(doc, "/root/@")
+    end
+
+    @testset "error: unknown function" begin
+        @test_throws ErrorException xpath(doc, "/root/foo()")
+    end
+
+    @testset "error: unexpected character" begin
+        @test_throws ErrorException xpath(doc, "/root/!bad")
+    end
+
+    @testset "deep nesting" begin
+        doc2 = parse("<a><b><c><d><e>deep</e></d></c></b></a>", Node)
+        results = xpath(doc2, "//e/text()")
+        @test length(results) == 1
+        @test value(results[1]) == "deep"
+    end
+
+    @testset "wildcard with predicate" begin
+        doc2 = parse("""<r><a x="1"/><b x="2"/><c/></r>""", Node)
+        results = xpath(doc2, "/r/*[@x]")
+        @test length(results) == 2
+    end
+
+    @testset "// from non-document node" begin
+        root = xpath(doc, "/root")[1]
+        results = xpath(root, "//name")
+        @test length(results) == 3
+    end
+end
+
+#==============================================================================#
+#                              LAZYNODE                                        #
+#==============================================================================#
+@testset "LazyNode" begin
+    @testset "parse and nodetype" begin
+        doc = parse("<root/>", LazyNode)
+        @test nodetype(doc) == Document
+
+        doc2 = parse(LazyNode, "<root/>")
+        @test nodetype(doc2) == Document
+    end
+
+    @testset "read from IO" begin
+        xml = """<?xml version="1.0"?><root>hello</root>"""
+        doc = read(IOBuffer(xml), LazyNode)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "read from file" begin
+        path = joinpath(@__DIR__, "data", "books.xml")
+        isfile(path) || return
+        doc = read(path, LazyNode)
+        @test nodetype(doc) == Document
+        @test length(children(doc)) > 0
+    end
+
+    @testset "Document children" begin
+        xml = """<?xml version="1.0"?><root><child/></root>"""
+        doc = parse(xml, LazyNode)
+        ch = children(doc)
+        @test length(ch) == 2
+        @test nodetype(ch[1]) == Declaration
+        @test nodetype(ch[2]) == Element
+    end
+
+    @testset "Document with all prolog node types" begin
+        xml = """<?xml version="1.0"?><!DOCTYPE root SYSTEM "r.dtd"><!-- comment --><?pi data?><root/>"""
+        doc = parse(xml, LazyNode)
+        ch = children(doc)
+        types = map(nodetype, ch)
+        @test Declaration in types
+        @test DTD in types
+        @test Comment in types
+        @test ProcessingInstruction in types
+        @test Element in types
+    end
+
+    @testset "Element tag" begin
+        doc = parse("<root/>", LazyNode)
+        @test tag(doc[1]) == "root"
+    end
+
+    @testset "tag returns nothing for non-element/PI" begin
+        doc = parse("<root>text</root>", LazyNode)
+        text_node = children(doc[1])[1]
+        @test nodetype(text_node) == Text
+        @test tag(text_node) === nothing
+    end
+
+    @testset "Element attributes" begin
+        doc = parse("""<root a="1" b="2"/>""", LazyNode)
+        attrs = attributes(doc[1])
+        @test attrs isa Attributes
+        @test attrs["a"] == "1"
+        @test attrs["b"] == "2"
+    end
+
+    @testset "Element with no attributes" begin
+        doc = parse("<root/>", LazyNode)
+        @test attributes(doc[1]) === nothing
+    end
+
+    @testset "attributes returns nothing for non-element" begin
+        doc = parse("<root>text</root>", LazyNode)
+        @test attributes(children(doc[1])[1]) === nothing
+    end
+
+    @testset "attributes unescape entity references" begin
+        doc = parse("""<x a="a&amp;b"/>""", LazyNode)
+        @test doc[1]["a"] == "a&b"
+    end
+
+    @testset "Declaration attributes" begin
+        doc = parse("""<?xml version="1.0" encoding="UTF-8"?><root/>""", LazyNode)
+        decl = doc[1]
+        @test nodetype(decl) == Declaration
+        attrs = attributes(decl)
+        @test attrs["version"] == "1.0"
+        @test attrs["encoding"] == "UTF-8"
+    end
+
+    @testset "get with default" begin
+        doc = parse("""<x a="1"/>""", LazyNode)
+        el = doc[1]
+        @test get(el, "a", "nope") == "1"
+        @test get(el, "b", "nope") == "nope"
+    end
+
+    @testset "get on non-element returns default" begin
+        doc = parse("<root>text</root>", LazyNode)
+        text_node = children(doc[1])[1]
+        @test get(text_node, "a", "default") == "default"
+    end
+
+    @testset "getindex with string key" begin
+        doc = parse("""<x a="1"/>""", LazyNode)
+        @test doc[1]["a"] == "1"
+        @test_throws KeyError doc[1]["nonexistent"]
+    end
+
+    @testset "haskey" begin
+        doc = parse("""<x a="1"/>""", LazyNode)
+        @test haskey(doc[1], "a") == true
+        @test haskey(doc[1], "b") == false
+    end
+
+    @testset "keys" begin
+        doc = parse("""<x a="1" b="2"/>""", LazyNode)
+        @test keys(doc[1]) == ["a", "b"]
+    end
+
+    @testset "keys on element with no attributes" begin
+        doc = parse("<x/>", LazyNode)
+        @test isempty(keys(doc[1]))
+    end
+
+    @testset "keys on non-element" begin
+        doc = parse("<root>text</root>", LazyNode)
+        @test keys(children(doc[1])[1]) == ()
+    end
+
+    @testset "Text value" begin
+        doc = parse("<root>hello</root>", LazyNode)
+        ch = children(doc[1])
+        @test nodetype(ch[1]) == Text
+        @test value(ch[1]) == "hello"
+    end
+
+    @testset "Text value unescapes entities" begin
+        doc = parse("<root>&amp; &lt; &gt;</root>", LazyNode)
+        @test value(children(doc[1])[1]) == "& < >"
+    end
+
+    @testset "has_entities short-circuit (zero-copy, correctness)" begin
+        # Entity-free Text: returns the raw SubString view, no allocation.
+        doc = parse("<root>plain text no entities</root>", LazyNode)
+        v = value(children(doc[1])[1])
+        @test v isa SubString{String}
+        @test v == "plain text no entities"
+        @test (@allocated value(children(doc[1])[1])) ≥ 0  # smoke
+
+        # Entity-bearing Text: still decodes byte-for-byte like unescape.
+        d2 = parse("<root>a &amp; b &#x41; &#65; &lt;</root>", LazyNode)
+        tv = value(children(d2[1])[1])
+        @test tv == unescape(SubString("a &amp; b &#x41; &#65; &lt;"))
+        @test tv == "a & b A A <"
+
+        # Entity-free attribute: zero-copy SubString view.
+        d3 = parse("""<c r="A1" s="3" t="n"/>""", LazyNode)
+        c = d3[1]
+        @test get(c, "r", nothing) isa SubString{String}
+        @test get(c, "r", nothing) == "A1"
+        a = attributes(c)
+        @test a["s"] == "3"
+        @test a["s"] isa SubString{String}
+        pairs_collected = collect(eachattribute(c))
+        @test pairs_collected == ["r" => "A1", "s" => "3", "t" => "n"]
+        @test all(p -> last(p) isa SubString{String}, pairs_collected)
+
+        # Entity-bearing attribute: decoded.
+        d4 = parse("""<x a="x &amp; y" b="plain"/>""", LazyNode)
+        x = d4[1]
+        @test x["a"] == "x & y"
+        @test get(x, "b", nothing) == "plain"
+        @test get(x, "b", nothing) isa SubString{String}
+        @test attributes(x)["a"] == "x & y"
+        @test Dict(eachattribute(x)) == Dict("a" => "x & y", "b" => "plain")
+
+        # CDATA carries markup characters verbatim — never entity-decoded.
+        d5 = parse("<root><![CDATA[a & b < c &amp; d]]></root>", LazyNode)
+        cd = children(d5[1])[1]
+        @test nodetype(cd) == CData
+        @test value(cd) == "a & b < c &amp; d"
+
+        # is_simple_value: entity-free returns view, entity-bearing decodes.
+        s1 = parse("<t>simple</t>", LazyNode)[1]
+        @test XML.is_simple_value(s1) == "simple"
+        @test XML.is_simple_value(s1) isa SubString{String}
+        s2 = parse("<t>a &amp; b</t>", LazyNode)[1]
+        @test XML.is_simple_value(s2) == "a & b"
+    end
+
+    @testset "Comment value" begin
+        doc = parse("<root><!-- a comment --></root>", LazyNode)
+        c = children(doc[1])[1]
+        @test nodetype(c) == Comment
+        @test value(c) == " a comment "
+    end
+
+    @testset "CData value" begin
+        doc = parse("<root><![CDATA[raw <data>]]></root>", LazyNode)
+        cd = children(doc[1])[1]
+        @test nodetype(cd) == CData
+        @test value(cd) == "raw <data>"
+    end
+
+    @testset "DTD value" begin
+        doc = parse("""<!DOCTYPE greeting SYSTEM "hello.dtd"><greeting/>""", LazyNode)
+        dtd = doc[1]
+        @test nodetype(dtd) == DTD
+        @test contains(value(dtd), "greeting")
+    end
+
+    @testset "ProcessingInstruction tag and value" begin
+        doc = parse("<?mypi some data?><root/>", LazyNode)
+        pi = doc[1]
+        @test nodetype(pi) == ProcessingInstruction
+        @test tag(pi) == "mypi"
+        @test value(pi) == "some data"
+    end
+
+    @testset "ProcessingInstruction with no content" begin
+        doc = parse("<?target?><root/>", LazyNode)
+        pi = doc[1]
+        @test tag(pi) == "target"
+        @test value(pi) === nothing
+    end
+
+    @testset "value returns nothing for Element/Document" begin
+        doc = parse("<root/>", LazyNode)
+        @test value(doc) === nothing
+        @test value(doc[1]) === nothing
+    end
+
+    @testset "Element children" begin
+        doc = parse("<root><a/><b/><c/></root>", LazyNode)
+        root = doc[1]
+        @test length(children(root)) == 3
+        @test tag(children(root)[1]) == "a"
+        @test tag(children(root)[2]) == "b"
+        @test tag(children(root)[3]) == "c"
+    end
+
+    @testset "self-closing element has no children" begin
+        doc = parse("<root><br/></root>", LazyNode)
+        br = children(doc[1])[1]
+        @test isempty(children(br))
+    end
+
+    @testset "non-element children returns empty tuple" begin
+        doc = parse("<root>text</root>", LazyNode)
+        text_node = children(doc[1])[1]
+        @test children(text_node) == ()
+    end
+
+    @testset "nested elements" begin
+        doc = parse("<a><b><c>deep</c></b></a>", LazyNode)
+        @test tag(doc[1]) == "a"
+        @test tag(doc[1][1]) == "b"
+        @test tag(doc[1][1][1]) == "c"
+        @test simple_value(doc[1][1][1]) == "deep"
+    end
+
+    @testset "mixed content children" begin
+        xml = "<root>text<!-- comment --><![CDATA[cdata]]><?pi data?><child/></root>"
+        doc = parse(xml, LazyNode)
+        ch = children(doc[1])
+        types = map(nodetype, ch)
+        @test Text in types
+        @test Comment in types
+        @test CData in types
+        @test ProcessingInstruction in types
+        @test Element in types
+    end
+
+    @testset "integer indexing" begin
+        doc = parse("<root><a/><b/><c/></root>", LazyNode)
+        @test tag(doc[1][1]) == "a"
+        @test tag(doc[1][2]) == "b"
+        @test tag(doc[1][3]) == "c"
+    end
+
+    @testset "colon indexing" begin
+        doc = parse("<root><a/><b/></root>", LazyNode)
+        all = doc[1][:]
+        @test length(all) == 2
+    end
+
+    @testset "lastindex" begin
+        doc = parse("<root><a/><b/><c/></root>", LazyNode)
+        @test tag(doc[1][end]) == "c"
+    end
+
+    @testset "only" begin
+        doc = parse("<root><only/></root>", LazyNode)
+        @test tag(only(doc[1])) == "only"
+    end
+
+    @testset "length" begin
+        doc = parse("<root><a/><b/><c/></root>", LazyNode)
+        @test length(doc[1]) == 3
+    end
+
+    @testset "is_simple" begin
+        doc = parse("<root><simple>text</simple><complex><child/></complex></root>", LazyNode)
+        simple = children(doc[1])[1]
+        complex = children(doc[1])[2]
+        @test is_simple(simple)
+        @test !is_simple(complex)
+    end
+
+    @testset "is_simple with attributes" begin
+        doc = parse("""<root><x a="1">text</x></root>""", LazyNode)
+        @test !is_simple(children(doc[1])[1])
+    end
+
+    @testset "is_simple with CData child" begin
+        doc = parse("<root><x><![CDATA[data]]></x></root>", LazyNode)
+        @test is_simple(children(doc[1])[1])
+    end
+
+    @testset "is_simple returns false for non-element" begin
+        doc = parse("<root>text</root>", LazyNode)
+        @test !is_simple(children(doc[1])[1])
+    end
+
+    @testset "simple_value" begin
+        doc = parse("<root><x>hello</x></root>", LazyNode)
+        @test simple_value(children(doc[1])[1]) == "hello"
+    end
+
+    @testset "simple_value errors on non-simple" begin
+        doc = parse("<root><x><y/></x></root>", LazyNode)
+        @test_throws ErrorException simple_value(children(doc[1])[1])
+    end
+
+    @testset "simple_value errors on non-element" begin
+        doc = parse("<root>text</root>", LazyNode)
+        @test_throws ErrorException simple_value(children(doc[1])[1])
+    end
+
+    @testset "show Document" begin
+        doc = parse("<root><a/></root>", LazyNode)
+        s = sprint(show, doc)
+        @test contains(s, "Lazy")
+        @test contains(s, "Document")
+        @test contains(s, "1 child")
+    end
+
+    @testset "show Document multiple children" begin
+        doc = parse("<!-- c --><root/>", LazyNode)
+        s = sprint(show, doc)
+        @test contains(s, "2 children")
+    end
+
+    @testset "show Element" begin
+        doc = parse("""<root a="1"/>""", LazyNode)
+        s = sprint(show, doc[1])
+        @test contains(s, "Lazy Element")
+        @test contains(s, "<root")
+    end
+
+    @testset "show Text" begin
+        doc = parse("<root>hello</root>", LazyNode)
+        s = sprint(show, children(doc[1])[1])
+        @test contains(s, "Lazy Text")
+        @test contains(s, "hello")
+    end
+
+    @testset "show Comment" begin
+        doc = parse("<root><!-- test --></root>", LazyNode)
+        s = sprint(show, children(doc[1])[1])
+        @test contains(s, "Lazy Comment")
+        @test contains(s, "<!--")
+    end
+
+    @testset "show CData" begin
+        doc = parse("<root><![CDATA[data]]></root>", LazyNode)
+        s = sprint(show, children(doc[1])[1])
+        @test contains(s, "Lazy CData")
+        @test contains(s, "<![CDATA[")
+    end
+
+    @testset "show DTD" begin
+        doc = parse("<!DOCTYPE html><html/>", LazyNode)
+        s = sprint(show, doc[1])
+        @test contains(s, "Lazy DTD")
+        @test contains(s, "<!DOCTYPE")
+    end
+
+    @testset "show Declaration" begin
+        doc = parse("""<?xml version="1.0"?><root/>""", LazyNode)
+        s = sprint(show, doc[1])
+        @test contains(s, "Lazy Declaration")
+        @test contains(s, "<?xml")
+    end
+
+    @testset "show ProcessingInstruction" begin
+        doc = parse("<?target data?><root/>", LazyNode)
+        s = sprint(show, doc[1])
+        @test contains(s, "Lazy ProcessingInstruction")
+        @test contains(s, "<?target")
+    end
+
+    @testset "show ProcessingInstruction without content" begin
+        doc = parse("<?target?><root/>", LazyNode)
+        s = sprint(show, doc[1])
+        @test contains(s, "<?target?>")
+    end
+
+    @testset "LazyNode agrees with Node on books.xml" begin
+        path = joinpath(@__DIR__, "data", "books.xml")
+        isfile(path) || return
+
+        eager = read(path, Node)
+        lazy = read(path, LazyNode)
+
+        # Same top-level structure
+        eager_ch = children(eager)
+        lazy_ch = children(lazy)
+        @test length(eager_ch) == length(lazy_ch)
+        @test map(nodetype, eager_ch) == map(nodetype, lazy_ch)
+
+        # Find root element in both
+        eager_root = first(filter(x -> nodetype(x) == Element, eager_ch))
+        lazy_root = first(filter(x -> nodetype(x) == Element, lazy_ch))
+        @test tag(eager_root) == tag(lazy_root)
+
+        # Same number of book elements
+        eager_books = filter(x -> nodetype(x) == Element, children(eager_root))
+        lazy_books = filter(x -> nodetype(x) == Element, children(lazy_root))
+        @test length(eager_books) == length(lazy_books)
+
+        # First book has same attributes and child values
+        eb1 = eager_books[1]
+        lb1 = lazy_books[1]
+        @test eb1["id"] == lb1["id"]
+
+        eager_author = first(filter(x -> nodetype(x) == Element && tag(x) == "author", children(eb1)))
+        lazy_author = first(filter(x -> nodetype(x) == Element && tag(x) == "author", children(lb1)))
+        @test simple_value(eager_author) == simple_value(lazy_author)
+    end
+
+    @testset "complex document" begin
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE root SYSTEM "root.dtd">
+<!-- comment -->
+<?pi data?>
+<root attr="val">
+    text content
+    <child>inner</child>
+    <![CDATA[cdata content]]>
+    <!-- inner comment -->
+    <?inner-pi inner data?>
+    <empty/>
+</root>"""
+        doc = parse(xml, LazyNode)
+        @test nodetype(doc) == Document
+
+        typed = filter(x -> nodetype(x) != Text, children(doc))
+        @test nodetype(typed[1]) == Declaration
+        @test nodetype(typed[2]) == DTD
+        @test nodetype(typed[3]) == Comment
+        @test nodetype(typed[4]) == ProcessingInstruction
+        @test nodetype(typed[5]) == Element
+
+        root = typed[5]
+        @test tag(root) == "root"
+        @test root["attr"] == "val"
+
+        inner = children(root)
+        inner_types = map(nodetype, inner)
+        @test Text in inner_types
+        @test Element in inner_types
+        @test CData in inner_types
+        @test Comment in inner_types
+        @test ProcessingInstruction in inner_types
+
+        child_els = filter(x -> nodetype(x) == Element, inner)
+        @test length(child_els) == 2
+        @test tag(child_els[1]) == "child"
+        @test simple_value(child_els[1]) == "inner"
+        @test tag(child_els[2]) == "empty"
+    end
+
+    @testset "sourcetext" begin
+        @testset "self-closing element" begin
+            doc = parse("<root/>", LazyNode)
+            @test sourcetext(doc[1]) == "<root/>"
         end
 
-        lzxml = """<root><text>    </text><text2>  hello  </text2><text3 xml:space="preserve">  hello  <text3b>  preserve  </text3b></text3><text4 xml:space="preserve"></text4><text5/></root>"""
-        lz = XML.parse(XML.LazyNode, lzxml)
-        n=XML.next(lz)
-        n=XML.next(n)
-        text_content = XML.write(n)
-       @test text_content == "<text/>"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "<text2>hello</text2>"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "hello"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "<text3 xml:space=\"preserve\">  hello  <text3b>  preserve  </text3b></text3>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "hello"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "<text3 xml:space=\"preserve\">  hello  <text3b>  preserve  </text3b></text3>"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "  hello  "
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "<text3b>  preserve  </text3b>"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "  preserve  "
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "<text4 xml:space=\"preserve\"/>"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "<text5/>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<text4 xml:space=\"preserve\"/>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "  preserve  "
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<text3b>  preserve  </text3b>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "  hello  "
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<text3 xml:space=\"preserve\">  hello  <text3b>  preserve  </text3b></text3>"
-        n=XML.next(n)
-        text_content = XML.write(n)
-        @test text_content == "  hello  "
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<text3 xml:space=\"preserve\">  hello  <text3b>  preserve  </text3b></text3>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "hello"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<text2>hello</text2>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<text/>"
-        n=XML.prev(n)
-        text_content = XML.write(n)
-        @test text_content == "<root>\n  <text/>\n  <text2>hello</text2>\n  <text3 xml:space=\"preserve\">  hello  <text3b>  preserve  </text3b></text3>\n  <text4 xml:space=\"preserve\"/>\n  <text5/>\n</root>"
-    end
-
-    @testset "depth and parent" begin
-        @test XML.depth(data) == 0
-        @test isnothing(XML.parent(data))
-        @test XML.depth(doc[1]) == 1
-        @test XML.parent(doc[1]) == data
-        @test XML.depth(doc[2]) == 1
-        @test XML.depth(doc[3]) == 2
-        @test XML.parent(doc[3]) == doc[2]
-        @test XML.depth(doc[end]) == 1
-        @test XML.parent(doc[end]) == data
-    end
-
-    @testset "tag/attributes/value" begin
-        x = doc[1]  # <?xml version="1.0"?>
-        @test XML.tag(x) === nothing
-        @test XML.attributes(x) == Dict("version" => "1.0")
-        @test XML.value(x) === nothing
-
-        x = XML.next(x)  # <catalog>
-        @test XML.tag(x) == "catalog"
-        @test XML.attributes(x) === nothing
-        @test XML.value(x) === nothing
-
-        x = XML.next(x)  # <book id="bk101">
-        @test XML.tag(x) == "book"
-        @test XML.attributes(x) == Dict("id" => "bk101")
-        @test XML.value(x) === nothing
-
-        x = XML.next(x)  # <author>
-        @test XML.tag(x) == "author"
-        @test XML.attributes(x) === nothing
-        @test XML.value(x) === nothing
-
-        x = XML.next(x)  # Gambardella, Matthew
-        @test XML.tag(x) === nothing
-        @test XML.attributes(x) === nothing
-        @test XML.value(x) == "Gambardella, Matthew"
-    end
-end
-
-#-----------------------------------------------------------------------------# Preserve whitespace
-@testset "xml:space" begin
-    @testset "Basic xml:space functionality" begin
-
-        # Test 1: xml:space="preserve" should preserve entirely empty whitespace
-        xml1 = """<root><text xml:space="preserve">   </text></root>"""
-        doc1 = parse(XML.Node, xml1)
-        text_content = XML.value(doc1[1][1][1])
-        @test text_content == "   "
-
-        # Test 2: xml:space="preserve" should preserve leading and trailing whitespace
-        xml2 = """<root><text xml:space="preserve">  leading and trailing spaces  </text></root>"""
-        doc2 = parse(XML.Node, xml2)
-        text_content = XML.value(doc2[1][1][1])
-        @test text_content == "  leading and trailing spaces  "
-        
-        # Test 3: Entirely empty tags with and without xml:space="preserve" become self-closing
-        xml3 = """<root><text>    </text><text2 xml:space="preserve">    </text2><text3 xml:space="preserve"></text3><text4/></root>"""
-        doc3 = XML.parse(XML.Node, xml3)
-        text_content = XML.write(doc3[1][1])
-        @test text_content == "<text/>" # without xml:space="preserve", empty text becomes self-closing
-        text_content = XML.value(doc3[1][2][1])
-        @test text_content == "    " # with xml:space, whitespace is preserved
-        text_content = XML.write(doc3[1][3])
-        @test text_content == "<text3 xml:space=\"preserve\"/>" # with xml:space="preserve", empty text becomes self-closing
-
-        # Test 4: Without xml:space, whitespace should be normalized
-        xml4 = """<root><text>  gets normalized  </text></root>"""
-        doc4 = XML.parse(XML.Node, xml4)
-        text_content = XML.value(doc4[1][1][1])
-        @test text_content == "gets normalized"
-        
-        # Test 5: xml:space="default" should normalize even with preserve_xml_space=true
-        xml5 = """<root><text xml:space="default">  gets normalized  </text></root>"""
-        doc5 = XML.parse(XML.Node, xml5)
-        text_content = XML.value(doc5[1][1][1])
-        @test text_content == "gets normalized"
-    end
-    
-    @testset "xml:space inheritance" begin
-        # Test 6: Children inherit parent's xml:space="preserve"
-        xml6 = """<root xml:space="preserve">
-            <parent>  parent text  
-                <child>  child text  </child>
-            </parent>
-        </root>"""
-        doc6 = XML.parse(XML.Node, xml6)
-        # Both parent and child should preserve whitespace
-        @test contains(XML.value(doc6[1][2][1]), "parent text  \n")
-        @test XML.value(doc6[1][2][2][1]) == "  child text  "
-        
-        # Test 7: xml:space="default" overrides parent's "preserve"
-        xml7 = """<root xml:space="preserve">
-            <child xml:space="default">  normalized despite parent  </child>
-        </root>"""
-        doc7 = XML.parse(XML.Node, xml7)
-        @test XML.value(doc7[1][2][1]) == "normalized despite parent"
-    end
-    
-    @testset "Nesting scenarios" begin
-        # Test 8: Multiple levels of xml:space changes
-        xml8 = """<root xml:space="preserve">
-            <level1>  preserved  
-                <level2 xml:space="default">  normalized  
-                    <level3 xml:space="preserve">  preserved again  </level3>
-                </level2>
-            </level1>
-        </root>"""
-        doc8 = XML.parse(XML.Node, xml8)
-        
-        # level1 should preserve (inherits from root)
-        level1_text = XML.value(doc8[1][2][1])
-        @test level1_text == "  preserved  \n        "
-        
-        # level2 should normalize (explicit xml:space="default")
-        level2_text = XML.value(doc8[1][2][2][1])
-        @test level2_text == "normalized"
-        
-        # level3 should preserve (explicit xml:space="preserve")
-        level3_text = XML.value(doc8[1][2][2][2][1])
-        @test level3_text == "  preserved again  "
-
-        # Test 9: repeated multiple levels of xml:space changes
-        xml9 = """<root xml:space="preserve">
-            <level1>  preserved  
-                <level2 xml:space="default">  normalized  
-                    <level3 xml:space="preserve">  preserved again  </level3>
-                </level2>
-            </level1>  
-            <level1b>  preserved b  
-                <level2b xml:space="default">  normalized b 
-                    <level3b xml:space="preserve">  preserved again b  </level3b>
-                </level2b>
-            </level1b>
-        </root>"""
-        doc9 = XML.parse(XML.Node, xml9)
-
-        # level1b should preserve (inherits from root)
-        level1b_text = XML.value(doc9[1][4][1])
-        @test level1b_text == "  preserved b  \n        "
-        
-        # level2 should normalize (explicit xml:space="default")
-        level2b_text = XML.value(doc9[1][4][2][1])
-        @test level2b_text == "normalized b"
-        
-        # level3 should preserve (explicit xml:space="preserve")
-        level3b_text = XML.value(doc9[1][4][2][2][1])
-        @test level3b_text == "  preserved again b  "
-
-        # Test 10: futher repeated multiple levels of xml:space changes
-        xml10 = """<root>
-            <level1>  normalized  
-                <level2>  normalized b  
-                    <level3 xml:space="preserve">  preserved   </level3>
-                </level2>
-            </level1>  
-            <level1b>  normalized c  
-                <level2b xml:space="preserve">  preserved b 
-                    <level3b xml:space="default">  normalized again b  </level3b>
-                    <level3c>  preserved c 
-                    </level3c>
-                </level2b>
-            </level1b>
-            <level1c>  normalized d   </level1c>
-        </root>"""
-        doc10 = XML.parse(XML.Node, xml10)
-        
-        # level1 should normalize (as root)
-        level1_text = XML.value(doc10[end][1][1])
-        @test level1_text == "normalized"
-        
-        # level2 should normalize (as root and level1)
-        level2_text = XML.value(doc10[end][1][2][1])
-        @test level2_text == "normalized b"
-        
-        # level3 should preserve (explicit xml:space="preserve")
-        level3_text = XML.value(doc10[end][1][2][2][1])
-        @test level3_text == "  preserved   "
-        
-        # level1b should normalize (as root)
-        level1b_text = XML.value(doc10[end][2][1])
-        @test level1b_text == "normalized c"
-        
-        # level2b should preserve (explicit xml:space="preserve")
-        level2b_text = XML.value(doc10[end][2][2][1])
-        @test level2b_text == "  preserved b \n            "
-        
-        # level3 should normalize (explicit xml:space="default")
-        level3b_text = XML.value(doc10[end][2][2][2][1])
-        @test level3b_text == "normalized again b"
-        
-        # level3c should preserve (inherited from level2b)
-        level3c_text = XML.value(doc10[end][2][2][4][1])
-        @test level3c_text == "  preserved c \n            "
-        
-        # level1c should normalize (as root)
-        level1c_text = XML.value(doc10[end][3][1])
-        @test level1c_text == "normalized d"
-    end
-    @testset "inter-element gap semantics" begin
-        # Default parent: gap between siblings should be dropped
-        s1 = """<root><a> x </a>
-                <b> y </b></root>"""
-        d1 = XML.parse(XML.Node, s1)
-        @test length(d1[1]) == 2
-        @test XML.value(d1[1][1][1]) == "x"
-        @test XML.value(d1[1][2][1]) == "y"
-
-        # Preserve parent, default child ends: gap after default child dropped
-        s2 = """<root xml:space="preserve">
-                  <p> keep  </p>
-                  <q xml:space="default">  norm  </q>
-                  <r>  after default gap  </r>
-                </root>"""
-        d2 = XML.parse(XML.Node, s2)
-        @test length(d2[1]) == 7
-        @test XML.value(d2[1][1]) == "\n  "
-        @test XML.value(d2[1][2][1]) == " keep  "
-        @test XML.value(d2[1][3]) == "\n  "
-        @test XML.value(d2[1][4][1]) == "norm"
-        @test XML.value(d2[1][5]) == "\n  "
-        @test XML.value(d2[1][6][1]) == "  after default gap  "
-        @test XML.value(d2[1][7]) == "\n"
-    end
-    @testset "XML whitespace vs Unicode whitespace" begin
-        nbsp = "\u00A0"
-        s = """<root>
-                 <a>  x\t\n  </a>
-                 <b>$(nbsp) y $(nbsp)</b>
-                 <c xml:space="default">$(nbsp)  z  $(nbsp)</c>
-               </root>"""
-        d = XML.parse(XML.Node, s)
-        @test XML.value(d[1][1][1]) == "x"
-        @test XML.value(d[1][2][1]) == "$(nbsp) y $(nbsp)"
-        @test XML.value(d[1][3][1]) == "$(nbsp)  z  $(nbsp)"
-    end
-
-    @testset "CDATA/Comment/PI boundaries" begin
-        s = """<root>
-                 <a xml:space="default">  pre  <![CDATA[  mid  ]]>  post  </a>
-                 <b xml:space="preserve">  pre  <!-- cmt -->  post  </b>
-                 <?xml-stylesheet type="text/css" href="style.css"?>
-               </root>"""
-        d = XML.parse(XML.Node, s)
-        @test XML.value(d[1][1][1]) == "pre"
-        @test nodetype(d[1][1][2]) == XML.CData
-        @test XML.value(d[1][1][3]) == "post"
-        @test XML.value(d[1][2][1]) == "  pre  "
-        @test nodetype(d[1][2][2]) == XML.Comment
-        @test XML.value(d[1][2][3]) == "  post  "
-        @test nodetype(d[1][3]) == XML.ProcessingInstruction
-    end
-
-    @testset "nested toggles and sibling sequences" begin
-        s = """<root xml:space="preserve">
-                 <x>  a  
-                   <y xml:space="default">  b  
-                     <z xml:space="preserve">  c  </z>
-                   </y>
-                   <y2 xml:space="default">  d  </y2>
-                   <w>  e  </w>
-                 </x>
-               </root>"""
-        d = XML.parse(XML.Node, s)
-        @test XML.value(d[1][2][1]) == "  a  \n    "
-        @test XML.value(d[1][2][2][1]) == "b"
-        @test XML.value(d[1][2][2][2][1]) == "  c  "
-        @test d[1][2][4].tag == "y2"
-        @test XML.value(d[1][2][4][1]) == "d"
-        @test d[1][2][6].tag == "w"
-        @test XML.value(d[1][2][6][1]) == "  e  "
-    end
-
-    @testset "root/document boundaries" begin
-        s = "\n  \n<root>  a  </root>\n \t "
-        d = XML.parse(XML.Node, s)
-        @test length(d) == 1
-        @test XML.value(d[1][1]) == "a"
-    end
-
-    @testset "entities expanding to whitespace" begin
-        chr1="\u0020"
-        chr2="\u000A"
-        chr3="\u00A0"
-        
-        s = """<root>
-                 <a> $(chr1) a $(chr2) </a>
-                 <b xml:space="preserve">$(chr1) b $(chr2)</b>
-                 <c>$(chr3)c$(chr3)</c>
-               </root>"""
-        d = XML.parse(XML.Node, s)
-        @test XML.value(d[1][1][1]) == "a"
-        @test XML.value(d[1][2][1]) == "  b \n"
-        @test XML.value(d[1][3][1]) == "$(chr3)c$(chr3)"
-    end
-
-    @testset "invalid values and placement" begin
-        s_bad = """<root><x xml:space="weird"> t </x></root>"""
-        @test_throws ErrorException XML.parse(XML.Node, s_bad)
-
-        s_pi = """<?pi xml:space="preserve"?><root> t </root>"""
-        d = XML.parse(XML.Node, s_pi)
-        @test XML.value(d[end][1]) == "t"
-
-        s_dup = """<root><x xml:space="preserve" xml:space="default">  t  </x></root>"""
-#        @test_throws ErrorException XML.parse(XML.Node, s_dup)
-    end
-
-    @testset "prev()/next() symmetry" begin
-        xml = """<root xml:space="preserve">
-                    <a>  a  <b xml:space="default">  b  </b>  <c>  c  </c>  </a>
-                    <d xml:space="default">  d  <e xml:space="preserve">  e  </e>  f  </d>
-                    <g><h/><i xml:space="preserve">  i  </i><j/></g>
-                 </root>"""
-        r = XML.parse(XML.LazyNode, xml).raw
-        toks=XML.Raw[]
-        while true
-            n = XML.next(r)
-            n === nothing && break
-            push!(toks, n)
-            r=n
+        @testset "element with attributes" begin
+            xml = """<root attr="val"/>"""
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == xml
         end
-        back = XML.Raw[]
-        r = toks[end]
-        while true
-            p = XML.prev(r)
-            p === nothing && break
-            push!(back, p)
-            r = p
+
+        @testset "element with children" begin
+            xml = "<root><child>text</child></root>"
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == xml
+            root = doc[1]
+            child = first(c for c in children(root) if nodetype(c) == Element)
+            @test sourcetext(child) == "<child>text</child>"
         end
-        @test reverse(back)[2:end] == toks[1:end-1]
-    end
-
-    @testset "write/read roundtrip extremes" begin
-        xml = """<root>
-                   <p xml:space="preserve">    </p>
-                   <q>   </q>
-                   <r xml:space="default">  r  </r>
-                   <s xml:space="preserve"> pre <t/> post </s>
-                 </root>"""
-        n = XML.parse(XML.Node, xml)
-        io = IOBuffer(); XML.write(io, n)
-        n2 = XML.parse(XML.Node, String(take!(io)))
-        @test n == n2
-        @test XML.write(n2[1][1]) == "<p xml:space=\"preserve\">    </p>"
-        @test XML.write(n2[1][2]) == "<q/>"
-        @test XML.value(n2[1][3][1]) == "r"
-        @test XML.write(n2[1][4]) == "<s xml:space=\"preserve\"> pre <t/> post </s>"
-   end
-
-    @testset "self-closing/empty/whitespace-only children" begin
-        s = """<root>
-                 <a xml:space="default">    </a>
-                 <b xml:space="preserve"></b>
-                 <c xml:space="preserve">   </c>
-                 <d><e/></d>
-                 <f> x <g/> y </f>
-               </root>"""
-        d = XML.parse(XML.Node, s)
-        @test XML.write(d[1][1]) == "<a xml:space=\"default\"/>"
-        @test XML.write(d[1][2]) == "<b xml:space=\"preserve\"/>"
-        @test XML.value(d[1][3][1]) == "   "
-        @test XML.value(d[1][5][1]) == "x"
-        @test XML.value(d[1][5][3]) == "y"
-    end
-
-    @testset "allocation guard: small xml:space doc" begin
-        xml = "<root><a xml:space=\"default\"> x </a><b xml:space=\"preserve\"> y </b></root>"
-        f() = XML.parse(XML.Node, xml)
-        a = @allocated f()
-        @test a < 500_000  # tune for CI
-    end
-
-end
-
-#-----------------------------------------------------------------------------# roundtrip
-@testset "read/write/read roundtrip" begin
-    for path in all_files
-        node = read(path, Node)
-        temp = tempname() * ".xml"
-        XML.write(temp, node)
-        node2 = read(temp, Node)
-        @test node == node2
-
-        #For debugging:
-        for (a,b) in zip(AbstractTrees.Leaves(node), AbstractTrees.Leaves(node2))
-            if a != b
-                @info path
-                @info a
-                @info b
-                error()
-            end
+
+        @testset "nested elements" begin
+            xml = "<a><b><c>deep</c></b></a>"
+            doc = parse(xml, LazyNode)
+            a = doc[1]
+            @test sourcetext(a) == xml
+            b = first(c for c in children(a) if nodetype(c) == Element)
+            @test sourcetext(b) == "<b><c>deep</c></b>"
+        end
+
+        @testset "comment" begin
+            xml = "<!-- hello --><root/>"
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == "<!-- hello -->"
+        end
+
+        @testset "cdata" begin
+            xml = "<root><![CDATA[some <data>]]></root>"
+            doc = parse(xml, LazyNode)
+            cdata = first(c for c in children(doc[1]) if nodetype(c) == CData)
+            @test sourcetext(cdata) == "<![CDATA[some <data>]]>"
+        end
+
+        @testset "processing instruction" begin
+            xml = "<?target data?><root/>"
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == "<?target data?>"
+        end
+
+        @testset "declaration" begin
+            xml = """<?xml version="1.0"?><root/>"""
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == """<?xml version="1.0"?>"""
+        end
+
+        @testset "DTD" begin
+            xml = """<!DOCTYPE html SYSTEM "html.dtd"><html/>"""
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == """<!DOCTYPE html SYSTEM "html.dtd">"""
+        end
+
+        @testset "text node" begin
+            doc = parse("<root>hello world</root>", LazyNode)
+            txt = first(c for c in children(doc[1]) if nodetype(c) == Text)
+            @test sourcetext(txt) == "hello world"
+        end
+
+        @testset "document" begin
+            xml = "<root>hello</root>"
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc) == xml
+        end
+
+        @testset "mixed content" begin
+            xml = "<p>Hello <b>world</b> and <i>more</i></p>"
+            doc = parse(xml, LazyNode)
+            @test sourcetext(doc[1]) == xml
         end
     end
-end
 
-#-----------------------------------------------------------------------------# Node writing
-@testset "Node writing" begin
-    doc = Document(
-        DTD("root_tag"),
-        Declaration(version=1.0),
-        Comment("comment"),
-        ProcessingInstruction("xml-stylesheet", href="mystyle.css", type="text/css"),
-        Element("root_tag", CData("cdata"), Text("text"))
-    )
-    @test map(nodetype, children(doc)) == [DTD,Declaration,Comment,ProcessingInstruction,Element]
-    @test length(children(doc[end])) == 2
-    @test nodetype(doc[end][1]) == XML.CData
-    @test nodetype(doc[end][2]) == XML.Text
-    @test value(doc[end][1]) == "cdata"
-    @test value(doc[end][2]) == "text"
-
-    #set/get index for attributes
-    o = doc[end]
-    @test isempty(keys(o))
-    o["id"] = 1
-    @test o["id"] == "1"
-    @test keys(o) == keys(Dict("id" => "1"))
-end
-
-#-----------------------------------------------------------------------------# Issues
-@testset "Issues" begin
-    # https://github.com/JuliaComputing/XML.jl/issues/12: DTD content was cut short
-    s = """
-    <!DOCTYPE note [
-    <!ENTITY nbsp "&#xA0;">
-    <!ENTITY writer "Writer: Donald Duck.">
-    <!ENTITY copyright "Copyright: W3Schools.">
-    ]>
-    """
-
-    doc = parse(Node, s)
-    @test value(only(doc)) == s[11:end-2]  # note [...]
-
-    # https://github.com/JuliaComputing/XML.jl/issues/14 (Sorted Attributes)
-    kw = NamedTuple(OrderedDict(Symbol(k) => Int(k) for k in 'a':'z'))
-    xyz  = XML.Element("point"; kw...)
-    @test collect(keys(attributes(xyz))) == string.(collect('a':'z'))
+    @testset "write(::LazyNode)" begin
+        @testset "write returns String" begin
+            xml = "<root><child>text</child></root>"
+            doc = parse(xml, LazyNode)
+            @test XML.write(doc[1]) == xml
+            @test XML.write(doc[1]) isa String
+        end
+
+        @testset "write to IO" begin
+            xml = "<root><child>text</child></root>"
+            doc = parse(xml, LazyNode)
+            io = IOBuffer()
+            XML.write(io, doc[1])
+            @test String(take!(io)) == xml
+        end
+    end
+
+    @testset "eachchildnode" begin
+        @testset "matches children for element" begin
+            xml = "<root><a/><b>text</b><c><d/></c></root>"
+            doc = parse(xml, LazyNode)
+            root = doc[1]
+            eager = children(root)
+            lazy = collect(eachchildnode(root))
+            @test length(eager) == length(lazy)
+            @test map(nodetype, eager) == map(nodetype, lazy)
+            @test map(tag, eager) == map(tag, lazy)
+        end
+
+        @testset "self-closing element has no children" begin
+            doc = parse("<root/>", LazyNode)
+            @test isempty(collect(eachchildnode(doc[1])))
+        end
+
+        @testset "document children" begin
+            xml = """<?xml version="1.0"?><!-- comment --><root/>"""
+            doc = parse(xml, LazyNode)
+            eager = children(doc)
+            lazy = collect(eachchildnode(doc))
+            @test length(eager) == length(lazy)
+            @test map(nodetype, eager) == map(nodetype, lazy)
+        end
+
+        @testset "mixed content types" begin
+            xml = """<root>text<!-- comment --><![CDATA[cdata]]><?pi data?><child/></root>"""
+            doc = parse(xml, LazyNode)
+            root = doc[1]
+            types = [nodetype(c) for c in eachchildnode(root)]
+            @test Text in types
+            @test Comment in types
+            @test CData in types
+            @test ProcessingInstruction in types
+            @test Element in types
+        end
+
+        @testset "sourcetext works on eachchildnode results" begin
+            xml = "<sst><si><t>hello</t></si><si><t>world</t></si></sst>"
+            doc = parse(xml, LazyNode)
+            root = doc[1]
+            results = [XML.write(c) for c in eachchildnode(root)]
+            @test results == ["<si><t>hello</t></si>", "<si><t>world</t></si>"]
+        end
+
+        @testset "non-element/document returns empty" begin
+            xml = "<!-- comment --><root/>"
+            doc = parse(xml, LazyNode)
+            comment = doc[1]
+            @test nodetype(comment) == Comment
+            @test isempty(collect(eachchildnode(comment)))
+        end
+    end
 end
 
+include("test_abstracttrees_ext.jl")
+include("test_pugixml.jl")
+include("test_libexpat.jl")
+include("test_w3c.jl")
diff --git a/test/test_abstracttrees_ext.jl b/test/test_abstracttrees_ext.jl
new file mode 100644
index 0000000..e30bc5c
--- /dev/null
+++ b/test/test_abstracttrees_ext.jl
@@ -0,0 +1,89 @@
+import AbstractTrees
+
+@testset "AbstractTrees extension" begin
+    xml = """
+    <?xml version="1.0"?>
+    <!-- top -->
+    <library>
+      <book id="1">
+        <title>One</title>
+        <author>Alice</author>
+      </book>
+      <book id="2">
+        <title>Two</title>
+      </book>
+    </library>
+    """
+
+    @testset "extension is loaded" begin
+        @test Base.get_extension(XML, :XMLAbstractTreesExt) !== nothing
+    end
+
+    @testset "children (Node)" begin
+        doc = parse(xml, Node)
+        @test AbstractTrees.children(doc) == XML.children(doc)
+        lib = first(filter(c -> nodetype(c) == Element, XML.children(doc)))
+        @test AbstractTrees.children(lib) == XML.children(lib)
+
+        title = first(filter(c -> nodetype(c) == Element, XML.children(lib)))[1]
+        # `<title>One</title>` — title element's only child is a Text node with no children
+        @test isempty(AbstractTrees.children(title))
+    end
+
+    @testset "children (LazyNode)" begin
+        ldoc = parse(xml, LazyNode)
+        @test length(AbstractTrees.children(ldoc)) == length(XML.children(ldoc))
+        lib = first(filter(c -> nodetype(c) == Element, XML.children(ldoc)))
+        @test length(AbstractTrees.children(lib)) == length(XML.children(lib))
+    end
+
+    @testset "nodevalue identity" begin
+        doc = parse(xml, Node)
+        @test AbstractTrees.nodevalue(doc) === doc
+        ldoc = parse(xml, LazyNode)
+        @test AbstractTrees.nodevalue(ldoc) === ldoc
+    end
+
+    @testset "traits" begin
+        @test AbstractTrees.NodeType(Node) === AbstractTrees.HasNodeType()
+        @test AbstractTrees.NodeType(LazyNode) === AbstractTrees.HasNodeType()
+        @test AbstractTrees.nodetype(Node{String}) === Node{String}
+        @test AbstractTrees.ChildIndexing(Node) === AbstractTrees.IndexedChildren()
+    end
+
+    @testset "PreOrderDFS visits every node" begin
+        doc = parse(xml, Node)
+        elements = [n for n in AbstractTrees.PreOrderDFS(doc) if nodetype(n) == Element]
+        @test map(tag, elements) == ["library", "book", "title", "author", "book", "title"]
+
+        ldoc = parse(xml, LazyNode)
+        lelements = [n for n in AbstractTrees.PreOrderDFS(ldoc) if nodetype(n) == Element]
+        @test map(tag, lelements) == ["library", "book", "title", "author", "book", "title"]
+    end
+
+    @testset "printnode labels" begin
+        @test sprint(AbstractTrees.printnode, Element("div", "hi"; class="main")) == "<div class=\"main\">"
+        @test sprint(AbstractTrees.printnode, Text("hello")) == "\"hello\""
+        @test sprint(AbstractTrees.printnode, Comment("c")) == "<!--c-->"
+        @test sprint(AbstractTrees.printnode, CData("xyz")) == "<![CDATA[xyz]]>"
+        @test sprint(AbstractTrees.printnode, DTD("note")) == "<!DOCTYPE note>"
+        @test sprint(AbstractTrees.printnode, ProcessingInstruction("xml-stylesheet", "type=\"text/xsl\"")) ==
+            "<?xml-stylesheet type=\"text/xsl\"?>"
+        @test sprint(AbstractTrees.printnode, Declaration(version="1.0")) == "<?xml version=\"1.0\"?>"
+        @test sprint(AbstractTrees.printnode, Document()) == "Document"
+
+        ldoc = parse("<a x=\"1\"><b>hi</b></a>", LazyNode)
+        a = ldoc[1]
+        @test sprint(AbstractTrees.printnode, a) == "<a x=\"1\">"
+    end
+
+    @testset "print_tree round-trips structure" begin
+        doc = parse("<a><b/><c><d/></c></a>", Node)
+        out = sprint(AbstractTrees.print_tree, doc)
+        @test occursin("Document", out)
+        @test occursin("<a>", out)
+        @test occursin("<b>", out)
+        @test occursin("<c>", out)
+        @test occursin("<d>", out)
+    end
+end
diff --git a/test/test_libexpat.jl b/test/test_libexpat.jl
new file mode 100644
index 0000000..9ac8955
--- /dev/null
+++ b/test/test_libexpat.jl
@@ -0,0 +1,389 @@
+# Test cases inspired by libexpat (https://github.com/libexpat/libexpat, MIT license)
+# Translated from expat/tests/basic_tests.c
+
+using XML
+using XML: Node, nodetype, Document, Element, Comment, CData, ProcessingInstruction, Text, Declaration, DTD
+using XML: tag, value, children, attributes, simple_value
+using Test
+
+@testset "libexpat-inspired" begin
+
+    #==========================================================================#
+    #                         Character References                             #
+    #==========================================================================#
+    @testset "Decimal character references" begin
+        doc = parse("<doc>&#233;&#232;</doc>", Node)
+        @test simple_value(children(doc)[1]) == "éè"
+    end
+
+    @testset "Hex character references" begin
+        doc = parse("<doc>&#xE9;&#xE8;</doc>", Node)
+        @test simple_value(children(doc)[1]) == "éè"
+    end
+
+    @testset "Mixed char refs and text" begin
+        doc = parse("<doc>abc&#100;ef</doc>", Node)
+        @test simple_value(children(doc)[1]) == "abcdef"
+    end
+
+    @testset "Large Unicode code points" begin
+        # CJK Unified Ideograph
+        doc = parse("<doc>&#x4E16;&#x754C;</doc>", Node)
+        @test simple_value(children(doc)[1]) == "世界"
+    end
+
+    #==========================================================================#
+    #                          UTF-8 Content                                   #
+    #==========================================================================#
+    @testset "UTF-8 BOM" begin
+        bom = "\xef\xbb\xbf"
+        doc = parse(bom * "<e/>", Node)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "UTF-8 element content" begin
+        doc = parse("<doc>Ünïcödé</doc>", Node)
+        @test simple_value(children(doc)[1]) == "Ünïcödé"
+    end
+
+    @testset "UTF-8 in attribute values" begin
+        doc = parse("<doc attr='café'/>", Node)
+        @test children(doc)[1]["attr"] == "café"
+    end
+
+    @testset "UTF-8 element names" begin
+        # XML.jl tokenizer does not yet support non-ASCII characters in element names
+        @test_broken try
+            parse("<données/>", Node)
+            true
+        catch
+            false
+        end
+    end
+
+    @testset "Multi-byte UTF-8 sequences" begin
+        # 2-byte: ñ (U+00F1)
+        doc = parse("<doc>ñ</doc>", Node)
+        @test simple_value(children(doc)[1]) == "ñ"
+
+        # 3-byte: 世 (U+4E16)
+        doc = parse("<doc>世</doc>", Node)
+        @test simple_value(children(doc)[1]) == "世"
+
+        # 4-byte: 𤭢 (U+24B62)
+        doc = parse("<doc>𤭢</doc>", Node)
+        @test simple_value(children(doc)[1]) == "𤭢"
+    end
+
+    #==========================================================================#
+    #                            CDATA                                         #
+    #==========================================================================#
+    @testset "Basic CDATA" begin
+        doc = parse("<a><![CDATA[<greeting>Hello!</greeting>]]></a>", Node)
+        root = children(doc)[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata) == 1
+        @test value(cdata[1]) == "<greeting>Hello!</greeting>"
+    end
+
+    @testset "CDATA with special characters" begin
+        doc = parse("<a><![CDATA[&<>\"']]></a>", Node)
+        root = children(doc)[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test value(cdata[1]) == "&<>\"'"
+    end
+
+    @testset "Multiple CDATA sections" begin
+        doc = parse("<a><![CDATA[first]]><![CDATA[second]]></a>", Node)
+        root = children(doc)[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata) == 2
+        @test value(cdata[1]) == "first"
+        @test value(cdata[2]) == "second"
+    end
+
+    @testset "CDATA containing ]]" begin
+        # ]] without > is valid inside CDATA
+        doc = parse("<a><![CDATA[data]]with]]brackets]]></a>", Node)
+        root = children(doc)[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test value(cdata[1]) == "data]]with]]brackets"
+    end
+
+    @testset "CDATA errors" begin
+        @test_throws Exception parse("<a><![CDATA[no end", Node)
+        @test_throws Exception parse("<a><![CDATA[", Node)
+    end
+
+    #==========================================================================#
+    #                          XML Declaration                                 #
+    #==========================================================================#
+    @testset "XML declaration" begin
+        doc = parse("<?xml version='1.0'?><doc/>", Node)
+        decls = filter(x -> nodetype(x) == Declaration, children(doc))
+        @test length(decls) == 1
+        @test decls[1]["version"] == "1.0"
+    end
+
+    @testset "XML declaration with encoding" begin
+        doc = parse("<?xml version='1.0' encoding='UTF-8'?><doc/>", Node)
+        decls = filter(x -> nodetype(x) == Declaration, children(doc))
+        @test decls[1]["encoding"] == "UTF-8"
+    end
+
+    @testset "XML declaration with standalone" begin
+        doc = parse("<?xml version='1.0' standalone='yes'?><doc/>", Node)
+        decls = filter(x -> nodetype(x) == Declaration, children(doc))
+        @test decls[1]["standalone"] == "yes"
+    end
+
+    @testset "Full XML declaration" begin
+        doc = parse("<?xml version='1.0' encoding='UTF-8' standalone='no'?><doc/>", Node)
+        decls = filter(x -> nodetype(x) == Declaration, children(doc))
+        @test decls[1]["version"] == "1.0"
+        @test decls[1]["encoding"] == "UTF-8"
+        @test decls[1]["standalone"] == "no"
+    end
+
+    #==========================================================================#
+    #                        Processing Instructions                           #
+    #==========================================================================#
+    @testset "Processing instructions" begin
+        doc = parse("<?mypi data?><doc/>", Node)
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc))
+        @test length(pis) == 1
+
+        doc = parse("<doc><?inner-pi some data?></doc>", Node)
+        root = children(doc)[1]
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(root))
+        @test length(pis) == 1
+    end
+
+    @testset "PI with no data" begin
+        doc = parse("<?mypi?><doc/>", Node)
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc))
+        @test length(pis) == 1
+    end
+
+    #==========================================================================#
+    #                           Comments                                       #
+    #==========================================================================#
+    @testset "Comments in various positions" begin
+        # In prolog
+        doc = parse("<!-- prolog comment --><doc/>", Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(comments) == 1
+
+        # Inside element
+        doc = parse("<doc><!-- inner --></doc>", Node)
+        root = children(doc)[1]
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test length(comments) == 1
+
+        # After root element
+        doc = parse("<doc/><!-- epilog -->", Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(comments) == 1
+    end
+
+    @testset "Comment with special content" begin
+        doc = parse("<doc><!-- <not-an-element> &not-entity; --></doc>", Node)
+        root = children(doc)[1]
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test contains(value(comments[1]), "<not-an-element>")
+        @test contains(value(comments[1]), "&not-entity;")
+    end
+
+    #==========================================================================#
+    #                          DTD / DOCTYPE                                    #
+    #==========================================================================#
+    @testset "DOCTYPE with internal subset" begin
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc attr CDATA #IMPLIED>
+]>
+<doc attr="value">text</doc>"""
+        doc = parse(xml, Node)
+        @test nodetype(doc) == Document
+        dtd_nodes = filter(x -> nodetype(x) == DTD, children(doc))
+        @test length(dtd_nodes) == 1
+        root = filter(x -> nodetype(x) == Element, children(doc))[1]
+        @test tag(root) == "doc"
+        @test root["attr"] == "value"
+        text_nodes = filter(x -> nodetype(x) == Text, children(root))
+        @test length(text_nodes) == 1
+        @test value(text_nodes[1]) == "text"
+    end
+
+    @testset "DOCTYPE with SYSTEM" begin
+        doc = parse("<!DOCTYPE doc SYSTEM 'test.dtd'><doc/>", Node)
+        dtd_nodes = filter(x -> nodetype(x) == DTD, children(doc))
+        @test length(dtd_nodes) == 1
+    end
+
+    @testset "DOCTYPE with PUBLIC" begin
+        doc = parse("""<!DOCTYPE doc PUBLIC "-//Test//DTD Test//EN" "test.dtd"><doc/>""", Node)
+        dtd_nodes = filter(x -> nodetype(x) == DTD, children(doc))
+        @test length(dtd_nodes) == 1
+    end
+
+    #==========================================================================#
+    #                         Entity Handling                                  #
+    #==========================================================================#
+    @testset "Predefined entities" begin
+        doc = parse("<doc>&lt;&gt;&amp;&apos;&quot;</doc>", Node)
+        @test simple_value(children(doc)[1]) == "<>&'\""
+    end
+
+    @testset "Entities in attribute values" begin
+        doc = parse("<doc attr='&lt;value&gt;'/>", Node)
+        @test children(doc)[1]["attr"] == "<value>"
+    end
+
+    @testset "Mixed entities and text" begin
+        doc = parse("<doc>Hello &amp; welcome &lt;user&gt;</doc>", Node)
+        @test simple_value(children(doc)[1]) == "Hello & welcome <user>"
+    end
+
+    #==========================================================================#
+    #                        Attribute Edge Cases                              #
+    #==========================================================================#
+    @testset "Empty attribute value" begin
+        doc = parse("<doc attr=''/>", Node)
+        @test children(doc)[1]["attr"] == ""
+
+        doc = parse("""<doc attr=""/>""", Node)
+        @test children(doc)[1]["attr"] == ""
+    end
+
+    @testset "Attribute with entities" begin
+        doc = parse("<doc attr='a&amp;b'/>", Node)
+        @test children(doc)[1]["attr"] == "a&b"
+    end
+
+    @testset "Multiple attributes" begin
+        doc = parse("""<doc a="1" b="2" c="3" d="4" e="5"/>""", Node)
+        el = children(doc)[1]
+        @test el["a"] == "1"
+        @test el["b"] == "2"
+        @test el["c"] == "3"
+        @test el["d"] == "4"
+        @test el["e"] == "5"
+    end
+
+    @testset "Attribute error: duplicate" begin
+        @test_throws Exception parse("""<doc attr="1" attr="2"/>""", Node)
+    end
+
+    #==========================================================================#
+    #                        Nesting & Structure                               #
+    #==========================================================================#
+    @testset "Deeply nested elements" begin
+        xml = "<a><b><c><d><e><f><g><h><i><j>deep</j></i></h></g></f></e></d></c></b></a>"
+        doc = parse(xml, Node)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "Many sibling elements" begin
+        items = join(["<item>$i</item>" for i in 1:100])
+        xml = "<root>$items</root>"
+        doc = parse(xml, Node)
+        root = children(doc)[1]
+        els = filter(x -> nodetype(x) == Element, children(root))
+        @test length(els) == 100
+        @test simple_value(els[1]) == "1"
+        @test simple_value(els[100]) == "100"
+    end
+
+    @testset "Mismatched tags" begin
+        @test_throws Exception parse("<a></b>", Node)
+        @test_throws Exception parse("<a><b></a></b>", Node)
+        @test_throws Exception parse("<a><b><c></b></c></a>", Node)
+    end
+
+    @testset "Unclosed elements" begin
+        @test_throws Exception parse("<a><b>", Node)
+        @test_throws Exception parse("<a>text", Node)
+    end
+
+    #==========================================================================#
+    #                           Line Endings                                   #
+    #==========================================================================#
+    @testset "Various line endings in content" begin
+        # CR, LF, CRLF should all work
+        doc = parse("<doc>line1\nline2</doc>", Node)
+        @test nodetype(doc) == Document
+
+        doc = parse("<doc>line1\rline2</doc>", Node)
+        @test nodetype(doc) == Document
+
+        doc = parse("<doc>line1\r\nline2</doc>", Node)
+        @test nodetype(doc) == Document
+    end
+
+    #==========================================================================#
+    #                          Empty Document Parts                            #
+    #==========================================================================#
+    @testset "Empty root element" begin
+        doc = parse("<doc/>", Node)
+        root = children(doc)[1]
+        @test tag(root) == "doc"
+        @test isempty(filter(x -> nodetype(x) == Element, children(root)))
+    end
+
+    @testset "Element with only whitespace" begin
+        doc = parse("<doc>   \n\t  </doc>", Node)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "Element with only comments" begin
+        doc = parse("<doc><!-- c1 --><!-- c2 --></doc>", Node)
+        root = children(doc)[1]
+        els = filter(x -> nodetype(x) == Element, children(root))
+        @test isempty(els)
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test length(comments) == 2
+    end
+
+    #==========================================================================#
+    #                       Namespace-like Attributes                          #
+    #==========================================================================#
+    @testset "xmlns declarations" begin
+        doc = parse("""<doc xmlns="http://example.com" xmlns:ns="http://example.com/ns"><ns:child/></doc>""", Node)
+        root = children(doc)[1]
+        @test root["xmlns"] == "http://example.com"
+        @test root["xmlns:ns"] == "http://example.com/ns"
+        els = filter(x -> nodetype(x) == Element, children(root))
+        @test tag(els[1]) == "ns:child"
+    end
+
+    @testset "Namespaced attributes" begin
+        doc = parse("""<doc xml:lang="en" xml:space="preserve"/>""", Node)
+        root = children(doc)[1]
+        @test root["xml:lang"] == "en"
+        @test root["xml:space"] == "preserve"
+    end
+
+    #==========================================================================#
+    #                        Large Content                                     #
+    #==========================================================================#
+    @testset "Long attribute value" begin
+        long_val = repeat("x", 10_000)
+        doc = parse("<doc attr='$long_val'/>", Node)
+        @test children(doc)[1]["attr"] == long_val
+    end
+
+    @testset "Long text content" begin
+        long_text = repeat("Hello World! ", 1000)
+        doc = parse("<doc>$long_text</doc>", Node)
+        @test simple_value(children(doc)[1]) == long_text
+    end
+
+    @testset "Long CDATA" begin
+        long_cdata = repeat("data<>& ", 1000)
+        doc = parse("<doc><![CDATA[$long_cdata]]></doc>", Node)
+        root = children(doc)[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test value(cdata[1]) == long_cdata
+    end
+end
diff --git a/test/test_libxml2_testcases.jl b/test/test_libxml2_testcases.jl
new file mode 100644
index 0000000..0b8a89a
--- /dev/null
+++ b/test/test_libxml2_testcases.jl
@@ -0,0 +1,1578 @@
+# Test cases borrowed from the libxml2 test suite (https://github.com/GNOME/libxml2).
+#
+# libxml2 is Copyright (C) the GNOME Project and contributors, licensed under the MIT License.
+# These test cases are adapted for the XML.jl Julia package.
+#
+# Categories mirror the libxml2 test/ directory structure:
+#   - CDATA handling
+#   - Comments
+#   - Processing instructions
+#   - Attributes (normalization, entities, quoting)
+#   - Namespaces
+#   - DTD / internal subset
+#   - Entity references (character refs, predefined, internal general)
+#   - Whitespace / blank handling
+#   - Well-formedness (boundaries, big names, mixed content)
+#   - Error cases (must fail to parse)
+
+using XML
+using XML: Document, Element, Declaration, Comment, CData, DTD, ProcessingInstruction, Text
+using XML: escape, unescape
+using Test
+
+@testset "libxml2 test cases" begin
+
+#==============================================================================#
+#                            CDATA SECTIONS                                    #
+#   From: test/cdata, test/cdata2, test/adjacent-cdata.xml,                   #
+#         test/emptycdata.xml, test/cdata-*-byte-UTF-8.xml                    #
+#==============================================================================#
+@testset "CDATA" begin
+    @testset "cdata: basic CDATA with markup characters" begin
+        # libxml2 test/cdata
+        xml = """<doc>\n<![CDATA[<greeting>Hello, world!</greeting>]]>\n</doc>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        cdata_nodes = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata_nodes) >= 1
+        @test value(cdata_nodes[1]) == "<greeting>Hello, world!</greeting>"
+    end
+
+    @testset "cdata2: nested CDATA-like content" begin
+        # libxml2 test/cdata2 - tests ]]> escaping pattern
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<collection>
+  <test><![CDATA[
+    <![CDATA[abc]]]>]&gt;<![CDATA[
+  ]]></test>
+</collection>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "collection"
+    end
+
+    @testset "adjacent-cdata: three adjacent CDATA sections" begin
+        # libxml2 test/adjacent-cdata.xml
+        xml = "<doc><![CDATA[abc]]><![CDATA[def]]><![CDATA[ghi]]></doc>"
+        doc = parse(xml, Node)
+        root = doc[1]
+        cdata_nodes = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata_nodes) == 3
+        @test value(cdata_nodes[1]) == "abc"
+        @test value(cdata_nodes[2]) == "def"
+        @test value(cdata_nodes[3]) == "ghi"
+    end
+
+    @testset "emptycdata: empty CDATA section in namespaced doc" begin
+        # libxml2 test/emptycdata.xml
+        xml = """<?xml version="1.0"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+<![CDATA[]]>
+</html>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "html"
+        cdata_nodes = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata_nodes) >= 1
+        @test value(cdata_nodes[1]) == ""
+    end
+
+    @testset "cdata-2-byte-UTF-8: two-byte chars across buffer boundary" begin
+        # libxml2 test/cdata-2-byte-UTF-8.xml - tests Č (U+010C, 2 bytes in UTF-8)
+        long_c = repeat("Č", 400)
+        xml = """<?xml version="1.0" encoding="UTF-8"?>\n<doc>\n<p><![CDATA[$(long_c)]]></p>\n</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        p = first(filter(x -> nodetype(x) == Element, children(root)))
+        cdata = first(filter(x -> nodetype(x) == CData, children(p)))
+        @test value(cdata) == long_c
+    end
+
+    @testset "cdata-3-byte-UTF-8: three-byte chars across buffer boundary" begin
+        # libxml2 test/cdata-3-byte-UTF-8.xml - tests 牛 (U+725B, 3 bytes in UTF-8)
+        long_cow = repeat("牛", 400)
+        xml = """<?xml version="1.0" encoding="UTF-8"?>\n<doc>\n<p><![CDATA[$(long_cow)]]></p>\n</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        p = first(filter(x -> nodetype(x) == Element, children(root)))
+        cdata = first(filter(x -> nodetype(x) == CData, children(p)))
+        @test value(cdata) == long_cow
+    end
+
+    @testset "cdata-4-byte-UTF-8: four-byte chars across buffer boundary" begin
+        # libxml2 test/cdata-4-byte-UTF-8.xml - tests 🍦 (U+1F366, 4 bytes in UTF-8)
+        long_ice = repeat("🍦", 334)
+        xml = """<?xml version="1.0" encoding="UTF-8"?>\n<doc>\n<p><![CDATA[$(long_ice)]]></p>\n</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        p = first(filter(x -> nodetype(x) == Element, children(root)))
+        cdata = first(filter(x -> nodetype(x) == CData, children(p)))
+        @test value(cdata) == long_ice
+    end
+end
+
+#==============================================================================#
+#                              COMMENTS                                        #
+#   From: test/comment.xml through test/comment6.xml, test/badcomment.xml      #
+#==============================================================================#
+@testset "Comments" begin
+    @testset "comment: comments inside element" begin
+        # libxml2 test/comment.xml
+        xml = """<?xml version="1.0"?>
+<doc>
+<!-- document start -->
+<empty/>
+<!-- document end -->
+</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test length(comments) == 2
+        @test contains(value(comments[1]), "document start")
+        @test contains(value(comments[2]), "document end")
+    end
+
+    @testset "comment2: comments outside root element" begin
+        # libxml2 test/comment2.xml
+        xml = """<?xml version="1.0"?>
+<!-- document start -->
+<doc>
+<empty/>
+</doc>
+<!-- document end -->"""
+        doc = parse(xml, Node)
+        top_comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(top_comments) == 2
+        @test contains(value(top_comments[1]), "document start")
+        @test contains(value(top_comments[2]), "document end")
+    end
+
+    @testset "comment3: very long comment (buffer boundary test)" begin
+        # libxml2 test/comment3.xml - 150+ lines of repeated digits
+        lines = join([repeat("01234567890123456789012345678901234567890123456789", 1) for _ in 1:150], "\n")
+        comment_text = " test of very very long comments and buffer limits\n" * lines * "\n"
+        xml = """<?xml version="1.0"?>\n<!--$(comment_text)-->\n<doc/>"""
+        doc = parse(xml, Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(comments) >= 1
+        @test length(value(comments[1])) > 7000
+    end
+
+    @testset "comment5: hyphens and line breaks in comments" begin
+        # libxml2 test/comment5.xml
+        xml = """<?xml version="1.0"?>
+<!-- test of hyphen and line break handling
+     some text - interrupted -
+- - - - - - - - - - - - - - - - - - - - - -
+                      this should stop here^
+
+
+-->
+<doc/>"""
+        doc = parse(xml, Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(comments) == 1
+        @test contains(value(comments[1]), "hyphen")
+        @test contains(value(comments[1]), "- - -")
+    end
+
+    @testset "comment6: comment before DOCTYPE" begin
+        # libxml2 test/comment6.xml
+        xml = """<!--
+long comment long comment long comment long comment long comment long comment
+long comment long comment long comment long comment long comment long comment
+long comment long comment long comment long comment long comment long comment
+-->
+<!DOCTYPE a [
+<!ELEMENT a EMPTY>
+]>
+<a/>"""
+        doc = parse(xml, Node)
+        typed = filter(x -> nodetype(x) != Text, children(doc))
+        @test nodetype(typed[1]) == Comment
+        @test nodetype(typed[2]) == DTD
+        @test nodetype(typed[3]) == Element
+    end
+
+    @testset "badcomment: comment with markup-like content" begin
+        # libxml2 test/badcomment.xml - note: libxml2 considers this valid XML
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+
+<foo>
+<!-- def='NT-Char'-->
+</foo>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "foo"
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test length(comments) >= 1
+    end
+
+    @testset "comment4: non-ASCII characters in comments" begin
+        # libxml2 test/comment4.xml (adapted from ISO-8859-1 to UTF-8)
+        xml = """<?xml version="1.0"?>
+<!-- test of non ascii comments like là et très -->
+<!--à another one -->
+<!-- another one à-->
+<doc/>"""
+        doc = parse(xml, Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(comments) == 3
+        @test contains(value(comments[1]), "là")
+        @test contains(value(comments[2]), "à")
+    end
+end
+
+#==============================================================================#
+#                        PROCESSING INSTRUCTIONS                               #
+#   From: test/pi.xml, test/pi2.xml                                           #
+#==============================================================================#
+@testset "Processing Instructions" begin
+    @testset "pi: PIs inside root element" begin
+        # libxml2 test/pi.xml
+        xml = """<?xml version="1.0"?>
+<doc>
+<?document-start doc?>
+<empty/>
+<?document-end doc?>
+</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(root))
+        @test length(pis) == 2
+        @test tag(pis[1]) == "document-start"
+        @test value(pis[1]) == "doc"
+        @test tag(pis[2]) == "document-end"
+        @test value(pis[2]) == "doc"
+    end
+
+    @testset "pi2: PIs outside root element" begin
+        # libxml2 test/pi2.xml
+        xml = """<?xml version="1.0"?>
+<?document-start doc?>
+<doc>
+<empty/>
+</doc>
+<?document-end doc?>"""
+        doc = parse(xml, Node)
+        top_pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc))
+        @test length(top_pis) == 2
+        @test tag(top_pis[1]) == "document-start"
+        @test tag(top_pis[2]) == "document-end"
+    end
+end
+
+#==============================================================================#
+#                            ATTRIBUTES                                        #
+#   From: test/att1 through test/att11, test/attrib.xml,                       #
+#         test/def-xml-attr.xml, test/defattr.xml                              #
+#==============================================================================#
+@testset "Attributes" begin
+    @testset "att1: attribute with newlines (whitespace normalization)" begin
+        # libxml2 test/att1
+        xml = "<doc attr=\"to normalize\nwith a    space\"/>"
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "doc"
+        @test haskey(doc[1], "attr")
+    end
+
+    @testset "att2: attribute with multiple spaces" begin
+        # libxml2 test/att2
+        xml = """<doc attr="to normalize  with a space"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == "to normalize  with a space"
+    end
+
+    @testset "att3: attribute with character references" begin
+        # libxml2 test/att3
+        xml = """<select onclick="aaaa&#10;      bbbb&#160;">f&#160;oo</select>"""
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "select"
+        @test haskey(doc[1], "onclick")
+    end
+
+    @testset "att4: complex document with many attributes" begin
+        # Adapted from libxml2 test/att4 (electroxml document)
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<electroxml modified="20021216T072726">
+  <data from="20021031T22" to="20021130T22">
+    <select>
+      <device serialnumb="E00003562">
+        <par memind="113400" h="3dc1a8de">
+          <val o="0" v="53"/>
+          <val o="e08" v="53"/>
+        </par>
+      </device>
+    </select>
+  </data>
+</electroxml>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "electroxml"
+        @test root["modified"] == "20021216T072726"
+    end
+
+    @testset "attrib: attribute with entities and char refs" begin
+        # libxml2 test/attrib.xml
+        xml = """<item title="Warning: &apos;test&apos;&#160;&#160;" url="http://example.com/" first_time="985034339" visits="1"/>"""
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "item"
+        @test doc[1]["url"] == "http://example.com/"
+        @test doc[1]["visits"] == "1"
+    end
+
+    @testset "att5: attribute with empty value" begin
+        # Adapted from libxml2 test/att5
+        xml = """<?xml version="1.0"?>
+<doc a="" b="val"/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test root["a"] == ""
+        @test root["b"] == "val"
+    end
+
+    @testset "att9: attribute with single quotes in double-quoted value" begin
+        # libxml2 test/att9 pattern
+        xml = """<doc attr="it's a test"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == "it's a test"
+    end
+
+    @testset "att10: attribute with double quotes in single-quoted value" begin
+        xml = """<doc attr='he said "hello"'/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == "he said \"hello\""
+    end
+
+    @testset "att11: attribute values with entity refs" begin
+        xml = """<doc a="&lt;tag&gt;" b="a&amp;b"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["a"] == "<tag>"
+        @test doc[1]["b"] == "a&b"
+    end
+
+    @testset "def-xml-attr: xml:lang default attribute in DTD" begin
+        # libxml2 test/def-xml-attr.xml (just verify parsing doesn't fail)
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE root [
+  <!ATTLIST foo xml:lang CDATA "eng">
+  <!ATTLIST foo bar CDATA "&lt;&gt;&quot;">
+]>
+<root>
+  <foo/>
+</root>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "root"
+    end
+end
+
+#==============================================================================#
+#                            NAMESPACES                                        #
+#   From: test/ns through test/ns7, test/namespaces/err_*.xml,                #
+#         test/nsclean.xml, test/entity-in-ns-uri.xml                          #
+#==============================================================================#
+@testset "Namespaces" begin
+    @testset "ns: namespace with prefix on element and attribute" begin
+        # libxml2 test/ns
+        xml = """<?xml version="1.0"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
+  <dia:diagramdata dia:testattr="test"/>
+</dia:diagram>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "dia:diagram"
+        @test root["xmlns:dia"] == "http://www.lysator.liu.se/~alla/dia/"
+        child = first(filter(x -> nodetype(x) == Element, children(root)))
+        @test tag(child) == "dia:diagramdata"
+        @test child["dia:testattr"] == "test"
+    end
+
+    @testset "ns2: namespace on self-closing element" begin
+        # libxml2 test/ns2
+        xml = """<?xml version="1.0"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/"
+             dia:testattr="test"/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "dia:diagram"
+        @test root["dia:testattr"] == "test"
+    end
+
+    @testset "ns3: xmlns declared after prefixed attribute" begin
+        # libxml2 test/ns3
+        xml = """<?xml version="1.0"?>
+<dia:diagram dia:testattr="test"
+             xmlns:dia="http://www.lysator.liu.se/~alla/dia/"/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test root["dia:testattr"] == "test"
+        @test root["xmlns:dia"] == "http://www.lysator.liu.se/~alla/dia/"
+    end
+
+    @testset "ns4: xml:lang, xml:link, xml:space built-in attributes" begin
+        # libxml2 test/ns4
+        xml = """<?xml version="1.0"?>
+<diagram testattr="test" xml:lang="en" xml:link="simple" xml:space="preserve"/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test root["xml:lang"] == "en"
+        @test root["xml:space"] == "preserve"
+    end
+
+    @testset "ns5: default namespace on element with prefix on another" begin
+        # libxml2 test/ns5
+        xml = """<element name="foo" xmlns:rng="http://example.org/ns/1" xmlns="http://example.org/ns/1">
+  <empty/>
+</element>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        @test root["xmlns"] == "http://example.org/ns/1"
+        @test root["xmlns:rng"] == "http://example.org/ns/1"
+        @test root["name"] == "foo"
+    end
+
+    @testset "ns6: default namespace on child, not on sibling" begin
+        # libxml2 test/ns6
+        xml = """<root>
+  <foo xmlns="http://abc" />
+  <bar />
+</root>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test tag(elements[1]) == "foo"
+        @test elements[1]["xmlns"] == "http://abc"
+        @test tag(elements[2]) == "bar"
+    end
+
+    @testset "ns7: xml: prefix element (built-in)" begin
+        # libxml2 test/ns7
+        xml = "<xml:test/>"
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "xml:test"
+    end
+
+    @testset "multiple namespace prefixes" begin
+        xml = """<root xmlns:a="http://a.com" xmlns:b="http://b.com">
+  <a:child a:attr="1"/>
+  <b:child b:attr="2"/>
+</root>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test tag(elements[1]) == "a:child"
+        @test elements[1]["a:attr"] == "1"
+        @test tag(elements[2]) == "b:child"
+        @test elements[2]["b:attr"] == "2"
+    end
+
+    @testset "namespace redeclaration on nested element" begin
+        xml = """<root xmlns:a="http://first.com">
+  <child xmlns:a="http://second.com">
+    <a:leaf/>
+  </child>
+</root>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        child = first(filter(x -> nodetype(x) == Element, children(root)))
+        @test child["xmlns:a"] == "http://second.com"
+    end
+end
+
+#==============================================================================#
+#                    DTD / INTERNAL SUBSET                                     #
+#   From: test/dtd1 through test/dtd13, test/intsubset.xml,                   #
+#         test/intsubset2.xml                                                  #
+#==============================================================================#
+@testset "DTD / Internal Subset" begin
+    @testset "dtd1: DOCTYPE with PUBLIC id" begin
+        # libxml2 test/dtd1
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN"
+                      "http://www.sgmlsource.com/dtds/memo.dtd">
+<MEMO>
+</MEMO>"""
+        doc = parse(xml, Node)
+        dtd = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        @test contains(value(dtd), "MEMO")
+        @test contains(value(dtd), "PUBLIC")
+    end
+
+    @testset "dtd2: simple internal subset with ELEMENT declaration" begin
+        # libxml2 test/dtd2
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>This is a valid document !</doc>"""
+        doc = parse(xml, Node)
+        dtd = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        @test contains(value(dtd), "ELEMENT")
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test simple_value(root) == "This is a valid document !"
+    end
+
+    @testset "dtd3: ANY content model" begin
+        # libxml2 test/dtd3
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+]>
+<doc>This is a valid document !</doc>"""
+        doc = parse(xml, Node)
+        dtd = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        @test contains(value(dtd), "ANY")
+    end
+
+    @testset "dtd4: EMPTY content model" begin
+        # libxml2 test/dtd4
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc EMPTY>]>
+<doc/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "doc"
+        @test length(children(root)) == 0
+    end
+
+    @testset "dtd5: mixed content model" begin
+        # libxml2 test/dtd5
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA | a | b)*>
+<!ELEMENT a (#PCDATA)>
+<!ELEMENT b (#PCDATA)>
+]>
+<doc><a>This</a> is a <b>valid</b> document</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "doc"
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test length(elements) == 2
+        @test tag(elements[1]) == "a"
+        @test tag(elements[2]) == "b"
+    end
+
+    @testset "dtd6: choice content model" begin
+        # libxml2 test/dtd6
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (a | b)*>
+<!ELEMENT a (#PCDATA)>
+<!ELEMENT b (#PCDATA)>
+]>
+<doc><a>This</a><b> is a valid</b><a> document</a></doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test length(elements) == 3
+    end
+
+    @testset "dtd7: sequence content model" begin
+        # libxml2 test/dtd7
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (a , b)*>
+<!ELEMENT a (#PCDATA)>
+<!ELEMENT b (#PCDATA)>
+]>
+<doc><a>This</a><b> is a valid document</b></doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test length(elements) == 2
+        @test tag(elements[1]) == "a"
+        @test tag(elements[2]) == "b"
+    end
+
+    @testset "dtd8: nested choice and sequence" begin
+        # libxml2 test/dtd8
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc ((a | b) , (c | d))+>
+<!ELEMENT a (#PCDATA)>
+<!ELEMENT b (#PCDATA)>
+<!ELEMENT c (#PCDATA)>
+<!ELEMENT d (#PCDATA)>
+]>
+<doc><b>This</b><c> is a valid document</c></doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test tag(elements[1]) == "b"
+        @test tag(elements[2]) == "c"
+    end
+
+    @testset "dtd9: optional content model" begin
+        # libxml2 test/dtd9
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc ((a | b | c) , d)?>
+<!ELEMENT a (#PCDATA)>
+<!ELEMENT b (#PCDATA)>
+<!ELEMENT c (#PCDATA)>
+<!ELEMENT d (#PCDATA)>
+]>
+<doc><b>This</b><d> is a valid document</d></doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test length(elements) == 2
+    end
+
+    @testset "dtd10: mixed repetition content model" begin
+        # libxml2 test/dtd10
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc ((a | b)+ , c ,  d)*>
+<!ELEMENT a (#PCDATA)>
+<!ELEMENT b (#PCDATA)>
+<!ELEMENT c (#PCDATA)>
+<!ELEMENT d (#PCDATA)>
+]>
+<doc><b>This</b><c> is a</c><d> valid document</d></doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        elements = filter(x -> nodetype(x) == Element, children(root))
+        @test length(elements) == 3
+    end
+
+    @testset "dtd11: ATTLIST with CDATA #IMPLIED" begin
+        # libxml2 test/dtd11
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+<!ATTLIST doc val CDATA #IMPLIED>
+]>
+<doc val="v1"/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test root["val"] == "v1"
+    end
+
+    @testset "dtd12: nested entity references" begin
+        # libxml2 test/dtd12 - entity referencing another entity
+        xml = """<!DOCTYPE doc [
+<!ENTITY YN '"Yes"' >
+<!ENTITY WhatHeSaid "He said &YN;" >
+]>
+<doc>&WhatHeSaid;</doc>"""
+        # This may or may not expand depending on XML.jl's entity handling
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "doc"
+    end
+
+    @testset "dtd13: comments before and after DOCTYPE" begin
+        # libxml2 test/dtd13
+        xml = """<!-- comment before the DTD -->
+<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+]>
+<!-- comment after the DTD -->
+<doc/>"""
+        doc = parse(xml, Node)
+        typed = filter(x -> nodetype(x) != Text, children(doc))
+        @test nodetype(typed[1]) == Comment
+        @test nodetype(typed[2]) == DTD
+        @test nodetype(typed[3]) == Comment
+        @test nodetype(typed[4]) == Element
+    end
+
+    @testset "intsubset: internal subset with comment containing quote" begin
+        # libxml2 test/intsubset.xml
+        xml = """<?xml version="1.0" standalone="yes"?>
+<!DOCTYPE root [
+<!ELEMENT root  EMPTY>
+<!--  " -->
+]>
+<root/>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "root"
+        dtd = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        @test contains(value(dtd), "ELEMENT")
+    end
+end
+
+#==============================================================================#
+#                        ENTITY REFERENCES                                     #
+#   From: test/ent1 through test/ent11, test/ent6hex                           #
+#==============================================================================#
+@testset "Entity References" begin
+    @testset "ent1: internal general entity declaration and use" begin
+        # libxml2 test/ent1
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE EXAMPLE SYSTEM "example.dtd" [
+<!ENTITY xml "Extensible Markup Language">
+]>
+<EXAMPLE>
+    &xml;
+</EXAMPLE>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "EXAMPLE"
+    end
+
+    @testset "ent3: entity refs in attribute values" begin
+        # libxml2 test/ent3
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE EXAMPLE SYSTEM "example.dtd" [
+<!ENTITY xml "Extensible Markup Language">
+]>
+<EXAMPLE prop1="a&amp;b" prop2="c&lt;d">
+</EXAMPLE>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test root["prop1"] == "a&b"
+        @test root["prop2"] == "c<d"
+    end
+
+    @testset "ent5: numeric character references (decimal and hex)" begin
+        # libxml2 test/ent5
+        xml = """<?xml version="1.0"?>
+<EXAMPLE>
+    This is an inverted exclamation sign &#xA1;
+    This is a space &#32;
+</EXAMPLE>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        text = join([value(c) for c in children(root) if nodetype(c) == Text])
+        @test contains(text, "\u00A1")  # ¡
+        @test contains(text, " ")       # space (&#32;)
+    end
+
+    @testset "ent6: predefined entities with double-escaping" begin
+        # libxml2 test/ent6
+        xml = """<!DOCTYPE doc [
+<!ENTITY lt     "&#38;#60;">
+<!ENTITY gt     "&#62;">
+<!ENTITY amp    "&#38;#38;">
+<!ENTITY apos   "&#39;">
+<!ENTITY quot   "&#34;">
+]>
+<doc a="&lt;">&lt;</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "doc"
+    end
+
+    @testset "ent8: multiple entities in one document" begin
+        # libxml2 test/ent8
+        xml = """<!DOCTYPE doc [
+<!ENTITY test1 "test 1">
+<!ENTITY test2 "test 2">
+]>
+<doc>
+&test1;&test2;
+</doc>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "doc"
+    end
+
+    @testset "predefined entities in text content" begin
+        xml = "<doc>&amp; &lt; &gt; &apos; &quot;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "& < > ' \""
+    end
+
+    @testset "predefined entities in attributes" begin
+        xml = """<doc a="&amp;" b="&lt;" c="&gt;" d="&apos;" e="&quot;"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["a"] == "&"
+        @test doc[1]["b"] == "<"
+        @test doc[1]["c"] == ">"
+        @test doc[1]["d"] == "'"
+        @test doc[1]["e"] == "\""
+    end
+
+    @testset "decimal character references" begin
+        xml = "<doc>&#65;&#66;&#67;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "ABC"
+    end
+
+    @testset "hexadecimal character references" begin
+        xml = "<doc>&#x41;&#x42;&#x43;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "ABC"
+    end
+
+    @testset "mixed hex and decimal char refs" begin
+        xml = "<doc>&#x48;&#101;&#x6C;&#108;&#x6F;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "Hello"
+    end
+
+    @testset "char ref for non-ASCII: inverted exclamation" begin
+        xml = "<doc>&#xA1;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "\u00A1"
+    end
+
+    @testset "char ref for CJK character" begin
+        xml = "<doc>&#x4E2D;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "中"
+    end
+
+    @testset "char ref for emoji" begin
+        xml = "<doc>&#x1F600;</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "\U0001F600"
+    end
+end
+
+#==============================================================================#
+#                     WHITESPACE / BLANK HANDLING                              #
+#   From: test/tstblanks.xml, test/title.xml                                  #
+#==============================================================================#
+@testset "Whitespace / Blank Handling" begin
+    @testset "title: simple document with encoding" begin
+        # libxml2 test/title.xml
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<title>my title</title>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "title"
+        @test simple_value(root) == "my title"
+    end
+
+    @testset "whitespace preservation in text content" begin
+        xml = "<root>  hello  world  </root>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "  hello  world  "
+    end
+
+    @testset "tab and newline preservation" begin
+        xml = "<root>\t\n\ttabbed\n</root>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "\t\n\ttabbed\n"
+    end
+
+    @testset "whitespace-only text node" begin
+        xml = "<root>   </root>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "   "
+    end
+
+    @testset "inter-element whitespace preserved" begin
+        xml = "<root>\n  <a/>\n  <b/>\n</root>"
+        doc = parse(xml, Node)
+        root = doc[1]
+        text_nodes = filter(x -> nodetype(x) == Text, children(root))
+        @test length(text_nodes) >= 1
+    end
+end
+
+#==============================================================================#
+#                    WELL-FORMED DOCUMENTS                                     #
+#   From: test/boundaries1.xml, test/bigname.xml, test/bigname2.xml,          #
+#         test/slashdot.xml, test/eve.xml, test/wap.xml, etc.                 #
+#==============================================================================#
+@testset "Well-Formed Documents" begin
+    @testset "boundaries1: boundary conditions with entities and CDATA" begin
+        # libxml2 test/boundaries1.xml (simplified - without DTD entity expansion)
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE d [
+    <!ENTITY a "]>">
+    <!ENTITY b ']>'>
+]>
+<?pi p1?>
+<d a=">" b='>'>
+text
+<![CDATA[cdata]]>
+<?pi p2?>
+</d>
+<?pi p3?>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "d"
+        @test root["a"] == ">"
+        @test root["b"] == ">"
+        cdata_nodes = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata_nodes) == 1
+        @test value(cdata_nodes[1]) == "cdata"
+    end
+
+    @testset "bigname: very long element name" begin
+        # libxml2 test/bigname.xml - element name with >10000 characters
+        longname = "this_is_a_very_large_name_" * repeat("0123456789", 500) * "_end"
+        xml = "<$(longname)/>"
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == longname
+    end
+
+    @testset "slashdot: real-world XML (ultramode feed)" begin
+        # libxml2 test/slashdot.xml (simplified)
+        xml = """<ultramode>
+ <story>
+    <title>100 Mbit/s on Fibre to the home</title>
+    <url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
+    <time>1999-06-06 14:39:59</time>
+    <author>CmdrTaco</author>
+    <department>wouldn't-it-be-nice</department>
+    <topic>internet</topic>
+    <comments>20</comments>
+    <section>articles</section>
+    <image>topicinternet.jpg</image>
+  </story>
+ <story>
+    <title>Gimp 1.2 Preview</title>
+    <url>http://slashdot.org/articles/99/06/06/1438246.shtml</url>
+    <time>1999-06-06 14:38:40</time>
+    <author>CmdrTaco</author>
+    <department>stuff-to-read</department>
+    <topic>gimp</topic>
+    <comments>12</comments>
+    <section>articles</section>
+    <image>topicgimp.gif</image>
+  </story>
+</ultramode>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        @test tag(root) == "ultramode"
+        stories = filter(x -> nodetype(x) == Element && tag(x) == "story", children(root))
+        @test length(stories) == 2
+        title1 = first(filter(x -> nodetype(x) == Element && tag(x) == "title",
+                              children(stories[1])))
+        @test simple_value(title1) == "100 Mbit/s on Fibre to the home"
+    end
+
+    @testset "eve: document with external DTD reference and internal entity" begin
+        # libxml2 test/eve.xml
+        xml = """<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
+<!ENTITY iso6.doc.date '29-May-1999'>
+]>
+<spec>
+</spec>"""
+        doc = parse(xml, Node)
+        dtd = first(filter(x -> nodetype(x) == DTD, children(doc)))
+        @test contains(value(dtd), "PUBLIC")
+        @test contains(value(dtd), "ENTITY")
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "spec"
+    end
+
+    @testset "deeply nested document" begin
+        xml = "<a><b><c><d><e><f><g><h><i><j>deep</j></i></h></g></f></e></d></c></b></a>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1][1][1][1][1][1][1][1][1][1]) == "deep"
+    end
+
+    @testset "many sibling elements" begin
+        items = join(["<item n=\"$i\">Item $i</item>" for i in 1:200])
+        xml = "<root>$items</root>"
+        doc = parse(xml, Node)
+        elements = filter(x -> nodetype(x) == Element, children(doc[1]))
+        @test length(elements) == 200
+        @test elements[1]["n"] == "1"
+        @test elements[200]["n"] == "200"
+    end
+
+    @testset "mixed content: text, elements, CDATA, comments, PIs" begin
+        xml = """<doc>
+  text before
+  <child attr="v">child text</child>
+  <!-- a comment -->
+  <![CDATA[cdata content]]>
+  <?pi data?>
+  text after
+</doc>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        types = Set(nodetype(c) for c in children(root))
+        @test Text in types
+        @test Element in types
+        @test Comment in types
+        @test CData in types
+        @test ProcessingInstruction in types
+    end
+
+    @testset "self-closing elements" begin
+        xml = "<root><br/><hr /><img  /></root>"
+        doc = parse(xml, Node)
+        elements = filter(x -> nodetype(x) == Element, children(doc[1]))
+        @test length(elements) == 3
+        @test tag(elements[1]) == "br"
+        @test tag(elements[2]) == "hr"
+        @test tag(elements[3]) == "img"
+        @test all(x -> length(children(x)) == 0, elements)
+    end
+
+    @testset "empty element: start-tag and end-tag" begin
+        xml = "<root><empty></empty></root>"
+        doc = parse(xml, Node)
+        el = first(filter(x -> nodetype(x) == Element, children(doc[1])))
+        @test tag(el) == "empty"
+    end
+
+    @testset "element names with hyphens, dots, underscores" begin
+        xml = "<my-root><sub.element/><_private/></my-root>"
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "my-root"
+        elements = filter(x -> nodetype(x) == Element, children(doc[1]))
+        @test tag(elements[1]) == "sub.element"
+        @test tag(elements[2]) == "_private"
+    end
+
+    @testset "element names starting with underscore" begin
+        xml = "<_root><__child/></_root>"
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "_root"
+    end
+
+    @testset "numeric element names (with letter prefix)" begin
+        xml = "<h1>heading</h1>"
+        doc = parse(xml, Node)
+        @test tag(doc[1]) == "h1"
+        @test simple_value(doc[1]) == "heading"
+    end
+end
+
+#==============================================================================#
+#                    ROUNDTRIP: PARSE → WRITE → PARSE                          #
+#   Tests that libxml2-style documents survive roundtrip processing            #
+#==============================================================================#
+@testset "Roundtrip" begin
+    @testset "roundtrip: namespaced document" begin
+        xml = """<?xml version="1.0"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
+  <dia:diagramdata dia:testattr="test"/>
+</dia:diagram>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        @test root["xmlns:dia"] == "http://www.lysator.liu.se/~alla/dia/"
+    end
+
+    @testset "roundtrip: DTD with internal subset" begin
+        xml = """<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>text</doc>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        dtd = first(filter(x -> nodetype(x) == DTD, children(doc2)))
+        @test contains(value(dtd), "ELEMENT")
+    end
+
+    @testset "roundtrip: adjacent CDATA sections" begin
+        xml = "<doc><![CDATA[abc]]><![CDATA[def]]></doc>"
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        cdata_nodes = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata_nodes) == 2
+    end
+
+    @testset "roundtrip: processing instructions" begin
+        xml = """<?xml version="1.0"?>
+<?document-start doc?>
+<doc/>
+<?document-end doc?>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc2))
+        @test length(pis) == 2
+    end
+
+    @testset "roundtrip: comments with special characters" begin
+        xml = "<root><!-- special: <>&'\" --></root>"
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test length(comments) == 1
+    end
+
+    @testset "roundtrip: entities in attributes" begin
+        xml = """<doc a="a&amp;b" b="c&lt;d"/>"""
+        doc = parse(xml, Node)
+        s = XML.write(doc)
+        doc2 = parse(s, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc2)))
+        @test root["a"] == "a&b"
+        @test root["b"] == "c<d"
+    end
+end
+
+#==============================================================================#
+#                    ERROR CASES (must fail to parse)                           #
+#   From: test/errors/*, test/namespaces/err_*.xml                             #
+#==============================================================================#
+@testset "Error Cases" begin
+    @testset "errors/empty: empty document" begin
+        # libxml2 test/errors/empty.xml
+        # XML.jl is lenient: returns an empty Document for empty input
+        doc = parse("", Node)
+        @test nodetype(doc) == Document
+        @test length(children(doc)) == 0
+    end
+
+    @testset "errors/extra-content: content after root element" begin
+        # libxml2 test/errors/extra-content.xml
+        # XML.jl is lenient: treats trailing text as a Text node in the Document
+        doc = parse("<d/>x", Node)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "errors/invalid-start-tag-1: text-only document" begin
+        # libxml2 test/errors/invalid-start-tag-1.xml
+        # XML.jl is lenient: treats bare text as a Text node
+        doc = parse("x", Node)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "errors/invalid-start-tag-2: lone <" begin
+        # libxml2 test/errors/invalid-start-tag-2.xml
+        @test_throws Exception parse("<", Node)
+    end
+
+    @testset "errors/doctype1: malformed DOCTYPE" begin
+        # libxml2 test/errors/doctype1.xml - "<!DOCTYPE doc>[]>"
+        # XML.jl is lenient: parses the DOCTYPE and treats []> as text
+        doc = parse("<!DOCTYPE doc>[]>\n<doc/>", Node)
+        @test nodetype(doc) == Document
+    end
+
+    @testset "errors/dup-xml-attr: duplicate xml: attribute" begin
+        # libxml2 test/errors/dup-xml-attr.xml
+        @test_throws Exception parse("""<doc xml:lang="en" xml:lang="de"/>""", Node)
+    end
+
+    @testset "errors/attr5: duplicate attribute" begin
+        # libxml2 test/errors/attr5.xml
+        @test_throws Exception parse("""<d xmlns="urn:foo">
+    <a b="" b=""/>
+</d>""", Node)
+    end
+
+    @testset "mismatched tags" begin
+        @test_throws Exception parse("<a></b>", Node)
+    end
+
+    @testset "overlapping elements" begin
+        @test_throws Exception parse("<a><b></a></b>", Node)
+    end
+
+    @testset "unclosed root element" begin
+        @test_throws Exception parse("<root>", Node)
+    end
+
+    @testset "close tag without open" begin
+        @test_throws Exception parse("</a>", Node)
+    end
+
+    @testset "unclosed comment" begin
+        @test_throws Exception parse("<!-- no end", Node)
+    end
+
+    @testset "unclosed CDATA" begin
+        @test_throws Exception parse("<![CDATA[no end", Node)
+    end
+
+    @testset "unclosed PI" begin
+        @test_throws Exception parse("<?pi no end", Node)
+    end
+
+    @testset "unterminated attribute (double quote)" begin
+        @test_throws Exception parse("""<a x="no end""", Node)
+    end
+
+    @testset "unterminated attribute (single quote)" begin
+        @test_throws Exception parse("<a x='no end", Node)
+    end
+
+    @testset "duplicate attribute" begin
+        @test_throws Exception parse("""<a x="1" x="2"/>""", Node)
+    end
+
+    @testset "attribute without value" begin
+        @test_throws Exception parse("<a disabled/>", Node)
+    end
+
+    @testset "attribute with unquoted value" begin
+        @test_throws Exception parse("<a x=hello/>", Node)
+    end
+
+    @testset "tag with space before name" begin
+        @test_throws Exception parse("< root/>", Node)
+    end
+
+    @testset "lone < in text content" begin
+        @test_throws Exception parse("<root>a < b</root>", Node)
+    end
+
+    @testset "close tag after self-closing" begin
+        @test_throws Exception parse("<a/></a>", Node)
+    end
+
+    @testset "deeply mismatched nesting" begin
+        @test_throws Exception parse("<a><b><c></b></c></a>", Node)
+    end
+
+    @testset "multiple unclosed tags" begin
+        @test_throws Exception parse("<a><b><c>", Node)
+    end
+end
+
+#==============================================================================#
+#                    UNICODE SUPPORT                                            #
+#   Tests borrowed from libxml2's UTF-8 handling tests                         #
+#==============================================================================#
+@testset "Unicode" begin
+    @testset "Latin-1 characters" begin
+        xml = "<doc>café résumé naïve</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "café résumé naïve"
+    end
+
+    @testset "CJK characters" begin
+        xml = "<doc>中文日本語한국어</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "中文日本語한국어"
+    end
+
+    @testset "Cyrillic characters" begin
+        xml = "<doc>Привет мир</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "Привет мир"
+    end
+
+    @testset "Arabic characters" begin
+        xml = "<doc>مرحبا</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "مرحبا"
+    end
+
+    @testset "Emoji (4-byte UTF-8)" begin
+        xml = "<doc>🍦🎉🚀</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == "🍦🎉🚀"
+    end
+
+    @testset "Unicode in attribute values" begin
+        xml = """<doc name="über" city="東京"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["name"] == "über"
+        @test doc[1]["city"] == "東京"
+    end
+
+    @testset "Unicode in comments" begin
+        xml = "<doc><!-- héllo wörld --></doc>"
+        doc = parse(xml, Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc[1]))
+        @test contains(value(comments[1]), "héllo")
+    end
+
+    @testset "Unicode in CDATA" begin
+        xml = "<doc><![CDATA[日本語テスト]]></doc>"
+        doc = parse(xml, Node)
+        cdata = first(filter(x -> nodetype(x) == CData, children(doc[1])))
+        @test value(cdata) == "日本語テスト"
+    end
+
+    @testset "Unicode in PI content" begin
+        xml = "<doc><?mypi données à traiter?></doc>"
+        doc = parse(xml, Node)
+        pi = first(filter(x -> nodetype(x) == ProcessingInstruction, children(doc[1])))
+        @test contains(value(pi), "données")
+    end
+
+    @testset "UTF-8 BOM handling" begin
+        # libxml2 test/utf8bom.xml pattern
+        xml = "\xef\xbb\xbf<?xml version=\"1.0\"?>\n<doc/>"
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "doc"
+    end
+end
+
+#==============================================================================#
+#                REAL-WORLD DOCUMENT PATTERNS                                  #
+#   Patterns commonly tested by libxml2 (DAV, RDF, SOAP, SVG, etc.)           #
+#==============================================================================#
+@testset "Real-World Document Patterns" begin
+    @testset "WebDAV-like document" begin
+        # Inspired by libxml2 test/dav* series
+        xml = """<?xml version="1.0" encoding="utf-8" ?>
+<D:multistatus xmlns:D="DAV:">
+  <D:response>
+    <D:href>/container/</D:href>
+    <D:propstat>
+      <D:prop>
+        <D:displayname>Example collection</D:displayname>
+        <D:resourcetype><D:collection/></D:resourcetype>
+      </D:prop>
+      <D:status>HTTP/1.1 200 OK</D:status>
+    </D:propstat>
+  </D:response>
+</D:multistatus>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "D:multistatus"
+        @test root["xmlns:D"] == "DAV:"
+    end
+
+    @testset "RDF-like document" begin
+        # Inspired by libxml2 test/rdf1, test/rdf2
+        xml = """<?xml version="1.0"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:dc="http://purl.org/dc/elements/1.1/">
+  <rdf:Description rdf:about="http://example.org/resource">
+    <dc:title>Example Resource</dc:title>
+    <dc:creator>John Doe</dc:creator>
+  </rdf:Description>
+</rdf:RDF>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "rdf:RDF"
+        desc = first(filter(x -> nodetype(x) == Element, children(root)))
+        @test desc["rdf:about"] == "http://example.org/resource"
+    end
+
+    @testset "SVG-like document" begin
+        # Inspired by libxml2 test/svg1, test/svg2, test/svg3
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
+     width="200" height="200" viewBox="0 0 200 200">
+  <defs>
+    <linearGradient id="grad1" x1="0%" y1="0%" x2="100%" y2="0%">
+      <stop offset="0%" style="stop-color:rgb(255,255,0);stop-opacity:1"/>
+      <stop offset="100%" style="stop-color:rgb(255,0,0);stop-opacity:1"/>
+    </linearGradient>
+  </defs>
+  <rect x="10" y="10" width="180" height="180" fill="url(#grad1)"/>
+  <circle cx="100" cy="100" r="50" fill="blue" opacity="0.5"/>
+  <text x="100" y="100" text-anchor="middle">Hello SVG</text>
+</svg>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "svg"
+        @test root["xmlns"] == "http://www.w3.org/2000/svg"
+        @test root["width"] == "200"
+    end
+
+    @testset "SOAP-like envelope" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"
+               xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+               xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+  <soap:Body>
+    <GetWeather xmlns="http://www.example.com/weather">
+      <City>New York</City>
+      <Country>US</Country>
+    </GetWeather>
+  </soap:Body>
+</soap:Envelope>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "soap:Envelope"
+    end
+
+    @testset "Atom feed" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+  <title>Example Feed</title>
+  <link href="http://example.org/"/>
+  <updated>2003-12-13T18:30:02Z</updated>
+  <author>
+    <name>John Doe</name>
+  </author>
+  <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+  <entry>
+    <title>Atom-Powered Robots Run Amok</title>
+    <link href="http://example.org/2003/12/13/atom03"/>
+    <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+    <updated>2003-12-13T18:30:02Z</updated>
+    <summary>Some text.</summary>
+  </entry>
+</feed>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "feed"
+        @test root["xmlns"] == "http://www.w3.org/2005/Atom"
+    end
+
+    @testset "plist-like document" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+  <dict>
+    <key>Name</key>
+    <string>Example</string>
+    <key>Version</key>
+    <integer>42</integer>
+    <key>Enabled</key>
+    <true/>
+    <key>Tags</key>
+    <array>
+      <string>alpha</string>
+      <string>beta</string>
+    </array>
+  </dict>
+</plist>"""
+        doc = parse(xml, Node)
+        plist = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(plist) == "plist"
+        @test plist["version"] == "1.0"
+    end
+
+    @testset "XHTML with mixed content" begin
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head><title>Test</title></head>
+  <body>
+    <p>This is <em>emphasized</em> and <strong>strong</strong> text.</p>
+    <p>A link: <a href="http://example.com">click here</a>.</p>
+    <hr/>
+    <pre>  preformatted  text  </pre>
+  </body>
+</html>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "html"
+        @test root["xmlns"] == "http://www.w3.org/1999/xhtml"
+    end
+
+    @testset "MathML-like document" begin
+        xml = """<math xmlns="http://www.w3.org/1998/Math/MathML">
+  <mrow>
+    <msup><mi>x</mi><mn>2</mn></msup>
+    <mo>+</mo>
+    <msup><mi>y</mi><mn>2</mn></msup>
+    <mo>=</mo>
+    <msup><mi>z</mi><mn>2</mn></msup>
+  </mrow>
+</math>"""
+        doc = parse(xml, Node)
+        root = doc[1]
+        @test tag(root) == "math"
+        @test root["xmlns"] == "http://www.w3.org/1998/Math/MathML"
+    end
+
+    @testset "WML-like document (mobile)" begin
+        # Inspired by libxml2 test/wml.xml
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE wml PUBLIC "-//WAPFORUM//DTD WML 1.1//EN" "http://www.wapforum.org/DTD/wml_1.1.xml">
+<wml>
+  <card id="main" title="Main Menu">
+    <p>Welcome to WML</p>
+  </card>
+</wml>"""
+        doc = parse(xml, Node)
+        root = first(filter(x -> nodetype(x) == Element, children(doc)))
+        @test tag(root) == "wml"
+    end
+end
+
+#==============================================================================#
+#                    EDGE CASES                                                #
+#   Additional edge cases inspired by libxml2 test patterns                    #
+#==============================================================================#
+@testset "Edge Cases" begin
+    @testset "CDATA containing ]] not followed by >" begin
+        xml = "<root><![CDATA[a]]b]]></root>"
+        doc = parse(xml, Node)
+        cdata = first(filter(x -> nodetype(x) == CData, children(doc[1])))
+        @test value(cdata) == "a]]b"
+    end
+
+    @testset "comment containing --" begin
+        # Note: -- inside comments is technically not well-formed per spec,
+        # but many parsers tolerate single - characters
+        xml = "<root><!-- one-dash and hyphen-ated --></root>"
+        doc = parse(xml, Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc[1]))
+        @test length(comments) == 1
+    end
+
+    @testset "attribute value containing >" begin
+        xml = """<doc attr="a>b"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == "a>b"
+    end
+
+    @testset "attribute value containing single quote in double quotes" begin
+        xml = """<doc attr="it's"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == "it's"
+    end
+
+    @testset "attribute value containing double quote in single quotes" begin
+        xml = "<doc attr='say \"hello\"'/>"
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == "say \"hello\""
+    end
+
+    @testset "very long attribute value" begin
+        long_val = repeat("x", 10000)
+        xml = """<doc attr="$(long_val)"/>"""
+        doc = parse(xml, Node)
+        @test doc[1]["attr"] == long_val
+    end
+
+    @testset "very long text content" begin
+        long_text = repeat("word ", 5000)
+        xml = "<doc>$(long_text)</doc>"
+        doc = parse(xml, Node)
+        @test simple_value(doc[1]) == long_text
+    end
+
+    @testset "many attributes on one element" begin
+        attrs = join(["a$i=\"v$i\"" for i in 1:50], " ")
+        xml = "<doc $attrs/>"
+        doc = parse(xml, Node)
+        @test doc[1]["a1"] == "v1"
+        @test doc[1]["a50"] == "v50"
+    end
+
+    @testset "whitespace around = in attributes" begin
+        xml = """<doc a = "1" b  =  "2" />"""
+        doc = parse(xml, Node)
+        @test doc[1]["a"] == "1"
+        @test doc[1]["b"] == "2"
+    end
+
+    @testset "tab and newline in tag whitespace" begin
+        xml = "<doc\n\ta=\"1\"\n\tb=\"2\"\n/>"
+        doc = parse(xml, Node)
+        @test doc[1]["a"] == "1"
+        @test doc[1]["b"] == "2"
+    end
+
+    @testset "empty element: self-closing vs open-close" begin
+        xml1 = "<root><x/></root>"
+        xml2 = "<root><x></x></root>"
+        doc1 = parse(xml1, Node)
+        doc2 = parse(xml2, Node)
+        # Both should produce empty elements
+        el1 = first(filter(x -> nodetype(x) == Element, children(doc1[1])))
+        el2 = first(filter(x -> nodetype(x) == Element, children(doc2[1])))
+        @test tag(el1) == tag(el2) == "x"
+    end
+
+    @testset "document with all prolog components" begin
+        xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!DOCTYPE root [
+  <!ELEMENT root (#PCDATA | child)*>
+  <!ELEMENT child EMPTY>
+  <!ATTLIST child id ID #REQUIRED>
+  <!ENTITY greeting "Hello, World!">
+]>
+<!-- document comment -->
+<?app-instruction data?>
+<root>&greeting;<child id="c1"/></root>"""
+        doc = parse(xml, Node)
+        typed = filter(x -> nodetype(x) != Text, children(doc))
+        type_list = map(nodetype, typed)
+        @test Declaration in type_list
+        @test DTD in type_list
+        @test Comment in type_list
+        @test ProcessingInstruction in type_list
+        @test Element in type_list
+    end
+end
+
+end  # top-level @testset
diff --git a/test/test_pugixml.jl b/test/test_pugixml.jl
new file mode 100644
index 0000000..6e46d5a
--- /dev/null
+++ b/test/test_pugixml.jl
@@ -0,0 +1,308 @@
+# Test cases inspired by pugixml (https://github.com/zeux/pugixml, MIT license)
+# Translated from tests/test_parse.cpp and tests/test_xpath.cpp
+
+using XML
+using XML: Node, nodetype, Document, Element, Comment, CData, ProcessingInstruction, Text, Declaration
+using XML: tag, value, children, attributes, simple_value, xpath
+using Test
+
+@testset "pugixml-inspired" begin
+
+    #==========================================================================#
+    #                        Processing Instructions                           #
+    #==========================================================================#
+    @testset "PI parsing" begin
+        doc = parse("<?pi?><root/>", Node)
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc))
+        @test length(pis) == 1
+
+        doc = parse("<?pi value?><root/>", Node)
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc))
+        @test length(pis) == 1
+
+        doc = parse("<?target  \r\n\t  value ?><root/>", Node)
+        pis = filter(x -> nodetype(x) == ProcessingInstruction, children(doc))
+        @test length(pis) == 1
+    end
+
+    @testset "PI errors" begin
+        # XML.jl is lenient about incomplete PIs without a root element,
+        # but these should fail when embedded in a document
+        @test_throws Exception parse("<root><?</root>", Node)
+        @test_throws Exception parse("<root><?name</root>", Node)
+    end
+
+    #==========================================================================#
+    #                              Comments                                    #
+    #==========================================================================#
+    @testset "Comment parsing" begin
+        doc = parse("<!----><root/>", Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test length(comments) == 1
+        @test value(comments[1]) == ""
+
+        doc = parse("<!--value--><root/>", Node)
+        comments = filter(x -> nodetype(x) == Comment, children(doc))
+        @test value(comments[1]) == "value"
+
+        doc = parse("<root><!--multi\nline\ncomment--></root>", Node)
+        root = filter(x -> nodetype(x) == Element, children(doc))[1]
+        comments = filter(x -> nodetype(x) == Comment, children(root))
+        @test contains(value(comments[1]), "multi")
+    end
+
+    @testset "Comment errors" begin
+        @test_throws Exception parse("<!-", Node)
+        @test_throws Exception parse("<root><!--</root>", Node)
+        @test_throws Exception parse("<!--->", Node)
+    end
+
+    #==========================================================================#
+    #                              CDATA                                       #
+    #==========================================================================#
+    @testset "CDATA parsing" begin
+        doc = parse("<root><![CDATA[]]></root>", Node)
+        root = filter(x -> nodetype(x) == Element, children(doc))[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test length(cdata) == 1
+        @test value(cdata[1]) == ""
+
+        doc = parse("<root><![CDATA[value]]></root>", Node)
+        root = filter(x -> nodetype(x) == Element, children(doc))[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test value(cdata[1]) == "value"
+
+        # CDATA preserves markup characters
+        doc = parse("<root><![CDATA[<greeting>Hello!</greeting>]]></root>", Node)
+        root = filter(x -> nodetype(x) == Element, children(doc))[1]
+        cdata = filter(x -> nodetype(x) == CData, children(root))
+        @test value(cdata[1]) == "<greeting>Hello!</greeting>"
+    end
+
+    @testset "CDATA errors" begin
+        @test_throws Exception parse("<root><![", Node)
+        @test_throws Exception parse("<root><![CDATA[", Node)
+        @test_throws Exception parse("<root><![CDATA[data", Node)
+    end
+
+    #==========================================================================#
+    #                           Tag Parsing                                    #
+    #==========================================================================#
+    @testset "Self-closing tags" begin
+        doc = parse("<node/>", Node)
+        @test tag(children(doc)[1]) == "node"
+
+        doc = parse("<node />", Node)
+        @test tag(children(doc)[1]) == "node"
+
+        doc = parse("<node\n/>", Node)
+        @test tag(children(doc)[1]) == "node"
+    end
+
+    @testset "Tag hierarchy" begin
+        doc = parse("<node><n1><n2/></n1><n3><n4><n5/></n4></n3></node>", Node)
+        root = children(doc)[1]
+        @test tag(root) == "node"
+        root_els = filter(x -> nodetype(x) == Element, children(root))
+        @test length(root_els) == 2
+        @test tag(root_els[1]) == "n1"
+        @test tag(root_els[2]) == "n3"
+    end
+
+    @testset "Tag errors" begin
+        @test_throws Exception parse("<", Node)
+        @test_throws Exception parse("<node", Node)
+        @test_throws Exception parse("<node></nodes>", Node)
+        @test_throws Exception parse("<node>", Node)
+        @test_throws Exception parse("</node>", Node)
+    end
+
+    #==========================================================================#
+    #                        Attribute Parsing                                 #
+    #==========================================================================#
+    @testset "Attribute quotes" begin
+        doc = parse("<node id1='v1' id2=\"v2\"/>", Node)
+        el = children(doc)[1]
+        @test el["id1"] == "v1"
+        @test el["id2"] == "v2"
+    end
+
+    @testset "Attribute spaces around =" begin
+        doc = parse("<node id1='v1' id2 ='v2' id3= 'v3' id4 = 'v4' />", Node)
+        el = children(doc)[1]
+        @test el["id1"] == "v1"
+        @test el["id2"] == "v2"
+        @test el["id3"] == "v3"
+        @test el["id4"] == "v4"
+    end
+
+    @testset "Attribute errors" begin
+        @test_throws Exception parse("<node id", Node)
+        @test_throws Exception parse("<node id='/>", Node)
+        @test_throws Exception parse("<node id='value", Node)
+    end
+
+    #==========================================================================#
+    #                        Entity/Escape Handling                            #
+    #==========================================================================#
+    @testset "Predefined entities in attributes" begin
+        doc = parse("<node id='&lt;&gt;&amp;&apos;&quot;'/>", Node)
+        @test children(doc)[1]["id"] == "<>&'\""
+    end
+
+    @testset "Predefined entities in text" begin
+        doc = parse("<node>&lt;&gt;&amp;&apos;&quot;</node>", Node)
+        @test simple_value(children(doc)[1]) == "<>&'\""
+    end
+
+    @testset "Numeric character references" begin
+        doc = parse("<node>&#32;&#x20;</node>", Node)
+        @test simple_value(children(doc)[1]) == "  "
+    end
+
+    @testset "Unicode character references" begin
+        # Greek gamma
+        doc = parse("<node>&#x03B3;</node>", Node)
+        @test simple_value(children(doc)[1]) == "γ"
+
+        # Same char, lowercase hex
+        doc = parse("<node>&#x03b3;</node>", Node)
+        @test simple_value(children(doc)[1]) == "γ"
+    end
+
+    #==========================================================================#
+    #                           Whitespace                                     #
+    #==========================================================================#
+    @testset "Whitespace text nodes preserved" begin
+        doc = parse("<root>  <node>  </node>  </root>", Node)
+        root = children(doc)[1]
+        # Should have text nodes with whitespace
+        text_nodes = filter(x -> nodetype(x) == Text, children(root))
+        @test length(text_nodes) >= 1
+    end
+
+    @testset "PCDATA content" begin
+        doc = parse("<root>text content</root>", Node)
+        @test simple_value(children(doc)[1]) == "text content"
+    end
+
+    #==========================================================================#
+    #                        Unicode / CJK Content                             #
+    #==========================================================================#
+    @testset "Unicode element names (CJK)" begin
+        # XML.jl tokenizer does not yet support CJK characters in element/attribute names
+        @test_broken try
+            parse("<汉语>世界</汉语>", Node)
+            true
+        catch
+            false
+        end
+    end
+
+    @testset "Unicode text content" begin
+        doc = parse("<doc>Ünïcödé café naïve</doc>", Node)
+        @test simple_value(children(doc)[1]) == "Ünïcödé café naïve"
+    end
+
+    #==========================================================================#
+    #                        Mixed Content                                     #
+    #==========================================================================#
+    @testset "Mixed text, CDATA, comments" begin
+        xml = "<node>First text<!-- comment -->Second text<![CDATA[cdata]]>Last text</node>"
+        doc = parse(xml, Node)
+        root = children(doc)[1]
+        child_types = map(nodetype, children(root))
+        @test Text in child_types
+        @test Comment in child_types
+        @test CData in child_types
+    end
+
+    #==========================================================================#
+    #                        Complex Document                                  #
+    #==========================================================================#
+    @testset "Complex document with all node types" begin
+        xml = """<?xml version="1.0"?>
+<!DOCTYPE mesh SYSTEM "mesh.dtd">
+<!-- comment in prolog -->
+<?custom-pi data?>
+<mesh name="mesh_root">
+    <!-- inner comment -->
+    some text
+    <![CDATA[cdata content]]>
+    <node attr1="value1" attr2="value2" />
+    <node attr1="value2">
+        <innernode/>
+    </node>
+    <?include somedata?>
+</mesh>"""
+        doc = parse(xml, Node)
+        @test nodetype(doc) == Document
+
+        root_els = filter(x -> nodetype(x) == Element, children(doc))
+        @test length(root_els) == 1
+        mesh = root_els[1]
+        @test tag(mesh) == "mesh"
+        @test mesh["name"] == "mesh_root"
+
+        # Check inner content types
+        inner = children(mesh)
+        @test any(x -> nodetype(x) == Comment, inner)
+        @test any(x -> nodetype(x) == Text, inner)
+        @test any(x -> nodetype(x) == CData, inner)
+        @test any(x -> nodetype(x) == ProcessingInstruction, inner)
+
+        nodes = filter(x -> nodetype(x) == Element && tag(x) == "node", inner)
+        @test length(nodes) == 2
+        @test nodes[1]["attr1"] == "value1"
+        @test nodes[1]["attr2"] == "value2"
+    end
+
+    #==========================================================================#
+    #                             XPath                                        #
+    #==========================================================================#
+    @testset "XPath" begin
+        @testset "descendant with attribute predicate" begin
+            doc = parse("<a><b><c id='a'/></b><c id='b'/></a>", Node)
+            results = xpath(doc, "//c[@id='b']")
+            @test length(results) == 1
+            @test results[1]["id"] == "b"
+        end
+
+        @testset "child with attribute" begin
+            doc = parse("<a><b><c id='a'/></b><c id='b'/></a>", Node)
+            results = xpath(doc, "/a/c[@id]")
+            @test length(results) == 1
+            @test results[1]["id"] == "b"
+        end
+
+        @testset "wildcard with attribute predicate" begin
+            doc = parse("""<node><child1 attr1="v1" attr2="v2"/><child2 attr1="v1">test</child2></node>""", Node)
+            results = xpath(doc, "/node/*[@attr1]")
+            @test length(results) == 2
+        end
+
+        @testset "descendant-or-self with text()" begin
+            doc = parse("<a><b><c><d><e>deep</e></d></c></b></a>", Node)
+            results = xpath(doc, "//e/text()")
+            @test length(results) == 1
+            @test value(results[1]) == "deep"
+        end
+
+        @testset "positional predicate" begin
+            doc = parse("<root><a/><b/><c/></root>", Node)
+            results = xpath(doc, "/root/*[1]")
+            @test length(results) == 1
+            @test tag(results[1]) == "a"
+
+            results = xpath(doc, "/root/*[last()]")
+            @test length(results) == 1
+            @test tag(results[1]) == "c"
+        end
+
+        @testset "nested predicates" begin
+            doc = parse("""<node><child><subchild id="1"/></child><child><subchild id="2"/></child></node>""", Node)
+            results = xpath(doc, "//subchild[@id]")
+            @test length(results) == 2
+        end
+    end
+end
diff --git a/test/test_remote_files.jl b/test/test_remote_files.jl
new file mode 100644
index 0000000..ed2b3e3
--- /dev/null
+++ b/test/test_remote_files.jl
@@ -0,0 +1,77 @@
+using XML
+using XML: Document, Element, Declaration, Comment, CData, DTD, ProcessingInstruction, Text
+using Downloads: download
+using Test
+
+#==============================================================================#
+#                REMOTE XML FILE PARSING TESTS                                 #
+#==============================================================================#
+# These tests download publicly available XML files and verify that XML.jl can
+# parse them without error.  A failed download (network issues, CI without
+# internet, URL gone) is silently skipped — only parsing failures count as test
+# failures.
+#
+# Not included in runtests.jl — run standalone:  julia --project test/test_remote_files.jl
+
+function _try_download(url::AbstractString)::Union{String, Nothing}
+    try
+        path = download(url)
+        return read(path, String)
+    catch
+        return nothing
+    end
+end
+
+const REMOTE_XML_URLS = [
+    # ---- W3Schools example files ----
+    ("W3Schools note.xml",           "https://www.w3schools.com/xml/note.xml"),
+    ("W3Schools cd_catalog.xml",     "https://www.w3schools.com/xml/cd_catalog.xml"),
+    ("W3Schools plant_catalog.xml",  "https://www.w3schools.com/xml/plant_catalog.xml"),
+    ("W3Schools simple.xml",         "https://www.w3schools.com/xml/simple.xml"),
+    ("W3Schools books.xml",          "https://www.w3schools.com/xml/books.xml"),
+
+    # ---- W3C SVG samples ----
+    ("W3C SVG helloworld.svg",       "https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/helloworld.svg"),
+    ("W3C SVG tiger.svg",            "https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/tiger.svg"),
+    ("W3C SVG w3c.svg",              "https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/w3c.svg"),
+    ("W3C SVG lineargradient2.svg",  "https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/lineargradient2.svg"),
+    ("W3C SVG heart.svg",            "https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/heart.svg"),
+
+    # ---- GitHub-hosted XML files ----
+    ("JUnit XML complete example",   "https://raw.githubusercontent.com/testmoapp/junitxml/main/examples/junit-complete.xml"),
+    ("JUnit XML basic example",      "https://raw.githubusercontent.com/testmoapp/junitxml/main/examples/junit-basic.xml"),
+    ("PEPPOL invoice base example",  "https://raw.githubusercontent.com/OpenPEPPOL/peppol-bis-invoice-3/master/rules/examples/base-example.xml"),
+
+    # ---- Maven Central POM (real-world XML with namespaces) ----
+    ("Maven JUnit 4.13.2 POM",      "https://repo1.maven.org/maven2/junit/junit/4.13.2/junit-4.13.2.pom"),
+    ("Maven Guava 33.0 POM",        "https://repo1.maven.org/maven2/com/google/guava/guava/33.0.0-jre/guava-33.0.0-jre.pom"),
+
+    # ---- NASA RSS feed (live XML) ----
+    ("NASA news RSS feed",           "https://www.nasa.gov/news-release/feed/"),
+]
+
+@testset "Remote XML Parsing" begin
+    for (label, url) in REMOTE_XML_URLS
+        @testset "$label" begin
+            xml_str = _try_download(url)
+            if isnothing(xml_str)
+                @info "Skipping $label — download failed" url
+                @test_skip false
+            else
+                doc = parse(xml_str, Node)
+                @test nodetype(doc) == Document
+                @test length(children(doc)) > 0
+
+                # Verify at least one Element exists somewhere in the document
+                has_element = any(x -> nodetype(x) == Element, children(doc))
+                @test has_element
+
+                # Verify write produces output and can be re-parsed
+                xml_out = XML.write(doc)
+                @test length(xml_out) > 0
+                doc2 = parse(xml_out, Node)
+                @test nodetype(doc2) == Document
+            end
+        end
+    end
+end
diff --git a/test/test_tokenizer.jl b/test/test_tokenizer.jl
new file mode 100644
index 0000000..89c7145
--- /dev/null
+++ b/test/test_tokenizer.jl
@@ -0,0 +1,425 @@
+using Test, XML
+
+using XML.XMLTokenizer
+
+# Convenience: collect token kinds from a string
+kinds(xml) = [t.kind for t in tokenize(xml)]
+raws(xml)  = [String(t.raw) for t in tokenize(xml)]
+
+@testset "XMLTokenizer" begin
+
+#-----------------------------------------------------------------------# Basic text
+@testset "plain text" begin
+    toks = collect(tokenize("hello world"))
+    @test length(toks) == 1
+    @test toks[1].kind == TokenKinds.TEXT
+    @test toks[1].raw == "hello world"
+end
+
+@testset "empty string" begin
+    @test isempty(collect(tokenize("")))
+end
+
+#-----------------------------------------------------------------------# Open tags
+@testset "open tag without attributes" begin
+    @test kinds("<div>") == [TokenKinds.OPEN_TAG, TokenKinds.TAG_CLOSE]
+    @test raws("<div>") == ["<div", ">"]
+end
+
+@testset "open tag with attributes" begin
+    xml = """<a href="url" class='main'>"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [
+        TokenKinds.OPEN_TAG,
+        TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE,
+        TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE,
+        TokenKinds.TAG_CLOSE,
+    ]
+    @test tag_name(toks[1]) == "a"
+    @test toks[2].raw == "href"
+    @test attr_value(toks[3]) == "url"
+    @test toks[4].raw == "class"
+    @test attr_value(toks[5]) == "main"
+end
+
+@testset "whitespace around =" begin
+    xml = """<x a = "1" >"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [
+        TokenKinds.OPEN_TAG, TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE, TokenKinds.TAG_CLOSE,
+    ]
+    @test attr_value(toks[3]) == "1"
+end
+
+#-----------------------------------------------------------------------# Self-closing tags
+@testset "self-closing tag" begin
+    @test kinds("<br/>") == [TokenKinds.OPEN_TAG, TokenKinds.SELF_CLOSE]
+    @test raws("<br/>") == ["<br", "/>"]
+end
+
+@testset "self-closing tag with attributes" begin
+    xml = """<img src="a.png" />"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [
+        TokenKinds.OPEN_TAG, TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE, TokenKinds.SELF_CLOSE,
+    ]
+    @test tag_name(toks[1]) == "img"
+    @test attr_value(toks[3]) == "a.png"
+end
+
+#-----------------------------------------------------------------------# Close tags
+@testset "close tag" begin
+    toks = collect(tokenize("</div>"))
+    @test [t.kind for t in toks] == [TokenKinds.CLOSE_TAG, TokenKinds.TAG_CLOSE]
+    @test tag_name(toks[1]) == "div"
+    @test toks[2].raw == ">"
+end
+
+@testset "close tag with whitespace" begin
+    toks = collect(tokenize("</div  >"))
+    @test [t.kind for t in toks] == [TokenKinds.CLOSE_TAG, TokenKinds.TAG_CLOSE]
+    @test tag_name(toks[1]) == "div"
+end
+
+#-----------------------------------------------------------------------# Open + close round-trip
+@testset "element with text" begin
+    xml = "<p>hello</p>"
+    @test kinds(xml) == [
+        TokenKinds.OPEN_TAG, TokenKinds.TAG_CLOSE,
+        TokenKinds.TEXT,
+        TokenKinds.CLOSE_TAG, TokenKinds.TAG_CLOSE,
+    ]
+    toks = collect(tokenize(xml))
+    @test tag_name(toks[1]) == "p"
+    @test toks[3].raw == "hello"
+    @test tag_name(toks[4]) == "p"
+end
+
+#-----------------------------------------------------------------------# Namespaced tags
+@testset "namespaced tag" begin
+    xml = """<ns:el xmlns:ns="http://example.com">"""
+    toks = collect(tokenize(xml))
+    @test tag_name(toks[1]) == "ns:el"
+    @test toks[2].raw == "xmlns:ns"
+end
+
+#-----------------------------------------------------------------------# Comments
+@testset "comment" begin
+    xml = "<!-- hello -->"
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [TokenKinds.COMMENT_OPEN, TokenKinds.COMMENT_CONTENT, TokenKinds.COMMENT_CLOSE]
+    @test toks[1].raw == "<!--"
+    @test toks[2].raw == " hello "
+    @test toks[3].raw == "-->"
+end
+
+@testset "empty comment" begin
+    toks = collect(tokenize("<!---->"))
+    @test [t.kind for t in toks] == [TokenKinds.COMMENT_OPEN, TokenKinds.COMMENT_CONTENT, TokenKinds.COMMENT_CLOSE]
+    @test toks[2].raw == ""
+end
+
+@testset "comment with markup-like content" begin
+    toks = collect(tokenize("<!-- <b>not</b> a tag -->"))
+    @test toks[2].raw == " <b>not</b> a tag "
+end
+
+#-----------------------------------------------------------------------# CDATA
+@testset "CDATA" begin
+    xml = "<![CDATA[raw & <text>]]>"
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [TokenKinds.CDATA_OPEN, TokenKinds.CDATA_CONTENT, TokenKinds.CDATA_CLOSE]
+    @test toks[1].raw == "<![CDATA["
+    @test toks[2].raw == "raw & <text>"
+    @test toks[3].raw == "]]>"
+end
+
+@testset "empty CDATA" begin
+    toks = collect(tokenize("<![CDATA[]]>"))
+    @test [t.kind for t in toks] == [TokenKinds.CDATA_OPEN, TokenKinds.CDATA_CONTENT, TokenKinds.CDATA_CLOSE]
+    @test toks[2].raw == ""
+end
+
+#-----------------------------------------------------------------------# Processing instructions
+@testset "processing instruction" begin
+    xml = """<?style type="text/css"?>"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [TokenKinds.PI_OPEN, TokenKinds.PI_CONTENT, TokenKinds.PI_CLOSE]
+    @test toks[1].raw == "<?style"
+    @test pi_target(toks[1]) == "style"
+    @test toks[2].raw == """ type="text/css\""""
+    @test toks[3].raw == "?>"
+end
+
+@testset "PI with no content" begin
+    toks = collect(tokenize("<?target?>"))
+    @test [t.kind for t in toks] == [TokenKinds.PI_OPEN, TokenKinds.PI_CONTENT, TokenKinds.PI_CLOSE]
+    @test pi_target(toks[1]) == "target"
+    @test toks[2].raw == ""
+end
+
+#-----------------------------------------------------------------------# XML declaration
+@testset "XML declaration" begin
+    xml = """<?xml version="1.0" encoding="UTF-8"?>"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [
+        TokenKinds.XML_DECL_OPEN,
+        TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE,
+        TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE,
+        TokenKinds.XML_DECL_CLOSE,
+    ]
+    @test pi_target(toks[1]) == "xml"
+    @test toks[1].raw == "<?xml"
+    @test toks[2].raw == "version"
+    @test attr_value(toks[3]) == "1.0"
+    @test toks[4].raw == "encoding"
+    @test attr_value(toks[5]) == "UTF-8"
+    @test toks[6].raw == "?>"
+end
+
+@testset "XML declaration with single quotes" begin
+    xml = "<?xml version='1.0'?>"
+    toks = collect(tokenize(xml))
+    @test toks[3].raw == "'1.0'"
+    @test attr_value(toks[3]) == "1.0"
+end
+
+#-----------------------------------------------------------------------# DOCTYPE
+@testset "DOCTYPE simple" begin
+    xml = """<!DOCTYPE note SYSTEM "note.dtd">"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [TokenKinds.DOCTYPE_OPEN, TokenKinds.DOCTYPE_CONTENT, TokenKinds.DOCTYPE_CLOSE]
+    @test toks[1].raw == "<!DOCTYPE"
+    @test toks[2].raw == """ note SYSTEM "note.dtd\""""
+    @test toks[3].raw == ">"
+end
+
+@testset "DOCTYPE with internal subset" begin
+    xml = """<!DOCTYPE note [<!ELEMENT note (#PCDATA)>]>"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [TokenKinds.DOCTYPE_OPEN, TokenKinds.DOCTYPE_CONTENT, TokenKinds.DOCTYPE_CLOSE]
+    @test toks[2].raw == " note [<!ELEMENT note (#PCDATA)>]"
+end
+
+@testset "DOCTYPE with quoted > in internal subset" begin
+    xml = """<!DOCTYPE note [<!ATTLIST x y CDATA "a>b">]>"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [TokenKinds.DOCTYPE_OPEN, TokenKinds.DOCTYPE_CONTENT, TokenKinds.DOCTYPE_CLOSE]
+    @test occursin("a>b", toks[2].raw)
+end
+
+#-----------------------------------------------------------------------# Full document
+@testset "full document" begin
+    xml = """<?xml version="1.0"?>
+<!DOCTYPE root SYSTEM "root.dtd">
+<root>
+  <child id="1">text</child>
+  <empty/>
+  <!-- comment -->
+  <![CDATA[data]]>
+  <?pi content?>
+</root>"""
+    toks = collect(tokenize(xml))
+    tok_kinds = [t.kind for t in toks]
+
+    # XML declaration
+    @test tok_kinds[1] == TokenKinds.XML_DECL_OPEN
+    # DOCTYPE present
+    @test TokenKinds.DOCTYPE_OPEN in tok_kinds
+    # All open tags have matching closes
+    open_names  = [tag_name(t) for t in toks if t.kind == TokenKinds.OPEN_TAG]
+    close_names = [tag_name(t) for t in toks if t.kind == TokenKinds.CLOSE_TAG]
+    @test open_names == ["root", "child", "empty"]
+    @test close_names == ["child", "root"]
+    # CDATA is present
+    cdata_content = [t.raw for t in toks if t.kind == TokenKinds.CDATA_CONTENT]
+    @test cdata_content == ["data"]
+    # Comment is present
+    comment_content = [t.raw for t in toks if t.kind == TokenKinds.COMMENT_CONTENT]
+    @test comment_content == [" comment "]
+    # PI is present
+    pi_opens = [t for t in toks if t.kind == TokenKinds.PI_OPEN]
+    @test length(pi_opens) == 1
+    @test pi_target(pi_opens[1]) == "pi"
+end
+
+#-----------------------------------------------------------------------# Raw round-trip
+@testset "concatenated raw reproduces input" begin
+    # Round-trip works for inputs where no whitespace/= is consumed between tokens.
+    # Whitespace around `=` in attributes is consumed and not part of any token.
+    for xml in [
+        """<!-- comment --><a/>""",
+        """<![CDATA[hello]]>""",
+        """<?pi data?>""",
+        """<!DOCTYPE x [<!ELEMENT x (#PCDATA)>]><x/>""",
+        """<p>text</p>""",
+    ]
+        reconstructed = join(t.raw for t in tokenize(xml))
+        @test reconstructed == xml
+    end
+end
+
+@testset "attribute whitespace is not preserved" begin
+    # Whitespace around `=` and between attrs is consumed, not emitted as tokens.
+    xml = """<a b = "c"  d='e' />"""
+    toks = collect(tokenize(xml))
+    @test [t.kind for t in toks] == [
+        TokenKinds.OPEN_TAG, TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE,
+        TokenKinds.ATTR_NAME, TokenKinds.ATTR_VALUE, TokenKinds.SELF_CLOSE,
+    ]
+end
+
+#-----------------------------------------------------------------------# Iterator protocol
+@testset "iterator protocol" begin
+    t = tokenize("<a/>")
+    @test Base.IteratorSize(typeof(t)) == Base.SizeUnknown()
+    @test Base.eltype(typeof(t)) == Token
+    toks = collect(t)
+    @test length(toks) == 2
+end
+
+#-----------------------------------------------------------------------# Utility error handling
+@testset "tag_name errors on wrong kind" begin
+    tok = first(tokenize("hello"))
+    @test_throws ArgumentError tag_name(tok)
+end
+
+@testset "attr_value errors on wrong kind" begin
+    tok = first(tokenize("<a>"))
+    @test_throws ArgumentError attr_value(tok)
+end
+
+@testset "pi_target errors on wrong kind" begin
+    tok = first(tokenize("<a>"))
+    @test_throws ArgumentError pi_target(tok)
+end
+
+#-----------------------------------------------------------------------# Error cases
+@testset "error: unterminated comment" begin
+    @test_throws ArgumentError collect(tokenize("<!-- no end"))
+end
+
+@testset "error: unterminated CDATA" begin
+    @test_throws ArgumentError collect(tokenize("<![CDATA[no end"))
+end
+
+@testset "error: unterminated PI" begin
+    @test_throws ArgumentError collect(tokenize("<?pi no end"))
+end
+
+@testset "unterminated open tag emits partial token" begin
+    # Tokenizer emits what it can; the tag is never closed but no error since EOF is reached
+    toks = collect(tokenize("<div"))
+    @test length(toks) == 1
+    @test toks[1].kind == TokenKinds.OPEN_TAG
+    @test tag_name(toks[1]) == "div"
+end
+
+@testset "unterminated close tag emits partial token" begin
+    toks = collect(tokenize("</div"))
+    @test length(toks) == 1
+    @test toks[1].kind == TokenKinds.CLOSE_TAG
+    @test tag_name(toks[1]) == "div"
+end
+
+@testset "error: unterminated attribute value" begin
+    @test_throws ArgumentError collect(tokenize("""<a b="no end"""))
+end
+
+@testset "error: unterminated DOCTYPE" begin
+    @test_throws ArgumentError collect(tokenize("<!DOCTYPE x"))
+end
+
+@testset "error: lone <" begin
+    @test_throws ArgumentError collect(tokenize("<"))
+end
+
+#-----------------------------------------------------------------------# Unicode content
+@testset "unicode text content" begin
+    xml = "<p>café ñ 日本語</p>"
+    toks = collect(tokenize(xml))
+    text_tok = toks[3]
+    @test text_tok.kind == TokenKinds.TEXT
+    @test text_tok.raw == "café ñ 日本語"
+end
+
+@testset "unicode in attribute value" begin
+    xml = """<x a="über"/>"""
+    toks = collect(tokenize(xml))
+    @test attr_value(toks[3]) == "über"
+end
+
+@testset "unicode in comment" begin
+    toks = collect(tokenize("<!-- héllo -->"))
+    @test toks[2].raw == " héllo "
+end
+
+#-----------------------------------------------------------------------# Edge cases
+@testset "adjacent tags" begin
+    xml = "<a></a><b></b>"
+    toks = collect(tokenize(xml))
+    open_names  = [tag_name(t) for t in toks if t.kind == TokenKinds.OPEN_TAG]
+    close_names = [tag_name(t) for t in toks if t.kind == TokenKinds.CLOSE_TAG]
+    @test open_names == ["a", "b"]
+    @test close_names == ["a", "b"]
+    # No text tokens between them
+    @test !any(t -> t.kind == TokenKinds.TEXT, toks)
+end
+
+@testset "text between adjacent tags" begin
+    xml = "<a>x</a>y<b/>"
+    texts = [t.raw for t in tokenize(xml) if t.kind == TokenKinds.TEXT]
+    @test texts == ["x", "y"]
+end
+
+@testset "multiple attributes" begin
+    xml = """<div a="1" b="2" c="3">"""
+    names = [String(t.raw) for t in tokenize(xml) if t.kind == TokenKinds.ATTR_NAME]
+    vals  = [String(attr_value(t)) for t in tokenize(xml) if t.kind == TokenKinds.ATTR_VALUE]
+    @test names == ["a", "b", "c"]
+    @test vals == ["1", "2", "3"]
+end
+
+@testset "attribute with > in value" begin
+    xml = """<x a="1>2">"""
+    toks = collect(tokenize(xml))
+    @test attr_value(toks[3]) == "1>2"
+    @test toks[end].kind == TokenKinds.TAG_CLOSE
+end
+
+@testset "attribute with single quotes" begin
+    xml = "<x a='val'>"
+    toks = collect(tokenize(xml))
+    @test toks[3].raw == "'val'"
+    @test attr_value(toks[3]) == "val"
+end
+
+@testset "mixed quote styles" begin
+    xml = """<x a="1" b='2'>"""
+    vals = [attr_value(t) for t in tokenize(xml) if t.kind == TokenKinds.ATTR_VALUE]
+    @test vals == ["1", "2"]
+end
+
+@testset "whitespace-only text" begin
+    xml = "<a>  \n\t </a>"
+    texts = [t for t in tokenize(xml) if t.kind == TokenKinds.TEXT]
+    @test length(texts) == 1
+    @test texts[1].raw == "  \n\t "
+end
+
+@testset "entities preserved verbatim" begin
+    xml = "<p>&amp; &lt; &#x41;</p>"
+    texts = [t.raw for t in tokenize(xml) if t.kind == TokenKinds.TEXT]
+    @test texts == ["&amp; &lt; &#x41;"]
+end
+
+@testset "show method" begin
+    tok = first(tokenize("hello"))
+    buf = IOBuffer()
+    show(buf, tok)
+    s = String(take!(buf))
+    @test occursin("TEXT", s)
+    @test occursin("hello", s)
+end
+
+end # top-level testset
diff --git a/test/test_w3c.jl b/test/test_w3c.jl
new file mode 100644
index 0000000..16587ed
--- /dev/null
+++ b/test/test_w3c.jl
@@ -0,0 +1,154 @@
+# W3C XML Conformance Test Suite
+# https://www.w3.org/XML/Test/xmlts20130923.tar
+#
+# Test types:
+#   - "valid": well-formed XML that is also valid (should parse successfully)
+#   - "invalid": well-formed but not valid per DTD (should still parse — we're non-validating)
+#   - "not-wf": not well-formed XML (should fail to parse)
+#   - "error": optional errors (parser may or may not reject)
+#
+# We only run tests with ENTITIES="none" since XML.jl does not expand external entities.
+# We skip XML 1.1 tests (VERSION="1.1" or RECOMMENDATION="XML1.1").
+
+using XML
+using XML: Node, nodetype, Document
+using Test
+using Downloads: download
+using Tar
+
+const W3C_URL = "https://www.w3.org/XML/Test/xmlts20130923.tar"
+const W3C_DIR = joinpath(@__DIR__, "data", "w3c")
+const W3C_TAR = joinpath(@__DIR__, "data", "xmlts20130923.tar")
+
+function ensure_w3c_suite()
+    isdir(joinpath(W3C_DIR, "xmlconf")) && return
+    mkpath(W3C_DIR)
+    if !isfile(W3C_TAR)
+        @info "Downloading W3C XML Conformance Test Suite..."
+        download(W3C_URL, W3C_TAR)
+    end
+    @info "Extracting W3C XML Conformance Test Suite..."
+    open(W3C_TAR) do io
+        Tar.extract(io, W3C_DIR)
+    end
+end
+
+# Parse a test catalog XML and extract TEST entries
+function parse_catalog(catalog_path::String)
+    isfile(catalog_path) || return NamedTuple[]
+    doc = read(catalog_path, Node)
+    tests = NamedTuple[]
+    _collect_tests!(tests, doc, dirname(catalog_path))
+    return tests
+end
+
+function _collect_tests!(tests, node, base_dir)
+    for child in XML.children(node)
+        nodetype(child) !== XML.Element && continue
+        if XML.tag(child) == "TEST"
+            attrs = XML.attributes(child)
+            haskey(attrs, "URI") || continue
+            push!(tests, (
+                type = get(attrs, "TYPE", ""),
+                entities = get(attrs, "ENTITIES", ""),
+                id = get(attrs, "ID", ""),
+                uri = joinpath(base_dir, attrs["URI"]),
+                version = get(attrs, "VERSION", "1.0"),
+                recommendation = get(attrs, "RECOMMENDATION", ""),
+            ))
+        elseif XML.tag(child) == "TESTCASES"
+            # TESTCASES may have xml:base to adjust paths
+            sub_base = get(XML.attributes(child), "xml:base", "")
+            child_base = isempty(sub_base) ? base_dir : joinpath(base_dir, sub_base)
+            _collect_tests!(tests, child, child_base)
+        else
+            _collect_tests!(tests, child, base_dir)
+        end
+    end
+end
+
+function is_xml11(test)
+    test.version == "1.1" ||
+    test.recommendation == "XML1.1" ||
+    contains(test.recommendation, "XML1.1")
+end
+
+ensure_w3c_suite()
+
+# Catalogs for XML 1.0 tests
+const XMLCONF_DIR = joinpath(W3C_DIR, "xmlconf")
+const CATALOGS = filter(isfile, [
+    joinpath(XMLCONF_DIR, "xmltest", "xmltest.xml"),
+    joinpath(XMLCONF_DIR, "sun", "sun-valid.xml"),
+    joinpath(XMLCONF_DIR, "sun", "sun-invalid.xml"),
+    joinpath(XMLCONF_DIR, "sun", "sun-not-wf.xml"),
+    joinpath(XMLCONF_DIR, "sun", "sun-error.xml"),
+    joinpath(XMLCONF_DIR, "oasis", "oasis.xml"),
+    joinpath(XMLCONF_DIR, "ibm", "ibm_oasis_not-wf.xml"),
+    joinpath(XMLCONF_DIR, "ibm", "ibm_oasis_valid.xml"),
+    joinpath(XMLCONF_DIR, "ibm", "ibm_oasis_invalid.xml"),
+    joinpath(XMLCONF_DIR, "eduni", "errata-2e", "errata2e.xml"),
+    joinpath(XMLCONF_DIR, "eduni", "errata-3e", "errata3e.xml"),
+    joinpath(XMLCONF_DIR, "eduni", "errata-4e", "errata4e.xml"),
+    joinpath(XMLCONF_DIR, "eduni", "namespaces", "1.0", "rmt-ns10.xml"),
+    joinpath(XMLCONF_DIR, "eduni", "misc", "ht-bh.xml"),
+    joinpath(XMLCONF_DIR, "japanese", "japanese.xml"),
+])
+
+# Collect all tests
+all_tests = NamedTuple[]
+for catalog in CATALOGS
+    append!(all_tests, parse_catalog(catalog))
+end
+
+# Filter: only ENTITIES="none", skip XML 1.1
+xml10_tests = filter(t -> t.entities == "none" && !is_xml11(t), all_tests)
+
+valid_tests = filter(t -> t.type in ("valid", "invalid"), xml10_tests)
+notwf_tests = filter(t -> t.type == "not-wf", xml10_tests)
+
+@info "W3C tests: $(length(valid_tests)) valid/invalid, $(length(notwf_tests)) not-wf (from $(length(all_tests)) total)"
+
+@testset "W3C Conformance" begin
+    @testset "Well-formed documents should parse" begin
+        n_pass = 0
+        n_fail = 0
+        failures = String[]
+        for test in valid_tests
+            isfile(test.uri) || continue
+            try
+                doc = read(test.uri, Node)
+                @test nodetype(doc) == Document
+                n_pass += 1
+            catch e
+                n_fail += 1
+                push!(failures, "$(test.id): $e")
+            end
+        end
+        if n_fail > 0
+            @warn "W3C well-formed: $n_pass passed, $n_fail failed" failures=first(failures, 20)
+        end
+        @info "W3C well-formed: $n_pass / $(n_pass + n_fail) passed"
+    end
+
+    @testset "Not-well-formed documents should fail to parse" begin
+        n_pass = 0
+        n_fail = 0
+        failures = String[]
+        for test in notwf_tests
+            isfile(test.uri) || continue
+            try
+                read(test.uri, Node)
+                n_fail += 1
+                push!(failures, test.id)
+            catch
+                @test true
+                n_pass += 1
+            end
+        end
+        if n_fail > 0
+            @warn "W3C not-well-formed: $n_pass rejected, $n_fail incorrectly accepted" failures=first(failures, 20)
+        end
+        @info "W3C not-well-formed: $n_pass / $(n_pass + n_fail) correctly rejected"
+    end
+end