From 6e295f94b7a6b1ea37174b8292714b313a53ec5d Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 13:35:28 -0700 Subject: [PATCH 01/25] Add simple copy method to allow simulating removing placeholders. --- tdom/placeholders.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tdom/placeholders.py b/tdom/placeholders.py index 1cf47128..eabe4583 100644 --- a/tdom/placeholders.py +++ b/tdom/placeholders.py @@ -61,6 +61,9 @@ class PlaceholderState: config: PlaceholderConfig = field(default_factory=make_placeholder_config) """Collection of currently 'known and active' placeholder indexes.""" + def copy(self): + return PlaceholderState(known=self.known.copy(), config=self.config) + @property def is_empty(self) -> bool: return len(self.known) == 0 From 4077f3e6f5481f5784bd5a16da0b4441147e7d40 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 13:41:36 -0700 Subject: [PATCH 02/25] Typeguard against bogus empty starttag_text. --- tdom/parser.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 2eec36d8..63207022 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -189,11 +189,9 @@ def make_open_tag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> OpenTag: # @NOTE: This must be called when the tag is handled since it is # populated based on the most recently finished start tag. Otherwise # the value will be out of sync. - starttag_text = self.get_starttag_text() - if starttag_text is None: - raise AssertionError( - f"Expected startag_text to be set when parsing component at {i_index}." - ) + starttag_text = self.always_get_starttag_text( + f"Expected startag_text to be set when parsing component at {i_index}." + ) tattrs = self.make_tattrs(attrs) @@ -371,6 +369,19 @@ def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: # any of this in the parser, instead relying on higher layers. return tag_ref.i_indexes[0] + def always_get_starttag_text( + self, msg: str = "Expecting starttag text to be set." + ) -> str: + """ + Wrap get_starttag_text and just raise if None is returned. + + Do this so we don't guard for `None` everywhere. + """ + starttag_text = self.get_starttag_text() + if starttag_text is None: + raise AssertionError(msg) + return starttag_text + # ------------------------------------------ # HTMLParser tag callbacks # ------------------------------------------ From b95eaa81db02e1b771a2b2317765e186ba97d6dd Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 13:45:25 -0700 Subject: [PATCH 03/25] Add debugging/introspection info to open tags. --- tdom/parser.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 63207022..3cafc45e 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -27,6 +27,12 @@ @dataclass class OpenTElement: + starttag_text: str + " Entire starttag as parsed, includes placeholders, used for debugging. " + raw_attrs: Sequence[HTMLAttribute] + " Attrs as parsed, includes placeholders, used for debugging. " + startend: bool + " Was parsed as startend tag, ie. , used for debugging. " tag: str attrs: tuple[TAttribute, ...] children: list[TNode] = field(default_factory=list) @@ -39,6 +45,12 @@ class OpenTFragment: @dataclass class OpenTComponent: + starttag_text: str + " Entire starttag as parsed, includes placeholders, used for debugging. " + raw_attrs: Sequence[HTMLAttribute] + " Attrs as parsed, includes placeholders, used for debugging. " + startend: bool + " Was parsed as startend tag, ie. , used for debugging. " start_i_index: int children_start_s_index: int """The strings index where the component's children template starts.""" @@ -159,12 +171,20 @@ def make_tattrs(self, attrs: Sequence[HTMLAttribute]) -> tuple[TAttribute, ...]: # Tag Helpers # ------------------------------------------ - def make_open_tag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> OpenTag: + def make_open_tag( + self, tag: str, attrs: Sequence[HTMLAttribute], startend: bool = False + ) -> OpenTag: """Build an OpenTag from a raw tag and attribute tuples.""" tag_ref = self.placeholders.remove_placeholders(tag) if tag_ref.is_literal: - return OpenTElement(tag=tag, attrs=self.make_tattrs(attrs)) + return OpenTElement( + starttag_text=self.always_get_starttag_text(), + raw_attrs=attrs, + startend=startend, + tag=tag, + attrs=self.make_tattrs(attrs), + ) if not tag_ref.is_singleton: raise ValueError( @@ -203,6 +223,9 @@ def make_open_tag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> OpenTag: ) return OpenTComponent( + starttag_text=starttag_text, + raw_attrs=attrs, + startend=startend, start_i_index=i_index, children_start_s_index=children_start_s_index, offset_into_children_start_s=offset_into_children_start_s, @@ -396,7 +419,7 @@ def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: def handle_startendtag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: """Dispatch a self-closing tag, `` to specialized handlers.""" - open_tag = self.make_open_tag(tag, attrs) + open_tag = self.make_open_tag(tag, attrs, startend=True) final_tag = self.finalize_tag(open_tag) self.append_child(final_tag) From 908415044241c096e52068d2a62836684a5d0505 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:22:13 -0700 Subject: [PATCH 04/25] Improve unclosed tags message for ambiguous slash case. --- tdom/parser.py | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/tdom/parser.py b/tdom/parser.py index 3cafc45e..ce0f40ab 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -471,6 +471,33 @@ def reset(self): self.placeholders = PlaceholderState() self.source = None + def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: + """ + Detect when an unquoted attribute value consumes a trailing "/" that + *might* have been meant to attempt to self-close a tag, ie. "/>". + + This can come up with literal values or values with interpolations. + + Such as "
" or "<{Component} title=test/>". + + Or more often "<{Component} title={title}/>" which should be corrected + with "<{Component} title={title} />". + """ + if isinstance(open_tag, (OpenTElement, OpenTComponent)): + return ( + # has attributes + len(open_tag.raw_attrs) > 0 + # last attr not bare attribute + and open_tag.raw_attrs[-1][1] is not None + # last char of last attr is "/" + and open_tag.raw_attrs[-1][1][-1] == "/" + # parsed starttag ends with "/>" + and open_tag.starttag_text.endswith("/>") + # if parsed as startend then its not ambiguous + and not open_tag.startend + ) + return False + def close(self) -> None: if self.waiting_for_data(): # We apply heuristics here to try to guess why the parser didn't finish. @@ -483,7 +510,24 @@ def close(self) -> None: "Parser expects more data, is the template valid html?" ) if self.stack: - raise ValueError("Invalid HTML structure: unclosed tags remain.") + e = ValueError("Invalid HTML structure: unclosed tags remain.") + # Check for tags that might have meant to self-close but whose + # unquoted last attribute value consumed a "/", ie.
. + parent = self.stack[-1] + if isinstance(parent, (OpenTElement, OpenTComponent)): + if isinstance(parent, OpenTElement): + starttag = parent.tag + elif isinstance(parent, OpenTComponent): + starttag = ( + f"{{{self.get_source().format_starttag(parent.start_i_index)}}}" + ) + if self.has_ambiguous_forward_slash(parent): + e.add_note( + f'Did you mean to quote the last attribute or put a space before "/>" for "<{starttag} .../>"?' + ) + else: + e.add_note(f"Most recently unclosed tag is <{starttag} ...>") + raise e if not self.placeholders.is_empty: raise ValueError("Some placeholders were never resolved.") super().close() From 8b226fc36fcd66e4dd01400580725c0eaca2c7d8 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 13:26:54 -0700 Subject: [PATCH 05/25] Fix method defn order. --- tdom/parser.py | 54 +++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index ce0f40ab..148fa34e 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -405,6 +405,33 @@ def always_get_starttag_text( raise AssertionError(msg) return starttag_text + def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: + """ + Detect when an unquoted attribute value consumes a trailing "/" that + *might* have been meant to attempt to self-close a tag, ie. "/>". + + This can come up with literal values or values with interpolations. + + Such as "
" or "<{Component} title=test/>". + + Or more often "<{Component} title={title}/>" which should be corrected + with "<{Component} title={title} />". + """ + if isinstance(open_tag, (OpenTElement, OpenTComponent)): + return ( + # has attributes + len(open_tag.raw_attrs) > 0 + # last attr not bare attribute + and open_tag.raw_attrs[-1][1] is not None + # last char of last attr is "/" + and open_tag.raw_attrs[-1][1][-1] == "/" + # parsed starttag ends with "/>" + and open_tag.starttag_text.endswith("/>") + # if parsed as startend then its not ambiguous + and not open_tag.startend + ) + return False + # ------------------------------------------ # HTMLParser tag callbacks # ------------------------------------------ @@ -471,33 +498,6 @@ def reset(self): self.placeholders = PlaceholderState() self.source = None - def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: - """ - Detect when an unquoted attribute value consumes a trailing "/" that - *might* have been meant to attempt to self-close a tag, ie. "/>". - - This can come up with literal values or values with interpolations. - - Such as "
" or "<{Component} title=test/>". - - Or more often "<{Component} title={title}/>" which should be corrected - with "<{Component} title={title} />". - """ - if isinstance(open_tag, (OpenTElement, OpenTComponent)): - return ( - # has attributes - len(open_tag.raw_attrs) > 0 - # last attr not bare attribute - and open_tag.raw_attrs[-1][1] is not None - # last char of last attr is "/" - and open_tag.raw_attrs[-1][1][-1] == "/" - # parsed starttag ends with "/>" - and open_tag.starttag_text.endswith("/>") - # if parsed as startend then its not ambiguous - and not open_tag.startend - ) - return False - def close(self) -> None: if self.waiting_for_data(): # We apply heuristics here to try to guess why the parser didn't finish. From f1b57390b9568aa764cee71100c25c5484cafd8a Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 13:27:56 -0700 Subject: [PATCH 06/25] Always fallback to tag str for error, fixes typecheck. --- tdom/parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 148fa34e..f95bc821 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -515,12 +515,12 @@ def close(self) -> None: # unquoted last attribute value consumed a "/", ie.
. parent = self.stack[-1] if isinstance(parent, (OpenTElement, OpenTComponent)): - if isinstance(parent, OpenTElement): - starttag = parent.tag - elif isinstance(parent, OpenTComponent): + if isinstance(parent, OpenTComponent): starttag = ( f"{{{self.get_source().format_starttag(parent.start_i_index)}}}" ) + else: + starttag = parent.tag if self.has_ambiguous_forward_slash(parent): e.add_note( f'Did you mean to quote the last attribute or put a space before "/>" for "<{starttag} .../>"?' From 922c2bf5b2c31870496539dbd6a62b53431b243d Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 14:38:36 -0700 Subject: [PATCH 07/25] Refine error messages for other cases with trailing slash is consumed instead of self-closing. --- tdom/parser.py | 49 +++++++++++++++++++++++++++++++++++++++------ tdom/parser_test.py | 40 +++++++++++++++++++++++++++++++++--- 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index f95bc821..35d5e43c 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -84,6 +84,26 @@ class SourceTracker: def interpolations(self) -> tuple[Interpolation, ...]: return self.template.interpolations + def _check_indices(self, index1: int, index2: int): + last_index = len(self.interpolations) - 1 + if max(index1, index2) > last_index or min(index1, index2) < 0: + raise ValueError( + f"Interpolation indices exceed bounds: {index1} {index2}: [0...{last_index}]" + ) + + def expressions_match(self, i_index1: int, i_index2: int) -> bool: + self._check_indices(i_index1, i_index2) + return ( + self.interpolations[i_index1].expression + == self.interpolations[i_index2].expression + ) + + def values_match(self, i_index1: int, i_index2: int) -> bool: + self._check_indices(i_index1, i_index2) + return ( + self.interpolations[i_index1].value == self.interpolations[i_index2].value + ) + def advance_interpolation(self) -> int: """Call before processing an interpolation to move to the next one.""" self.i_index += 1 @@ -360,7 +380,7 @@ def extract_component_children_ref( def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: """Validate that closing tag matches open tag. Return component end index if applicable.""" - assert self.source, "Parser source tracker not initialized." + source = self.get_source() tag_ref = self.placeholders.remove_placeholders(tag) match open_tag: @@ -380,16 +400,33 @@ def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: case OpenTComponent(start_i_index=start_i_index): if tag_ref.is_literal: - raise ValueError( - f"Mismatched closing tag for component starting at {self.source.format_starttag(start_i_index)}." + starttag = source.format_starttag(start_i_index) + e = ValueError( + f"Mismatched closing tag for component with tag {{{starttag}}}." ) + if self.has_ambiguous_forward_slash(open_tag): + e.add_note( + f'Did you mean to quote the last attribute or put a space before "/>" for "<{{{starttag}}} .../>"?' + ) + raise e if not tag_ref.is_singleton: raise ValueError( "Component end tags must have exactly one interpolation." ) - # HERE BE DRAGONS: the interpolation at end_i_index shuld be a - # component callable that matches the start tag. We do not check - # any of this in the parser, instead relying on higher layers. + if not source.expressions_match( + open_tag.start_i_index, tag_ref.i_indexes[0] + ) and not source.values_match( + open_tag.start_i_index, tag_ref.i_indexes[0] + ): + e = TypeError( + "Component start and end tags must contain the same callable." + ) + if self.has_ambiguous_forward_slash(open_tag): + starttag = source.format_starttag(start_i_index) + e.add_note( + f'Did you mean to quote the last attribute or put a space before "/>" for "<{{{starttag}}} .../>"?' + ) + raise e return tag_ref.i_indexes[0] def always_get_starttag_text( diff --git a/tdom/parser_test.py b/tdom/parser_test.py index d1650ae0..e457a3a2 100644 --- a/tdom/parser_test.py +++ b/tdom/parser_test.py @@ -425,15 +425,17 @@ def Component(): assert node == TComponent(start_i_index=0, end_i_index=1) -def test_component_element_special_case_mismatched_closing_tag_still_parses(): +def test_component_element_special_case_mismatched_closing_tag_error(): def Component1(): pass def Component2(): pass - node = TemplateParser.parse(t"<{Component1}>") - assert node == TComponent(start_i_index=0, end_i_index=1) + with pytest.raises( + TypeError, match="Component start and end tags must contain the same callable." + ): + _ = TemplateParser.parse(t"<{Component1}>") def test_component_element_invalid_closing_tag(): @@ -602,3 +604,35 @@ def test_extract_with_templated_attr_gt_char(self, Component): strings=("
Hello, World!
",), i_indexes=() ), ) + + +class TestComponentUnquotedAttrValue: + @pytest.fixture + def Comp(self): + def _Comp(children: Template, title: str) -> Template: + return children + + return _Comp + + @pytest.fixture + def Comp2(self): + def _Comp2(children: Template, title: str) -> Template: + return children + + return _Comp2 + + def test_comp_unquoted_attr_value_error_root(self, Comp): + with pytest.raises( + ValueError, match="Did you mean to quote the last attribute" + ): + _ = TemplateParser.parse(t"<{Comp} title=today/>") + + def test_comp_unquoted_attr_value_error_nested_in_el(self, Comp): + with pytest.raises( + ValueError, match="Did you mean to quote the last attribute" + ): + _ = TemplateParser.parse(t"
<{Comp} title=today/>
") + + def test_comp_unquoted_attr_value_error_nested_in_comp(self, Comp, Comp2): + with pytest.raises(TypeError, match="Did you mean to quote the last attribute"): + _ = TemplateParser.parse(t"<{Comp2}><{Comp} title=today/>") From 2b03b4eafe068d9929e401ccada92974c8bdd98b Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 14:52:40 -0700 Subject: [PATCH 08/25] Use getter directly but still guard against None. --- tdom/parser.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 35d5e43c..16b9719f 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -199,7 +199,7 @@ def make_open_tag( if tag_ref.is_literal: return OpenTElement( - starttag_text=self.always_get_starttag_text(), + starttag_text=self.get_starttag_text(), raw_attrs=attrs, startend=startend, tag=tag, @@ -229,7 +229,7 @@ def make_open_tag( # @NOTE: This must be called when the tag is handled since it is # populated based on the most recently finished start tag. Otherwise # the value will be out of sync. - starttag_text = self.always_get_starttag_text( + starttag_text = self.get_starttag_text( f"Expected startag_text to be set when parsing component at {i_index}." ) @@ -429,15 +429,13 @@ def validate_end_tag(self, tag: str, open_tag: OpenTag) -> int | None: raise e return tag_ref.i_indexes[0] - def always_get_starttag_text( - self, msg: str = "Expecting starttag text to be set." - ) -> str: + def get_starttag_text(self, msg: str = "Expecting starttag text to be set.") -> str: """ Wrap get_starttag_text and just raise if None is returned. Do this so we don't guard for `None` everywhere. """ - starttag_text = self.get_starttag_text() + starttag_text = super().get_starttag_text() if starttag_text is None: raise AssertionError(msg) return starttag_text From a10f209abe3f4e082f8520145f20cc588c0d3324 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 15:07:54 -0700 Subject: [PATCH 09/25] Restrict self-close suggestion to components. --- tdom/parser.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 16b9719f..0960ad38 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -549,19 +549,16 @@ def close(self) -> None: # Check for tags that might have meant to self-close but whose # unquoted last attribute value consumed a "/", ie.
. parent = self.stack[-1] - if isinstance(parent, (OpenTElement, OpenTComponent)): - if isinstance(parent, OpenTComponent): - starttag = ( - f"{{{self.get_source().format_starttag(parent.start_i_index)}}}" - ) - else: - starttag = parent.tag - if self.has_ambiguous_forward_slash(parent): - e.add_note( - f'Did you mean to quote the last attribute or put a space before "/>" for "<{starttag} .../>"?' - ) - else: - e.add_note(f"Most recently unclosed tag is <{starttag} ...>") + # @TODO: We need to determine which tags this might apply to, this only applies to components. + if isinstance(parent, OpenTComponent) and self.has_ambiguous_forward_slash( + parent + ): + starttag = ( + f"{{{self.get_source().format_starttag(parent.start_i_index)}}}" + ) + e.add_note( + f'Did you mean to quote the last attribute or put a space before "/>" for "<{starttag} .../>"?' + ) raise e if not self.placeholders.is_empty: raise ValueError("Some placeholders were never resolved.") From f7b82cd15863626b0d1021e08062e85dbba1c3af Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 13:34:11 -0700 Subject: [PATCH 10/25] Add namespace type. --- tdom/htmlspec.py | 4 ++++ tdom/processor.py | 5 +++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tdom/htmlspec.py b/tdom/htmlspec.py index 941dfb21..feef2202 100644 --- a/tdom/htmlspec.py +++ b/tdom/htmlspec.py @@ -1,3 +1,5 @@ +from typing import Literal + # See https://developer.mozilla.org/en-US/docs/Glossary/Void_element VOID_ELEMENTS = frozenset( [ @@ -129,3 +131,5 @@ # Used for fragments that do not have a tag # to assume that text is inside this element. DEFAULT_NORMAL_TEXT_ELEMENT = "div" + +type NamespaceType = Literal["html", "math", "svg"] diff --git a/tdom/processor.py b/tdom/processor.py index b52e6869..fed656fc 100644 --- a/tdom/processor.py +++ b/tdom/processor.py @@ -28,6 +28,7 @@ SVG_ATTR_FIX, SVG_TAG_FIX, VOID_ELEMENTS, + NamespaceType, ) from .parser import ( HTMLAttribute, @@ -476,11 +477,11 @@ def _fix_svg_attrs(html_attrs: Iterable[HTMLAttribute]) -> Iterable[HTMLAttribut @dataclass(frozen=True, slots=True) class ProcessContext: parent_tag: str = DEFAULT_NORMAL_TEXT_ELEMENT - ns: str = "html" + ns: NamespaceType = "html" def copy( self, - ns: str | None = None, + ns: NamespaceType | None = None, parent_tag: str | None = None, ) -> ProcessContext: return ProcessContext( From f29e1eb9f03f661a72aec6e73c63dd7a1ce4cbe4 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:23:30 -0700 Subject: [PATCH 11/25] Be more intentional about when tags can and cannot self-close. --- tdom/parser.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 0960ad38..9d03e39e 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -3,7 +3,7 @@ from html.parser import HTMLParser from string.templatelib import Interpolation, Template -from .htmlspec import VOID_ELEMENTS +from .htmlspec import MATH_TAGS, SVG_TAGS, VOID_ELEMENTS, NamespaceType from .placeholders import PlaceholderConfig, PlaceholderState from .template_utils import TemplateRef, combine_template_refs from .tnodes import ( @@ -128,6 +128,12 @@ def format_starttag(self, i_index: int) -> str: return self.get_expression(i_index, fallback_prefix="component-starttag") +XML_SELF_CLOSE_TAGS = frozenset(SVG_TAGS | MATH_TAGS) + + +DEFAULT_NS: NamespaceType = "html" # Namespace to fall back to if we don't know the namespace. + + class TemplateParser(HTMLParser): root: OpenTFragment stack: list[OpenTag] @@ -473,14 +479,69 @@ def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: open_tag = self.make_open_tag(tag, attrs) - if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS: + # @NOTE: We only auto-close void elements when the effective namespace is html. + # Ie. should fail. + if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS and self.get_effective_current_ns() == "html": final_tag = self.finalize_tag(open_tag) self.append_child(final_tag) else: self.stack.append(open_tag) + def get_current_ns(self) -> None | NamespaceType: + for container in reversed(self.stack): + if isinstance(container, OpenTElement) and container.tag == "svg": + return "svg" + elif isinstance(container, OpenTElement) and container.tag == "math": + return "math" + elif ( + isinstance(container, OpenTElement) and container.tag == "foreignobject" + ): + return "html" + elif isinstance(container, OpenTComponent): + return None # Unknown + elif isinstance(container, OpenTFragment): + for sib in container.children: + if isinstance(sib, TDocumentType): + return "html" + return None # Unknown + return None # Unknown + + def get_effective_current_ns(self) -> NamepsaceType: + ns = self.get_current_ns() + return ns if ns is not None else DEFAULT_NS + + def is_literal_tag(self, tag: str): + return self.placeholders.copy().remove_placeholders(tag).is_literal + + def validate_self_close_attempt(self, ns: NamespaceType | None, tag: str): + if ( + ns is None + and tag not in VOID_ELEMENTS + # @NOTE: We permit xml tags to close when NS is implicitly html. + and tag not in XML_SELF_CLOSE_TAGS + ): + e = ValueError( + "Self-closing tags are only supported for components, void tags, svg tags or math tags in an ambigous namespace." + ) + e.add_note(f"Cannot self-close {tag}.") + raise e + elif ( + ns == "html" + # @NOTE: Only void tags can be losed when NS is explictly html. + and tag not in VOID_ELEMENTS + ): + e = ValueError( + "Self-closing tags are only supported for components and void tags in html." + ) + e.add_note(f"Cannot self-close {tag}.") + raise e + def handle_startendtag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: """Dispatch a self-closing tag, `` to specialized handlers.""" + if self.is_literal_tag(tag): + ns = self.get_current_ns() + self.validate_self_close_attempt(ns, tag) + open_tag = self.make_open_tag(tag, attrs, startend=True) final_tag = self.finalize_tag(open_tag) self.append_child(final_tag) From 57cd776870d837f43290abdce2779a9fe05e5c44 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:24:05 -0700 Subject: [PATCH 12/25] Update tests. --- tdom/parser_test.py | 130 ++++++++++++++++++++++++++++++++--------- tdom/processor_test.py | 20 +++---- 2 files changed, 111 insertions(+), 39 deletions(-) diff --git a/tdom/parser_test.py b/tdom/parser_test.py index e457a3a2..5c1f2f2e 100644 --- a/tdom/parser_test.py +++ b/tdom/parser_test.py @@ -24,7 +24,7 @@ def test_parse_mixed_literal_content(): t"" t"" t'
' - t"Hello,
world !" + t"Hello,
world !" t"
" ) assert node == TFragment( @@ -95,9 +95,10 @@ def test_parse_void_element(): assert node == TElement("br") -def test_parse_void_element_self_closed(): - node = TemplateParser.parse(t"
") - assert node == TElement("br") +def test_parse_void_element_with_optional_solidus(): + for el in (t"
", t"
"): + node = TemplateParser.parse(el) + assert node == TElement("br") def test_parse_uppercase_void_element(): @@ -139,7 +140,7 @@ def test_parse_element_with_template_singleton(): def test_parse_multiple_voids(): - node = TemplateParser.parse(t"






") + node = TemplateParser.parse(t"






") assert node == TFragment( children=( TElement("br"), @@ -222,32 +223,25 @@ def test_parse_unexpected_closing_tag(): _ = TemplateParser.parse(t"Unopened
") -def test_self_closing_tags(): - node = TemplateParser.parse(t"

") - assert node == TFragment( - children=( - TElement("div"), - TElement("p"), - ) - ) +def test_self_invalid_self_closing_tags(): + with pytest.raises(ValueError, match="Self-closing tags are only supported for"): + _ = TemplateParser.parse(t"

") -def test_nested_self_closing_tags(): - node = TemplateParser.parse(t"


") - assert node == TElement( - "div", children=(TElement("br"), TElement("div"), TElement("br")) - ) - node = TemplateParser.parse(t"
") - assert node == TElement("div", children=(TElement("div"),)) +def test_nested_invalid_self_closing_tags(): + with pytest.raises(ValueError, match="Self-closing tags are only supported for"): + _ = TemplateParser.parse(t"


") + with pytest.raises(ValueError, match="Self-closing tags are only supported for"): + _ = TemplateParser.parse(t"
") def test_self_closing_tags_unexpected_closing_tag(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Self-closing tags are only supported for"): _ = TemplateParser.parse(t"
") def test_self_closing_void_tags_unexpected_closing_tag(): - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="Unexpected closing tag"): _ = TemplateParser.parse(t"") @@ -276,6 +270,11 @@ def test_literal_attrs(): ) +def test_void_element_with_attr_value_endswith_solidus(): + node = TemplateParser.parse(t"") + assert node == TElement("img", attrs=(TLiteralAttribute("src", "/"),)) + + def test_literal_attr_entities(): node = TemplateParser.parse(t'Link') assert node == TElement( @@ -298,7 +297,7 @@ def test_literal_attr_order(): def test_interpolated_attr(): value1 = 42 value2 = 99 - node = TemplateParser.parse(t'
') + node = TemplateParser.parse(t'
') assert node == TElement( "div", attrs=( @@ -313,7 +312,7 @@ def test_templated_attr(): value1 = 42 value2 = 99 node = TemplateParser.parse( - t'
' + t'
' ) value1_ref = TemplateRef(strings=("", "-burrito"), i_indexes=(0,)) value2_ref = TemplateRef(strings=("neato-", "-wow"), i_indexes=(1,)) @@ -329,7 +328,7 @@ def test_templated_attr(): def test_spread_attr(): spread_attrs = {} - node = TemplateParser.parse(t"
") + node = TemplateParser.parse(t"
") assert node == TElement( "div", attrs=(TSpreadAttribute(i_index=0),), @@ -340,21 +339,21 @@ def test_spread_attr(): def test_templated_attribute_name_error(): with pytest.raises(ValueError): attr_name = "some-attr" - _ = TemplateParser.parse(t'
') + _ = TemplateParser.parse(t'
') def test_templated_attribute_name_and_value_error(): with pytest.raises(ValueError): attr_name = "some-attr" value = "value" - _ = TemplateParser.parse(t'
') + _ = TemplateParser.parse(t'
') def test_adjacent_spread_attrs_error(): with pytest.raises(ValueError): attrs1 = {} attrs2 = {} - _ = TemplateParser.parse(t"
") + _ = TemplateParser.parse(t"
") # @@ -636,3 +635,78 @@ def test_comp_unquoted_attr_value_error_nested_in_el(self, Comp): def test_comp_unquoted_attr_value_error_nested_in_comp(self, Comp, Comp2): with pytest.raises(TypeError, match="Did you mean to quote the last attribute"): _ = TemplateParser.parse(t"<{Comp2}><{Comp} title=today/>") + + +class TestAmbiguousSelfCloseCheck: + @pytest.fixture + def comp(self): + def component( + active: bool = False, title: str = "Title", children: Template = t"" + ) -> Template: + dataset = {"active": active} + return t"
{children}
" + + return component + + def test_component_ok(self, comp): + dynamic = "dynamic" + attrs = {"active": True} + for template in [ + t"<{comp}/>abc", + t"<{comp} active/>abc", # Still ok because attr name cannot contain / + t"<{comp} {attrs}/>abc", # Still ok because attr name cannot contain / + t"<{comp} />abc", + t"<{comp} title=literal />abc", + t"<{comp} title=literal/ >abc", # This is really gross but shouldn't be common. + t'<{comp} title="literal"/>abc', + t"<{comp} title={dynamic} />abc", + t'<{comp} title="{dynamic}"/>abc', + t"<{comp} title={dynamic}literal />abc", + t'<{comp} title="{dynamic}literal"/>abc', + ]: + tnode = TemplateParser.parse(template) + assert ( + isinstance(tnode, TFragment) + and len(tnode.children) == 2 + and isinstance(tnode.children[0], TComponent) + ) + + def test_component_ambiguous_error(self, comp): + dynamic = "dynamic" + for template in ( + t"<{comp} title=literal/>", + t"<{comp} title=literal/>abc", + t"<{comp} title={dynamic}/>", + t"<{comp} title={dynamic}/>abc", + t"<{comp} title=prefix{dynamic}/>", + t"<{comp} title=prefix{dynamic}/>abc", + t"<{comp} title={dynamic}literal/>", + t"<{comp} title={dynamic}literal/>abc", + t"<{comp} title=/>abc", + t"<{comp} title= />abc", # WS between = and value is ignored, so title=/ + ): + with pytest.raises( + ValueError, match="Invalid HTML structure: unclosed tags remain" + ): + _ = TemplateParser.parse(template) + + def test_element_self_closing_error(self): + dynamic = "dynamic" + attrs = {"active": True} + for template in ( + t"
abc", + t"
abc", + t"
abc", + t"
abc", + t"
abc", + t'
abc', + t"
abc", + t'
abc', + t"
abc", + t'
abc', + ): + with pytest.raises( + ValueError, match="Self-closing tags are only supported" + ): + _ = TemplateParser.parse(template) + diff --git a/tdom/processor_test.py b/tdom/processor_test.py index 971a659e..0956d139 100644 --- a/tdom/processor_test.py +++ b/tdom/processor_test.py @@ -231,16 +231,10 @@ class TestVoidElementLiteral: def test_void(self): assert html(t"
") == "
" - def test_void_self_closed(self): - assert html(t"
") == "
" - - def test_void_mixed_closing(self): - assert html(t"
Is this content?
") == "
Is this content?
" - def test_chain_of_void_elements(self): # Make sure our handling of CPython issue #69445 is reasonable. assert ( - html(t"



") + html(t"



") == '



' ) @@ -1684,7 +1678,7 @@ def __call__(self) -> Template: return ( t"
" t"" - t"{f" + t"{f" t"" t"{self.user_name}" t"{self.children}" @@ -1725,7 +1719,7 @@ def __call__(self) -> Template: return ( t"
" t"" - t"{f" + t"{f" t"" t"{self.user_name}" t"ignore children" @@ -1906,8 +1900,12 @@ def test_process_template_internal_cache(): # miss the cache. If this element is used elsewhere than the global # cache might cache it and it will ruin our counting, specifically # the first miss will instead be a hit. - sample_t = t"
{'content'}
" - sample_diff_t = t"
{'diffcontent'}
" + sample_t = ( + t"
{'content'}
" + ) + sample_diff_t = ( + t"
{'diffcontent'}
" + ) alt_t = t"{'content'}" process_api = TemplateProcessor(parser_api=TemplateParserProxy()) cached_process_api = TemplateProcessor(parser_api=CachedTemplateParserProxy()) From c762d59dd43a8b20010c27a652f866cf18171a06 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:28:17 -0700 Subject: [PATCH 13/25] Typecheck fixes. --- tdom/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdom/parser.py b/tdom/parser.py index 9d03e39e..2e904ba1 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -506,7 +506,7 @@ def get_current_ns(self) -> None | NamespaceType: return None # Unknown return None # Unknown - def get_effective_current_ns(self) -> NamepsaceType: + def get_effective_current_ns(self) -> NamespaceType: ns = self.get_current_ns() return ns if ns is not None else DEFAULT_NS From c70cb32bdf3eb512910dfe84d562f9c3e78ce1d9 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:28:50 -0700 Subject: [PATCH 14/25] Format parser. --- tdom/parser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 2e904ba1..95283381 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -131,7 +131,9 @@ def format_starttag(self, i_index: int) -> str: XML_SELF_CLOSE_TAGS = frozenset(SVG_TAGS | MATH_TAGS) -DEFAULT_NS: NamespaceType = "html" # Namespace to fall back to if we don't know the namespace. +DEFAULT_NS: NamespaceType = ( + "html" # Namespace to fall back to if we don't know the namespace. +) class TemplateParser(HTMLParser): @@ -481,7 +483,11 @@ def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: open_tag = self.make_open_tag(tag, attrs) # @NOTE: We only auto-close void elements when the effective namespace is html. # Ie. should fail. - if isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS and self.get_effective_current_ns() == "html": + if ( + isinstance(open_tag, OpenTElement) + and open_tag.tag in VOID_ELEMENTS + and self.get_effective_current_ns() == "html" + ): final_tag = self.finalize_tag(open_tag) self.append_child(final_tag) else: From 1f9e0b26141494e8c822d0249ab000cdb3815262 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:36:45 -0700 Subject: [PATCH 15/25] Manually bring back a few void tests. --- tdom/parser_test.py | 2 +- tdom/processor_test.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tdom/parser_test.py b/tdom/parser_test.py index 5c1f2f2e..6b2533d0 100644 --- a/tdom/parser_test.py +++ b/tdom/parser_test.py @@ -140,7 +140,7 @@ def test_parse_element_with_template_singleton(): def test_parse_multiple_voids(): - node = TemplateParser.parse(t"






") + node = TemplateParser.parse(t"






") assert node == TFragment( children=( TElement("br"), diff --git a/tdom/processor_test.py b/tdom/processor_test.py index 0956d139..82f13914 100644 --- a/tdom/processor_test.py +++ b/tdom/processor_test.py @@ -231,10 +231,16 @@ class TestVoidElementLiteral: def test_void(self): assert html(t"
") == "
" + def test_void_self_closed(self): + assert html(t"
") == "
" + + def test_void_mixed_closing(self): + assert html(t"
Is this content?
") == "
Is this content?
" + def test_chain_of_void_elements(self): # Make sure our handling of CPython issue #69445 is reasonable. assert ( - html(t"



") + html(t"



") == '



' ) From 8164d44ed5a929487fbeca5283dd1c99c0c5a6ef Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 14:40:59 -0700 Subject: [PATCH 16/25] Fix method order. --- tdom/parser.py | 90 +++++++++++++++++++++++++------------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 95283381..8ed20fce 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -448,51 +448,6 @@ def get_starttag_text(self, msg: str = "Expecting starttag text to be set.") -> raise AssertionError(msg) return starttag_text - def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: - """ - Detect when an unquoted attribute value consumes a trailing "/" that - *might* have been meant to attempt to self-close a tag, ie. "/>". - - This can come up with literal values or values with interpolations. - - Such as "
" or "<{Component} title=test/>". - - Or more often "<{Component} title={title}/>" which should be corrected - with "<{Component} title={title} />". - """ - if isinstance(open_tag, (OpenTElement, OpenTComponent)): - return ( - # has attributes - len(open_tag.raw_attrs) > 0 - # last attr not bare attribute - and open_tag.raw_attrs[-1][1] is not None - # last char of last attr is "/" - and open_tag.raw_attrs[-1][1][-1] == "/" - # parsed starttag ends with "/>" - and open_tag.starttag_text.endswith("/>") - # if parsed as startend then its not ambiguous - and not open_tag.startend - ) - return False - - # ------------------------------------------ - # HTMLParser tag callbacks - # ------------------------------------------ - - def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: - open_tag = self.make_open_tag(tag, attrs) - # @NOTE: We only auto-close void elements when the effective namespace is html. - # Ie. should fail. - if ( - isinstance(open_tag, OpenTElement) - and open_tag.tag in VOID_ELEMENTS - and self.get_effective_current_ns() == "html" - ): - final_tag = self.finalize_tag(open_tag) - self.append_child(final_tag) - else: - self.stack.append(open_tag) - def get_current_ns(self) -> None | NamespaceType: for container in reversed(self.stack): if isinstance(container, OpenTElement) and container.tag == "svg": @@ -542,6 +497,51 @@ def validate_self_close_attempt(self, ns: NamespaceType | None, tag: str): e.add_note(f"Cannot self-close {tag}.") raise e + def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: + """ + Detect when an unquoted attribute value consumes a trailing "/" that + *might* have been meant to attempt to self-close a tag, ie. "/>". + + This can come up with literal values or values with interpolations. + + Such as "
" or "<{Component} title=test/>". + + Or more often "<{Component} title={title}/>" which should be corrected + with "<{Component} title={title} />". + """ + if isinstance(open_tag, (OpenTElement, OpenTComponent)): + return ( + # has attributes + len(open_tag.raw_attrs) > 0 + # last attr not bare attribute + and open_tag.raw_attrs[-1][1] is not None + # last char of last attr is "/" + and open_tag.raw_attrs[-1][1][-1] == "/" + # parsed starttag ends with "/>" + and open_tag.starttag_text.endswith("/>") + # if parsed as startend then its not ambiguous + and not open_tag.startend + ) + return False + + # ------------------------------------------ + # HTMLParser tag callbacks + # ------------------------------------------ + + def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: + open_tag = self.make_open_tag(tag, attrs) + # @NOTE: We only auto-close void elements when the effective namespace is html. + # Ie. should fail. + if ( + isinstance(open_tag, OpenTElement) + and open_tag.tag in VOID_ELEMENTS + and self.get_effective_current_ns() == "html" + ): + final_tag = self.finalize_tag(open_tag) + self.append_child(final_tag) + else: + self.stack.append(open_tag) + def handle_startendtag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: """Dispatch a self-closing tag, `` to specialized handlers.""" if self.is_literal_tag(tag): From 5dafd5bae6320f7402ced3745a1853163cd30769 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 23:23:21 -0700 Subject: [PATCH 17/25] Attempt to parse templates within a certain context. --- tdom/parser.py | 131 +++++++++++++++++++++++++++-------------- tdom/processor.py | 17 +++--- tdom/processor_test.py | 16 +++-- 3 files changed, 105 insertions(+), 59 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 8ed20fce..b6697059 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -136,13 +136,53 @@ def format_starttag(self, i_index: int) -> str: ) +@dataclass(frozen=True) +class ParseContext: + """ + This is the context that was used to parse a given template. + """ + + # @TODO: slots might have issue with weakref, check if caching that + # is an issue. + + ns: NamespaceType = "html" + + def copy(self, ns: NamespaceType | None = None) -> ParseContext: + return ParseContext(ns=ns if ns is not None else self.ns) + + +@dataclass(frozen=True) +class InternalParseContext: + """ + This is the context that was used to parse a given template. + """ + + ns: NamespaceType = "html" + in_component: bool = False + + def copy( + self, ns: NamespaceType | None = None, in_component: bool | None = None + ) -> InternalParseContext: + return InternalParseContext( + ns=ns if ns is not None else self.ns, + in_component=in_component + if in_component is not None + else self.in_component, + ) + + class TemplateParser(HTMLParser): root: OpenTFragment - stack: list[OpenTag] + stack: list[tuple[OpenTag, InternalParseContext]] placeholders: PlaceholderState source: SourceTracker | None + root_ctx: InternalParseContext + " Assume that template parsing *starts* in this context. " - def __init__(self, *, convert_charrefs: bool = True): + def __init__( + self, *, root_ctx: InternalParseContext, convert_charrefs: bool = True + ): + self.root_ctx = root_ctx # This calls HTMLParser.reset() which we override to set up our state. super().__init__(convert_charrefs=convert_charrefs) @@ -152,7 +192,7 @@ def __init__(self, *, convert_charrefs: bool = True): def get_parent(self) -> OpenTag: """Return the current parent node to which new children should be added.""" - return self.stack[-1] if self.stack else self.root + return self.stack[-1][0] if self.stack else self.root def append_child(self, child: TNode) -> None: parent = self.get_parent() @@ -448,46 +488,19 @@ def get_starttag_text(self, msg: str = "Expecting starttag text to be set.") -> raise AssertionError(msg) return starttag_text - def get_current_ns(self) -> None | NamespaceType: - for container in reversed(self.stack): - if isinstance(container, OpenTElement) and container.tag == "svg": - return "svg" - elif isinstance(container, OpenTElement) and container.tag == "math": - return "math" - elif ( - isinstance(container, OpenTElement) and container.tag == "foreignobject" - ): - return "html" - elif isinstance(container, OpenTComponent): - return None # Unknown - elif isinstance(container, OpenTFragment): - for sib in container.children: - if isinstance(sib, TDocumentType): - return "html" - return None # Unknown - return None # Unknown - - def get_effective_current_ns(self) -> NamespaceType: - ns = self.get_current_ns() - return ns if ns is not None else DEFAULT_NS + def get_last_ctx(self) -> InternalParseContext: + if self.stack: + return self.stack[-1][1] + else: + return self.root_ctx def is_literal_tag(self, tag: str): return self.placeholders.copy().remove_placeholders(tag).is_literal - def validate_self_close_attempt(self, ns: NamespaceType | None, tag: str): + def validate_self_close_attempt(self, last_ctx: InternalParseContext, tag: str): if ( - ns is None - and tag not in VOID_ELEMENTS - # @NOTE: We permit xml tags to close when NS is implicitly html. - and tag not in XML_SELF_CLOSE_TAGS - ): - e = ValueError( - "Self-closing tags are only supported for components, void tags, svg tags or math tags in an ambigous namespace." - ) - e.add_note(f"Cannot self-close {tag}.") - raise e - elif ( - ns == "html" + not last_ctx.in_component + and last_ctx.ns == "html" # @NOTE: Only void tags can be losed when NS is explictly html. and tag not in VOID_ELEMENTS ): @@ -530,23 +543,47 @@ def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: open_tag = self.make_open_tag(tag, attrs) + """<{c}> # what namespace is the input in? + <{c}>""" + last_ctx = self.get_last_ctx() # @NOTE: We only auto-close void elements when the effective namespace is html. # Ie. should fail. if ( isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS - and self.get_effective_current_ns() == "html" + and ( + last_ctx.ns == "html" + # @TODO: Maybe backtracking when it looks like we needed + # to close it would be better? We just need this HTML to + # get out of the way when parsing a component. + or last_ctx.in_component + ) ): final_tag = self.finalize_tag(open_tag) self.append_child(final_tag) else: - self.stack.append(open_tag) + last_ctx = self.get_last_ctx() + if isinstance(open_tag, OpenTElement): + if open_tag.tag == "svg": + next_ctx = last_ctx.copy(ns="svg") + elif open_tag.tag == "math": + next_ctx = last_ctx.copy(ns="math") + elif open_tag.tag == "foreignobject" and last_ctx.ns in ("svg", "math"): + next_ctx = last_ctx.copy(ns="html") + else: + next_ctx = last_ctx + elif isinstance(open_tag, OpenTComponent): + next_ctx = last_ctx.copy(in_component=True) + else: + next_ctx = last_ctx + print(f"push: {(open_tag, next_ctx)=}") + self.stack.append((open_tag, next_ctx)) def handle_startendtag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: """Dispatch a self-closing tag, `` to specialized handlers.""" if self.is_literal_tag(tag): - ns = self.get_current_ns() - self.validate_self_close_attempt(ns, tag) + last_ctx = self.get_last_ctx() + self.validate_self_close_attempt(last_ctx, tag) open_tag = self.make_open_tag(tag, attrs, startend=True) final_tag = self.finalize_tag(open_tag) @@ -556,7 +593,8 @@ def handle_endtag(self, tag: str) -> None: if not self.stack: raise ValueError(f"Unexpected closing tag with no open tag.") - open_tag = self.stack.pop() + open_tag, last_ctx = self.stack.pop() + print(f"pop: {(open_tag, last_ctx)=}") endtag_i_index = self.validate_end_tag(tag, open_tag) final_tag = self.finalize_tag(open_tag, endtag_i_index) self.append_child(final_tag) @@ -674,6 +712,7 @@ def feed_interpolation(self, index: int) -> None: def feed_template(self, template: Template) -> None: """Feed a Template's content to the parser.""" + print(f"assume: {self.root_ctx=}") assert self.source is None, "Did you forget to call reset?" self.source = SourceTracker(template) for i_index in range(len(template.interpolations)): @@ -685,13 +724,15 @@ def feed_template(self, template: Template) -> None: self.feed_str(template.strings[-1]) @staticmethod - def parse(t: Template) -> TNode: + def parse(t: Template, assume_ctx: ParseContext | None = None) -> TNode: """ Parse a Template containing valid HTML and substitutions and return a TNode tree representing its structure. This cachable structure can later be resolved against actual interpolation values to produce a Node tree. """ - parser = TemplateParser() + if assume_ctx is None: + assume_ctx = ParseContext() + parser = TemplateParser(root_ctx=InternalParseContext(ns=assume_ctx.ns)) parser.feed_template(t) parser.close() return parser.get_tnode() diff --git a/tdom/processor.py b/tdom/processor.py index fed656fc..4da6fc5c 100644 --- a/tdom/processor.py +++ b/tdom/processor.py @@ -32,6 +32,7 @@ ) from .parser import ( HTMLAttribute, + ParseContext, TAttribute, TComment, TComponent, @@ -523,23 +524,23 @@ def copy( class ITemplateParserProxy(t.Protocol): - def to_tnode(self, template: Template) -> TNode: ... + def to_tnode(self, template: Template, assume_ctx: ParseContext) -> TNode: ... @dataclass(frozen=True) class TemplateParserProxy(ITemplateParserProxy): - def to_tnode(self, template: Template) -> TNode: - return TemplateParser.parse(template) + def to_tnode(self, template: Template, assume_ctx: ParseContext) -> TNode: + return TemplateParser.parse(template, assume_ctx) @dataclass(frozen=True) class CachedTemplateParserProxy(TemplateParserProxy): @lru_cache(512) # noqa: B019 - def _to_tnode(self, ct: CachableTemplate) -> TNode: - return super().to_tnode(ct.template) + def _to_tnode(self, ct: CachableTemplate, assume_ctx: ParseContext) -> TNode: + return super().to_tnode(ct.template, assume_ctx) - def to_tnode(self, template: Template) -> TNode: - return self._to_tnode(CachableTemplate(template)) + def to_tnode(self, template: Template, assume_ctx: ParseContext) -> TNode: + return self._to_tnode(CachableTemplate(template), assume_ctx) class IComponentProcessor(t.Protocol): @@ -668,7 +669,7 @@ def process( return self._process_template(root_template, assume_ctx) def _process_template(self, template: Template, last_ctx: ProcessContext) -> str: - root = self.parser_api.to_tnode(template) + root = self.parser_api.to_tnode(template, ParseContext(ns=last_ctx.ns)) return self._process_tnode(template, last_ctx, root) def _process_tnode( diff --git a/tdom/processor_test.py b/tdom/processor_test.py index 82f13914..115eb0e5 100644 --- a/tdom/processor_test.py +++ b/tdom/processor_test.py @@ -11,6 +11,7 @@ from .callables import get_callable_info from .escaping import escape_html_text +from .parser import ParseContext from .processor import ( CachedTemplateParserProxy, ProcessContext, @@ -1921,11 +1922,14 @@ def test_process_template_internal_cache(): assert isinstance(cached_process_api, TemplateProcessor) assert isinstance(cached_process_api.parser_api, CachedTemplateParserProxy) start_ci = cached_process_api.parser_api._to_tnode.cache_info() - tnode1 = process_api.parser_api.to_tnode(sample_t) - tnode2 = process_api.parser_api.to_tnode(sample_t) - cached_tnode1 = cached_process_api.parser_api.to_tnode(sample_t) - cached_tnode2 = cached_process_api.parser_api.to_tnode(sample_t) - cached_tnode3 = cached_process_api.parser_api.to_tnode(sample_diff_t) + default_parse_ctx = ParseContext() + tnode1 = process_api.parser_api.to_tnode(sample_t, default_parse_ctx) + tnode2 = process_api.parser_api.to_tnode(sample_t, default_parse_ctx) + cached_tnode1 = cached_process_api.parser_api.to_tnode(sample_t, default_parse_ctx) + cached_tnode2 = cached_process_api.parser_api.to_tnode(sample_t, default_parse_ctx) + cached_tnode3 = cached_process_api.parser_api.to_tnode( + sample_diff_t, default_parse_ctx + ) # Check that the uncached and cached services are actually # returning non-identical results. assert tnode1 is not cached_tnode1 @@ -1948,7 +1952,7 @@ def test_process_template_internal_cache(): assert ci.hits - start_ci.hits == 2 # cached_tf1 was a miss because cache was empty (brand new) assert ci.misses - start_ci.misses == 1 - cached_tnode4 = cached_process_api.parser_api.to_tnode(alt_t) + cached_tnode4 = cached_process_api.parser_api.to_tnode(alt_t, default_parse_ctx) # A different template produces a brand new tf. assert cached_tnode1 is not cached_tnode4 # The template is new AND has a different structure so it also From 94608058bc3b9a20b604dc5d83c011e13f74dbe7 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 11 Jun 2026 23:25:13 -0700 Subject: [PATCH 18/25] Remove unused. --- tdom/parser.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index b6697059..ffba4277 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -3,7 +3,7 @@ from html.parser import HTMLParser from string.templatelib import Interpolation, Template -from .htmlspec import MATH_TAGS, SVG_TAGS, VOID_ELEMENTS, NamespaceType +from .htmlspec import VOID_ELEMENTS, NamespaceType from .placeholders import PlaceholderConfig, PlaceholderState from .template_utils import TemplateRef, combine_template_refs from .tnodes import ( @@ -128,14 +128,6 @@ def format_starttag(self, i_index: int) -> str: return self.get_expression(i_index, fallback_prefix="component-starttag") -XML_SELF_CLOSE_TAGS = frozenset(SVG_TAGS | MATH_TAGS) - - -DEFAULT_NS: NamespaceType = ( - "html" # Namespace to fall back to if we don't know the namespace. -) - - @dataclass(frozen=True) class ParseContext: """ From 169c7e83aea67b6eb16a1577402df8fa5122a50a Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 00:36:27 -0700 Subject: [PATCH 19/25] Cleanup cruft. --- tdom/parser.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index ffba4277..859a8f9a 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -535,19 +535,16 @@ def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: open_tag = self.make_open_tag(tag, attrs) - """<{c}> # what namespace is the input in? - <{c}>""" last_ctx = self.get_last_ctx() - # @NOTE: We only auto-close void elements when the effective namespace is html. - # Ie. should fail. if ( isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS and ( last_ctx.ns == "html" # @TODO: Maybe backtracking when it looks like we needed - # to close it would be better? We just need this HTML to - # get out of the way when parsing a component. + # to close it would be better? We just need the component's + # children to parse out and get out of the way because that + # isn't the template we are trying to parse and cache. or last_ctx.in_component ) ): @@ -568,7 +565,6 @@ def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: next_ctx = last_ctx.copy(in_component=True) else: next_ctx = last_ctx - print(f"push: {(open_tag, next_ctx)=}") self.stack.append((open_tag, next_ctx)) def handle_startendtag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: @@ -585,8 +581,7 @@ def handle_endtag(self, tag: str) -> None: if not self.stack: raise ValueError(f"Unexpected closing tag with no open tag.") - open_tag, last_ctx = self.stack.pop() - print(f"pop: {(open_tag, last_ctx)=}") + open_tag, _ = self.stack.pop() endtag_i_index = self.validate_end_tag(tag, open_tag) final_tag = self.finalize_tag(open_tag, endtag_i_index) self.append_child(final_tag) From 24e04a606a34cf2901c9990b8b3b857a18bb4ab8 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 00:37:11 -0700 Subject: [PATCH 20/25] Add test for relaxed 'in_component' rules. --- tdom/parser_test.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tdom/parser_test.py b/tdom/parser_test.py index 6b2533d0..b9636bfc 100644 --- a/tdom/parser_test.py +++ b/tdom/parser_test.py @@ -710,3 +710,39 @@ def test_element_self_closing_error(self): ): _ = TemplateParser.parse(template) + +class TestRelaxingComponentChildrenRules: + @pytest.fixture + def PTC(self): + def PassThru(children: Template) -> Template: + return children + + return PassThru + + def test_html_wrapped_component_ok(self, PTC): + # html rules relaxed during component parsing + templates = ( + t"
<{PTC}>
", + t"
<{PTC}>
", + t"
<{PTC}>
", + t"
<{PTC}>
<{PTC}/><{PTC}>
", + ) + for tf in templates: + node = TemplateParser.parse(tf) + assert isinstance(node, TElement) and node.tag == "div" + assert len(node.children) == 1 and isinstance(node.children[0], TComponent) + + def test_xml_wrapped_component_ok(self, PTC): + # xml (svg/mathml) rules relaxed during component parsing + templates = ( + t"<{PTC}>", # allow void in svg + t"<{PTC}>", # allow void in svg IN svg + t"<{PTC}>", # allow void in math + t"<{PTC}>", # its crazy out here! + t"<{PTC}>", + ) + for tf in templates: + node = TemplateParser.parse(tf) + assert isinstance(node, TElement) and node.tag in ("svg", "math") + assert len(node.children) == 1 and isinstance(node.children[0], TComponent) + From 1bd38b5ba5fa66975a0dc63b53e0c360f20d9a77 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 13:08:57 -0700 Subject: [PATCH 21/25] Move tests into dedicated file and add symmatric follow up tests for processor. --- tdom/parser_test.py | 37 --------------- tdom/relaxed_parser_processor_test.py | 68 +++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 37 deletions(-) create mode 100644 tdom/relaxed_parser_processor_test.py diff --git a/tdom/parser_test.py b/tdom/parser_test.py index b9636bfc..8b99d4af 100644 --- a/tdom/parser_test.py +++ b/tdom/parser_test.py @@ -709,40 +709,3 @@ def test_element_self_closing_error(self): ValueError, match="Self-closing tags are only supported" ): _ = TemplateParser.parse(template) - - -class TestRelaxingComponentChildrenRules: - @pytest.fixture - def PTC(self): - def PassThru(children: Template) -> Template: - return children - - return PassThru - - def test_html_wrapped_component_ok(self, PTC): - # html rules relaxed during component parsing - templates = ( - t"
<{PTC}>
", - t"
<{PTC}>
", - t"
<{PTC}>
", - t"
<{PTC}>
<{PTC}/><{PTC}>
", - ) - for tf in templates: - node = TemplateParser.parse(tf) - assert isinstance(node, TElement) and node.tag == "div" - assert len(node.children) == 1 and isinstance(node.children[0], TComponent) - - def test_xml_wrapped_component_ok(self, PTC): - # xml (svg/mathml) rules relaxed during component parsing - templates = ( - t"<{PTC}>", # allow void in svg - t"<{PTC}>", # allow void in svg IN svg - t"<{PTC}>", # allow void in math - t"<{PTC}>", # its crazy out here! - t"<{PTC}>", - ) - for tf in templates: - node = TemplateParser.parse(tf) - assert isinstance(node, TElement) and node.tag in ("svg", "math") - assert len(node.children) == 1 and isinstance(node.children[0], TComponent) - diff --git a/tdom/relaxed_parser_processor_test.py b/tdom/relaxed_parser_processor_test.py new file mode 100644 index 00000000..e7d788c5 --- /dev/null +++ b/tdom/relaxed_parser_processor_test.py @@ -0,0 +1,68 @@ +from collections.abc import Callable +from string.templatelib import Template + +import pytest + +from tdom import html +from tdom.parser import TemplateParser +from tdom.tnodes import TComponent, TElement + + +def PTC(children: Template) -> Template: + """Pass children through.""" + return children + + +def get_relaxed_html_templates( + Comp: Callable[[Template], Template] = PTC, +) -> tuple[Template, ...]: + return ( + t"
<{Comp}>
", + t"
<{Comp}>
", + t"
<{Comp}>
", + t"
<{Comp}>
<{Comp}/><{Comp}>
", + ) + + +def get_relaxed_xml_templates( + Comp: Callable[[Template], Template] = PTC, +) -> tuple[Template, ...]: + return ( + t"<{Comp}>", + t"<{Comp}>", + t"<{Comp}>", + t"<{Comp}>", + t"<{Comp}>", + ) + + +class TestRelaxedRulesInParser: + """These templates work in the parser because we don't have enough ctx.""" + + def test_html_wrapped(self): + # html rules relaxed during component parsing + for tf in get_relaxed_html_templates(): + node = TemplateParser.parse(tf) + assert isinstance(node, TElement) and node.tag == "div" + assert len(node.children) == 1 and isinstance(node.children[0], TComponent) + + def test_xml_wrapped(self): + # xml (svg/mathml) rules relaxed during component parsing + for tf in get_relaxed_xml_templates(): + node = TemplateParser.parse(tf) + assert isinstance(node, TElement) and node.tag in ("svg", "math") + assert len(node.children) == 1 and isinstance(node.children[0], TComponent) + + +class TestRelaxedRulesInProcessor: + """These templates fail in the processor when only passed through.""" + + def test_html_wrapped_component_ok(self): + for tf in get_relaxed_html_templates(): + with pytest.raises(ValueError): + _ = html(tf) + + def test_xml_wrapped_component_ok(self): + for tf in get_relaxed_xml_templates(): + with pytest.raises(ValueError): + _ = html(tf) From 5c90a1c88444347c8393b98776b86c08c49fd844 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Fri, 12 Jun 2026 15:33:32 -0700 Subject: [PATCH 22/25] Unpack tuple. --- tdom/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdom/parser.py b/tdom/parser.py index 859a8f9a..08ca5eae 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -640,7 +640,7 @@ def close(self) -> None: e = ValueError("Invalid HTML structure: unclosed tags remain.") # Check for tags that might have meant to self-close but whose # unquoted last attribute value consumed a "/", ie.
. - parent = self.stack[-1] + parent, _ = self.stack[-1] # @TODO: We need to determine which tags this might apply to, this only applies to components. if isinstance(parent, OpenTComponent) and self.has_ambiguous_forward_slash( parent From 699ed913256d5ebe92c6ae107edfff3a1f952f4a Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Sat, 13 Jun 2026 15:56:25 -0700 Subject: [PATCH 23/25] Remove debugging print. --- tdom/parser.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tdom/parser.py b/tdom/parser.py index 08ca5eae..18c63144 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -699,7 +699,6 @@ def feed_interpolation(self, index: int) -> None: def feed_template(self, template: Template) -> None: """Feed a Template's content to the parser.""" - print(f"assume: {self.root_ctx=}") assert self.source is None, "Did you forget to call reset?" self.source = SourceTracker(template) for i_index in range(len(template.interpolations)): From 84a1711abebc1730ddd00fddd741c37e4aa36455 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Sat, 13 Jun 2026 16:06:28 -0700 Subject: [PATCH 24/25] Clump parse details together into object. --- tdom/parser.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/tdom/parser.py b/tdom/parser.py index 18c63144..191a03f3 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -26,13 +26,18 @@ @dataclass -class OpenTElement: +class ParseInfo: starttag_text: str " Entire starttag as parsed, includes placeholders, used for debugging. " raw_attrs: Sequence[HTMLAttribute] " Attrs as parsed, includes placeholders, used for debugging. " startend: bool " Was parsed as startend tag, ie. , used for debugging. " + + +@dataclass +class OpenTElement: + parse_info: ParseInfo tag: str attrs: tuple[TAttribute, ...] children: list[TNode] = field(default_factory=list) @@ -45,12 +50,7 @@ class OpenTFragment: @dataclass class OpenTComponent: - starttag_text: str - " Entire starttag as parsed, includes placeholders, used for debugging. " - raw_attrs: Sequence[HTMLAttribute] - " Attrs as parsed, includes placeholders, used for debugging. " - startend: bool - " Was parsed as startend tag, ie. , used for debugging. " + parse_info: ParseInfo start_i_index: int children_start_s_index: int """The strings index where the component's children template starts.""" @@ -239,9 +239,11 @@ def make_open_tag( if tag_ref.is_literal: return OpenTElement( - starttag_text=self.get_starttag_text(), - raw_attrs=attrs, - startend=startend, + parse_info=ParseInfo( + starttag_text=self.get_starttag_text(), + raw_attrs=attrs, + startend=startend, + ), tag=tag, attrs=self.make_tattrs(attrs), ) @@ -283,9 +285,9 @@ def make_open_tag( ) return OpenTComponent( - starttag_text=starttag_text, - raw_attrs=attrs, - startend=startend, + parse_info=ParseInfo( + starttag_text=starttag_text, raw_attrs=attrs, startend=startend + ), start_i_index=i_index, children_start_s_index=children_start_s_index, offset_into_children_start_s=offset_into_children_start_s, @@ -515,17 +517,18 @@ def has_ambiguous_forward_slash(self, open_tag: OpenTag) -> bool: with "<{Component} title={title} />". """ if isinstance(open_tag, (OpenTElement, OpenTComponent)): + info = open_tag.parse_info return ( # has attributes - len(open_tag.raw_attrs) > 0 + len(info.raw_attrs) > 0 # last attr not bare attribute - and open_tag.raw_attrs[-1][1] is not None + and info.raw_attrs[-1][1] is not None # last char of last attr is "/" - and open_tag.raw_attrs[-1][1][-1] == "/" + and info.raw_attrs[-1][1][-1] == "/" # parsed starttag ends with "/>" - and open_tag.starttag_text.endswith("/>") + and info.starttag_text.endswith("/>") # if parsed as startend then its not ambiguous - and not open_tag.startend + and not info.startend ) return False From c23e4b1bd6d807838a63bdbab4762dc48496d33d Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Sun, 28 Jun 2026 13:30:18 -0700 Subject: [PATCH 25/25] Remove relaxed parsing. --- tdom/parser.py | 51 +++++--------------- tdom/parser_test.py | 38 +++++++++++++++ tdom/relaxed_parser_processor_test.py | 68 --------------------------- 3 files changed, 49 insertions(+), 108 deletions(-) delete mode 100644 tdom/relaxed_parser_processor_test.py diff --git a/tdom/parser.py b/tdom/parser.py index 191a03f3..0068a9e3 100644 --- a/tdom/parser.py +++ b/tdom/parser.py @@ -143,37 +143,15 @@ def copy(self, ns: NamespaceType | None = None) -> ParseContext: return ParseContext(ns=ns if ns is not None else self.ns) -@dataclass(frozen=True) -class InternalParseContext: - """ - This is the context that was used to parse a given template. - """ - - ns: NamespaceType = "html" - in_component: bool = False - - def copy( - self, ns: NamespaceType | None = None, in_component: bool | None = None - ) -> InternalParseContext: - return InternalParseContext( - ns=ns if ns is not None else self.ns, - in_component=in_component - if in_component is not None - else self.in_component, - ) - - class TemplateParser(HTMLParser): root: OpenTFragment - stack: list[tuple[OpenTag, InternalParseContext]] + stack: list[tuple[OpenTag, ParseContext]] placeholders: PlaceholderState source: SourceTracker | None - root_ctx: InternalParseContext + root_ctx: ParseContext " Assume that template parsing *starts* in this context. " - def __init__( - self, *, root_ctx: InternalParseContext, convert_charrefs: bool = True - ): + def __init__(self, *, root_ctx: ParseContext, convert_charrefs: bool = True): self.root_ctx = root_ctx # This calls HTMLParser.reset() which we override to set up our state. super().__init__(convert_charrefs=convert_charrefs) @@ -482,7 +460,7 @@ def get_starttag_text(self, msg: str = "Expecting starttag text to be set.") -> raise AssertionError(msg) return starttag_text - def get_last_ctx(self) -> InternalParseContext: + def get_last_ctx(self) -> ParseContext: if self.stack: return self.stack[-1][1] else: @@ -491,11 +469,10 @@ def get_last_ctx(self) -> InternalParseContext: def is_literal_tag(self, tag: str): return self.placeholders.copy().remove_placeholders(tag).is_literal - def validate_self_close_attempt(self, last_ctx: InternalParseContext, tag: str): + def validate_self_close_attempt(self, last_ctx: ParseContext, tag: str): if ( - not last_ctx.in_component - and last_ctx.ns == "html" - # @NOTE: Only void tags can be losed when NS is explictly html. + last_ctx.ns == "html" + # @NOTE: Only void tags can be closed when NS is explictly html. and tag not in VOID_ELEMENTS ): e = ValueError( @@ -542,14 +519,7 @@ def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: if ( isinstance(open_tag, OpenTElement) and open_tag.tag in VOID_ELEMENTS - and ( - last_ctx.ns == "html" - # @TODO: Maybe backtracking when it looks like we needed - # to close it would be better? We just need the component's - # children to parse out and get out of the way because that - # isn't the template we are trying to parse and cache. - or last_ctx.in_component - ) + and last_ctx.ns == "html" ): final_tag = self.finalize_tag(open_tag) self.append_child(final_tag) @@ -565,7 +535,8 @@ def handle_starttag(self, tag: str, attrs: Sequence[HTMLAttribute]) -> None: else: next_ctx = last_ctx elif isinstance(open_tag, OpenTComponent): - next_ctx = last_ctx.copy(in_component=True) + # @NOTE: We "reset" the ns to html when parsing component children. + next_ctx = last_ctx.copy(ns="html") else: next_ctx = last_ctx self.stack.append((open_tag, next_ctx)) @@ -721,7 +692,7 @@ def parse(t: Template, assume_ctx: ParseContext | None = None) -> TNode: """ if assume_ctx is None: assume_ctx = ParseContext() - parser = TemplateParser(root_ctx=InternalParseContext(ns=assume_ctx.ns)) + parser = TemplateParser(root_ctx=assume_ctx) parser.feed_template(t) parser.close() return parser.get_tnode() diff --git a/tdom/parser_test.py b/tdom/parser_test.py index 8b99d4af..07d71509 100644 --- a/tdom/parser_test.py +++ b/tdom/parser_test.py @@ -709,3 +709,41 @@ def test_element_self_closing_error(self): ValueError, match="Self-closing tags are only supported" ): _ = TemplateParser.parse(template) + + +class TestResetRulesInParser: + @pytest.fixture + def wrap_in_svg(self): + def _wrap_in_svg(children: Template) -> Template: + return t"{children}" + + return _wrap_in_svg + + @pytest.fixture + def wrap_in_fo(self): + def _wrap_in_fo(children: Template) -> Template: + return t"{children}" + + return _wrap_in_fo + + def test_parser_fails_when_processor_might_succeed_svg_in_html(self, wrap_in_svg): + with pytest.raises(ValueError, match="void"): + _ = TemplateParser.parse( + t"
<{wrap_in_svg}>
" + ) + + def test_parser_works_by_accident_html_in_svg(self, wrap_in_fo): + node = TemplateParser.parse(t"<{wrap_in_fo}>") + assert isinstance(node, TElement) + + def test_parser_fails_when_it_should_anyways_svg_in_svg(self, wrap_in_fo): + with pytest.raises(ValueError, match="void"): + _ = TemplateParser.parse( + t"<{wrap_in_fo}>" + ) + + def test_parser_works_when_processor_should_fail(self, wrap_in_svg): + node = TemplateParser.parse( + t"
<{wrap_in_svg}>
" + ) + assert isinstance(node, TElement) diff --git a/tdom/relaxed_parser_processor_test.py b/tdom/relaxed_parser_processor_test.py deleted file mode 100644 index e7d788c5..00000000 --- a/tdom/relaxed_parser_processor_test.py +++ /dev/null @@ -1,68 +0,0 @@ -from collections.abc import Callable -from string.templatelib import Template - -import pytest - -from tdom import html -from tdom.parser import TemplateParser -from tdom.tnodes import TComponent, TElement - - -def PTC(children: Template) -> Template: - """Pass children through.""" - return children - - -def get_relaxed_html_templates( - Comp: Callable[[Template], Template] = PTC, -) -> tuple[Template, ...]: - return ( - t"
<{Comp}>
", - t"
<{Comp}>
", - t"
<{Comp}>
", - t"
<{Comp}>
<{Comp}/><{Comp}>
", - ) - - -def get_relaxed_xml_templates( - Comp: Callable[[Template], Template] = PTC, -) -> tuple[Template, ...]: - return ( - t"<{Comp}>", - t"<{Comp}>", - t"<{Comp}>", - t"<{Comp}>", - t"<{Comp}>", - ) - - -class TestRelaxedRulesInParser: - """These templates work in the parser because we don't have enough ctx.""" - - def test_html_wrapped(self): - # html rules relaxed during component parsing - for tf in get_relaxed_html_templates(): - node = TemplateParser.parse(tf) - assert isinstance(node, TElement) and node.tag == "div" - assert len(node.children) == 1 and isinstance(node.children[0], TComponent) - - def test_xml_wrapped(self): - # xml (svg/mathml) rules relaxed during component parsing - for tf in get_relaxed_xml_templates(): - node = TemplateParser.parse(tf) - assert isinstance(node, TElement) and node.tag in ("svg", "math") - assert len(node.children) == 1 and isinstance(node.children[0], TComponent) - - -class TestRelaxedRulesInProcessor: - """These templates fail in the processor when only passed through.""" - - def test_html_wrapped_component_ok(self): - for tf in get_relaxed_html_templates(): - with pytest.raises(ValueError): - _ = html(tf) - - def test_xml_wrapped_component_ok(self): - for tf in get_relaxed_xml_templates(): - with pytest.raises(ValueError): - _ = html(tf)