From 82161c63ddc275ae97bfc80e576917e76c559b81 Mon Sep 17 00:00:00 2001 From: Chris Huber Date: Tue, 23 Jun 2026 00:13:23 -0400 Subject: [PATCH] Extract HTML pattern detectors --- .../src/HtmlToBlocks/HtmlTransformer.php | 166 ++++++------------ .../HtmlToBlocks/Patterns/ButtonsPattern.php | 78 ++++++++ .../HtmlToBlocks/Patterns/DetailsPattern.php | 41 +++++ .../Patterns/NavigationPattern.php | 108 ++++++++++++ 4 files changed, 279 insertions(+), 114 deletions(-) create mode 100644 php-transformer/src/HtmlToBlocks/Patterns/ButtonsPattern.php create mode 100644 php-transformer/src/HtmlToBlocks/Patterns/DetailsPattern.php create mode 100644 php-transformer/src/HtmlToBlocks/Patterns/NavigationPattern.php diff --git a/php-transformer/src/HtmlToBlocks/HtmlTransformer.php b/php-transformer/src/HtmlToBlocks/HtmlTransformer.php index 21fa6be..47f16e2 100644 --- a/php-transformer/src/HtmlToBlocks/HtmlTransformer.php +++ b/php-transformer/src/HtmlToBlocks/HtmlTransformer.php @@ -6,6 +6,9 @@ use Automattic\BlocksEngine\PhpTransformer\Contract\ConversionReportProjection; use Automattic\BlocksEngine\PhpTransformer\Contract\TransformationOptions; use Automattic\BlocksEngine\PhpTransformer\Contract\TransformerResult; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Patterns\ButtonsPattern; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Patterns\DetailsPattern; +use Automattic\BlocksEngine\PhpTransformer\HtmlToBlocks\Patterns\NavigationPattern; use Automattic\BlocksEngine\PhpTransformer\WordPress\Runtime; use DOMDocument; use DOMElement; @@ -17,6 +20,12 @@ final class HtmlTransformer private readonly BlockFactory $blockFactory; + private readonly ButtonsPattern $buttonsPattern; + + private readonly DetailsPattern $detailsPattern; + + private readonly NavigationPattern $navigationPattern; + /** * @var array */ @@ -46,7 +55,10 @@ final class HtmlTransformer public function __construct(private readonly Runtime $runtime = new Runtime()) { - $this->blockFactory = new BlockFactory(); + $this->blockFactory = new BlockFactory(); + $this->buttonsPattern = new ButtonsPattern(); + $this->detailsPattern = new DetailsPattern(); + $this->navigationPattern = new NavigationPattern(); } /** @@ -391,15 +403,14 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca } if ( 'details' === $tagName ) { - $summary = $this->firstChildElement($element, 'summary'); - $children = $this->convertChildrenWithoutTags($element, $fallbacks, array( 'summary' )); - if ( null === $summary && array() === $children ) { - return null; - } - - return $this->createBlock('core/details', array_filter(array_merge($this->presentationAttributes($element), array( - 'summary' => $summary instanceof DOMElement ? $this->innerHtml($summary) : '', - )), static fn ($value): bool => '' !== $value), $children, $element); + return $this->detailsPattern->match( + $element, + $fallbacks, + fn (DOMElement $sourceElement, array &$sourceFallbacks, array $excludedTags): array => $this->convertChildrenWithoutTags($sourceElement, $sourceFallbacks, $excludedTags), + fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement), + fn (DOMElement $sourceElement): string => $this->innerHtml($sourceElement), + fn (string $name, array $attrs = array(), array $innerBlocks = array(), ?DOMElement $sourceElement = null): array => $this->createBlock($name, $attrs, $innerBlocks, $sourceElement) + ); } if ( 'img' === $tagName ) { @@ -419,16 +430,23 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca } if ( 'a' === $tagName && '' !== trim($element->textContent ?? '') ) { - $fileBlock = $this->fileBlockFromAnchor($element); - if ( null !== $fileBlock ) { - return $fileBlock; - } - - return $this->createBlock('core/buttons', array(), array( $this->buttonBlockFromAnchor($element) ), $element); + return $this->buttonsPattern->matchAnchor( + $element, + fn (DOMElement $anchor): ?array => $this->fileBlockFromAnchor($anchor), + fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement), + fn (DOMElement $sourceElement): string => $this->innerHtml($sourceElement), + fn (DOMElement $sourceElement, string $name): string => $this->attr($sourceElement, $name), + fn (string $name, array $attrs = array(), array $innerBlocks = array(), ?DOMElement $sourceElement = null): array => $this->createBlock($name, $attrs, $innerBlocks, $sourceElement) + ); } if ( 'button' === $tagName ) { - return $this->createBlock('core/buttons', array(), array( $this->createBlock('core/button', array_merge($this->presentationAttributes($element), array( 'text' => $this->innerHtml($element) )), array(), $element) ), $element); + return $this->buttonsPattern->matchButton( + $element, + fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement), + fn (DOMElement $sourceElement): string => $this->innerHtml($sourceElement), + fn (string $name, array $attrs = array(), array $innerBlocks = array(), ?DOMElement $sourceElement = null): array => $this->createBlock($name, $attrs, $innerBlocks, $sourceElement) + ); } if ( 'svg' === $tagName ) { @@ -468,9 +486,14 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca } if ( 'nav' === $tagName ) { - $navigationLinks = $this->navigationLinks($element); - if ( array() !== $navigationLinks ) { - return $this->createBlock('core/navigation', $this->presentationAttributes($element), $navigationLinks, $element); + $navigation = $this->navigationPattern->match( + $element, + fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement), + fn (DOMElement $sourceElement): string => $this->innerHtml($sourceElement), + fn (string $name, array $attrs = array(), array $innerBlocks = array(), ?DOMElement $sourceElement = null): array => $this->createBlock($name, $attrs, $innerBlocks, $sourceElement) + ); + if ( null !== $navigation ) { + return $navigation; } } @@ -495,9 +518,15 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca return $codeWindow; } - $buttonChildren = $this->buttonChildren($element); - if ( array() !== $buttonChildren ) { - return $this->createBlock('core/buttons', $this->presentationAttributes($element), $buttonChildren, $element); + $buttons = $this->buttonsPattern->matchContainer( + $element, + fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement), + fn (DOMElement $sourceElement): string => $this->innerHtml($sourceElement), + fn (DOMElement $sourceElement, string $name): string => $this->attr($sourceElement, $name), + fn (string $name, array $attrs = array(), array $innerBlocks = array(), ?DOMElement $sourceElement = null): array => $this->createBlock($name, $attrs, $innerBlocks, $sourceElement) + ); + if ( null !== $buttons ) { + return $buttons; } $children = $this->convertChildren($element, $fallbacks, true); @@ -2230,14 +2259,6 @@ private function mergeClassNames(string ...$classNames): string return implode(' ', $classes); } - private function buttonBlockFromAnchor(DOMElement $anchor): array - { - return $this->createBlock('core/button', array_filter(array_merge($this->presentationAttributes($anchor), array( - 'text' => $this->innerHtml($anchor), - 'url' => $this->attr($anchor, 'href'), - )), static fn ($value): bool => is_array($value) ? array() !== $value : '' !== $value), array(), $anchor); - } - /** * @return array */ @@ -2379,87 +2400,4 @@ private function eventMetadata(DOMElement $element): array return $events; } - /** - * @return array> - */ - private function navigationLinks(DOMElement $element): array - { - $links = array(); - foreach ( $this->directNavigationAnchors($element) as $anchor ) { - $links[] = $this->createBlock('core/navigation-link', array_filter(array( - 'label' => $this->innerHtml($anchor), - 'url' => $this->safeNavigationUrl($this->attr($anchor, 'href')), - 'kind' => 'custom', - ), static fn ($value): bool => '' !== $value), array(), $anchor); - } - - return $links; - } - - private function safeNavigationUrl(string $url): string - { - $url = trim($url); - if ( '' === $url || preg_match('/[\x00-\x1f\x7f]|javascript\s*:/i', $url) ) { - return ''; - } - - return $url; - } - - /** - * @return array - */ - private function directNavigationAnchors(DOMElement $element): array - { - $anchors = array(); - foreach ( $element->childNodes as $child ) { - if ( XML_TEXT_NODE === $child->nodeType && '' === trim($child->textContent ?? '') ) { - continue; - } - - if ( $child instanceof DOMElement && 'a' === strtolower($child->tagName) && '' !== trim($child->textContent ?? '') ) { - $anchors[] = $child; - continue; - } - - if ( $child instanceof DOMElement && in_array(strtolower($child->tagName), array( 'ul', 'ol' ), true) ) { - foreach ( $child->childNodes as $item ) { - if ( XML_TEXT_NODE === $item->nodeType && '' === trim($item->textContent ?? '') ) { - continue; - } - - if ( ! $item instanceof DOMElement || 'li' !== strtolower($item->tagName) ) { - return array(); - } - - $anchor = $this->firstChildElement($item, 'a'); - if ( ! $anchor instanceof DOMElement || '' === trim($anchor->textContent ?? '') || 1 !== $this->childElementCount($item) ) { - return array(); - } - - $anchors[] = $anchor; - } - continue; - } - - return array(); - } - - return $anchors; - } - - /** - * @return array> - */ - private function buttonChildren(DOMElement $element): array - { - $buttons = array(); - foreach ( $element->childNodes as $child ) { - if ( $child instanceof DOMElement && 'a' === strtolower($child->tagName) && '' !== trim($child->textContent ?? '') ) { - $buttons[] = $this->buttonBlockFromAnchor($child); - } - } - return 1 < count($buttons) ? $buttons : array(); - } - } diff --git a/php-transformer/src/HtmlToBlocks/Patterns/ButtonsPattern.php b/php-transformer/src/HtmlToBlocks/Patterns/ButtonsPattern.php new file mode 100644 index 0000000..72f27dc --- /dev/null +++ b/php-transformer/src/HtmlToBlocks/Patterns/ButtonsPattern.php @@ -0,0 +1,78 @@ +|null $fileBlockFromAnchor + * @param callable(DOMElement): array $presentationAttributes + * @param callable(DOMElement): string $innerHtml + * @param callable(DOMElement, string): string $attr + * @param callable(string, array, array>, DOMElement|null): array $createBlock + * @return array|null + */ + public function matchAnchor(DOMElement $anchor, callable $fileBlockFromAnchor, callable $presentationAttributes, callable $innerHtml, callable $attr, callable $createBlock): ?array + { + $fileBlock = $fileBlockFromAnchor($anchor); + if ( null !== $fileBlock ) { + return $fileBlock; + } + + return $createBlock('core/buttons', array(), array( $this->buttonBlockFromAnchor($anchor, $presentationAttributes, $innerHtml, $attr, $createBlock) ), $anchor); + } + + /** + * @param callable(DOMElement): array $presentationAttributes + * @param callable(DOMElement): string $innerHtml + * @param callable(string, array, array>, DOMElement|null): array $createBlock + * @return array + */ + public function matchButton(DOMElement $button, callable $presentationAttributes, callable $innerHtml, callable $createBlock): array + { + return $createBlock('core/buttons', array(), array( + $createBlock('core/button', array_merge($presentationAttributes($button), array( 'text' => $innerHtml($button) )), array(), $button), + ), $button); + } + + /** + * @param callable(DOMElement): array $presentationAttributes + * @param callable(DOMElement): string $innerHtml + * @param callable(DOMElement, string): string $attr + * @param callable(string, array, array>, DOMElement|null): array $createBlock + * @return array|null + */ + public function matchContainer(DOMElement $element, callable $presentationAttributes, callable $innerHtml, callable $attr, callable $createBlock): ?array + { + $buttons = array(); + foreach ( $element->childNodes as $child ) { + if ( $child instanceof DOMElement && 'a' === strtolower($child->tagName) && '' !== trim($child->textContent ?? '') ) { + $buttons[] = $this->buttonBlockFromAnchor($child, $presentationAttributes, $innerHtml, $attr, $createBlock); + } + } + + if ( count($buttons) <= 1 ) { + return null; + } + + return $createBlock('core/buttons', $presentationAttributes($element), $buttons, $element); + } + + /** + * @param callable(DOMElement): array $presentationAttributes + * @param callable(DOMElement): string $innerHtml + * @param callable(DOMElement, string): string $attr + * @param callable(string, array, array>, DOMElement|null): array $createBlock + * @return array + */ + private function buttonBlockFromAnchor(DOMElement $anchor, callable $presentationAttributes, callable $innerHtml, callable $attr, callable $createBlock): array + { + return $createBlock('core/button', array_filter(array_merge($presentationAttributes($anchor), array( + 'text' => $innerHtml($anchor), + 'url' => $attr($anchor, 'href'), + )), static fn ($value): bool => is_array($value) ? array() !== $value : '' !== $value), array(), $anchor); + } +} diff --git a/php-transformer/src/HtmlToBlocks/Patterns/DetailsPattern.php b/php-transformer/src/HtmlToBlocks/Patterns/DetailsPattern.php new file mode 100644 index 0000000..b30c4d1 --- /dev/null +++ b/php-transformer/src/HtmlToBlocks/Patterns/DetailsPattern.php @@ -0,0 +1,41 @@ +> $fallbacks + * @param callable(DOMElement, array>&, array): array> $convertChildrenWithoutTags + * @param callable(DOMElement): array $presentationAttributes + * @param callable(DOMElement): string $innerHtml + * @param callable(string, array, array>, DOMElement|null): array $createBlock + * @return array|null + */ + public function match(DOMElement $element, array &$fallbacks, callable $convertChildrenWithoutTags, callable $presentationAttributes, callable $innerHtml, callable $createBlock): ?array + { + $summary = $this->firstChildElement($element, 'summary'); + $children = $convertChildrenWithoutTags($element, $fallbacks, array( 'summary' )); + if ( null === $summary && array() === $children ) { + return null; + } + + return $createBlock('core/details', array_filter(array_merge($presentationAttributes($element), array( + 'summary' => $summary instanceof DOMElement ? $innerHtml($summary) : '', + )), static fn ($value): bool => '' !== $value), $children, $element); + } + + private function firstChildElement(DOMElement $element, string $tagName): ?DOMElement + { + foreach ( $element->childNodes as $child ) { + if ( $child instanceof DOMElement && strtolower($child->tagName) === $tagName ) { + return $child; + } + } + + return null; + } +} diff --git a/php-transformer/src/HtmlToBlocks/Patterns/NavigationPattern.php b/php-transformer/src/HtmlToBlocks/Patterns/NavigationPattern.php new file mode 100644 index 0000000..66f7afa --- /dev/null +++ b/php-transformer/src/HtmlToBlocks/Patterns/NavigationPattern.php @@ -0,0 +1,108 @@ + $presentationAttributes + * @param callable(DOMElement): string $innerHtml + * @param callable(string, array, array>, DOMElement|null): array $createBlock + * @return array|null + */ + public function match(DOMElement $element, callable $presentationAttributes, callable $innerHtml, callable $createBlock): ?array + { + $links = array(); + foreach ( $this->directNavigationAnchors($element) as $anchor ) { + $links[] = $createBlock('core/navigation-link', array_filter(array( + 'label' => $innerHtml($anchor), + 'url' => $this->safeNavigationUrl($anchor->hasAttribute('href') ? $anchor->getAttribute('href') : ''), + 'kind' => 'custom', + ), static fn ($value): bool => '' !== $value), array(), $anchor); + } + + if ( array() === $links ) { + return null; + } + + return $createBlock('core/navigation', $presentationAttributes($element), $links, $element); + } + + private function safeNavigationUrl(string $url): string + { + $url = trim($url); + if ( '' === $url || preg_match('/[\x00-\x1f\x7f]|javascript\s*:/i', $url) ) { + return ''; + } + + return $url; + } + + /** + * @return array + */ + private function directNavigationAnchors(DOMElement $element): array + { + $anchors = array(); + foreach ( $element->childNodes as $child ) { + if ( XML_TEXT_NODE === $child->nodeType && '' === trim($child->textContent ?? '') ) { + continue; + } + + if ( $child instanceof DOMElement && 'a' === strtolower($child->tagName) && '' !== trim($child->textContent ?? '') ) { + $anchors[] = $child; + continue; + } + + if ( $child instanceof DOMElement && in_array(strtolower($child->tagName), array( 'ul', 'ol' ), true) ) { + foreach ( $child->childNodes as $item ) { + if ( XML_TEXT_NODE === $item->nodeType && '' === trim($item->textContent ?? '') ) { + continue; + } + + if ( ! $item instanceof DOMElement || 'li' !== strtolower($item->tagName) ) { + return array(); + } + + $anchor = $this->firstChildElement($item, 'a'); + if ( ! $anchor instanceof DOMElement || '' === trim($anchor->textContent ?? '') || 1 !== $this->childElementCount($item) ) { + return array(); + } + + $anchors[] = $anchor; + } + continue; + } + + return array(); + } + + return $anchors; + } + + private function firstChildElement(DOMElement $element, string $tagName): ?DOMElement + { + foreach ( $element->childNodes as $child ) { + if ( $child instanceof DOMElement && strtolower($child->tagName) === $tagName ) { + return $child; + } + } + + return null; + } + + private function childElementCount(DOMElement $element): int + { + $count = 0; + foreach ( $element->childNodes as $child ) { + if ( $child instanceof DOMElement ) { + ++$count; + } + } + + return $count; + } +}