Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion php-transformer/src/HtmlToBlocks/BlockFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,26 @@ public function create(string $name, array $attrs = array(), array $innerBlocks

return array(
'blockName' => $name,
'attrs' => $attrs,
'attrs' => $this->commentAttrs($name, $attrs),
'innerBlocks' => $innerBlocks,
'innerHTML' => $innerHtml,
'innerContent' => $innerContent,
);
}

/**
* @param array<string, mixed> $attrs
* @return array<string, mixed>
*/
private function commentAttrs(string $name, array $attrs): array
{
if ( 'core/paragraph' === $name && preg_match('/^\s*<a\b/i', (string) ($attrs['content'] ?? '')) ) {
unset($attrs['content']);
}

return $attrs;
}

/**
* @param array<string, mixed> $attrs
* @param array<int, array<string, mixed>> $innerBlocks
Expand Down
87 changes: 85 additions & 2 deletions php-transformer/src/HtmlToBlocks/HtmlTransformer.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ final class HtmlTransformer
*/
private array $structureProvenance = array();

/**
* @var array<int, array<string, mixed>>
*/
private array $scriptMetadata = array();

/**
* @var array<string, array<string, mixed>>
*/
Expand Down Expand Up @@ -86,6 +91,7 @@ public function transform(string $html, array $options = array()): TransformerRe
$this->presentationProvenance = array();
$this->sourceProvenance = array();
$this->structureProvenance = array();
$this->scriptMetadata = array();
$this->assetMetadata = $this->assetMetadataFromOptions($options);
$this->staticClassPromotions = $this->detectStaticClassPromotions($html);
$this->staticStyleRules = $this->staticStyleRules($html, (string) ($options['static_css'] ?? ''));
Expand Down Expand Up @@ -167,6 +173,18 @@ public function transform(string $html, array $options = array()): TransformerRe
),
);

foreach ( $this->scriptMetadata as $metadata ) {
$diagnostics[] = array(
'code' => 'html_static_script_metadata',
'message' => 'Static script data was preserved as bounded metadata and does not require client script execution.',
'source' => self::class,
'reason' => 'script_static_metadata',
'tag' => 'script',
'selector' => $metadata['selector'] ?? null,
'script_role' => $metadata['script_role'] ?? null,
);
}

foreach ( $fallbacks as $fallback ) {
if ( ! empty($fallback['diagnostic_code']) ) {
$diagnostics[] = array(
Expand Down Expand Up @@ -203,6 +221,7 @@ public function transform(string $html, array $options = array()): TransformerRe
'presentation_signals' => $this->presentationProvenance,
'source_provenance' => $sourceProvenance,
'structure_signals' => $this->structureProvenance,
'script_metadata' => $this->scriptMetadata,
),
);
$sourceReports['conversion_report'] = ConversionReportProjection::fromResultParts('html', $blocks, $fallbacks, $sourceReports, array(), $provenance, $metrics);
Expand Down Expand Up @@ -596,6 +615,15 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
return $this->createBlock('core/preformatted', array_merge($this->presentationAttributes($element), array( 'content' => $this->innerHtmlPreservingWhitespace($element) )), array(), $element);
}

if ( 'plaintext' === $tagName ) {
$content = $this->runtime->escapeHtml($element->textContent ?? '');
if ( '' === trim($content) ) {
return null;
}

return $this->createBlock('core/preformatted', array_merge($this->presentationAttributes($element), array( 'content' => $content )), array(), $element);
}

if ( 'table' === $tagName ) {
return $this->createBlock('core/table', array_merge($this->presentationAttributes($element), $this->tableAttributes($element)), array(), $element);
}
Expand Down Expand Up @@ -718,6 +746,10 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
}

if ( 'script' === $tagName ) {
if ( $this->captureStaticScriptMetadata($element) ) {
return null;
}

$this->captureScriptFallback($element, $fallbacks);
return null;
}
Expand Down Expand Up @@ -773,7 +805,7 @@ private function convertElement(DOMElement $element, array &$fallbacks, bool $ca
}
}

if ( in_array($tagName, array( 'article', 'aside', 'body', 'div', 'footer', 'header', 'main', 'nav', 'section' ), true) ) {
if ( in_array($tagName, array( 'article', 'aside', 'body', 'center', 'div', 'footer', 'header', 'main', 'nav', 'section' ), true) ) {
$logo = $this->logoPattern->match(
$element,
fn (DOMElement $sourceElement): array => $this->presentationAttributes($sourceElement),
Expand Down Expand Up @@ -1438,7 +1470,7 @@ private function isNonContentRuntimeControl(DOMElement $element): bool

private function isInlineContentElement(string $tagName): bool
{
return in_array($tagName, array( 'abbr', 'b', 'cite', 'code', 'em', 'i', 'mark', 'small', 'span', 'strong', 'sub', 'sup', 'time' ), true);
return in_array($tagName, array( 'abbr', 'b', 'cite', 'code', 'em', 'font', 'i', 'mark', 'rp', 'rt', 'ruby', 'small', 'span', 'strong', 'sub', 'sup', 'time' ), true);
}

private function hasBlockContentChildren(DOMElement $element): bool
Expand Down Expand Up @@ -2434,6 +2466,7 @@ private function captureScriptFallback(DOMElement $element, array &$fallbacks):
{
$boundedHtml = $this->boundedFallbackHtml($this->safeFallbackHtml($element));
$boundedBody = $this->boundedFallbackText(trim($element->textContent ?? ''));
$scriptRole = $this->scriptRole($element);
$fallbacks[] = FallbackDiagnostic::build(array(
'type' => 'html',
'reason' => 'script_requires_runtime',
Expand All @@ -2445,6 +2478,8 @@ private function captureScriptFallback(DOMElement $element, array &$fallbacks):
'attributes' => $this->safeScriptAttributes($element),
'context' => $this->sourceContext($element),
'events' => $this->eventMetadata($element),
'script_role' => $scriptRole,
'script_source_kind' => '' !== trim($this->attr($element, 'src')) ? 'external' : 'inline',
'text_length' => strlen(trim($element->textContent ?? '')),
'child_count' => $this->childElementCount($element),
'html' => $boundedHtml['html'],
Expand All @@ -2456,6 +2491,54 @@ private function captureScriptFallback(DOMElement $element, array &$fallbacks):
), $this->fallbackProvenance);
}

private function captureStaticScriptMetadata(DOMElement $element): bool
{
if ( '' !== trim($this->attr($element, 'src')) ) {
return false;
}

$scriptRole = $this->scriptRole($element);
if ( 'data' !== $scriptRole ) {
return false;
}

$boundedBody = $this->boundedFallbackText(trim($element->textContent ?? ''));
$this->scriptMetadata[] = array(
'type' => 'script_metadata',
'reason' => 'script_static_metadata',
'source_format' => 'html',
'tag' => 'script',
'selector' => $this->elementSelector($element),
'attributes' => $this->safeScriptAttributes($element),
'context' => $this->sourceContext($element),
'script_role' => $scriptRole,
'script_source_kind' => 'inline',
'body' => $boundedBody['text'],
'body_bytes' => $boundedBody['bytes'],
'body_truncated' => $boundedBody['truncated'],
);

return true;
}

private function scriptRole(DOMElement $element): string
{
$type = strtolower(trim($this->attr($element, 'type')));
if ( '' === $type || in_array($type, array( 'text/javascript', 'application/javascript', 'module' ), true) ) {
return 'runtime';
}

if ( str_starts_with($type, 'application/ld+json') || in_array($type, array( 'application/json', 'importmap', 'speculationrules' ), true) ) {
return 'data';
}

if ( str_starts_with($type, 'text/') && ! in_array($type, array( 'text/javascript', 'text/ecmascript' ), true) ) {
return 'data';
}

return 'runtime';
}

/**
* @return array<string, string>
*/
Expand Down
52 changes: 52 additions & 0 deletions php-transformer/tests/contract/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,33 @@ function serialize_blocks(array $blocks): string
$assert(! str_contains((string) $buttonResult['serialized_blocks'], '\\u003c'), 'button serialization avoids escaped nested HTML attrs');
$assert('pass' === ($buttonResult['source_reports']['wp_block_validity']['status'] ?? ''), 'HTML transform exposes passing WordPress block validity report for generated buttons');

$rubyResult = ( new HtmlTransformer() )->transform(
'<main><blockquote><ruby>翻訳<rt>ほんやく</rt></ruby> keeps pronunciation visible.</blockquote></main>'
)->toArray();
$rubyQuote = $rubyResult['blocks'][0] ?? array();
$assert(array() === ($rubyResult['fallbacks'] ?? array()), 'ruby phrasing content does not create unsupported fallbacks');
$assert('core/quote' === ($rubyQuote['blockName'] ?? ''), 'ruby phrasing content remains inside quote block');
$assert(str_contains((string) ($rubyResult['serialized_blocks'] ?? ''), '<ruby>翻訳<rt>ほんやく</rt></ruby>'), 'ruby markup is preserved in quote content');

$plaintextResult = ( new HtmlTransformer() )->transform(
'<main><plaintext>Plain legacy text with &lt;b&gt;literal tags&lt;/b&gt;</plaintext></main>'
)->toArray();
$plaintextBlock = $plaintextResult['blocks'][0] ?? array();
$assert(array() === ($plaintextResult['fallbacks'] ?? array()), 'plaintext content does not create unsupported fallbacks');
$assert('core/preformatted' === ($plaintextBlock['blockName'] ?? ''), 'plaintext content converts to a preformatted block');
$assert(str_contains((string) ($plaintextBlock['innerHTML'] ?? ''), '&lt;b&gt;literal tags&lt;/b&gt;'), 'plaintext literal tags are escaped in preformatted content');

$linkedLogoResult = ( new HtmlTransformer() )->transform(
'<main><a class="site-logo" href="/">Mara Vale</a></main>'
)->toArray();
$linkedLogoBlock = $linkedLogoResult['blocks'][0] ?? array();
$linkedLogoSerialized = (string) ($linkedLogoResult['serialized_blocks'] ?? '');
$assert('core/paragraph' === ($linkedLogoBlock['blockName'] ?? ''), 'linked logo text converts to a paragraph block');
$assert(! array_key_exists('content', is_array($linkedLogoBlock['attrs'] ?? null) ? $linkedLogoBlock['attrs'] : array()), 'paragraph source content is not serialized as a block comment attribute');
$assert(str_contains($linkedLogoSerialized, '<p class="site-logo"><a class="site-logo" href="/">Mara Vale</a></p>'), 'linked logo paragraph preserves anchor markup in saved HTML');
$assert(! str_contains($linkedLogoSerialized, '\\u003ca'), 'linked logo paragraph avoids raw anchor HTML in delimiter JSON');
$assert('pass' === ($linkedLogoResult['source_reports']['wp_block_validity']['status'] ?? ''), 'linked logo paragraph passes generated block validity checks');

$invalidButtonBlocks = array(
array(
'blockName' => 'core/button',
Expand Down Expand Up @@ -474,6 +501,31 @@ function serialize_blocks(array $blocks): string
$assert(($staticPlan['totals']['routes'] ?? null) === ($staticSummary['route_count'] ?? null), 'conversion report route count matches materialization plan totals');
$assert(($staticPlan['totals']['navigation_links'] ?? null) === ($staticSummary['navigation_link_count'] ?? null), 'conversion report navigation link count matches materialization plan totals');
$assert(($staticPlan['totals']['menus'] ?? null) === ($staticSummary['menu_count'] ?? null), 'conversion report menu count matches materialization plan totals');

$legacyFrontPageSite = $compiler->compile(
array(
'entrypoint' => 'index.html',
'files' => array(
'index.html' => '<main><h1>Home</h1></main>',
'about-us.html' => '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><HTML><HEAD><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=windows-1252"><TITLE>About Us</TITLE></HEAD><BODY BGCOLOR="#FFFFFF" TEXT="#003366"><CENTER><TABLE BORDER="0" WIDTH="600"><TR><TD><CENTER><FONT FACE="Times New Roman" SIZE="6"><B>About Hank\'s Tool Rental</B></FONT></CENTER><FONT FACE="Arial" SIZE="2">Family owned since 1987.<BR>We answer the phone.</FONT></TD></TR></TABLE></CENTER></BODY></HTML>',
),
)
)->toArray();
$legacyPlanPage = null;
foreach ( $legacyFrontPageSite['source_reports']['materialization_plan']['pages'] ?? array() as $planPage ) {
if ( 'about-us.html' === ($planPage['source_path'] ?? '') ) {
$legacyPlanPage = $planPage;
}
}
$legacyBlockMarkup = (string) ($legacyPlanPage['block_markup'] ?? '');
$assert('' !== trim($legacyBlockMarkup), 'legacy HTML 4 FrontPage-era documents produce non-empty materialization block markup');
$assert(str_contains($legacyBlockMarkup, 'About Hank\'s Tool Rental'), 'legacy HTML 4 FrontPage-era table/font/center content is preserved');
$assert(str_contains($legacyBlockMarkup, '<!-- wp:table'), 'legacy HTML 4 layout tables convert to table block markup instead of empty fallback metadata');

$legacyInline = ( new HtmlTransformer() )->transform('<CENTER><FONT FACE="Arial" SIZE="2">Visible legacy inline copy</FONT></CENTER>')->toArray();
$assert(str_contains((string) ($legacyInline['serialized_blocks'] ?? ''), 'Visible legacy inline copy'), 'center/font-only legacy fragments preserve visible text');
$assert(str_contains((string) ($legacyInline['serialized_blocks'] ?? ''), '<!-- wp:paragraph'), 'center/font-only legacy fragments convert to semantic paragraph blocks');

$logoAssetPlanRow = null;
$cssAssetPlanRow = null;
foreach ( $staticPlan['assets'] ?? array() as $assetPlanRow ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
{ "path": "status", "assert": "equals", "value": "success_with_warnings" },
{ "path": "fallbacks", "assert": "count", "count": 1 },
{ "path": "fallbacks.0.diagnostic_code", "assert": "equals", "value": "html_script_fallback" },
{ "path": "fallbacks.0.script_role", "assert": "equals", "value": "runtime" },
{ "path": "fallbacks.0.script_source_kind", "assert": "equals", "value": "inline" },
{ "path": "fallbacks.0.body", "assert": "equals", "value": "window.inlineRuntime = true;" },
{ "path": "serialized_blocks", "assert": "contains", "value": "Content remains block-convertible." },
{ "path": "serialized_blocks", "assert": "not_contains", "value": "assets/site.js" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
},
"expected_blocks": [
{ "path": "blocks.0", "name": "core/image", "attrs": { "url": "assets/photos/thumb-stop-motion.png", "href": "single.html", "linkDestination": "custom", "linkAriaHidden": "true", "linkTabIndex": "-1" } },
{ "path": "blocks.1", "name": "core/paragraph", "attrs": { "className": "bp-logo", "content": "<a class=\"bp-logo\" href=\"index.html\" aria-label=\"The Baseplate - home\"></a>" } }
{ "path": "blocks.1", "name": "core/paragraph", "attrs": { "className": "bp-logo" } }
],
"expected_fallbacks": [],
"expect": [
Expand All @@ -26,6 +26,7 @@
{ "path": "serialized_blocks", "assert": "contains", "value": "<!-- wp:image" },
{ "path": "serialized_blocks", "assert": "contains", "value": "<a href=\"single.html\" aria-hidden=\"true\" tabindex=\"-1\"><img src=\"assets/photos/thumb-stop-motion.png\" alt=\"\"/></a>" },
{ "path": "serialized_blocks", "assert": "contains", "value": "<a class=\"bp-logo\" href=\"index.html\" aria-label=\"The Baseplate - home\"></a>" },
{ "path": "serialized_blocks", "assert": "not_contains", "value": "\\u003ca class=\\u0022bp-logo" },
{ "path": "fallbacks", "assert": "count", "count": 0 },
{ "path": "coverage.0.fallback_count", "assert": "equals", "value": 0 }
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
{ "path": "blocks.1", "name": "core/details", "attrs": { "summary": "What ships?", "className": "faq" } },
{ "path": "blocks.1.innerBlocks.0", "name": "core/paragraph", "attrs": { "content": "Native primitives." } },
{ "path": "blocks.2", "name": "core/group", "attrs": { "className": "site-footer" } },
{ "path": "blocks.2.innerBlocks.0", "name": "core/paragraph", "attrs": { "content": "<a href=\"/contact\">Contact</a>" } }
{ "path": "blocks.2.innerBlocks.0", "name": "core/paragraph" }
],
"expected_fallbacks": [],
"expect": [
Expand All @@ -36,6 +36,8 @@
{ "path": "fallbacks", "assert": "count", "count": 0 },
{ "path": "serialized_blocks", "assert": "contains", "value": "<!-- wp:details" },
{ "path": "serialized_blocks", "assert": "contains", "value": "<!-- wp:navigation-link" },
{ "path": "serialized_blocks", "assert": "contains", "value": "<a href=\"/contact\">Contact</a>" },
{ "path": "serialized_blocks", "assert": "not_contains", "value": "\\u003ca href=\\u0022/contact" },
{ "path": "source_reports.html.source_provenance", "assert": "count", "count": 9 },
{ "path": "source_reports.html.source_provenance.3.block_path", "assert": "equals", "value": "blocks.0.innerBlocks.1.innerBlocks.0" },
{ "path": "source_reports.html.source_provenance.3.block_name", "assert": "equals", "value": "core/navigation-link" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
{ "path": "blocks.0.innerBlocks.1.innerBlocks.0", "name": "core/navigation-link", "attrs": { "url": "/docs", "kind": "custom" } },
{ "path": "blocks.0.innerBlocks.1.innerBlocks.1", "name": "core/navigation-link", "attrs": { "url": "/pricing", "kind": "custom" } },
{ "path": "blocks.1", "name": "core/group" },
{ "path": "blocks.1.innerBlocks.0", "name": "core/paragraph", "attrs": { "content": "<a href=\"/about\">About us</a>" } },
{ "path": "blocks.1.innerBlocks.0", "name": "core/paragraph" },
{ "path": "blocks.1.innerBlocks.1", "name": "core/buttons" },
{ "path": "blocks.1.innerBlocks.1.innerBlocks.0", "name": "core/button", "attrs": { "text": "Start now", "url": "/start", "className": "cta" } },
{ "path": "blocks.2", "name": "core/navigation" },
Expand All @@ -37,15 +37,16 @@
{ "path": "status", "assert": "equals", "value": "success" },
{ "path": "blocks", "assert": "count", "count": 4 },
{ "path": "blocks.0.innerBlocks", "assert": "count", "count": 2 },
{ "path": "blocks.0.innerBlocks.0.attrs.content", "assert": "contains", "value": "site-logo" },
{ "path": "blocks.0.innerBlocks.0.attrs.content", "assert": "contains", "value": "<svg" },
{ "path": "serialized_blocks", "assert": "contains", "value": "site-logo" },
{ "path": "serialized_blocks", "assert": "contains", "value": "<svg" },
{ "path": "blocks.0.innerBlocks.1.innerBlocks", "assert": "count", "count": 2 },
{ "path": "blocks.1.innerBlocks", "assert": "count", "count": 2 },
{ "path": "blocks.2.innerBlocks", "assert": "count", "count": 2 },
{ "path": "blocks.3.innerBlocks", "assert": "count", "count": 2 },
{ "path": "fallbacks", "assert": "count", "count": 0 },
{ "path": "serialized_blocks", "assert": "contains", "value": "<!-- wp:navigation-link" },
{ "path": "serialized_blocks", "assert": "contains", "value": "<!-- wp:button" },
{ "path": "serialized_blocks", "assert": "not_contains", "value": "\\u003ca href=\\u0022/about" },
{ "path": "coverage.0.fallback_count", "assert": "equals", "value": 0 }
]
}
Loading
Loading