Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
300 changes: 5 additions & 295 deletions php-transformer/src/ArtifactCompiler/ArtifactCompiler.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

namespace Automattic\BlocksEngine\PhpTransformer\ArtifactCompiler;

use Automattic\BlocksEngine\PhpTransformer\AssetAnalysis\ReferenceAnalyzer;
use Automattic\BlocksEngine\PhpTransformer\Contract\ConversionReportProjection;
use Automattic\BlocksEngine\PhpTransformer\Contract\TransformerResult;
use Automattic\BlocksEngine\PhpTransformer\FormatBridge\FormatBridge;
Expand Down Expand Up @@ -183,256 +184,11 @@ private function safeEntryImageHtml(string $html, string $entryPath, array $file
*/
private function referenceReports(array $files): array
{
$internalLinks = array();
$assetReferences = array();
$imageReferences = array();

foreach ( $files as $file ) {
if ( ! empty($file['binary']) ) {
continue;
}

if ( 'html' === ($file['kind'] ?? '') || 'blocks' === ($file['kind'] ?? '') ) {
foreach ( $this->htmlReferenceCandidates((string) ($file['content'] ?? ''), (string) ($file['path'] ?? '')) as $candidate ) {
if ( '' === $candidate['url'] || ! $this->isArtifactLocalReference($candidate['url']) ) {
continue;
}

$reference = $this->normalizeReferenceCandidate($candidate, $files);
$target = $reference['target'] ?? null;
if ( is_array($target) && $this->isLinkableDocument($target) && 'a' === $candidate['element'] ) {
unset($reference['target']);
$internalLinks[] = $reference;
continue;
}

if ( is_array($target) && ! $this->isLinkableDocument($target) ) {
unset($reference['target']);
$assetReferences[] = $reference;
if ( 'img' === $candidate['element'] && 'src' === $candidate['attribute'] ) {
$imageReferences[] = $this->legacyImageReference($reference, count($imageReferences));
}
}
}
}

if ( 'css' === ($file['kind'] ?? '') ) {
foreach ( $this->cssReferenceCandidates((string) ($file['content'] ?? ''), (string) ($file['path'] ?? '')) as $candidate ) {
if ( '' === $candidate['url'] || ! $this->isArtifactLocalReference($candidate['url']) ) {
continue;
}

$reference = $this->normalizeReferenceCandidate($candidate, $files);
$target = $reference['target'] ?? null;
if ( is_array($target) && ! $this->isLinkableDocument($target) ) {
unset($reference['target']);
$assetReferences[] = $reference;
}
}
}
}

return array(
'internal_links' => $internalLinks,
'asset_references' => $assetReferences,
'image_references' => $imageReferences,
);
}

/**
* @return array<int, array{source_path: string, selector: string, element: string, attribute: string, value: string, url: string}>
*/
private function htmlReferenceCandidates(string $html, string $sourcePath): array
{
if ( '' === trim($html) || ! preg_match_all('/<\s*(a|audio|img|script|link|source|video)\b([^>]*)>/i', $html, $matches, PREG_SET_ORDER) ) {
return array();
}

$candidates = array();
$counts = array();
foreach ( $matches as $match ) {
$element = strtolower((string) $match[1]);
$attributes = $this->htmlAttributes((string) $match[2]);
$counts[$element] = ($counts[$element] ?? 0) + 1;
$selector = $element . ':nth-of-type(' . $counts[$element] . ')';

foreach ( $this->referenceAttributesForElement($element, $attributes) as $attribute ) {
$value = (string) ($attributes[$attribute] ?? '');
foreach ( $this->urlsFromAttributeValue($attribute, $value) as $url ) {
$candidates[] = array(
'source_path' => $sourcePath,
'selector' => $selector,
'element' => $element,
'attribute' => $attribute,
'value' => $value,
'url' => $url,
);
}
}
}

return $candidates;
}

/**
* @return array<string, string>
*/
private function htmlAttributes(string $attributeText): array
{
$attributes = array();
if ( ! preg_match_all('/([A-Za-z_:][-A-Za-z0-9_:.]*)\s*=\s*(?:(["\'])(.*?)\2|([^\s"\'>]+))/s', $attributeText, $matches, PREG_SET_ORDER) ) {
return $attributes;
}

foreach ( $matches as $match ) {
$attributes[strtolower((string) $match[1])] = html_entity_decode((string) ('' !== ($match[3] ?? '') ? $match[3] : ($match[4] ?? '')), ENT_QUOTES | ENT_HTML5);
}

return $attributes;
}

/**
* @param array<string, string> $attributes
* @return array<int, string>
*/
private function referenceAttributesForElement(string $element, array $attributes): array
{
$attributesByElement = array(
'a' => array('href'),
'audio' => array('src'),
'img' => array('src', 'srcset'),
'script' => array('src'),
'link' => array('href'),
'source' => array('src', 'srcset'),
'video' => array('src', 'poster'),
);

return array_values(array_filter(
$attributesByElement[$element] ?? array(),
static fn (string $attribute): bool => isset($attributes[$attribute])
));
}

/**
* @return array<int, string>
*/
private function urlsFromAttributeValue(string $attribute, string $value): array
{
if ( 'srcset' !== $attribute ) {
return array(trim($value));
}

$urls = array();
foreach ( explode(',', $value) as $candidate ) {
$parts = preg_split('/\s+/', trim($candidate));
if ( is_array($parts) && '' !== ($parts[0] ?? '') ) {
$urls[] = $parts[0];
}
}

return $urls;
}

/**
* @return array<int, array{source_path: string, selector: string, element: string, attribute: string, value: string, url: string, context?: string}>
*/
private function cssReferenceCandidates(string $css, string $sourcePath): array
{
if ( '' === trim($css) ) {
return array();
}

$candidates = array();

if ( preg_match_all('/@import\s+(?:url\(\s*)?(["\']?)([^"\'\)\s;]+)\1\s*\)?[^;]*;/i', $css, $matches, PREG_SET_ORDER) ) {
foreach ( $matches as $index => $match ) {
$url = html_entity_decode(trim((string) $match[2]), ENT_QUOTES | ENT_HTML5);
$candidates[] = array(
'source_path' => $sourcePath,
'selector' => 'css:@import(' . ($index + 1) . ')',
'element' => 'style',
'attribute' => '@import',
'value' => $url,
'url' => $url,
'context' => 'css-import',
);
}
}

if ( ! preg_match_all('/url\(\s*(["\']?)([^"\')]+)\1\s*\)/i', $css, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE) ) {
return $candidates;
}

foreach ( $matches as $index => $match ) {
$url = html_entity_decode(trim((string) $match[2][0]), ENT_QUOTES | ENT_HTML5);
$ruleContext = $this->cssRuleContext($css, (int) $match[0][1]);
$candidates[] = array(
'source_path' => $sourcePath,
'selector' => ('font-face' === $ruleContext ? 'css:@font-face:url(' : 'css:url(') . ($index + 1) . ')',
'element' => 'style',
'attribute' => 'url',
'value' => $url,
'url' => $url,
'context' => 'font-face' === $ruleContext ? 'css-font-face' : 'css-url',
);
}

return $candidates;
}

private function cssRuleContext(string $css, int $offset): string
{
$before = substr($css, 0, $offset);
$ruleStart = strrpos($before, '{');
if ( false === $ruleStart ) {
return '';
}

$prefix = substr($css, max(0, $ruleStart - 256), $ruleStart - max(0, $ruleStart - 256));
return preg_match('/@font-face\s*$/i', $prefix) ? 'font-face' : '';
}

/**
* @param array{source_path: string, selector: string, element: string, attribute: string, value: string, url: string} $candidate
* @param array<int, array<string, mixed>> $files
* @return array<string, mixed>
*/
private function normalizeReferenceCandidate(array $candidate, array $files): array
{
$resolvedPath = $this->resolveHtmlReferencePath($candidate['url'], $candidate['source_path']);
$target = '' === $resolvedPath ? null : $this->findFileByPath($resolvedPath, $files);
$reference = array_filter(
array(
'source_path' => $candidate['source_path'],
'selector' => $candidate['selector'],
'element' => $candidate['element'],
'attribute' => $candidate['attribute'],
'value' => $candidate['value'],
'url' => $candidate['url'],
'context' => $candidate['context'] ?? '',
'resolved_path' => $resolvedPath,
),
static fn (mixed $value): bool => '' !== $value
return ( new ReferenceAnalyzer() )->referenceReports(
$files,
fn (array $file): bool => $this->isLinkableDocument($file),
fn (array $asset): bool => $this->isSafeImageAsset($asset)
);

if ( is_array($target) ) {
$targetPath = (string) ($target['path'] ?? '');
if ( $this->isLinkableDocument($target) ) {
$reference['target_path'] = $targetPath;
} else {
$reference['asset_path'] = $targetPath;
}
$reference['kind'] = $target['kind'] ?? '';
$reference['role'] = $target['role'] ?? '';
$reference['mime_type'] = $target['mime_type'] ?? '';
$reference['bytes'] = $target['bytes'] ?? 0;
if ( str_starts_with((string) ($target['mime_type'] ?? ''), 'image/') ) {
$reference['safe'] = $this->isSafeImageAsset($target);
}
$reference['target'] = $target;
}

return $reference;
}

/**
Expand All @@ -443,52 +199,6 @@ private function isLinkableDocument(array $file): bool
return in_array($file['kind'] ?? '', array('html', 'blocks'), true) && ! $this->isTemplatePartFile($file);
}

/**
* @param array<int, array<string, mixed>> $files
* @return array<string, mixed>|null
*/
private function findFileByPath(string $path, array $files): ?array
{
foreach ( $files as $file ) {
if ( $path === ($file['path'] ?? '') ) {
return $file;
}
}

return null;
}

private function isArtifactLocalReference(string $reference): bool
{
$reference = trim($reference);
if ( '' === $reference || str_starts_with($reference, '#') || str_starts_with($reference, '//') ) {
return false;
}

return ! preg_match('#^[a-z][a-z0-9+.-]*:#i', $reference);
}

/**
* @param array<string, mixed> $reference
* @return array<string, mixed>
*/
private function legacyImageReference(array $reference, int $index): array
{
return array_filter(
array(
'source_path' => $reference['source_path'] ?? '',
'selector' => 'img:nth-of-type(' . ($index + 1) . ')',
'src' => $reference['url'] ?? '',
'resolved_path' => $reference['resolved_path'] ?? '',
'asset_path' => $reference['asset_path'] ?? '',
'mime_type' => $reference['mime_type'] ?? '',
'bytes' => $reference['bytes'] ?? 0,
'safe' => $reference['safe'] ?? null,
),
static fn (mixed $value): bool => null !== $value && '' !== $value
);
}

/**
* @param array{files: array<int, array<string, mixed>>, bytes: int, hash_payload: string} $artifact
* @param array<int, array<string, mixed>> $documents
Expand Down
Loading
Loading