diff --git a/docs/developers/incremental-builds-architecture.rst b/docs/developers/incremental-builds-architecture.rst new file mode 100644 index 000000000..9049100c4 --- /dev/null +++ b/docs/developers/incremental-builds-architecture.rst @@ -0,0 +1,304 @@ +==================================== +Incremental Builds: Architecture +==================================== + +This document describes the internal architecture, design decisions, and +security considerations of the incremental build system. For usage documentation, +see :doc:`incremental-builds`. + +Design Goals +============ + +The incremental build system was designed with these priorities: + +1. **Correctness** - Never skip a document that needs re-rendering +2. **Performance** - O(1) operations where possible, efficient memory usage +3. **Security** - Prevent resource exhaustion and path traversal attacks +4. **Parallelization** - Support parallel compilation workflows + +Architecture Overview +===================== + +.. code-block:: text + + ┌─────────────────────────────────────────────────────────────┐ + │ IncrementalBuildCache │ + │ (Orchestrates caching, persistence, and state management) │ + └─────────────────────────────────────────────────────────────┘ + │ │ │ + ▼ ▼ ▼ + ┌───────────────────┐ ┌──────────────────┐ ┌─────────────────┐ + │ DependencyGraph │ │ DocumentExports │ │ CacheVersioning│ + │ (Import/export │ │ (Per-document │ │ (Version │ + │ relationships) │ │ public API) │ │ validation) │ + └───────────────────┘ └──────────────────┘ └─────────────────┘ + │ │ + ▼ ▼ + ┌───────────────────┐ ┌──────────────────┐ + │ DirtyPropagator │ │ ChangeDetector │ + │ (Cascade dirty │ │ (File-based │ + │ state) │ │ detection) │ + └───────────────────┘ └──────────────────┘ + +Component Responsibilities +========================== + +IncrementalBuildCache +--------------------- + +The central orchestrator for cache persistence. Uses **sharded storage** with +256 buckets (2-character hex prefix from MD5 hash) for efficient incremental saves. + +**Design decisions:** + +- Sharded storage: Only modified documents are rewritten, not the entire cache +- Hash-based filenames: Prevents path traversal and handles special characters +- Separate metadata file: ``_build_meta.json`` is always loaded; exports are lazy-loaded + +DependencyGraph +--------------- + +Bidirectional graph tracking import/dependent relationships. Uses keyed arrays +for O(1) lookup performance. + +**Design decisions:** + +- Bidirectional: Stores both ``imports[A] = [B, C]`` and ``dependents[B] = [A]`` +- Keyed arrays: ``$imports[$doc][$target] = true`` for O(1) add/remove/lookup +- Depth-limited traversal: Maximum 100 levels to prevent stack overflow on cycles + +DirtyPropagator +--------------- + +Propagates dirty state through the dependency graph when exports change. + +**Design decisions:** + +- Uses ``SplQueue`` for O(1) enqueue/dequeue (vs ``array_shift`` which is O(n)) +- Export comparison: Only propagates when *exports* change, not just content +- Visited tracking: Prevents infinite loops in cyclic dependencies + +GlobalInvalidationDetector +-------------------------- + +Detects changes that require a full rebuild (config, theme, toctree structure). + +**Design decisions:** + +- Configurable patterns: Default patterns can be overridden per-project +- Directory patterns: Must match complete path segments (``foo/`` matches + ``path/foo/bar`` but not ``prefix_foo/bar``) + +Security Model +============== + +The incremental build system processes untrusted cache files and must defend +against malicious input. + +Resource Limits +--------------- + +All components enforce consistent limits to prevent memory exhaustion: + +.. code-block:: php + + // Consistent across all classes + MAX_DOCUMENTS = 100_000 + MAX_EXPORTS = 100_000 + MAX_OUTPUT_PATHS = 100_000 + MAX_PROPAGATION_VISITS = 100_000 + + // DependencyGraph-specific + MAX_TOTAL_EDGES = 2_000_000 + MAX_IMPORTS_PER_DOCUMENT = 1_000 + + // GlobalInvalidationDetector + MAX_PATTERN_LENGTH = 256 + +**Important:** These limits are intentionally kept in sync. If you change one, +consider whether related limits should also change. + +Input Validation +---------------- + +All ``fromArray()`` deserialization methods validate: + +1. **Type checking**: All values must match expected types +2. **Size limits**: Arrays must not exceed maximum sizes +3. **Format validation**: Hashes must be valid hex strings +4. **Character validation**: Document paths reject control characters + +Path Traversal Prevention +------------------------- + +The sharded cache system prevents path traversal attacks: + +.. code-block:: php + + // Shard directory names validated with regex + private function isValidShardName(string $name): bool + { + return preg_match('/^[0-9a-f]{2}$/', $name) === 1; + } + + // Document paths become hash-based filenames + $hash = md5($docPath); // e.g., "d41d8cd98f00b204" + $prefix = substr($hash, 0, 2); // e.g., "d4" + $filename = $hash . '.json'; // Full hash as filename + +Thread Safety +============= + +The incremental build classes are **NOT thread-safe**. They are designed for +single-threaded build processes. + +For parallel builds, use the extract/merge pattern: + +.. code-block:: php + + // Parent process + $cache = new IncrementalBuildCache($versioning); + $cache->load($outputDir); + + // Fork child processes, each with their own state + foreach ($chunks as $chunk) { + $childState = $state->extractState(); + // Pass $childState to child process + } + + // After children complete, merge results sequentially + foreach ($childResults as $result) { + $cache->mergeState($result); + } + + $cache->save($outputDir); + +Algorithm Complexity +==================== + +.. list-table:: + :header-rows: 1 + + * - Operation + - Complexity + - Notes + * - ``DependencyGraph::addImport()`` + - O(1) + - Keyed array insertion + * - ``DependencyGraph::getImports()`` + - O(1) + - Direct array access + * - ``DependencyGraph::propagateDirty()`` + - O(V + E) + - BFS traversal + * - ``DirtyPropagator::propagate()`` + - O(V + E) + - Uses SplQueue + * - ``IncrementalBuildCache::save()`` + - O(dirty) + - Only writes changed exports + * - ``ChangeDetector::detectChanges()`` + - O(n) + - Checks each document + +Cache Format +============ + +The cache uses two storage formats: + +Metadata File (``_build_meta.json``) +------------------------------------ + +Always loaded. Contains version info, dependency graph, and output paths. + +.. code-block:: json + + { + "metadata": { + "version": 1, + "phpVersion": "8.1.0", + "packageVersion": "1.0.0", + "settingsHash": "abc123...", + "createdAt": 1706140800 + }, + "dependencies": { + "imports": {"doc1": {"doc2": true}}, + "dependents": {"doc2": {"doc1": true}} + }, + "outputs": { + "doc1": "/output/doc1.html" + } + } + +Export Files (``_exports//.json``) +----------------------------------------------------- + +Loaded on demand. One file per document, sharded into 256 directories. + +.. code-block:: json + + { + "path": "getting-started", + "documentPath": "getting-started", + "contentHash": "a1b2c3...", + "exportsHash": "d4e5f6...", + "anchors": {"installation": "Installation"}, + "sectionTitles": {"installation": "Installation"}, + "citations": [], + "lastModified": 1706140800, + "documentTitle": "Getting Started" + } + +Testing Guidelines +================== + +When modifying the incremental build system: + +1. **Security tests**: Add tests for limit enforcement and validation +2. **Edge cases**: Test cycles, empty graphs, maximum sizes +3. **Serialization round-trips**: Test ``toArray()``/``fromArray()`` compatibility +4. **Algorithm correctness**: Verify dirty propagation finds all affected documents + +Example test patterns: + +.. code-block:: php + + // Test limit enforcement + public function testRejectsExcessiveDocuments(): void + { + $this->expectException(InvalidArgumentException::class); + // ... create data exceeding MAX_DOCUMENTS + } + + // Test cycle handling + public function testHandlesCyclicDependencies(): void + { + $graph->addImport('a', 'b'); + $graph->addImport('b', 'c'); + $graph->addImport('c', 'a'); // Cycle! + + $result = $graph->propagateDirty(['a']); + // Should not infinite loop, should find all three + } + +Extending the System +==================== + +Adding New Dependency Types +--------------------------- + +To track a new type of cross-reference: + +1. Update ``DependencyGraphPass`` to detect the new reference type +2. Call ``$graph->addImport($source, $target)`` for each reference +3. Add tests for the new reference detection + +Adding New Export Types +----------------------- + +To track additional exported symbols: + +1. Update ``DocumentExports`` to include the new field +2. Update ``ExportsCollectorPass`` to collect the new data +3. Update ``ContentHasher::hashExports()`` to include in the hash +4. Add tests for export change detection diff --git a/docs/developers/incremental-builds.rst b/docs/developers/incremental-builds.rst new file mode 100644 index 000000000..79edb0878 --- /dev/null +++ b/docs/developers/incremental-builds.rst @@ -0,0 +1,275 @@ +================= +Incremental Builds +================= + +The guides library provides infrastructure for incremental builds, allowing +applications to skip re-rendering unchanged documents. This can dramatically +improve build times for large documentation sets. + +This document covers usage and integration. For architecture details, security +considerations, and maintainer information, see :doc:`incremental-builds-architecture`. + +Overview +======== + +Incremental build support consists of several components: + +**Infrastructure Classes:** + +- **DependencyGraph**: Tracks inter-document dependencies (includes, references) +- **ContentHasher**: Computes fast content hashes for change detection +- **ChangeDetector**: Determines which documents need re-rendering +- **DocumentExports**: Tracks exported symbols (anchors, titles, citations) +- **IncrementalBuildState**: Holds state during compilation + +**Compiler Passes:** + +- **ExportsCollectorPass**: Collects exports from each document during compilation +- **DependencyGraphPass**: Builds the dependency graph from cross-references + +Dependency Graph +================ + +The ``DependencyGraph`` class tracks relationships between documents to enable +dirty propagation. When a document changes, all documents that depend on it +must also be re-rendered. + +.. code-block:: php + + use phpDocumentor\Guides\Build\IncrementalBuild\DependencyGraph; + + $graph = new DependencyGraph(); + + // Record that index.rst includes getting-started.rst + $graph->addImport('index', 'getting-started'); + + // Record that tutorials/basics.rst references index.rst + $graph->addImport('tutorials/basics', 'index'); + + // When index.rst changes, find all affected documents + $dirtyDocs = ['index']; + $allAffected = $graph->propagateDirty($dirtyDocs); + // Returns: ['index', 'tutorials/basics'] + +Merging Graphs +-------------- + +When parsing documents in parallel, each worker builds a partial dependency graph. +These can be merged after parsing completes: + +.. code-block:: php + + $mainGraph = new DependencyGraph(); + $workerGraph = new DependencyGraph(); + + // ... workers add imports to their graphs ... + + $mainGraph->merge($workerGraph); + +Persistence +----------- + +The dependency graph can be serialized for caching between builds: + +.. code-block:: php + + // Save to cache + $data = $graph->toArray(); + file_put_contents('cache/deps.json', json_encode($data)); + + // Load from cache + $data = json_decode(file_get_contents('cache/deps.json'), true); + $graph = DependencyGraph::fromArray($data); + +Content Hasher +============== + +The ``ContentHasher`` class provides fast content hashing using xxh128 (if available) +or SHA-256 as a fallback. + +.. code-block:: php + + use phpDocumentor\Guides\Build\IncrementalBuild\ContentHasher; + + $hasher = new ContentHasher(); + + // Hash a file + $fileHash = $hasher->hashFile('/path/to/document.rst'); + + // Hash string content + $contentHash = $hasher->hashContent($documentContent); + + // Hash document exports (for dependency invalidation) + $exportsHash = $hasher->hashExports( + anchors: ['section-1' => 'Section One', 'section-2' => 'Section Two'], + sectionTitles: ['section-1' => 'Section One'], + citations: ['ref1', 'ref2'], + documentTitle: 'My Document', + ); + +Change Detection +================ + +The ``ChangeDetector`` class determines which documents need re-rendering by +comparing current file state against cached exports. + +.. code-block:: php + + use phpDocumentor\Guides\Build\IncrementalBuild\ChangeDetector; + use phpDocumentor\Guides\Build\IncrementalBuild\ContentHasher; + + $hasher = new ContentHasher(); + $detector = new ChangeDetector($hasher); + + // Get list of document paths + $documentPaths = ['index', 'getting-started', 'tutorials/basics']; + + // Load cached exports from previous build + $cachedExports = loadCachedExports(); // array + + // Resolver function to get file path from document path + $fileResolver = fn(string $docPath) => "/docs/{$docPath}.rst"; + + // Detect changes + $result = $detector->detectChangesWithResolver( + $documentPaths, + $cachedExports, + $fileResolver, + ); + + // Get documents that need re-rendering + $changedDocs = $result->getChangedDocuments(); + $unchangedDocs = $result->getUnchangedDocuments(); + +Document Exports +================ + +The ``DocumentExports`` class tracks the "public interface" of a document - +the anchors, section titles, and citations it exports. When exports change, +dependent documents must be re-rendered even if their content hasn't changed. + +.. code-block:: php + + use phpDocumentor\Guides\Build\IncrementalBuild\DocumentExports; + + $exports = new DocumentExports( + documentPath: 'getting-started', + contentHash: $hasher->hashFile($filePath), + exportsHash: $hasher->hashExports($anchors, $titles, $citations), + anchors: ['installation' => 'Installation', 'first-steps' => 'First Steps'], + sectionTitles: ['installation' => 'Installation'], + citations: [], + lastModified: filemtime($filePath), + documentTitle: 'Getting Started', + ); + + // Check if exports changed (triggers dependency re-render) + if ($exports->hasExportsChanged($previousExports)) { + // Dependent documents need re-rendering + } + + // Check if content changed (document itself needs re-rendering) + if ($exports->hasContentChanged($previousExports)) { + // This document needs re-rendering + } + +Incremental Build State +======================= + +The ``IncrementalBuildState`` class holds all incremental build state during a +single compilation run. It stores the dependency graph and document exports, +and can be serialized for persistence between builds. + +.. code-block:: php + + use phpDocumentor\Guides\Build\IncrementalBuild\IncrementalBuildState; + + $state = new IncrementalBuildState(); + + // Set the input directory for source file hashing + $state->setInputDir('/path/to/docs'); + + // Load previous exports for change detection + $state->setPreviousExports($cachedExports); + + // After compilation, get the current state + $graph = $state->getDependencyGraph(); + $exports = $state->getAllExports(); + + // Serialize for caching + $data = $state->toArray(); + file_put_contents('cache/build_state.json', json_encode($data)); + + // Restore from cache + $data = json_decode(file_get_contents('cache/build_state.json'), true); + $state = IncrementalBuildState::fromArray($data); + +Compiler Passes +=============== + +The library includes two compiler passes that automatically build the dependency +graph and collect exports during the compilation phase. + +ExportsCollectorPass +-------------------- + +Priority: 10 (runs late, after all document and menu processing) + +Collects anchors, section titles, citations, and document titles from each +compiled document. Computes content and exports hashes for change detection. + +DependencyGraphPass +------------------- + +Priority: 9 (runs after ``ExportsCollectorPass``) + +Analyzes all documents for cross-references (``:doc:``, ``:ref:``, etc.) and +records dependencies in the graph. This enables dirty propagation when a +document's exports change. + +Integration Example +=================== + +Here's a complete example of implementing incremental builds: + +.. code-block:: php + + use phpDocumentor\Guides\Build\IncrementalBuild\ChangeDetector; + use phpDocumentor\Guides\Build\IncrementalBuild\ContentHasher; + use phpDocumentor\Guides\Build\IncrementalBuild\DependencyGraph; + + class IncrementalBuilder + { + private ContentHasher $hasher; + private ChangeDetector $detector; + private DependencyGraph $graph; + + public function __construct() + { + $this->hasher = new ContentHasher(); + $this->detector = new ChangeDetector($this->hasher); + $this->graph = new DependencyGraph(); + } + + public function build(array $documentPaths, array $cachedExports): array + { + // 1. Detect changed documents + $result = $this->detector->detectChangesWithResolver( + $documentPaths, + $cachedExports, + fn($doc) => "docs/{$doc}.rst" + ); + + // 2. Propagate changes through dependency graph + $dirtyDocs = $this->graph->propagateDirty( + $result->getChangedDocuments() + ); + + // 3. Render only dirty documents + foreach ($dirtyDocs as $docPath) { + $this->renderDocument($docPath); + } + + return $dirtyDocs; + } + } diff --git a/docs/developers/index.rst b/docs/developers/index.rst index c51818060..f6c364143 100644 --- a/docs/developers/index.rst +++ b/docs/developers/index.rst @@ -14,3 +14,5 @@ it in some other way that is not possible with the ``guides`` command line tool. extensions/index compiler directive + incremental-builds + incremental-builds-architecture diff --git a/packages/guides/resources/config/guides.php b/packages/guides/resources/config/guides.php index 02e9657a3..3c9cce9b6 100644 --- a/packages/guides/resources/config/guides.php +++ b/packages/guides/resources/config/guides.php @@ -3,6 +3,8 @@ declare(strict_types=1); use League\Tactician\CommandBus; +use phpDocumentor\Guides\Build\IncrementalBuild\ContentHasher; +use phpDocumentor\Guides\Build\IncrementalBuild\IncrementalBuildState; use phpDocumentor\Guides\Compiler\Compiler; use phpDocumentor\Guides\Compiler\CompilerPass; use phpDocumentor\Guides\Compiler\DocumentNodeTraverser; @@ -252,5 +254,9 @@ ->arg('$environmentBuilder', new Reference(EnvironmentBuilder::class)) ->set('phpdoc.guides.raw_node.sanitizer.default', HtmlSanitizerConfig::class) - ->call('allowSafeElements', [], true); + ->call('allowSafeElements', [], true) + + // Incremental build infrastructure + ->set(ContentHasher::class) + ->set(IncrementalBuildState::class); }; diff --git a/packages/guides/src/Build/IncrementalBuild/CacheVersioning.php b/packages/guides/src/Build/IncrementalBuild/CacheVersioning.php new file mode 100644 index 000000000..9a8900875 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/CacheVersioning.php @@ -0,0 +1,149 @@ + $metadata Cached metadata + * + * @return bool True if cache is valid + */ + public function isCacheValid(array $metadata): bool + { + // Check cache version + if (($metadata['version'] ?? 0) !== self::CACHE_VERSION) { + return false; + } + + // Check PHP major.minor version (patch changes are compatible) + $cachedPhpVersion = $metadata['phpVersion'] ?? ''; + if (!is_string($cachedPhpVersion)) { + return false; + } + + $currentPhpMajor = PHP_MAJOR_VERSION . '.' . PHP_MINOR_VERSION; + if (!str_starts_with($cachedPhpVersion, $currentPhpMajor)) { + return false; + } + + // Check package major version (major version changes may break cache compatibility) + $cachedPackageVersion = $metadata['packageVersion'] ?? ''; + if (!is_string($cachedPackageVersion)) { + return false; + } + + return $this->isMajorVersionCompatible($cachedPackageVersion, $this->packageVersion); + } + + /** + * Check if two version strings have the same major version. + */ + private function isMajorVersionCompatible(string $cached, string $current): bool + { + $cachedMajor = $this->extractMajorVersion($cached); + $currentMajor = $this->extractMajorVersion($current); + + // If either can't be parsed, assume incompatible + if ($cachedMajor === null || $currentMajor === null) { + return false; + } + + return $cachedMajor === $currentMajor; + } + + /** + * Extract major version from semver string. + * + * @return int|null Major version or null if unparseable + */ + private function extractMajorVersion(string $version): int|null + { + // Handle versions with 'v' prefix (e.g., 'v1.2.3') + if (str_starts_with($version, 'v')) { + $version = substr($version, 1); + } + + $parts = explode('.', $version); + if ($parts === [] || !is_numeric($parts[0])) { + return null; + } + + $major = (int) $parts[0]; + + return $major >= self::MIN_VERSION_MAJOR ? $major : null; + } + + /** + * Create metadata for cache persistence. + * + * @param string $settingsHash Hash of project settings + * + * @return array + */ + public function createMetadata(string $settingsHash = ''): array + { + return [ + 'version' => self::CACHE_VERSION, + 'phpVersion' => PHP_VERSION, + 'packageVersion' => $this->packageVersion, + 'settingsHash' => $settingsHash, + 'createdAt' => time(), + ]; + } + + /** + * Get current cache version. + */ + public function getCacheVersion(): int + { + return self::CACHE_VERSION; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/ChangeDetectionResult.php b/packages/guides/src/Build/IncrementalBuild/ChangeDetectionResult.php new file mode 100644 index 000000000..ea326068e --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/ChangeDetectionResult.php @@ -0,0 +1,78 @@ +dirty, $this->new); + } + + /** + * Check if any changes were detected. + */ + public function hasChanges(): bool + { + return $this->dirty !== [] || $this->new !== [] || $this->deleted !== []; + } + + /** + * Get total count of changed items. + */ + public function getChangeCount(): int + { + return count($this->dirty) + count($this->new) + count($this->deleted); + } + + /** + * Serialize to array. + * + * @return array + */ + public function toArray(): array + { + return [ + 'dirty' => $this->dirty, + 'clean' => $this->clean, + 'new' => $this->new, + 'deleted' => $this->deleted, + ]; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/ChangeDetector.php b/packages/guides/src/Build/IncrementalBuild/ChangeDetector.php new file mode 100644 index 000000000..796f8ebf6 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/ChangeDetector.php @@ -0,0 +1,167 @@ + $cachedExports Previous build's exports + * @param callable(string): string $fileResolver Resolves document path to actual file path + */ + public function detectChangesWithResolver( + array $documentPaths, + array $cachedExports, + callable $fileResolver, + ): ChangeDetectionResult { + $dirty = []; + $clean = []; + $new = []; + $this->fastPathHits = 0; + $this->hashComputations = 0; + + foreach ($documentPaths as $docPath) { + $filePath = $fileResolver($docPath); + $cached = $cachedExports[$docPath] ?? null; + + if ($cached === null) { + $new[] = $docPath; + continue; + } + + // Timestamp-first optimization + $currentMtime = $this->getFileMtime($filePath); + + if ($currentMtime === $cached->lastModified && $cached->lastModified > 0) { + // Fast path: timestamp unchanged, assume content unchanged + $this->fastPathHits++; + $clean[] = $docPath; + continue; + } + + // Timestamp changed - verify with content hash + $this->hashComputations++; + $currentHash = $this->hasher->hashFile($filePath); + + if ($currentHash === $cached->contentHash) { + // Content same despite mtime change (git checkout, touch, etc.) + $clean[] = $docPath; + } else { + $dirty[] = $docPath; + } + } + + // Detect deleted files + $deleted = []; + $currentSet = array_flip($documentPaths); + foreach (array_keys($cachedExports) as $cachedPath) { + if (isset($currentSet[$cachedPath])) { + continue; + } + + $deleted[] = $cachedPath; + } + + return new ChangeDetectionResult($dirty, $clean, $new, $deleted); + } + + /** + * Compare current source files against cached state (legacy method). + * + * @param string[] $sourceFiles Current source file paths + * @param array $cachedExports Previous build's exports + */ + public function detectChanges(array $sourceFiles, array $cachedExports): ChangeDetectionResult + { + return $this->detectChangesWithResolver($sourceFiles, $cachedExports, static fn ($path) => $path); + } + + /** + * Quick check if a single file has changed using timestamp-first approach. + */ + public function hasFileChanged(string $filePath, DocumentExports|null $cached): bool + { + if ($cached === null) { + return true; + } + + // Timestamp-first check + $currentMtime = $this->getFileMtime($filePath); + if ($currentMtime === $cached->lastModified && $cached->lastModified > 0) { + return false; + } + + // Verify with hash + $currentHash = $this->hasher->hashFile($filePath); + + return $currentHash !== $cached->contentHash; + } + + /** + * Get file modification time. + * + * Uses @ error suppression to avoid TOCTOU race between existence check + * and mtime retrieval. If file doesn't exist or was deleted, returns 0. + */ + public function getFileMtime(string $filePath): int + { + // Suppress warning if file vanishes; filemtime returns false for non-existent files + $mtime = @filemtime($filePath); + + return $mtime !== false ? $mtime : 0; + } + + /** + * Get performance statistics for last detection run. + * + * @return array{fastPathHits: int, hashComputations: int} + */ + public function getStats(): array + { + return [ + 'fastPathHits' => $this->fastPathHits, + 'hashComputations' => $this->hashComputations, + ]; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/ContentHasher.php b/packages/guides/src/Build/IncrementalBuild/ContentHasher.php new file mode 100644 index 000000000..0d46d1361 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/ContentHasher.php @@ -0,0 +1,129 @@ +algorithm = in_array('xxh128', hash_algos(), true) ? 'xxh128' : 'sha256'; + } + + /** + * Hash a file's contents. + * + * Returns empty string if file doesn't exist (expected case for new documents). + * + * Note: Uses @ error suppression to avoid TOCTOU race condition between + * file_exists() and hash_file(). The hash_file() call handles non-existent + * files by returning false, which we then check. + * + * @throws RuntimeException If hashing fails for an existing, readable file + */ + public function hashFile(string $filePath): string + { + // Suppress warnings from hash_file() for non-existent files to avoid + // TOCTOU race between file_exists() check and hash_file() call. + // If file doesn't exist or was deleted, hash_file returns false. + $hash = @hash_file($this->algorithm, $filePath); + + if ($hash === false) { + // Check if file exists now - if not, return empty (expected for new docs) + // This is a race-condition-safe approach: we try first, check existence after + if (!file_exists($filePath)) { + return ''; + } + + // File exists but hashing failed (permissions, I/O error, etc.) + // Use basename to avoid leaking full system paths in error messages + throw new RuntimeException(sprintf( + 'ContentHasher: failed to hash file "%s" with algorithm "%s"', + basename($filePath), + $this->algorithm, + )); + } + + return $hash; + } + + /** + * Hash arbitrary string content. + */ + public function hashContent(string $content): string + { + return hash($this->algorithm, $content); + } + + /** + * Compute hash of document exports for dependency invalidation. + * + * @param array $anchors + * @param array $sectionTitles + * @param string[] $citations + * @param string $documentTitle Required to ensure consistent hashing + */ + public function hashExports( + array $anchors, + array $sectionTitles, + array $citations, + string $documentTitle, + ): string { + // Sort keys for deterministic hashing + ksort($anchors); + ksort($sectionTitles); + sort($citations); + + $data = json_encode([ + 'anchors' => $anchors, + 'sectionTitles' => $sectionTitles, + 'citations' => $citations, + 'documentTitle' => $documentTitle, + ], JSON_THROW_ON_ERROR); + + return hash($this->algorithm, $data); + } + + /** + * Get the algorithm being used. + */ + public function getAlgorithm(): string + { + return $this->algorithm; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/DependencyGraph.php b/packages/guides/src/Build/IncrementalBuild/DependencyGraph.php new file mode 100644 index 000000000..9816df0d9 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/DependencyGraph.php @@ -0,0 +1,622 @@ + [imported docPath => true] + * "Document A imports from documents B, C, D" + * Uses keyed arrays for O(1) lookup instead of in_array O(n). + * + * @var array> + */ + private array $imports = []; + + /** + * Reverse edges: docPath -> [dependent docPath => true] + * "Document B is depended on by documents A, E, F" + * Uses keyed arrays for O(1) lookup instead of in_array O(n). + * + * @var array> + */ + private array $dependents = []; + + /** + * Record that $fromDoc imports/references something from $toDoc. + * O(1) operation using keyed arrays. + * + * Enforces runtime limits to prevent memory exhaustion during build: + * - MAX_DOCUMENTS: Maximum total documents in the graph + * - MAX_IMPORTS_PER_DOCUMENT: Maximum imports per document + * + * Returns false if the import was rejected due to limits being reached. + */ + public function addImport(string $fromDoc, string $toDoc): bool + { + // Don't add self-references + if ($fromDoc === $toDoc) { + return true; // Not an error, just a no-op + } + + // Check if this edge already exists (no limits needed for existing edges) + if (isset($this->imports[$fromDoc][$toDoc])) { + return true; + } + + // Enforce global edge limit to prevent memory exhaustion + if ($this->edgeCount >= self::MAX_TOTAL_EDGES) { + return false; + } + + // Enforce per-document import limit + if (isset($this->imports[$fromDoc]) && count($this->imports[$fromDoc]) >= self::MAX_IMPORTS_PER_DOCUMENT) { + return false; + } + + // Enforce total document limit (only check when adding a new document) + if (!isset($this->imports[$fromDoc]) && count($this->imports) >= self::MAX_DOCUMENTS) { + return false; + } + + // Add forward edge (O(1) with isset check) + $this->imports[$fromDoc][$toDoc] = true; + + // Add reverse edge (O(1) with isset check) + $this->dependents[$toDoc][$fromDoc] = true; + + // Track total edges + $this->edgeCount++; + + return true; + } + + /** + * Get all documents that $docPath imports from. + * + * Note: Uses array_map to ensure string return type since PHP converts + * numeric string keys to integers in arrays. + * + * @return string[] + */ + public function getImports(string $docPath): array + { + return array_map('strval', array_keys($this->imports[$docPath] ?? [])); + } + + /** + * Get all documents that depend on $docPath. + * + * Note: Uses array_map to ensure string return type since PHP converts + * numeric string keys to integers in arrays. + * + * @return string[] + */ + public function getDependents(string $docPath): array + { + return array_map('strval', array_keys($this->dependents[$docPath] ?? [])); + } + + /** + * Given a set of dirty documents, propagate to find all affected documents. + * Uses transitive closure: if A depends on B, and B is dirty, A is dirty. + * + * Optimized to O(V+E) using SplQueue for O(1) dequeue operations. + * + * Note on memory usage: This method builds a complete result array in memory. + * For very large graphs (e.g., 100k+ documents), consider using + * propagateDirtyIterator() which yields results one at a time. + * + * @param string[] $dirtyDocs Initially dirty documents + * + * @return string[] All documents that need re-rendering + */ + public function propagateDirty(array $dirtyDocs): array + { + $result = []; + $visited = []; + + // Use SplQueue for O(1) enqueue/dequeue instead of array_shift O(n) + /** @var SplQueue $queue */ + $queue = new SplQueue(); + foreach ($dirtyDocs as $doc) { + $queue->enqueue($doc); + } + + while (!$queue->isEmpty()) { + $current = $queue->dequeue(); + assert(is_string($current)); + + if (isset($visited[$current])) { + continue; + } + + $visited[$current] = true; + $result[] = $current; + + // Add all dependents to the queue + foreach ($this->getDependents($current) as $dependent) { + if (isset($visited[$dependent])) { + continue; + } + + $queue->enqueue($dependent); + } + } + + return $result; + } + + /** + * Generator version of propagateDirty for memory-efficient processing of large graphs. + * + * Yields documents one at a time instead of building a full result array. + * Use this for very large dependency graphs where memory is a concern. + * + * @param string[] $dirtyDocs Initially dirty documents + * + * @return Generator + */ + public function propagateDirtyIterator(array $dirtyDocs): Generator + { + $visited = []; + + /** @var SplQueue $queue */ + $queue = new SplQueue(); + foreach ($dirtyDocs as $doc) { + $queue->enqueue($doc); + } + + while (!$queue->isEmpty()) { + $current = $queue->dequeue(); + assert(is_string($current)); + + if (isset($visited[$current])) { + continue; + } + + $visited[$current] = true; + + yield $current; + + foreach ($this->getDependents($current) as $dependent) { + if (isset($visited[$dependent])) { + continue; + } + + $queue->enqueue($dependent); + } + } + } + + /** + * Remove a document from the graph (when deleted). + * O(k) where k is edges involving this document (uses reverse index for efficiency). + */ + public function removeDocument(string $docPath): void + { + // 1. Remove edges pointing TO this document (using reverse index for O(k) instead of O(N)) + // The dependents index tells us which documents import this one + $parents = array_keys($this->dependents[$docPath] ?? []); + foreach ($parents as $from) { + if (!isset($this->imports[$from][$docPath])) { + continue; + } + + unset($this->imports[$from][$docPath]); + $this->edgeCount--; + + if ($this->imports[$from] !== []) { + continue; + } + + unset($this->imports[$from]); + } + + // 2. Remove edges originating FROM this document + $children = $this->imports[$docPath] ?? []; + $this->edgeCount -= count($children); + + // Remove this document from the dependents list of its imports + foreach (array_keys($children) as $to) { + unset($this->dependents[$to][$docPath]); + if (!isset($this->dependents[$to]) || $this->dependents[$to] !== []) { + continue; + } + + unset($this->dependents[$to]); + } + + // 3. Remove own entries + unset($this->imports[$docPath]); + unset($this->dependents[$docPath]); + + // Safety: ensure non-negative edge count + if ($this->edgeCount >= 0) { + return; + } + + $this->edgeCount = 0; + } + + /** + * Clear all edges for a document (before re-computing). + * O(I) where I is number of imports for this document. + */ + public function clearImportsFor(string $docPath): void + { + $oldImports = $this->imports[$docPath] ?? []; + $edgesRemoved = count($oldImports); + unset($this->imports[$docPath]); + + // Remove this doc from dependents of its old imports + foreach (array_keys($oldImports) as $importedDoc) { + unset($this->dependents[$importedDoc][$docPath]); + if (!isset($this->dependents[$importedDoc]) || $this->dependents[$importedDoc] !== []) { + continue; + } + + unset($this->dependents[$importedDoc]); + } + + // Update edge count + $this->edgeCount -= $edgesRemoved; + if ($this->edgeCount >= 0) { + return; + } + + $this->edgeCount = 0; // Safety: ensure non-negative + } + + /** + * Get all document paths in the graph. + * + * @return string[] + */ + public function getAllDocuments(): array + { + // Use array union for O(1) uniqueness per key instead of array_unique O(n log n) + // array_map ensures string return type (PHP converts numeric keys to int) + return array_map('strval', array_keys($this->imports + $this->dependents)); + } + + /** + * Serialize to array for JSON persistence. + * + * Note: Keys are explicitly cast to strings for consistent serialization, + * since PHP converts numeric string keys to integers in arrays. + * + * @return array{imports: array, dependents: array} + */ + public function toArray(): array + { + $imports = []; + foreach ($this->imports as $from => $toMap) { + // Cast keys to strings for consistent serialization + $imports[(string) $from] = array_map('strval', array_keys($toMap)); + } + + $dependents = []; + foreach ($this->dependents as $to => $fromMap) { + // Cast keys to strings for consistent serialization + $dependents[(string) $to] = array_map('strval', array_keys($fromMap)); + } + + return [ + 'imports' => $imports, + 'dependents' => $dependents, + ]; + } + + /** + * Deserialize from array. + * + * Security: This method validates all input to protect against maliciously crafted + * cache files. It enforces size limits (MAX_DOCUMENTS, MAX_IMPORTS_PER_DOCUMENT) + * to prevent memory exhaustion attacks from corrupted or attacker-controlled JSON. + * + * @param array $data + * + * @throws InvalidArgumentException If data format is invalid or size limits exceeded + */ + public static function fromArray(array $data): self + { + $graph = new self(); + + $imports = $data['imports'] ?? []; + if (!is_array($imports)) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: expected imports to be array, got %s', + get_debug_type($imports), + )); + } + + // Enforce document count limit + if (count($imports) > self::MAX_DOCUMENTS) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: imports exceed maximum of %d documents', + self::MAX_DOCUMENTS, + )); + } + + foreach ($imports as $from => $toList) { + // Convert PHP integer keys (from JSON numeric strings) to string + $fromKey = (string) $from; + + if (!is_array($toList)) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: expected import value for "%s" to be array, got %s', + $fromKey, + get_debug_type($toList), + )); + } + + // Enforce per-document import limit + if (count($toList) > self::MAX_IMPORTS_PER_DOCUMENT) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: imports for "%s" exceed maximum of %d', + $fromKey, + self::MAX_IMPORTS_PER_DOCUMENT, + )); + } + + // Validate all values in toList are strings + $validated = []; + foreach ($toList as $value) { + if (!is_string($value)) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: expected import target for "%s" to be string, got %s', + $fromKey, + get_debug_type($value), + )); + } + + $validated[$value] = true; + } + + $graph->imports[$fromKey] = $validated; + } + + $dependents = $data['dependents'] ?? []; + if (!is_array($dependents)) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: expected dependents to be array, got %s', + get_debug_type($dependents), + )); + } + + // Enforce document count limit for dependents + if (count($dependents) > self::MAX_DOCUMENTS) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: dependents exceed maximum of %d documents', + self::MAX_DOCUMENTS, + )); + } + + foreach ($dependents as $to => $fromList) { + // Convert PHP integer keys (from JSON numeric strings) to string + $toKey = (string) $to; + + if (!is_array($fromList)) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: expected dependent value for "%s" to be array, got %s', + $toKey, + get_debug_type($fromList), + )); + } + + // Enforce per-document dependent limit + if (count($fromList) > self::MAX_IMPORTS_PER_DOCUMENT) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: dependents for "%s" exceed maximum of %d', + $toKey, + self::MAX_IMPORTS_PER_DOCUMENT, + )); + } + + // Validate all values in fromList are strings + $validated = []; + foreach ($fromList as $value) { + if (!is_string($value)) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: expected dependent source for "%s" to be string, got %s', + $toKey, + get_debug_type($value), + )); + } + + $validated[$value] = true; + } + + $graph->dependents[$toKey] = $validated; + } + + // Compute total edge count from imports + $totalEdges = 0; + foreach ($graph->imports as $toMap) { + $totalEdges += count($toMap); + } + + // Enforce total edge limit + if ($totalEdges > self::MAX_TOTAL_EDGES) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: total edges (%d) exceed maximum of %d', + $totalEdges, + self::MAX_TOTAL_EDGES, + )); + } + + $graph->edgeCount = $totalEdges; + + return $graph; + } + + /** + * Get statistics about the graph. + * + * @return array{documents: int, edges: int, avgImportsPerDoc: float} + */ + public function getStats(): array + { + $importCount = count($this->imports); + + return [ + 'documents' => count($this->getAllDocuments()), + 'edges' => $this->edgeCount, + 'avgImportsPerDoc' => $this->edgeCount > 0 ? (float) ($this->edgeCount / max(1, $importCount)) : 0.0, + ]; + } + + /** + * Merge another dependency graph into this one. + * Used to combine results from parallel child processes. + * + * Thread Safety: This method is NOT thread-safe. It should only be called + * from a single-threaded context after child processes have completed and + * returned their results. Do not call from multiple threads simultaneously. + * + * Size Limits: After merging, the resulting graph may exceed MAX_DOCUMENTS + * or MAX_IMPORTS_PER_DOCUMENT limits. Call validateLimits() after merging + * if strict limit enforcement is required. + */ + public function merge(self $other): void + { + foreach ($other->imports as $from => $toMap) { + if (!isset($this->imports[$from])) { + $this->imports[$from] = $toMap; + } else { + $this->imports[$from] += $toMap; + } + } + + foreach ($other->dependents as $to => $fromMap) { + if (!isset($this->dependents[$to])) { + $this->dependents[$to] = $fromMap; + } else { + $this->dependents[$to] += $fromMap; + } + } + + // Recalculate edge count after merge + $this->edgeCount = 0; + foreach ($this->imports as $toMap) { + $this->edgeCount += count($toMap); + } + } + + /** + * Validate that the graph does not exceed size limits. + * + * Call this after merge() if strict limit enforcement is required. + * + * @throws InvalidArgumentException If any limits are exceeded + */ + public function validateLimits(): void + { + // Validate imports + if (count($this->imports) > self::MAX_DOCUMENTS) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: imports exceed maximum of %d documents', + self::MAX_DOCUMENTS, + )); + } + + foreach ($this->imports as $from => $toMap) { + if (count($toMap) > self::MAX_IMPORTS_PER_DOCUMENT) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: imports for "%s" exceed maximum of %d', + $from, + self::MAX_IMPORTS_PER_DOCUMENT, + )); + } + } + + // Validate dependents (reverse edges) + if (count($this->dependents) > self::MAX_DOCUMENTS) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: dependents exceed maximum of %d documents', + self::MAX_DOCUMENTS, + )); + } + + foreach ($this->dependents as $to => $fromMap) { + if (count($fromMap) > self::MAX_IMPORTS_PER_DOCUMENT) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: dependents for "%s" exceed maximum of %d', + $to, + self::MAX_IMPORTS_PER_DOCUMENT, + )); + } + } + + // Validate total edge count + if ($this->edgeCount > self::MAX_TOTAL_EDGES) { + throw new InvalidArgumentException(sprintf( + 'DependencyGraph: total edges (%d) exceed maximum of %d', + $this->edgeCount, + self::MAX_TOTAL_EDGES, + )); + } + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/DirtyPropagator.php b/packages/guides/src/Build/IncrementalBuild/DirtyPropagator.php new file mode 100644 index 000000000..d8a1f1755 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/DirtyPropagator.php @@ -0,0 +1,155 @@ + $oldExports Previous build's exports + * @param array $newExports Current build's exports (for dirty docs) + */ + public function propagate( + ChangeDetectionResult $changes, + DependencyGraph $graph, + array $oldExports, + array $newExports, + ): PropagationResult { + // Start with directly dirty/new documents + $dirtySet = array_flip(array_merge($changes->dirty, $changes->new)); + $propagatedFrom = []; + + // Handle deleted files - their dependents become dirty + foreach ($changes->deleted as $deletedPath) { + $dependents = $graph->getDependents($deletedPath); + foreach ($dependents as $dependent) { + if (isset($dirtySet[$dependent])) { + continue; + } + + $dirtySet[$dependent] = true; + $propagatedFrom[] = $deletedPath; + } + } + + // Check if exports changed for dirty docs + // If so, propagate to dependents + /** @var SplQueue $queue */ + $queue = new SplQueue(); + foreach (array_keys($dirtySet) as $doc) { + $queue->enqueue($doc); + } + + $visited = []; + + while (!$queue->isEmpty()) { + $current = $queue->dequeue(); + + if (isset($visited[$current])) { + continue; + } + + $visited[$current] = true; + + // Defense-in-depth: prevent runaway propagation + if (count($visited) >= self::MAX_PROPAGATION_VISITS) { + break; + } + + // Check if exports changed + $old = $oldExports[$current] ?? null; + $new = $newExports[$current] ?? null; + + $exportsChanged = false; + if ($old === null || $new === null) { + // New or deleted - definitely changed + $exportsChanged = true; + } elseif ($old->hasExportsChanged($new)) { + $exportsChanged = true; + } + + if (!$exportsChanged) { + continue; + } + + // Propagate to dependents + foreach ($graph->getDependents($current) as $dependent) { + if (isset($dirtySet[$dependent])) { + continue; + } + + $dirtySet[$dependent] = true; + $propagatedFrom[] = $current; + + // Add to queue for further propagation + if (isset($visited[$dependent])) { + continue; + } + + $queue->enqueue($dependent); + } + } + + // Compute final sets + $documentsToRender = array_keys($dirtySet); + $documentsToSkip = array_diff($changes->clean, $documentsToRender); + + return new PropagationResult( + documentsToRender: array_values($documentsToRender), + documentsToSkip: array_values($documentsToSkip), + propagatedFrom: array_unique($propagatedFrom), + ); + } + + /** + * Simple propagation without export comparison. + * Used when exports aren't available yet (during initial compile). + * + * @param string[] $dirtyDocs Initially dirty documents + * @param DependencyGraph $graph Dependency relationships + * + * @return string[] All documents that need rendering + */ + public function propagateSimple(array $dirtyDocs, DependencyGraph $graph): array + { + return $graph->propagateDirty($dirtyDocs); + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/DocumentExports.php b/packages/guides/src/Build/IncrementalBuild/DocumentExports.php new file mode 100644 index 000000000..a8374337e --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/DocumentExports.php @@ -0,0 +1,389 @@ + $anchors Anchor name => title mapping + * @param array $sectionTitles Section ID => title mapping + * @param string[] $citations Citation names defined in this document + * @param int $lastModified Unix timestamp of last modification + * @param string $documentTitle Document title (first heading) + */ + public function __construct( + public readonly string $documentPath, + public readonly string $contentHash, + public readonly string $exportsHash, + public readonly array $anchors, + public readonly array $sectionTitles, + public readonly array $citations, + public readonly int $lastModified, + public readonly string $documentTitle = '', + ) { + } + + /** + * Check if the exports (public interface) changed compared to another version. + * Content can change without exports changing (e.g., fixing a typo in body text). + */ + public function hasExportsChanged(self $other): bool + { + return $this->exportsHash !== $other->exportsHash; + } + + /** + * Check if any content changed. + */ + public function hasContentChanged(self $other): bool + { + return $this->contentHash !== $other->contentHash; + } + + /** + * Get all anchor names exported by this document. + * + * Note: Uses array_map to ensure string return type since PHP converts + * numeric string keys to integers in arrays. + * + * @return string[] + */ + public function getAnchorNames(): array + { + return array_map('strval', array_keys($this->anchors)); + } + + /** + * Serialize to array for JSON persistence. + * + * @return array + */ + public function toArray(): array + { + return [ + 'documentPath' => $this->documentPath, + 'contentHash' => $this->contentHash, + 'exportsHash' => $this->exportsHash, + 'anchors' => $this->anchors, + 'sectionTitles' => $this->sectionTitles, + 'citations' => $this->citations, + 'lastModified' => $this->lastModified, + 'documentTitle' => $this->documentTitle, + ]; + } + + /** + * Deserialize from array. + * + * @param array $data + * + * @throws InvalidArgumentException If data format is invalid + */ + public static function fromArray(array $data): self + { + $anchors = self::validateStringMap($data, 'anchors'); + $sectionTitles = self::validateStringMap($data, 'sectionTitles'); + $citations = self::validateStringList($data, 'citations'); + + $documentPath = self::validateDocumentPath($data); + $contentHash = self::validateHash($data, 'contentHash'); + $exportsHash = self::validateHash($data, 'exportsHash'); + $documentTitle = self::validateString($data, 'documentTitle'); + + $lastModified = $data['lastModified'] ?? 0; + if (!is_int($lastModified) || $lastModified < 0 || $lastModified > self::MAX_TIMESTAMP) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: expected lastModified to be int between 0 and %d, got %s', + self::MAX_TIMESTAMP, + is_int($lastModified) ? (string) $lastModified : get_debug_type($lastModified), + )); + } + + return new self( + documentPath: $documentPath, + contentHash: $contentHash, + exportsHash: $exportsHash, + anchors: $anchors, + sectionTitles: $sectionTitles, + citations: $citations, + lastModified: $lastModified, + documentTitle: $documentTitle, + ); + } + + /** + * Validate and extract a string field with length checking. + * + * @param array $data + * + * @throws InvalidArgumentException If value is not a string or exceeds max length + */ + private static function validateString(array $data, string $field): string + { + $value = $data[$field] ?? ''; + + if (!is_string($value)) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: expected %s to be string, got %s', + $field, + get_debug_type($value), + )); + } + + if (strlen($value) > self::MAX_STRING_LENGTH) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: %s exceeds maximum length of %d bytes', + $field, + self::MAX_STRING_LENGTH, + )); + } + + return $value; + } + + /** + * Validate and extract a document path with additional safety checks. + * + * Rejects control characters (including null bytes) that could cause issues + * in filesystem operations, log output, or other string processing. + * + * @param array $data + * + * @throws InvalidArgumentException If path is invalid + */ + private static function validateDocumentPath(array $data): string + { + $value = self::validateString($data, 'documentPath'); + + // Allow empty path (documented as valid for test/fallback scenarios) + if ($value === '') { + return ''; + } + + // Reject control characters (0x00-0x1F and 0x7F) that could cause issues + // in filesystem operations, log output, or terminal display + if (preg_match('/[\x00-\x1F\x7F]/', $value) === 1) { + throw new InvalidArgumentException( + 'DocumentExports: documentPath contains invalid control characters', + ); + } + + return $value; + } + + /** Valid hash lengths: xxh128 (32 hex chars) or sha256 (64 hex chars) */ + private const VALID_HASH_LENGTHS = [32, 64]; + + /** + * Validate and extract a hash field (hexadecimal string or empty). + * + * Accepts: + * - Empty string (for new documents or when hash wasn't computed) + * - 32 hex chars (xxh128 algorithm) + * - 64 hex chars (sha256 algorithm) + * + * @param array $data + * + * @throws InvalidArgumentException If value is not a valid hash format or length + */ + private static function validateHash(array $data, string $field): string + { + $value = self::validateString($data, $field); + + // Allow empty hash (for new documents or when hash wasn't computed) + if ($value === '') { + return ''; + } + + // Validate hexadecimal format + if (preg_match('/^[a-f0-9]+$/i', $value) !== 1) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: %s must be a hexadecimal string, got invalid format', + $field, + )); + } + + // Validate hash length matches known algorithms + $length = strlen($value); + if (!in_array($length, self::VALID_HASH_LENGTHS, true)) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: %s must be 32 (xxh128) or 64 (sha256) hex chars, got %d', + $field, + $length, + )); + } + + return $value; + } + + /** + * Validate a string-to-string map array field. + * + * Note on key collision: PHP arrays can have both integer key 123 and string key "123" + * which would collide when cast to string. However, JSON-decoded arrays cannot have + * both forms of the same key, so this is not a concern in practice. The later value + * would overwrite the earlier one if such a collision occurred. + * + * @param array $data + * + * @return array + * + * @throws InvalidArgumentException If validation fails + */ + private static function validateStringMap(array $data, string $field): array + { + $value = $data[$field] ?? []; + + if (!is_array($value)) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: expected %s to be array, got %s', + $field, + get_debug_type($value), + )); + } + + if (count($value) > self::MAX_ARRAY_ITEMS) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: %s exceeds maximum of %d items', + $field, + self::MAX_ARRAY_ITEMS, + )); + } + + $result = []; + foreach ($value as $key => $item) { + $stringKey = (string) $key; + + if (strlen($stringKey) > self::MAX_STRING_LENGTH) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: key in %s exceeds maximum length', + $field, + )); + } + + if (!is_string($item)) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: expected %s value to be string, got %s', + $field, + get_debug_type($item), + )); + } + + if (strlen($item) > self::MAX_STRING_LENGTH) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: value in %s exceeds maximum length', + $field, + )); + } + + $result[$stringKey] = $item; + } + + return $result; + } + + /** + * Validate a string list array field. + * + * @param array $data + * + * @return string[] + * + * @throws InvalidArgumentException If validation fails + */ + private static function validateStringList(array $data, string $field): array + { + $value = $data[$field] ?? []; + + if (!is_array($value)) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: expected %s to be array, got %s', + $field, + get_debug_type($value), + )); + } + + if (count($value) > self::MAX_ARRAY_ITEMS) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: %s exceeds maximum of %d items', + $field, + self::MAX_ARRAY_ITEMS, + )); + } + + $result = []; + foreach ($value as $item) { + if (!is_string($item)) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: expected %s item to be string, got %s', + $field, + get_debug_type($item), + )); + } + + if (strlen($item) > self::MAX_STRING_LENGTH) { + throw new InvalidArgumentException(sprintf( + 'DocumentExports: item in %s exceeds maximum length', + $field, + )); + } + + $result[] = $item; + } + + return $result; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/GlobalInvalidationDetector.php b/packages/guides/src/Build/IncrementalBuild/GlobalInvalidationDetector.php new file mode 100644 index 000000000..7dbf23ee5 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/GlobalInvalidationDetector.php @@ -0,0 +1,201 @@ + self::MAX_PATTERN_LENGTH) { + throw new InvalidArgumentException( + 'Global pattern exceeds maximum length of ' . self::MAX_PATTERN_LENGTH, + ); + } + } + + $this->globalPatterns = $globalPatterns; + } else { + $this->globalPatterns = self::DEFAULT_PATTERNS; + } + } + + /** + * Check if any changes require a full rebuild. + * + * @param ChangeDetectionResult $changes Detected changes + * @param string|null $settingsHash Current settings hash + * @param string|null $cachedSettingsHash Previous settings hash + * + * @return bool True if full rebuild is required + */ + public function requiresFullRebuild( + ChangeDetectionResult $changes, + string|null $settingsHash = null, + string|null $cachedSettingsHash = null, + ): bool { + // Check if settings changed + if ($settingsHash !== null && $cachedSettingsHash !== null) { + if ($settingsHash !== $cachedSettingsHash) { + return true; + } + } + + // Check if any global files changed + $allChangedFiles = array_merge($changes->dirty, $changes->new, $changes->deleted); + + foreach ($allChangedFiles as $file) { + if ($this->isGlobalFile($file)) { + return true; + } + } + + return false; + } + + /** + * Check if a file is a global file that affects all documents. + */ + public function isGlobalFile(string $filePath): bool + { + $normalizedPath = str_replace('\\', '/', $filePath); + + foreach ($this->globalPatterns as $pattern) { + if (str_ends_with($pattern, '/')) { + // Directory pattern - must match as complete path segment + // e.g., pattern "foo/" matches "/path/foo/bar.txt" but not "/path/prefix_foo/bar.txt" + if ( + str_starts_with($normalizedPath, $pattern) || + str_contains($normalizedPath, '/' . $pattern) + ) { + return true; + } + } else { + // File pattern + if (str_ends_with($normalizedPath, '/' . $pattern) || $normalizedPath === $pattern) { + return true; + } + } + } + + return false; + } + + /** + * Check if toctree structure changed. + * This is detected by comparing the document hierarchy. + * + * @param array $oldToctree Previous toctree structure + * @param array $newToctree Current toctree structure + * + * @return bool True if structure changed + */ + public function hasToctreeChanged(array $oldToctree, array $newToctree): bool + { + // Simple comparison - if keys or values differ, structure changed + if (count($oldToctree) !== count($newToctree)) { + return true; + } + + foreach ($oldToctree as $parent => $children) { + if (!isset($newToctree[$parent])) { + return true; + } + + $oldChildren = $children; + $newChildren = $newToctree[$parent]; + + // Sort before comparing - order changes within a toctree entry + // don't affect the dependency graph structure, only the rendered + // navigation order. Navigation rendering is handled separately. + sort($oldChildren); + sort($newChildren); + + if ($oldChildren !== $newChildren) { + return true; + } + } + + return false; + } + + /** + * Get the configured global patterns. + * + * @return string[] + */ + public function getGlobalPatterns(): array + { + return $this->globalPatterns; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/IncrementalBuildCache.php b/packages/guides/src/Build/IncrementalBuild/IncrementalBuildCache.php new file mode 100644 index 000000000..2e4e5fcec --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/IncrementalBuildCache.php @@ -0,0 +1,629 @@ +/.json: Per-document exports (loaded on demand) + * + * Sharding benefits: + * - O(1) save per changed document instead of O(n) full rewrite + * - Better git diffs (only changed files appear) + * - Reduced memory for large projects (can load exports on demand) + * + * Security considerations: + * - All input data is validated before use + * - Maximum limits enforced on document counts + * - Path traversal prevented in shard operations + * + * Thread safety: + * - NOT thread-safe. Designed for single-threaded build processes. + * - For parallel builds: each child process should use extractState()/mergeState() + * to serialize state, with the parent process merging results sequentially + * after all children complete. + */ +final class IncrementalBuildCache +{ + private const BUILD_META_FILE = '_build_meta.json'; + private const EXPORTS_DIR = '_exports'; + + /** + * Maximum number of exports. + * Consistent with PropagationResult::MAX_DOCUMENTS and DirtyPropagator::MAX_PROPAGATION_VISITS. + */ + private const MAX_EXPORTS = 100_000; + + /** + * Maximum number of output path mappings. + * Consistent with MAX_EXPORTS. + */ + private const MAX_OUTPUT_PATHS = 100_000; + + /** @var array */ + private array $exports = []; + + private DependencyGraph $dependencyGraph; + + /** @var array docPath -> rendered output path */ + private array $outputPaths = []; + + /** @var array */ + private array $metadata = []; + + private bool $loaded = false; + + /** Input directory for file path resolution */ + private string $inputDir = ''; + + /** @var array Tracks which exports have been modified (for incremental save) */ + private array $dirtyExports = []; + + /** Output directory (stored for incremental saves) */ + private string|null $outputDir = null; + + public function __construct( + private readonly CacheVersioning $versioning, + ) { + $this->dependencyGraph = new DependencyGraph(); + } + + /** + * Load cache from output directory. + * + * Supports both legacy (monolithic) and sharded cache formats. + * + * @param string $outputDir The output directory where _build_meta.json is stored + * + * @return bool True if cache was loaded and is valid + */ + public function load(string $outputDir): bool + { + $this->outputDir = $outputDir; + $metaPath = $outputDir . '/' . self::BUILD_META_FILE; + + if (!file_exists($metaPath)) { + return false; + } + + $json = file_get_contents($metaPath); + if ($json === false) { + return false; + } + + try { + $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); + } catch (JsonException) { + return false; + } + + if (!is_array($data)) { + return false; + } + + // Load and validate metadata + /** @var array $metadata */ + $metadata = $data['metadata'] ?? []; + if (!is_array($metadata)) { + return false; + } + + $this->metadata = $metadata; + if (!$this->versioning->isCacheValid($this->metadata)) { + return false; + } + + // Check if using sharded exports (new format) + $exportsDir = $outputDir . '/' . self::EXPORTS_DIR; + $isSharded = is_dir($exportsDir) && !isset($data['exports']); + + if ($isSharded) { + // Load exports from sharded files + $this->loadShardedExports($exportsDir); + } else { + // Legacy: Load exports from main file + $exportsData = $data['exports'] ?? []; + if (!is_array($exportsData)) { + return false; + } + + if (count($exportsData) > self::MAX_EXPORTS) { + throw new InvalidArgumentException('exports exceed maximum of ' . self::MAX_EXPORTS); + } + + foreach ($exportsData as $path => $exportData) { + if (!is_string($path) || !is_array($exportData)) { + continue; + } + + $this->exports[$path] = DocumentExports::fromArray($exportData); + } + } + + // Load dependencies + $depsData = $data['dependencies'] ?? []; + if (!is_array($depsData)) { + return false; + } + + $this->dependencyGraph = DependencyGraph::fromArray($depsData); + + // Load output paths + $outputPaths = $data['outputs'] ?? []; + if (!is_array($outputPaths)) { + return false; + } + + if (count($outputPaths) > self::MAX_OUTPUT_PATHS) { + throw new InvalidArgumentException('output paths exceed maximum of ' . self::MAX_OUTPUT_PATHS); + } + + foreach ($outputPaths as $docPath => $outputPath) { + if (!is_string($docPath) || !is_string($outputPath)) { + continue; + } + + $this->outputPaths[$docPath] = $outputPath; + } + + $this->loaded = true; + $this->dirtyExports = []; // Reset dirty tracking after load + + return true; + } + + /** + * Load exports from sharded directory structure. + */ + private function loadShardedExports(string $exportsDir): void + { + // Iterate through shard directories (2-char hash prefixes) + $shardDirs = glob($exportsDir . '/*', GLOB_ONLYDIR); + if ($shardDirs === false) { + return; + } + + $loadedCount = 0; + + foreach ($shardDirs as $shardDir) { + // Validate shard directory name (must be 2 hex chars) + $shardName = basename($shardDir); + if (!$this->isValidShardName($shardName)) { + continue; + } + + $files = glob($shardDir . '/*.json'); + if ($files === false) { + continue; + } + + foreach ($files as $file) { + if ($loadedCount >= self::MAX_EXPORTS) { + throw new InvalidArgumentException('exports exceed maximum of ' . self::MAX_EXPORTS); + } + + $json = file_get_contents($file); + if ($json === false) { + continue; + } + + try { + $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); + } catch (JsonException) { + continue; + } + + if (!is_array($data) || !isset($data['path']) || !is_string($data['path'])) { + continue; + } + + $docPath = $data['path']; + unset($data['path']); // Remove path from export data + $this->exports[$docPath] = DocumentExports::fromArray($data); + $loadedCount++; + } + } + } + + /** + * Validate shard directory name (2 lowercase hex characters). + */ + private function isValidShardName(string $name): bool + { + if (strlen($name) !== 2) { + return false; + } + + return preg_match('/^[0-9a-f]{2}$/', $name) === 1; + } + + /** + * Save cache to output directory. + * + * Uses sharded storage for exports (each document in separate file). + * Only writes changed exports for incremental efficiency. + * + * @param string $outputDir The output directory + * @param string $settingsHash Hash of current settings for invalidation + * + * @throws RuntimeException If write operations fail + */ + public function save(string $outputDir, string $settingsHash = ''): void + { + $this->outputDir = $outputDir; + + if (!is_dir($outputDir) && !mkdir($outputDir, 0o755, true)) { + throw new RuntimeException('Failed to create cache directory: ' . $outputDir); + } + + // Save sharded exports (only dirty ones) + $this->saveShardedExports($outputDir); + + // Build main metadata file (no exports - they're sharded) + $this->metadata = $this->versioning->createMetadata($settingsHash); + + $data = [ + 'metadata' => $this->metadata, + 'dependencies' => $this->dependencyGraph->toArray(), + 'outputs' => $this->outputPaths, + ]; + + $metaPath = $outputDir . '/' . self::BUILD_META_FILE; + $result = file_put_contents( + $metaPath, + json_encode($data, JSON_PRETTY_PRINT | JSON_THROW_ON_ERROR), + ); + + if ($result === false) { + throw new RuntimeException('Failed to write cache metadata: ' . $metaPath); + } + + // Clear dirty tracking after successful save + $this->dirtyExports = []; + } + + /** + * Save exports to sharded directory structure. + * + * Directory structure: _exports//.json + * Only writes files that have been modified (tracked in dirtyExports). + */ + private function saveShardedExports(string $outputDir): void + { + $exportsDir = $outputDir . '/' . self::EXPORTS_DIR; + + // On first save or full rebuild, write all exports + $writeAll = !is_dir($exportsDir) || $this->dirtyExports === []; + + if (!is_dir($exportsDir) && !mkdir($exportsDir, 0o755, true)) { + throw new RuntimeException('Failed to create exports directory: ' . $exportsDir); + } + + foreach ($this->exports as $docPath => $exports) { + // Skip unchanged exports (incremental save) + if (!$writeAll && !isset($this->dirtyExports[$docPath])) { + continue; + } + + $this->writeExportFile($exportsDir, $docPath, $exports); + } + } + + /** + * Write a single export file to the sharded directory. + */ + private function writeExportFile(string $exportsDir, string $docPath, DocumentExports $exports): void + { + // Use hash prefix for distribution (2 chars = 256 buckets) + $hash = md5($docPath); + $prefix = substr($hash, 0, 2); + $shardDir = $exportsDir . '/' . $prefix; + + if (!is_dir($shardDir) && !mkdir($shardDir, 0o755, true)) { + throw new RuntimeException('Failed to create shard directory: ' . $shardDir); + } + + // Use hash as filename to handle special chars in doc paths + $filename = $hash . '.json'; + $filePath = $shardDir . '/' . $filename; + + // Include path in the data for loading + $data = $exports->toArray(); + $data['path'] = $docPath; + + $result = file_put_contents( + $filePath, + json_encode($data, JSON_THROW_ON_ERROR), + ); + + if ($result === false) { + throw new RuntimeException('Failed to write export file: ' . $filePath); + } + } + + /** + * Get the shard file path for a document. + */ + private function getExportFilePath(string $outputDir, string $docPath): string + { + $hash = md5($docPath); + $prefix = substr($hash, 0, 2); + + return $outputDir . '/' . self::EXPORTS_DIR . '/' . $prefix . '/' . $hash . '.json'; + } + + /** + * Get exports for a document. + */ + public function getExports(string $docPath): DocumentExports|null + { + return $this->exports[$docPath] ?? null; + } + + /** + * Set exports for a document. + * Marks the export as dirty for incremental save. + * + * @throws InvalidArgumentException If maximum exports limit would be exceeded + */ + public function setExports(string $docPath, DocumentExports $exports): void + { + if (!isset($this->exports[$docPath]) && count($this->exports) >= self::MAX_EXPORTS) { + throw new InvalidArgumentException('exports exceed maximum of ' . self::MAX_EXPORTS); + } + + $this->exports[$docPath] = $exports; + $this->dirtyExports[$docPath] = true; + } + + /** + * Get all cached exports. + * + * @return array + */ + public function getAllExports(): array + { + return $this->exports; + } + + /** + * Get all cached document paths. + * + * @return string[] + */ + public function getAllDocPaths(): array + { + return array_keys($this->exports); + } + + /** + * Get the dependency graph. + */ + public function getDependencyGraph(): DependencyGraph + { + return $this->dependencyGraph; + } + + /** + * Set output path for a document. + * + * @throws InvalidArgumentException If maximum output paths limit would be exceeded + */ + public function setOutputPath(string $docPath, string $outputPath): void + { + if (!isset($this->outputPaths[$docPath]) && count($this->outputPaths) >= self::MAX_OUTPUT_PATHS) { + throw new InvalidArgumentException('output paths exceed maximum of ' . self::MAX_OUTPUT_PATHS); + } + + $this->outputPaths[$docPath] = $outputPath; + } + + /** + * Get output path for a document. + */ + public function getOutputPath(string $docPath): string|null + { + return $this->outputPaths[$docPath] ?? null; + } + + /** + * Remove a document from all cache structures. + * Also deletes the sharded export file if it exists. + * + * @throws RuntimeException If the export file cannot be deleted + */ + public function removeDocument(string $docPath): void + { + unset($this->exports[$docPath]); + unset($this->outputPaths[$docPath]); + unset($this->dirtyExports[$docPath]); + $this->dependencyGraph->removeDocument($docPath); + + // Delete sharded export file if output directory is known + if ($this->outputDir === null) { + return; + } + + $exportFile = $this->getExportFilePath($this->outputDir, $docPath); + if (!file_exists($exportFile)) { + return; + } + + if (!@unlink($exportFile)) { + throw new RuntimeException('Failed to delete export file: ' . $exportFile); + } + } + + /** + * Check if cache was loaded from disk. + */ + public function isLoaded(): bool + { + return $this->loaded; + } + + /** + * Get cached settings hash. + */ + public function getSettingsHash(): string|null + { + $hash = $this->metadata['settingsHash'] ?? null; + + return is_string($hash) ? $hash : null; + } + + /** + * Clear all cached data. + */ + public function clear(): void + { + $this->exports = []; + $this->dirtyExports = []; + $this->dependencyGraph = new DependencyGraph(); + $this->outputPaths = []; + $this->metadata = []; + $this->loaded = false; + } + + /** + * Get cache statistics. + * + * @return array + */ + public function getStats(): array + { + return [ + 'documents' => count($this->exports), + 'outputs' => count($this->outputPaths), + 'graph' => $this->dependencyGraph->getStats(), + 'loaded' => $this->loaded, + ]; + } + + /** + * Extract cache state for serialization (used in parallel compilation). + * + * @return array{exports: array>, dependencies: array, outputPaths: array} + */ + public function extractState(): array + { + $exportsData = []; + foreach ($this->exports as $path => $exports) { + $exportsData[$path] = $exports->toArray(); + } + + return [ + 'exports' => $exportsData, + 'dependencies' => $this->dependencyGraph->toArray(), + 'outputPaths' => $this->outputPaths, + ]; + } + + /** + * Merge state from another cache instance (used after parallel compilation). + * + * @param array{exports?: array>, dependencies?: array, outputPaths?: array} $state State from extractState() + */ + public function mergeState(array $state): void + { + // Merge exports + $exportsData = $state['exports'] ?? []; + foreach ($exportsData as $path => $exportData) { + if (!is_string($path) || !is_array($exportData)) { + continue; + } + + // Only add if not already present (first write wins) + if (isset($this->exports[$path])) { + continue; + } + + if (count($this->exports) >= self::MAX_EXPORTS) { + throw new InvalidArgumentException('exports exceed maximum during merge'); + } + + $this->exports[$path] = DocumentExports::fromArray($exportData); + } + + // Merge dependency graph + $depsData = $state['dependencies'] ?? []; + if (is_array($depsData) && $depsData !== []) { + $childGraph = DependencyGraph::fromArray($depsData); + $this->dependencyGraph->merge($childGraph); + } + + // Merge output paths + $outputPaths = $state['outputPaths'] ?? []; + foreach ($outputPaths as $docPath => $outputPath) { + if (!is_string($docPath) || !is_string($outputPath)) { + continue; + } + + if (isset($this->outputPaths[$docPath])) { + continue; + } + + if (count($this->outputPaths) >= self::MAX_OUTPUT_PATHS) { + throw new InvalidArgumentException('output paths exceed maximum during merge'); + } + + $this->outputPaths[$docPath] = $outputPath; + } + } + + /** + * Set the input directory for file path resolution. + */ + public function setInputDir(string $inputDir): void + { + $this->inputDir = $inputDir; + } + + /** + * Get the input directory for file path resolution. + */ + public function getInputDir(): string + { + return $this->inputDir; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/IncrementalBuildState.php b/packages/guides/src/Build/IncrementalBuild/IncrementalBuildState.php new file mode 100644 index 000000000..9555acf3f --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/IncrementalBuildState.php @@ -0,0 +1,339 @@ + Document path => exports */ + private array $exports = []; + + /** @var array Cached exports from previous build */ + private array $previousExports = []; + + /** Directory containing input documentation files */ + private string $inputDir = ''; + + /** Hash algorithm used when state was serialized (for compatibility checking) */ + private string $hashAlgorithm = ''; + + public function __construct() + { + $this->dependencyGraph = new DependencyGraph(); + } + + /** + * Get the dependency graph for this build. + */ + public function getDependencyGraph(): DependencyGraph + { + return $this->dependencyGraph; + } + + /** + * Set the dependency graph (e.g., loaded from cache). + */ + public function setDependencyGraph(DependencyGraph $graph): void + { + $this->dependencyGraph = $graph; + } + + /** + * Set exports for a document. + * + * Enforces runtime limits to prevent memory exhaustion during build. + * + * @throws InvalidArgumentException If adding a new document would exceed MAX_EXPORTS limit + */ + public function setExports(string $documentPath, DocumentExports $exports): void + { + // Allow updating existing documents without counting against the limit + if (!isset($this->exports[$documentPath]) && count($this->exports) >= self::MAX_EXPORTS) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: exports exceed maximum of %d documents', + self::MAX_EXPORTS, + )); + } + + $this->exports[$documentPath] = $exports; + } + + /** + * Get exports for a document. + */ + public function getExports(string $documentPath): DocumentExports|null + { + return $this->exports[$documentPath] ?? null; + } + + /** + * Get all current exports. + * + * Note: Returns the internal array. While PHP uses copy-on-write semantics, + * callers should not rely on modifications to the returned array affecting + * internal state. DocumentExports objects are immutable (readonly properties). + * + * @return array + */ + public function getAllExports(): array + { + return $this->exports; + } + + /** + * Set exports from a previous build (for change detection). + * + * @param array $exports + * + * @throws InvalidArgumentException If exports exceed maximum allowed size + */ + public function setPreviousExports(array $exports): void + { + if (count($exports) > self::MAX_EXPORTS) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: previousExports exceed maximum of %d documents', + self::MAX_EXPORTS, + )); + } + + $this->previousExports = $exports; + } + + /** + * Get exports from the previous build. + * + * @return array + */ + public function getPreviousExports(): array + { + return $this->previousExports; + } + + /** + * Get previous exports for a specific document. + */ + public function getPreviousExportsFor(string $documentPath): DocumentExports|null + { + return $this->previousExports[$documentPath] ?? null; + } + + /** + * Set the input directory. + */ + public function setInputDir(string $inputDir): void + { + $this->inputDir = $inputDir; + } + + /** + * Get the input directory. + */ + public function getInputDir(): string + { + return $this->inputDir; + } + + /** + * Set the hash algorithm used for this state. + * + * Should be called when creating new state to record the current algorithm. + * + * @throws InvalidArgumentException If algorithm is not in ALLOWED_ALGORITHMS + */ + public function setHashAlgorithm(string $algorithm): void + { + if (!in_array($algorithm, self::ALLOWED_ALGORITHMS, true)) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: invalid hash algorithm "%s", allowed: %s', + $algorithm, + implode(', ', self::ALLOWED_ALGORITHMS), + )); + } + + $this->hashAlgorithm = $algorithm; + } + + /** + * Get the hash algorithm used when this state was created/serialized. + * + * Returns empty string if algorithm was not recorded (legacy state). + */ + public function getHashAlgorithm(): string + { + return $this->hashAlgorithm; + } + + /** + * Check if this state's hash algorithm is compatible with the given algorithm. + * + * Returns true if: + * - The algorithms match exactly, OR + * - This state has no recorded algorithm (legacy state created with sha256) + * AND the current algorithm is sha256 + * + * Returns false if algorithms differ, indicating cached hashes are invalid + * and a full rebuild is needed. + * + * Note: Legacy state (empty hashAlgorithm) assumes sha256 because it was the + * only algorithm available before xxh128 support was added. This ensures + * correct cache invalidation when xxh128 becomes available on a system. + */ + public function isHashAlgorithmCompatible(string $currentAlgorithm): bool + { + // Legacy state without recorded algorithm - assume sha256 was used + // (sha256 was the only option before xxh128 support was added) + if ($this->hashAlgorithm === '') { + return $currentAlgorithm === 'sha256'; + } + + return $this->hashAlgorithm === $currentAlgorithm; + } + + /** + * Reset state for a new build. + */ + public function reset(): void + { + $this->dependencyGraph = new DependencyGraph(); + $this->exports = []; + // Note: previousExports is intentionally NOT reset - it's the reference for change detection + } + + /** + * Serialize state to array for persistence. + * + * @return array + */ + public function toArray(): array + { + $exportsArray = []; + foreach ($this->exports as $path => $exports) { + $exportsArray[$path] = $exports->toArray(); + } + + return [ + 'dependencyGraph' => $this->dependencyGraph->toArray(), + 'exports' => $exportsArray, + 'hashAlgorithm' => $this->hashAlgorithm, + ]; + } + + /** + * Restore state from array. + * + * @param array $data + * + * @throws InvalidArgumentException If data format is invalid + */ + public static function fromArray(array $data): self + { + $state = new self(); + + if (isset($data['dependencyGraph'])) { + $graphData = $data['dependencyGraph']; + if (!is_array($graphData)) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: expected dependencyGraph to be array, got %s', + get_debug_type($graphData), + )); + } + + $state->dependencyGraph = DependencyGraph::fromArray($graphData); + } + + if (isset($data['exports'])) { + $exportsData = $data['exports']; + if (!is_array($exportsData)) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: expected exports to be array, got %s', + get_debug_type($exportsData), + )); + } + + // Enforce size limit to prevent memory exhaustion + if (count($exportsData) > self::MAX_EXPORTS) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: exports exceed maximum of %d documents', + self::MAX_EXPORTS, + )); + } + + foreach ($exportsData as $path => $exportData) { + // PHP array keys are always int or string, so just validate value + if (!is_array($exportData)) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: expected export data for "%s" to be array, got %s', + $path, + get_debug_type($exportData), + )); + } + + $state->exports[(string) $path] = DocumentExports::fromArray($exportData); + } + } + + // Restore hash algorithm if present (may be empty for legacy state) + if (isset($data['hashAlgorithm'])) { + $algorithm = $data['hashAlgorithm']; + if (!is_string($algorithm)) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: expected hashAlgorithm to be string, got %s', + get_debug_type($algorithm), + )); + } + + // Validate algorithm if not empty (empty is valid for legacy state) + if ($algorithm !== '' && !in_array($algorithm, self::ALLOWED_ALGORITHMS, true)) { + throw new InvalidArgumentException(sprintf( + 'IncrementalBuildState: invalid hash algorithm "%s", allowed: %s', + $algorithm, + implode(', ', self::ALLOWED_ALGORITHMS), + )); + } + + $state->hashAlgorithm = $algorithm; + } + + return $state; + } +} diff --git a/packages/guides/src/Build/IncrementalBuild/PropagationResult.php b/packages/guides/src/Build/IncrementalBuild/PropagationResult.php new file mode 100644 index 000000000..6b000e3f9 --- /dev/null +++ b/packages/guides/src/Build/IncrementalBuild/PropagationResult.php @@ -0,0 +1,155 @@ +documentsToRender, true); + } + + /** + * Get count of documents to render. + */ + public function getRenderCount(): int + { + return count($this->documentsToRender); + } + + /** + * Get count of documents to skip. + */ + public function getSkipCount(): int + { + return count($this->documentsToSkip); + } + + /** + * Get savings ratio (0.0 - 1.0). + * + * Returns the proportion of documents that can be skipped. + */ + public function getSavingsRatio(): float + { + $total = $this->getRenderCount() + $this->getSkipCount(); + if ($total === 0) { + return 0.0; + } + + return $this->getSkipCount() / $total; + } + + /** + * Serialize to array. + * + * @return array{documentsToRender: string[], documentsToSkip: string[], propagatedFrom: string[]} + */ + public function toArray(): array + { + return [ + 'documentsToRender' => array_values($this->documentsToRender), + 'documentsToSkip' => array_values($this->documentsToSkip), + 'propagatedFrom' => array_values($this->propagatedFrom), + ]; + } + + /** + * Create from serialized array with validation. + * + * @param array $data + */ + public static function fromArray(array $data): self + { + $documentsToRender = self::validateStringArray( + array_key_exists('documentsToRender', $data) ? $data['documentsToRender'] : [], + 'documentsToRender', + ); + $documentsToSkip = self::validateStringArray( + array_key_exists('documentsToSkip', $data) ? $data['documentsToSkip'] : [], + 'documentsToSkip', + ); + $propagatedFrom = self::validateStringArray( + array_key_exists('propagatedFrom', $data) ? $data['propagatedFrom'] : [], + 'propagatedFrom', + ); + + return new self($documentsToRender, $documentsToSkip, $propagatedFrom); + } + + /** + * Validate that a value is an array of strings within size limits. + * + * @param mixed $value The value (may be any type including null if key was set to null) + * + * @return string[] + */ + private static function validateStringArray(mixed $value, string $fieldName): array + { + if (!is_array($value)) { + throw new InvalidArgumentException('expected ' . $fieldName . ' to be array'); + } + + if (count($value) > self::MAX_DOCUMENTS) { + throw new InvalidArgumentException($fieldName . ' exceed maximum of ' . self::MAX_DOCUMENTS); + } + + $result = []; + foreach ($value as $item) { + if (!is_string($item)) { + throw new InvalidArgumentException('expected ' . $fieldName . ' item to be string'); + } + + $result[] = $item; + } + + return $result; + } +} diff --git a/packages/guides/src/Compiler/Passes/DependencyGraphPass.php b/packages/guides/src/Compiler/Passes/DependencyGraphPass.php new file mode 100644 index 000000000..3bf5206ad --- /dev/null +++ b/packages/guides/src/Compiler/Passes/DependencyGraphPass.php @@ -0,0 +1,246 @@ +getProjectNode(); + $graph = $this->buildState->getDependencyGraph(); + $this->rejectedImports = 0; + + foreach ($documents as $document) { + $filePath = $document->getFilePath(); + + // Clear old imports for this document + $graph->clearImportsFor($filePath); + + // Find all references in this document + $imports = $this->findImports($document, $projectNode); + + // Add edges to the graph, tracking rejections + foreach ($imports as $importedDocPath) { + if ($graph->addImport($filePath, $importedDocPath)) { + continue; + } + + $this->rejectedImports++; + } + } + + return $documents; + } + + /** + * Get the count of imports that were rejected due to limits. + * + * If this is non-zero, the dependency graph may be incomplete and + * incremental builds may miss some dependencies. + */ + public function getRejectedImportCount(): int + { + return $this->rejectedImports; + } + + /** + * Check if any imports were rejected due to limits. + * + * If true, the dependency graph is incomplete and incremental builds + * may miss some dependencies. A full rebuild is recommended. + */ + public function hasRejectedImports(): bool + { + return $this->rejectedImports > 0; + } + + /** + * Find all documents that this document imports from. + * + * @return string[] Imported document paths + */ + private function findImports(DocumentNode $document, ProjectNode $projectNode): array + { + $imports = []; + $filePath = $document->getFilePath(); + + $this->traverseNodes($document->getChildren(), function (Node $node) use (&$imports, $projectNode, $filePath): void { + // Handle :doc:`reference` + if ($node instanceof DocReferenceNode) { + $targetDoc = $this->resolveDocReference($node, $projectNode); + if ($targetDoc !== null && $targetDoc !== $filePath) { + $imports[] = $targetDoc; + } + + return; + } + + // Handle :ref:`reference` + if ($node instanceof ReferenceNode) { + $targetDoc = $this->resolveRefReference($node, $projectNode); + if ($targetDoc !== null && $targetDoc !== $filePath) { + $imports[] = $targetDoc; + } + + return; + } + + // Handle any other CrossReferenceNode + if (!($node instanceof CrossReferenceNode)) { + return; + } + + $targetDoc = $this->resolveCrossReference($node, $projectNode); + if ($targetDoc === null || $targetDoc === $filePath) { + return; + } + + $imports[] = $targetDoc; + }); + + return array_unique($imports); + } + + /** + * Resolve a :doc: reference to its target document. + * + * Note: When the target document is not found in the project, the raw target + * path is returned. This is intentional as it tracks intended dependencies + * even for unresolved references (e.g., references to documents being added + * in the same build). The rendering phase handles error reporting for + * actually missing documents. + */ + private function resolveDocReference(DocReferenceNode $node, ProjectNode $projectNode): string|null + { + // Skip interlink references to external projects + if ($node->getInterlinkDomain() !== '') { + return null; + } + + // The target is the document path + $target = $node->getTargetReference(); + + // Strip any anchor + $hashPos = strpos($target, '#'); + if ($hashPos !== false) { + $target = substr($target, 0, $hashPos); + } + + // Check if document exists in project + $entry = $projectNode->findDocumentEntry($target); + if ($entry !== null) { + return $entry->getFile(); + } + + // Return raw target for unresolved references (see method doc) + return $target !== '' ? $target : null; + } + + /** + * Resolve a :ref: reference to its target document. + */ + private function resolveRefReference(ReferenceNode $node, ProjectNode $projectNode): string|null + { + // Skip interlink references to external projects + if ($node->getInterlinkDomain() !== '') { + return null; + } + + $targetAnchor = $node->getTargetReference(); + $linkType = $node->getLinkType(); + + // Look up the anchor in the project's internal targets + $target = $projectNode->getInternalTarget($targetAnchor, $linkType); + if ($target !== null) { + return $target->getDocumentPath(); + } + + // Try default link type + $target = $projectNode->getInternalTarget($targetAnchor); + + return $target?->getDocumentPath(); + } + + /** + * Resolve any CrossReferenceNode to its target document. + */ + private function resolveCrossReference(CrossReferenceNode $node, ProjectNode $projectNode): string|null + { + // Skip interlink references to external projects + if ($node->getInterlinkDomain() !== '') { + return null; + } + + // Try to resolve using the reference target + $targetAnchor = $node->getTargetReference(); + + $target = $projectNode->getInternalTarget($targetAnchor); + + return $target?->getDocumentPath(); + } +} diff --git a/packages/guides/src/Compiler/Passes/ExportsCollectorPass.php b/packages/guides/src/Compiler/Passes/ExportsCollectorPass.php new file mode 100644 index 000000000..1bcd45538 --- /dev/null +++ b/packages/guides/src/Compiler/Passes/ExportsCollectorPass.php @@ -0,0 +1,238 @@ +getProjectNode(); + $inputDir = $this->buildState->getInputDir(); + + foreach ($documents as $document) { + $docPath = $document->getFilePath(); + + // Collect anchors from this document + $anchors = $this->collectAnchors($document, $projectNode); + + // Collect section titles + $sectionTitles = $this->collectSectionTitles($document); + + // Collect citations (if any) + $citations = $this->collectCitations($document); + + // Compute content hash and mtime from the actual source file + // Logic flow: + // 1. If source file found and readable: use file hash + file mtime + // 2. If filemtime fails (permissions, etc.): use file hash + 0 mtime + // 3. If no source file or path traversal blocked: use document serialize hash + current time + $contentHash = ''; + $lastModified = 0; + + if ($inputDir !== '') { + $sourceFilePath = $this->findSourceFile($inputDir, $docPath); + if ($sourceFilePath !== null) { + // hashFile() already handles TOCTOU race (returns '' if file vanishes) + $contentHash = $this->hasher->hashFile($sourceFilePath); + // Suppress warning if file vanishes between hashFile and filemtime (TOCTOU) + $mtime = @filemtime($sourceFilePath); + $lastModified = $mtime !== false ? $mtime : 0; + } + } + + // Fallback: hash the document structure when source file not available + // This happens when: no inputDir set, file not found, or path traversal blocked + if ($contentHash === '') { + $contentHash = $this->hasher->hashContent(serialize($document)); + $lastModified = time(); + } + + // Get document title (first heading, used by :doc: references) + $documentTitle = $document->getTitle()?->toString() ?? ''; + + $exportsHash = $this->hasher->hashExports($anchors, $sectionTitles, $citations, $documentTitle); + + $exports = new DocumentExports( + documentPath: $docPath, + contentHash: $contentHash, + exportsHash: $exportsHash, + anchors: $anchors, + sectionTitles: $sectionTitles, + citations: $citations, + lastModified: $lastModified, + documentTitle: $documentTitle, + ); + + $this->buildState->setExports($docPath, $exports); + } + + return $documents; + } + + /** + * Find the source file for a document path. + * + * Includes path traversal protection to ensure the resolved path + * stays within the input directory. + */ + private function findSourceFile(string $inputDir, string $docPath): string|null + { + $inputDir = rtrim($inputDir, '/'); + + // Resolve the real path of the input directory for comparison + $realInputDir = realpath($inputDir); + if ($realInputDir === false) { + return null; + } + + // Try common extensions + foreach (['.rst', '.md', '.txt', ''] as $ext) { + $tryPath = $inputDir . '/' . $docPath . $ext; + if (!file_exists($tryPath)) { + continue; + } + + // Resolve the real path and verify it's within the input directory + // This prevents path traversal attacks via "../" in docPath + $realPath = realpath($tryPath); + if ($realPath === false) { + continue; + } + + // Check path is within input directory using trailing slash to prevent + // prefix attacks (e.g., /docs vs /docs-internal) + if (!str_starts_with($realPath . '/', $realInputDir . '/')) { + // Path traversal attempt detected - path is outside input directory + continue; + } + + return $realPath; + } + + return null; + } + + /** + * Collect all anchors defined in this document. + * + * @return array Anchor name => title + */ + private function collectAnchors(DocumentNode $document, ProjectNode $projectNode): array + { + $anchors = []; + $filePath = $document->getFilePath(); + + // Get all internal targets from the project node for this document + $allTargets = $projectNode->getAllInternalTargets(); + + foreach ($allTargets as $targets) { + foreach ($targets as $anchorName => $target) { + if ($target->getDocumentPath() !== $filePath) { + continue; + } + + $anchors[(string) $anchorName] = $target->getTitle() ?? (string) $anchorName; + } + } + + return $anchors; + } + + /** + * Collect section titles from this document. + * + * @return array Section ID => title + */ + private function collectSectionTitles(DocumentNode $document): array + { + $titles = []; + + $this->traverseNodes(array_values($document->getChildren()), static function (Node $node) use (&$titles): void { + if (!($node instanceof SectionNode)) { + return; + } + + $titles[$node->getId()] = $node->getTitle()->toString(); + }); + + return $titles; + } + + /** + * Collect citations defined in this document. + * + * @return string[] + */ + private function collectCitations(DocumentNode $document): array + { + $citations = []; + + $this->traverseNodes(array_values($document->getChildren()), static function (Node $node) use (&$citations): void { + if (!($node instanceof CitationNode)) { + return; + } + + $citations[] = $node->getName(); + }); + + return $citations; + } +} diff --git a/packages/guides/src/Compiler/Passes/NodeTraversalTrait.php b/packages/guides/src/Compiler/Passes/NodeTraversalTrait.php new file mode 100644 index 000000000..3d0074091 --- /dev/null +++ b/packages/guides/src/Compiler/Passes/NodeTraversalTrait.php @@ -0,0 +1,75 @@ + $nodes + * @param callable(Node): void $callback + * @param int $depth Current recursion depth (internal use) + */ + private function traverseNodes(iterable $nodes, callable $callback, int $depth = 0): void + { + if ($depth > $this->getMaxTraversalDepth()) { + // Security: Silently stop traversal to prevent stack overflow from + // maliciously crafted deeply nested documents. Nodes beyond this + // depth will not have their callbacks invoked. This is intentional + // behavior - 100 levels is sufficient for any legitimate document. + return; + } + + foreach ($nodes as $node) { + $callback($node); + + // Use is_callable to ensure the method is both present AND accessible + // (method_exists alone returns true for private/protected methods) + if (!is_callable([$node, 'getChildren'])) { + continue; + } + + $children = $node->getChildren(); + if (!is_iterable($children)) { + continue; + } + + $this->traverseNodes($children, $callback, $depth + 1); + } + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/CacheVersioningTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/CacheVersioningTest.php new file mode 100644 index 000000000..453c81bac --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/CacheVersioningTest.php @@ -0,0 +1,234 @@ +createMetadata(); + + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithOldVersion(): void + { + $versioning = new CacheVersioning(); + $metadata = [ + 'version' => 0, // Old/different version + 'phpVersion' => PHP_VERSION, + ]; + + self::assertFalse($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithDifferentPhpMajorVersion(): void + { + $versioning = new CacheVersioning(); + $metadata = $versioning->createMetadata(); + $metadata['phpVersion'] = '7.4.0'; // Different major.minor version + + // Only false if current PHP is not 7.4 + if (PHP_MAJOR_VERSION !== 7 || PHP_MINOR_VERSION !== 4) { + self::assertFalse($versioning->isCacheValid($metadata)); + } else { + self::assertTrue($versioning->isCacheValid($metadata)); + } + } + + public function testIsCacheValidWithSameMajorMinorVersion(): void + { + $versioning = new CacheVersioning(); + $metadata = $versioning->createMetadata(); + // Simulate different patch version (should still be valid) + $metadata['phpVersion'] = PHP_MAJOR_VERSION . '.' . PHP_MINOR_VERSION . '.999'; + + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithEmptyMetadata(): void + { + $versioning = new CacheVersioning(); + + self::assertFalse($versioning->isCacheValid([])); + } + + public function testIsCacheValidWithMissingPhpVersion(): void + { + $versioning = new CacheVersioning(); + $metadata = ['version' => 1]; // Missing phpVersion + + self::assertFalse($versioning->isCacheValid($metadata)); + } + + public function testCreateMetadata(): void + { + $versioning = new CacheVersioning('2.0.0'); + $metadata = $versioning->createMetadata('settings-hash-123'); + + self::assertSame(1, $metadata['version']); + self::assertSame(PHP_VERSION, $metadata['phpVersion']); + self::assertSame('2.0.0', $metadata['packageVersion']); + self::assertSame('settings-hash-123', $metadata['settingsHash']); + self::assertIsInt($metadata['createdAt']); + self::assertGreaterThan(0, $metadata['createdAt']); + } + + public function testCreateMetadataWithEmptySettingsHash(): void + { + $versioning = new CacheVersioning(); + $metadata = $versioning->createMetadata(); + + self::assertSame('', $metadata['settingsHash']); + } + + public function testGetCacheVersion(): void + { + $versioning = new CacheVersioning(); + + self::assertSame(1, $versioning->getCacheVersion()); + } + + public function testDefaultPackageVersion(): void + { + $versioning = new CacheVersioning(); + $metadata = $versioning->createMetadata(); + + self::assertSame('1.0.0', $metadata['packageVersion']); + } + + public function testCustomPackageVersion(): void + { + $versioning = new CacheVersioning('3.14.159'); + $metadata = $versioning->createMetadata(); + + self::assertSame('3.14.159', $metadata['packageVersion']); + } + + public function testCreatedAtTimestamp(): void + { + $before = time(); + $versioning = new CacheVersioning(); + $metadata = $versioning->createMetadata(); + $after = time(); + + self::assertGreaterThanOrEqual($before, $metadata['createdAt']); + self::assertLessThanOrEqual($after, $metadata['createdAt']); + } + + public function testIsCacheValidWithSameMajorPackageVersion(): void + { + $versioning = new CacheVersioning('1.5.0'); + $metadata = $versioning->createMetadata(); + // Simulate cache from different minor version + $metadata['packageVersion'] = '1.2.3'; + + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithDifferentMajorPackageVersion(): void + { + $versioning = new CacheVersioning('2.0.0'); + $metadata = $versioning->createMetadata(); + // Simulate cache from v1 + $metadata['packageVersion'] = '1.9.9'; + + self::assertFalse($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithVPrefixedVersion(): void + { + $versioning = new CacheVersioning('v1.5.0'); + $metadata = $versioning->createMetadata(); + // Both with v prefix + $metadata['packageVersion'] = 'v1.2.3'; + + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithMixedVPrefix(): void + { + $versioning = new CacheVersioning('1.5.0'); + $metadata = $versioning->createMetadata(); + // Cache has v prefix, current doesn't + $metadata['packageVersion'] = 'v1.2.3'; + + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithInvalidPackageVersion(): void + { + $versioning = new CacheVersioning('1.0.0'); + $metadata = $versioning->createMetadata(); + // Invalid version string + $metadata['packageVersion'] = 'not-a-version'; + + self::assertFalse($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithMissingPackageVersion(): void + { + $versioning = new CacheVersioning('1.0.0'); + $metadata = $versioning->createMetadata(); + unset($metadata['packageVersion']); + + self::assertFalse($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithNonStringPackageVersion(): void + { + $versioning = new CacheVersioning('1.0.0'); + $metadata = $versioning->createMetadata(); + $metadata['packageVersion'] = 123; // Non-string + + self::assertFalse($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithMajorVersionZero(): void + { + $versioning = new CacheVersioning('0.9.0'); + $metadata = $versioning->createMetadata(); + $metadata['packageVersion'] = '0.1.0'; + + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithPreReleaseVersion(): void + { + $versioning = new CacheVersioning('1.0.0-alpha'); + $metadata = $versioning->createMetadata(); + $metadata['packageVersion'] = '1.2.3-beta.1'; + + // Major version is still 1, so compatible + self::assertTrue($versioning->isCacheValid($metadata)); + } + + public function testIsCacheValidWithMajorOnlyVersion(): void + { + $versioning = new CacheVersioning('2'); + $metadata = $versioning->createMetadata(); + $metadata['packageVersion'] = '2.0.0'; + + self::assertTrue($versioning->isCacheValid($metadata)); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/ChangeDetectionResultTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/ChangeDetectionResultTest.php new file mode 100644 index 000000000..2be08c227 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/ChangeDetectionResultTest.php @@ -0,0 +1,138 @@ +getFilesToProcess(), + ); + } + + public function testHasChangesReturnsTrueForDirtyFiles(): void + { + $result = new ChangeDetectionResult( + dirty: ['changed.rst'], + clean: [], + new: [], + deleted: [], + ); + + self::assertTrue($result->hasChanges()); + } + + public function testHasChangesReturnsTrueForNewFiles(): void + { + $result = new ChangeDetectionResult( + dirty: [], + clean: [], + new: ['new.rst'], + deleted: [], + ); + + self::assertTrue($result->hasChanges()); + } + + public function testHasChangesReturnsTrueForDeletedFiles(): void + { + $result = new ChangeDetectionResult( + dirty: [], + clean: [], + new: [], + deleted: ['deleted.rst'], + ); + + self::assertTrue($result->hasChanges()); + } + + public function testHasChangesReturnsFalseWhenOnlyClean(): void + { + $result = new ChangeDetectionResult( + dirty: [], + clean: ['file1.rst', 'file2.rst'], + new: [], + deleted: [], + ); + + self::assertFalse($result->hasChanges()); + } + + public function testGetChangeCountSumsCorrectly(): void + { + $result = new ChangeDetectionResult( + dirty: ['d1.rst', 'd2.rst'], + clean: ['c1.rst', 'c2.rst', 'c3.rst'], + new: ['n1.rst'], + deleted: ['del1.rst', 'del2.rst', 'del3.rst'], + ); + + // dirty(2) + new(1) + deleted(3) = 6 + self::assertSame(6, $result->getChangeCount()); + } + + public function testGetChangeCountDoesNotCountClean(): void + { + $result = new ChangeDetectionResult( + dirty: [], + clean: ['c1.rst', 'c2.rst', 'c3.rst'], + new: [], + deleted: [], + ); + + self::assertSame(0, $result->getChangeCount()); + } + + public function testToArrayReturnsAllCategories(): void + { + $result = new ChangeDetectionResult( + dirty: ['dirty.rst'], + clean: ['clean.rst'], + new: ['new.rst'], + deleted: ['deleted.rst'], + ); + + self::assertSame([ + 'dirty' => ['dirty.rst'], + 'clean' => ['clean.rst'], + 'new' => ['new.rst'], + 'deleted' => ['deleted.rst'], + ], $result->toArray()); + } + + public function testEmptyResult(): void + { + $result = new ChangeDetectionResult( + dirty: [], + clean: [], + new: [], + deleted: [], + ); + + self::assertSame([], $result->getFilesToProcess()); + self::assertFalse($result->hasChanges()); + self::assertSame(0, $result->getChangeCount()); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/ChangeDetectorTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/ChangeDetectorTest.php new file mode 100644 index 000000000..12e2f11be --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/ChangeDetectorTest.php @@ -0,0 +1,405 @@ +hasher = new ContentHasher(); + $this->detector = new ChangeDetector($this->hasher); + $this->tempDir = sys_get_temp_dir() . '/change-detector-test-' . uniqid(); + mkdir($this->tempDir, 0755, true); + } + + protected function tearDown(): void + { + foreach ($this->tempFiles as $file) { + if (!file_exists($file) && !is_link($file)) { + continue; + } + + unlink($file); + } + + if (!is_dir($this->tempDir)) { + return; + } + + rmdir($this->tempDir); + } + + private function createTempFile(string $name, string $content): string + { + $path = $this->tempDir . '/' . $name; + file_put_contents($path, $content); + $this->tempFiles[] = $path; + + return $path; + } + + public function testDetectsNewFiles(): void + { + $result = $this->detector->detectChanges( + ['doc1', 'doc2'], + [], // No cached exports = all files are new + ); + + self::assertSame(['doc1', 'doc2'], $result->new); + self::assertSame([], $result->dirty); + self::assertSame([], $result->clean); + self::assertSame([], $result->deleted); + } + + public function testDetectsDeletedFiles(): void + { + $cachedExports = [ + 'existing' => $this->createExports('existing', 'hash1', 100), + 'deleted' => $this->createExports('deleted', 'hash2', 100), + ]; + + $result = $this->detector->detectChanges( + ['existing'], // Only 'existing' is present + $cachedExports, + ); + + self::assertSame(['deleted'], $result->deleted); + } + + public function testDetectsChangedContentWithHashMismatch(): void + { + $filePath = $this->createTempFile('doc.rst', 'new content'); + clearstatcache(true, $filePath); + $currentMtime = (int) filemtime($filePath); + + // Cache has different content hash but same mtime + $cachedExports = [ + $filePath => $this->createExports($filePath, 'old-hash', $currentMtime + 1), + ]; + + $result = $this->detector->detectChanges([$filePath], $cachedExports); + + self::assertSame([$filePath], $result->dirty); + self::assertSame([], $result->clean); + } + + public function testFastPathWhenMtimeUnchanged(): void + { + $filePath = $this->createTempFile('unchanged.rst', 'content'); + clearstatcache(true, $filePath); + $mtime = (int) filemtime($filePath); + $hash = $this->hasher->hashFile($filePath); + + $cachedExports = [ + $filePath => $this->createExports($filePath, $hash, $mtime), + ]; + + $result = $this->detector->detectChanges([$filePath], $cachedExports); + + self::assertSame([], $result->dirty); + self::assertSame([$filePath], $result->clean); + + // Verify fast path was used + $stats = $this->detector->getStats(); + self::assertSame(1, $stats['fastPathHits']); + self::assertSame(0, $stats['hashComputations']); + } + + public function testMtimeChangeWithSameContentIsClean(): void + { + $filePath = $this->createTempFile('touched.rst', 'same content'); + clearstatcache(true, $filePath); + $hash = $this->hasher->hashFile($filePath); + $oldMtime = (int) filemtime($filePath) - 100; // Pretend old mtime was different + + $cachedExports = [ + $filePath => $this->createExports($filePath, $hash, $oldMtime), + ]; + + $result = $this->detector->detectChanges([$filePath], $cachedExports); + + // Should be clean because content hash matches + self::assertSame([], $result->dirty); + self::assertSame([$filePath], $result->clean); + + // Hash computation was needed because mtime changed + $stats = $this->detector->getStats(); + self::assertSame(0, $stats['fastPathHits']); + self::assertSame(1, $stats['hashComputations']); + } + + public function testHasFileChangedReturnsTrueForNewFile(): void + { + self::assertTrue($this->detector->hasFileChanged('/some/path', null)); + } + + public function testHasFileChangedReturnsFalseWhenMtimeMatches(): void + { + $filePath = $this->createTempFile('check.rst', 'content'); + clearstatcache(true, $filePath); + $mtime = (int) filemtime($filePath); + $hash = $this->hasher->hashFile($filePath); + + $cached = $this->createExports($filePath, $hash, $mtime); + + self::assertFalse($this->detector->hasFileChanged($filePath, $cached)); + } + + public function testHasFileChangedReturnsTrueWhenContentDiffers(): void + { + $filePath = $this->createTempFile('changed.rst', 'new content'); + clearstatcache(true, $filePath); + $mtime = (int) filemtime($filePath); + + // Cached has different hash + $cached = $this->createExports($filePath, 'different-hash', $mtime + 1); + + self::assertTrue($this->detector->hasFileChanged($filePath, $cached)); + } + + public function testGetFileMtimeReturnsZeroForNonexistent(): void + { + self::assertSame(0, $this->detector->getFileMtime('/nonexistent/file.rst')); + } + + public function testGetFileMtimeReturnsActualMtime(): void + { + $filePath = $this->createTempFile('mtime.rst', 'content'); + clearstatcache(true, $filePath); + $expected = (int) filemtime($filePath); + + self::assertSame($expected, $this->detector->getFileMtime($filePath)); + } + + public function testDetectChangesWithResolverUsesCustomResolver(): void + { + $filePath = $this->createTempFile('resolved.rst', 'content'); + + $result = $this->detector->detectChangesWithResolver( + ['doc-path'], + [], + static fn (string $docPath) => $filePath, // Custom resolver + ); + + self::assertSame(['doc-path'], $result->new); + } + + public function testStatsResetBetweenDetectionRuns(): void + { + $file1 = $this->createTempFile('file1.rst', 'content1'); + $file2 = $this->createTempFile('file2.rst', 'content2'); + clearstatcache(true); + + // First run with two files + $this->detector->detectChanges([$file1, $file2], []); + + // Second run with one file + $this->detector->detectChanges([$file1], []); + + $stats = $this->detector->getStats(); + // Stats should reflect only the last run + self::assertSame(0, $stats['fastPathHits']); + self::assertSame(0, $stats['hashComputations']); + } + + public function testEmptyFileIsDetectedCorrectly(): void + { + $filePath = $this->createTempFile('empty.rst', ''); + clearstatcache(true, $filePath); + $hash = $this->hasher->hashFile($filePath); + $mtime = (int) filemtime($filePath); + + $cachedExports = [ + $filePath => $this->createExports($filePath, $hash, $mtime), + ]; + + $result = $this->detector->detectChanges([$filePath], $cachedExports); + + // Empty file should be clean if hash matches + self::assertSame([], $result->dirty); + self::assertSame([$filePath], $result->clean); + } + + public function testZeroMtimeForcesHashComputation(): void + { + $filePath = $this->createTempFile('zero-mtime.rst', 'content'); + clearstatcache(true, $filePath); + $hash = $this->hasher->hashFile($filePath); + + // Cache with zero mtime should force hash computation + $cachedExports = [ + $filePath => $this->createExports($filePath, $hash, 0), + ]; + + $result = $this->detector->detectChanges([$filePath], $cachedExports); + + // Should be clean because content hash matches + self::assertSame([], $result->dirty); + self::assertSame([$filePath], $result->clean); + + // Hash was computed because cached mtime was 0 + $stats = $this->detector->getStats(); + self::assertSame(0, $stats['fastPathHits']); + self::assertSame(1, $stats['hashComputations']); + } + + public function testResolverReturnsNonExistentPath(): void + { + $cachedExports = [ + 'doc-path' => $this->createExports('doc-path', 'old-hash', 12_345), + ]; + + $result = $this->detector->detectChangesWithResolver( + ['doc-path'], + $cachedExports, + static fn (string $docPath) => '/nonexistent/path/' . $docPath . '.rst', + ); + + // Non-existent file should be detected as dirty (mtime=0 differs from cache) + self::assertSame(['doc-path'], $result->dirty); + self::assertSame([], $result->clean); + } + + public function testMixedChangesInSingleRun(): void + { + // Set up files + $cleanFile = $this->createTempFile('clean.rst', 'clean content'); + $dirtyFile = $this->createTempFile('dirty.rst', 'new dirty content'); + clearstatcache(true); + + $cleanMtime = (int) filemtime($cleanFile); + $cleanHash = $this->hasher->hashFile($cleanFile); + + $cachedExports = [ + $cleanFile => $this->createExports($cleanFile, $cleanHash, $cleanMtime), + $dirtyFile => $this->createExports($dirtyFile, 'old-hash', (int) filemtime($dirtyFile) + 1), + 'deleted-file' => $this->createExports('deleted-file', 'hash', 12_345), + ]; + + $result = $this->detector->detectChanges( + [$cleanFile, $dirtyFile, 'new-file'], + $cachedExports, + ); + + self::assertSame([$dirtyFile], $result->dirty); + self::assertSame([$cleanFile], $result->clean); + self::assertSame(['new-file'], $result->new); + self::assertSame(['deleted-file'], $result->deleted); + } + + public function testSymlinkTargetIsHashed(): void + { + $targetFile = $this->createTempFile('target.rst', 'symlink content'); + $symlinkPath = $this->tempDir . '/symlink.rst'; + + // Create symlink if possible + if (!@symlink($targetFile, $symlinkPath)) { + self::markTestSkipped('Symlinks not supported on this system'); + } + + $this->tempFiles[] = $symlinkPath; + clearstatcache(true, $symlinkPath); + clearstatcache(true, $targetFile); + + // Verify symlink was created + self::assertTrue(is_link($symlinkPath)); + + $mtime = (int) filemtime($symlinkPath); + $hash = $this->hasher->hashFile($symlinkPath); + + $cachedExports = [ + $symlinkPath => $this->createExports($symlinkPath, $hash, $mtime), + ]; + + $result = $this->detector->detectChanges([$symlinkPath], $cachedExports); + + self::assertSame([], $result->dirty); + self::assertSame([$symlinkPath], $result->clean); + } + + public function testFileDeletedBetweenMtimeAndHashCheck(): void + { + // This tests behavior when mtime check passes but hash fails due to deletion + // In practice, this is a race condition edge case + $filePath = '/nonexistent/deleted-during-check.rst'; + + $cachedExports = [ + $filePath => $this->createExports($filePath, 'old-hash', 0), // mtime=0 forces hash check + ]; + + $result = $this->detector->detectChanges([$filePath], $cachedExports); + + // File doesn't exist, hash will be empty, different from 'old-hash' -> dirty + self::assertSame([$filePath], $result->dirty); + } + + public function testHasFileChangedWithZeroMtimeCache(): void + { + $filePath = $this->createTempFile('has-changed.rst', 'content'); + clearstatcache(true, $filePath); + $hash = $this->hasher->hashFile($filePath); + + // Cached mtime is 0, should force hash comparison + $cached = $this->createExports($filePath, $hash, 0); + + // Content same, so should report unchanged + self::assertFalse($this->detector->hasFileChanged($filePath, $cached)); + } + + public function testHasFileChangedWithZeroMtimeCacheAndDifferentHash(): void + { + $filePath = $this->createTempFile('has-changed2.rst', 'new content'); + clearstatcache(true, $filePath); + + // Cached mtime is 0, different hash + $cached = $this->createExports($filePath, 'old-hash', 0); + + self::assertTrue($this->detector->hasFileChanged($filePath, $cached)); + } + + private function createExports(string $path, string $contentHash, int $mtime): DocumentExports + { + return new DocumentExports( + documentPath: $path, + contentHash: $contentHash, + exportsHash: 'exports-hash', + anchors: [], + sectionTitles: [], + citations: [], + lastModified: $mtime, + documentTitle: 'Test', + ); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/ContentHasherTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/ContentHasherTest.php new file mode 100644 index 000000000..993d1c142 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/ContentHasherTest.php @@ -0,0 +1,111 @@ +hasher = new ContentHasher(); + $this->tempFile = sys_get_temp_dir() . '/content-hasher-test-' . uniqid() . '.txt'; + } + + protected function tearDown(): void + { + if (!file_exists($this->tempFile)) { + return; + } + + unlink($this->tempFile); + } + + public function testHashFileReturnsConsistentHash(): void + { + file_put_contents($this->tempFile, 'test content'); + + $hash1 = $this->hasher->hashFile($this->tempFile); + $hash2 = $this->hasher->hashFile($this->tempFile); + + self::assertSame($hash1, $hash2); + self::assertNotEmpty($hash1); + } + + public function testHashFileReturnsDifferentHashForDifferentContent(): void + { + file_put_contents($this->tempFile, 'content A'); + $hash1 = $this->hasher->hashFile($this->tempFile); + + file_put_contents($this->tempFile, 'content B'); + $hash2 = $this->hasher->hashFile($this->tempFile); + + self::assertNotSame($hash1, $hash2); + } + + public function testHashFileReturnsEmptyForNonexistentFile(): void + { + $hash = $this->hasher->hashFile('/nonexistent/file.txt'); + + self::assertSame('', $hash); + } + + public function testHashContentReturnsConsistentHash(): void + { + $hash1 = $this->hasher->hashContent('test string'); + $hash2 = $this->hasher->hashContent('test string'); + + self::assertSame($hash1, $hash2); + self::assertNotEmpty($hash1); + } + + public function testHashExportsIsDeterministic(): void + { + $anchors = ['section-a' => 'Section A', 'section-b' => 'Section B']; + $titles = ['h1' => 'Title One', 'h2' => 'Title Two']; + $citations = ['cite1', 'cite2']; + + $hash1 = $this->hasher->hashExports($anchors, $titles, $citations, 'Doc Title'); + $hash2 = $this->hasher->hashExports($anchors, $titles, $citations, 'Doc Title'); + + self::assertSame($hash1, $hash2); + } + + public function testHashExportsChangesWithDifferentAnchors(): void + { + $titles = ['h1' => 'Title']; + $citations = []; + + $hash1 = $this->hasher->hashExports(['a' => 'A'], $titles, $citations, ''); + $hash2 = $this->hasher->hashExports(['b' => 'B'], $titles, $citations, ''); + + self::assertNotSame($hash1, $hash2); + } + + public function testGetAlgorithmReturnsValidAlgorithm(): void + { + $algorithm = $this->hasher->getAlgorithm(); + + self::assertContains($algorithm, ['xxh128', 'sha256']); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/DependencyGraphTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/DependencyGraphTest.php new file mode 100644 index 000000000..030d95db2 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/DependencyGraphTest.php @@ -0,0 +1,564 @@ +addImport('docA', 'docB'); + + self::assertSame(['docB'], $graph->getImports('docA')); + self::assertSame(['docA'], $graph->getDependents('docB')); + } + + public function testSelfReferencesAreIgnored(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docA'); + + self::assertSame([], $graph->getImports('docA')); + self::assertSame([], $graph->getDependents('docA')); + } + + public function testPropagateDirtyFindsDependents(): void + { + $graph = new DependencyGraph(); + // A imports B, C imports A + $graph->addImport('docA', 'docB'); + $graph->addImport('docC', 'docA'); + + // If B is dirty, both A and C should be dirty (transitive) + $dirty = $graph->propagateDirty(['docB']); + + self::assertContains('docB', $dirty); + self::assertContains('docA', $dirty); + self::assertContains('docC', $dirty); + self::assertCount(3, $dirty); + } + + public function testPropagateDirtyHandlesCycles(): void + { + $graph = new DependencyGraph(); + // Circular: A -> B -> C -> A + $graph->addImport('docA', 'docB'); + $graph->addImport('docB', 'docC'); + $graph->addImport('docC', 'docA'); + + // Should not infinite loop + $dirty = $graph->propagateDirty(['docA']); + + self::assertContains('docA', $dirty); + self::assertContains('docB', $dirty); + self::assertContains('docC', $dirty); + self::assertCount(3, $dirty); + } + + public function testRemoveDocumentClearsAllEdges(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docC', 'docA'); + + $graph->removeDocument('docA'); + + self::assertSame([], $graph->getImports('docA')); + self::assertSame([], $graph->getDependents('docA')); + self::assertSame([], $graph->getDependents('docB')); // A was removed + self::assertSame([], $graph->getImports('docC')); // A was removed + } + + public function testClearImportsForRemovesOnlyImports(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docA', 'docC'); + $graph->addImport('docD', 'docA'); // A is also a dependency + + $graph->clearImportsFor('docA'); + + self::assertSame([], $graph->getImports('docA')); + self::assertSame([], $graph->getDependents('docB')); + self::assertSame([], $graph->getDependents('docC')); + // But A should still be a dependency for D + self::assertSame(['docA'], $graph->getImports('docD')); + } + + public function testSerializationRoundTrip(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docA', 'docC'); + $graph->addImport('docD', 'docA'); + + $array = $graph->toArray(); + $restored = DependencyGraph::fromArray($array); + + self::assertSame($graph->getImports('docA'), $restored->getImports('docA')); + self::assertSame($graph->getDependents('docA'), $restored->getDependents('docA')); + self::assertSame($graph->getDependents('docB'), $restored->getDependents('docB')); + } + + public function testMerge(): void + { + $graph1 = new DependencyGraph(); + $graph1->addImport('docA', 'docB'); + + $graph2 = new DependencyGraph(); + $graph2->addImport('docC', 'docD'); + $graph2->addImport('docA', 'docE'); // Additional import for docA + + $graph1->merge($graph2); + + self::assertContains('docB', $graph1->getImports('docA')); + self::assertContains('docE', $graph1->getImports('docA')); + self::assertSame(['docD'], $graph1->getImports('docC')); + } + + public function testGetAllDocuments(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docC', 'docD'); + + $allDocs = $graph->getAllDocuments(); + + self::assertContains('docA', $allDocs); + self::assertContains('docB', $allDocs); + self::assertContains('docC', $allDocs); + self::assertContains('docD', $allDocs); + } + + public function testGetStats(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docA', 'docC'); + $graph->addImport('docD', 'docA'); + + $stats = $graph->getStats(); + + self::assertSame(4, $stats['documents']); + self::assertSame(3, $stats['edges']); + } + + public function testFromArrayThrowsOnInvalidImports(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected imports to be array'); + + DependencyGraph::fromArray(['imports' => 'not-an-array']); + } + + public function testFromArrayThrowsOnInvalidDependents(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected dependents to be array'); + + DependencyGraph::fromArray(['dependents' => 123]); + } + + public function testFromArrayThrowsOnInvalidImportValue(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected import value for "docA" to be array'); + + DependencyGraph::fromArray(['imports' => ['docA' => 'not-an-array']]); + } + + public function testFromArrayThrowsOnInvalidImportTarget(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected import target for "docA" to be string'); + + DependencyGraph::fromArray(['imports' => ['docA' => [123]]]); + } + + public function testFromArrayThrowsOnInvalidDependentValue(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected dependent value for "docB" to be array'); + + DependencyGraph::fromArray(['dependents' => ['docB' => 'not-an-array']]); + } + + public function testFromArrayThrowsOnInvalidDependentSource(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected dependent source for "docB" to be string'); + + DependencyGraph::fromArray(['dependents' => ['docB' => [456]]]); + } + + public function testPropagateDirtyIteratorYieldsSameResultsAsPropageDirty(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docC', 'docA'); + $graph->addImport('docD', 'docC'); + + $arrayResult = $graph->propagateDirty(['docB']); + $iteratorResult = iterator_to_array($graph->propagateDirtyIterator(['docB'])); + + // Both should return the same documents (order may differ) + sort($arrayResult); + sort($iteratorResult); + self::assertSame($arrayResult, $iteratorResult); + } + + public function testGetStatsOnEmptyGraph(): void + { + $graph = new DependencyGraph(); + $stats = $graph->getStats(); + + self::assertSame(0, $stats['documents']); + self::assertSame(0, $stats['edges']); + self::assertSame(0.0, $stats['avgImportsPerDoc']); + } + + public function testFromArrayHandlesIntegerKeys(): void + { + // JSON decode turns numeric string keys like "123" into integer keys + // This simulates what happens when you json_decode a graph with numeric document paths + $data = [ + 'imports' => [123 => ['doc456', 'doc789']], + 'dependents' => ['doc456' => ['123'], 'doc789' => ['123']], + ]; + + $graph = DependencyGraph::fromArray($data); + + // Should work correctly with integer-to-string key conversion + self::assertSame(['doc456', 'doc789'], $graph->getImports('123')); + self::assertSame(['123'], $graph->getDependents('doc456')); + } + + public function testFromArrayThrowsOnExcessiveImportsPerDocument(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + // Create array with more than MAX_IMPORTS_PER_DOCUMENT (1000) for one doc + $targets = []; + for ($i = 0; $i < 1001; $i++) { + $targets[] = 'target' . $i; + } + + DependencyGraph::fromArray(['imports' => ['docA' => $targets]]); + } + + public function testFromArrayThrowsOnExcessiveDependentsPerDocument(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + // Create array with more than MAX_IMPORTS_PER_DOCUMENT (1000) dependents + $sources = []; + for ($i = 0; $i < 1001; $i++) { + $sources[] = 'source' . $i; + } + + DependencyGraph::fromArray(['dependents' => ['docB' => $sources]]); + } + + public function testToArraySerializesNumericKeysAsStrings(): void + { + $graph = new DependencyGraph(); + $graph->addImport('123', '456'); + + $array = $graph->toArray(); + + // Keys should be strings, not integers + $imports = $array['imports']; + $dependents = $array['dependents']; + + self::assertArrayHasKey('123', $imports); + self::assertContains('456', $imports['123'] ?? []); + self::assertArrayHasKey('456', $dependents); + self::assertContains('123', $dependents['456'] ?? []); + } + + public function testAddImportReturnsTrueOnSuccess(): void + { + $graph = new DependencyGraph(); + + self::assertTrue($graph->addImport('docA', 'docB')); + self::assertSame(['docB'], $graph->getImports('docA')); + } + + public function testAddImportReturnsTrueForSelfReference(): void + { + $graph = new DependencyGraph(); + + // Self-references are silently ignored, not an error + self::assertTrue($graph->addImport('docA', 'docA')); + self::assertSame([], $graph->getImports('docA')); + } + + public function testAddImportReturnsTrueForDuplicateEdge(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + + // Adding same edge again should succeed (idempotent) + self::assertTrue($graph->addImport('docA', 'docB')); + } + + public function testValidateLimitsPassesForValidGraph(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docA', 'docC'); + + // Should not throw + $graph->validateLimits(); + self::assertTrue(true); // Reached here without exception + } + + public function testValidateLimitsThrowsOnExcessiveDocuments(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + // Create a graph that exceeds MAX_DOCUMENTS via fromArray (which allows direct setting) + $imports = []; + for ($i = 0; $i < 100_001; $i++) { + $imports['doc' . $i] = []; + } + + // Use reflection to directly set imports to bypass addImport limits + $graph = new DependencyGraph(); + $reflection = new ReflectionClass($graph); + $property = $reflection->getProperty('imports'); + $property->setValue($graph, $imports); + + $graph->validateLimits(); + } + + public function testValidateLimitsThrowsOnExcessiveImportsPerDocument(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + // Create a graph with too many imports for one document + $toMap = []; + for ($i = 0; $i < 1001; $i++) { + $toMap['target' . $i] = true; + } + + $graph = new DependencyGraph(); + $reflection = new ReflectionClass($graph); + $property = $reflection->getProperty('imports'); + $property->setValue($graph, ['docA' => $toMap]); + + $graph->validateLimits(); + } + + public function testMergeAndValidateLimits(): void + { + $graph1 = new DependencyGraph(); + $graph1->addImport('docA', 'docB'); + + $graph2 = new DependencyGraph(); + $graph2->addImport('docC', 'docD'); + + $graph1->merge($graph2); + $graph1->validateLimits(); // Should not throw + + self::assertContains('docB', $graph1->getImports('docA')); + self::assertContains('docD', $graph1->getImports('docC')); + } + + public function testFromArrayThrowsOnExcessiveDocumentCount(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + // Create array with more than MAX_DOCUMENTS (100000) imports entries + $imports = []; + for ($i = 0; $i < 100_001; $i++) { + $imports['doc' . $i] = []; + } + + DependencyGraph::fromArray(['imports' => $imports]); + } + + public function testValidateLimitsThrowsOnExcessiveDependentsDocumentCount(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('dependents exceed maximum'); + + // Create a graph with excessive dependents via reflection + $dependents = []; + for ($i = 0; $i < 100_001; $i++) { + $dependents['doc' . $i] = []; + } + + $graph = new DependencyGraph(); + $reflection = new ReflectionClass($graph); + $property = $reflection->getProperty('dependents'); + $property->setValue($graph, $dependents); + + $graph->validateLimits(); + } + + public function testValidateLimitsThrowsOnExcessiveDependentsPerDocument(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('dependents for'); + + // Create a graph with too many dependents for one document + $fromMap = []; + for ($i = 0; $i < 1001; $i++) { + $fromMap['source' . $i] = true; + } + + $graph = new DependencyGraph(); + $reflection = new ReflectionClass($graph); + $property = $reflection->getProperty('dependents'); + $property->setValue($graph, ['docTarget' => $fromMap]); + + $graph->validateLimits(); + } + + public function testValidateLimitsThrowsOnExcessiveTotalEdges(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('total edges'); + + // Create a graph with excessive total edges via reflection + // We can't easily create 2M+ edges, so we'll set edgeCount directly + $graph = new DependencyGraph(); + $reflection = new ReflectionClass($graph); + $edgeProperty = $reflection->getProperty('edgeCount'); + $edgeProperty->setValue($graph, 2_000_001); // Just over MAX_TOTAL_EDGES + + $graph->validateLimits(); + } + + public function testFromArrayThrowsOnExcessiveTotalEdges(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('total edges'); + + // Create a graph structure with many small edge lists that sum to >2M + // This is more realistic than 2M+ edges in one document + // We'll create 2001 documents with 1000 edges each = 2,001,000 edges + $imports = []; + for ($doc = 0; $doc < 2001; $doc++) { + $targets = []; + for ($target = 0; $target < 1000; $target++) { + $targets[] = 'target_' . $doc . '_' . $target; + } + + $imports['doc' . $doc] = $targets; + } + + DependencyGraph::fromArray(['imports' => $imports]); + } + + public function testAddImportReturnsFalseWhenTotalEdgeLimitReached(): void + { + $graph = new DependencyGraph(); + $reflection = new ReflectionClass($graph); + $edgeProperty = $reflection->getProperty('edgeCount'); + $edgeProperty->setValue($graph, 2_000_000); // At MAX_TOTAL_EDGES + + // Should return false when limit is reached + self::assertFalse($graph->addImport('docNew', 'docTarget')); + } + + /** + * Stress test: verify graph operations work correctly near MAX_DOCUMENTS limit. + * + * This test creates a graph with 1000 documents (scaled down from 100k for + * test performance) and verifies that all operations work correctly at scale. + * + * @group stress + */ + public function testStressTestNearMaxDocumentsLimit(): void + { + $graph = new DependencyGraph(); + $docCount = 1000; // Scaled down from MAX_DOCUMENTS (100k) for test speed + + // Create a chain of dependencies: doc0 <- doc1 <- doc2 <- ... <- doc999 + // This creates a worst-case propagation scenario (linear chain) + for ($i = 1; $i < $docCount; $i++) { + $result = $graph->addImport('doc' . $i, 'doc' . ($i - 1)); + self::assertTrue($result, 'Failed to add import at index ' . $i); + } + + // Verify graph stats + $stats = $graph->getStats(); + self::assertSame($docCount, $stats['documents']); + self::assertSame($docCount - 1, $stats['edges']); + + // Verify propagation from root finds all dependents + $dirty = $graph->propagateDirty(['doc0']); + self::assertCount($docCount, $dirty, 'Propagation should find all documents in chain'); + + // Verify propagation from middle finds downstream only + $midpoint = (int) ($docCount / 2); + $dirty = $graph->propagateDirty(['doc' . $midpoint]); + self::assertCount($docCount - $midpoint, $dirty, 'Propagation should find downstream documents'); + + // Verify serialization round-trip preserves graph + $serialized = $graph->toArray(); + $restored = DependencyGraph::fromArray($serialized); + self::assertSame($stats, $restored->getStats()); + + // Verify removal works correctly + $graph->removeDocument('doc' . $midpoint); + $stats = $graph->getStats(); + self::assertSame($docCount - 1, $stats['documents']); + } + + /** + * Stress test: verify graph handles fan-out pattern (one doc referenced by many). + * + * @group stress + */ + public function testStressTestFanOutPattern(): void + { + $graph = new DependencyGraph(); + $fanOutCount = 500; // Number of documents importing from a single source + + // Create fan-out: many documents import from 'shared' + for ($i = 0; $i < $fanOutCount; $i++) { + $result = $graph->addImport('consumer' . $i, 'shared'); + self::assertTrue($result); + } + + // Verify all consumers are dependents of 'shared' + $dependents = $graph->getDependents('shared'); + self::assertCount($fanOutCount, $dependents); + + // Verify propagation from 'shared' finds all consumers + $dirty = $graph->propagateDirty(['shared']); + self::assertCount($fanOutCount + 1, $dirty); // +1 for 'shared' itself + + // Verify iterator version produces same results + $dirtyIterator = iterator_to_array($graph->propagateDirtyIterator(['shared'])); + sort($dirty); + sort($dirtyIterator); + self::assertSame($dirty, $dirtyIterator); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/DirtyPropagatorTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/DirtyPropagatorTest.php new file mode 100644 index 000000000..ae2def0c2 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/DirtyPropagatorTest.php @@ -0,0 +1,260 @@ +propagator = new DirtyPropagator(); + } + + public function testPropagateWithNoChanges(): void + { + $changes = new ChangeDetectionResult( + dirty: [], + clean: ['doc1', 'doc2'], + new: [], + deleted: [], + ); + + $graph = new DependencyGraph(); + $result = $this->propagator->propagate($changes, $graph, [], []); + + self::assertSame([], $result->documentsToRender); + self::assertSame(['doc1', 'doc2'], $result->documentsToSkip); + } + + public function testPropagateWithDirtyDocument(): void + { + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: ['doc2'], + new: [], + deleted: [], + ); + + $graph = new DependencyGraph(); + $result = $this->propagator->propagate($changes, $graph, [], []); + + self::assertContains('doc1', $result->documentsToRender); + self::assertSame(['doc2'], $result->documentsToSkip); + } + + public function testPropagateWithNewDocument(): void + { + $changes = new ChangeDetectionResult( + dirty: [], + clean: ['doc1'], + new: ['doc2'], + deleted: [], + ); + + $graph = new DependencyGraph(); + $result = $this->propagator->propagate($changes, $graph, [], []); + + self::assertContains('doc2', $result->documentsToRender); + self::assertSame(['doc1'], $result->documentsToSkip); + } + + public function testPropagateWithDeletedDocument(): void + { + $changes = new ChangeDetectionResult( + dirty: [], + clean: ['doc2'], + new: [], + deleted: ['doc1'], + ); + + // doc2 imports from doc1 (deleted) + $graph = new DependencyGraph(); + $graph->addImport('doc2', 'doc1'); + + $result = $this->propagator->propagate($changes, $graph, [], []); + + // doc2 should be dirty because its dependency was deleted + self::assertContains('doc2', $result->documentsToRender); + self::assertContains('doc1', $result->propagatedFrom); + } + + public function testPropagateWithExportChange(): void + { + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: ['doc2'], + new: [], + deleted: [], + ); + + // doc2 imports from doc1 + $graph = new DependencyGraph(); + $graph->addImport('doc2', 'doc1'); + + // Exports changed for doc1 + $oldExports = [ + 'doc1' => $this->createExports('anchor1'), + 'doc2' => $this->createExports('anchor2'), + ]; + $newExports = [ + 'doc1' => $this->createExports('anchor1-changed'), // Different anchors + ]; + + $result = $this->propagator->propagate($changes, $graph, $oldExports, $newExports); + + // Both docs should be rendered because doc1's exports changed + self::assertContains('doc1', $result->documentsToRender); + self::assertContains('doc2', $result->documentsToRender); + self::assertContains('doc1', $result->propagatedFrom); + } + + public function testPropagateWithUnchangedExports(): void + { + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: ['doc2'], + new: [], + deleted: [], + ); + + // doc2 imports from doc1 + $graph = new DependencyGraph(); + $graph->addImport('doc2', 'doc1'); + + // Same exports for doc1 + $exports = $this->createExports('anchor1'); + $oldExports = ['doc1' => $exports, 'doc2' => $this->createExports('anchor2')]; + $newExports = ['doc1' => $exports]; // Same object = same exports hash + + $result = $this->propagator->propagate($changes, $graph, $oldExports, $newExports); + + // Only doc1 should render, doc2's dependency exports unchanged + self::assertContains('doc1', $result->documentsToRender); + self::assertSame(['doc2'], $result->documentsToSkip); + } + + public function testPropagateHandlesCycles(): void + { + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: [], + new: [], + deleted: [], + ); + + // Circular dependency: doc1 -> doc2 -> doc3 -> doc1 + $graph = new DependencyGraph(); + $graph->addImport('doc1', 'doc2'); + $graph->addImport('doc2', 'doc3'); + $graph->addImport('doc3', 'doc1'); + + // All have changed exports to trigger full propagation + $result = $this->propagator->propagate($changes, $graph, [], []); + + // Should not infinite loop and all should be marked for rendering + self::assertContains('doc1', $result->documentsToRender); + // doc2 and doc3 will be rendered because they depend on doc1 transitively + } + + public function testPropagateTransitive(): void + { + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: ['doc2', 'doc3'], + new: [], + deleted: [], + ); + + // Chain: doc3 imports doc2, doc2 imports doc1 + $graph = new DependencyGraph(); + $graph->addImport('doc2', 'doc1'); + $graph->addImport('doc3', 'doc2'); + + // doc1 exports changed, should propagate to doc2, then to doc3 + $oldExports = [ + 'doc1' => $this->createExports('a'), + 'doc2' => $this->createExports('b'), + 'doc3' => $this->createExports('c'), + ]; + $newExports = [ + 'doc1' => $this->createExports('a-changed'), + 'doc2' => $this->createExports('b-changed'), + ]; + + $result = $this->propagator->propagate($changes, $graph, $oldExports, $newExports); + + self::assertContains('doc1', $result->documentsToRender); + self::assertContains('doc2', $result->documentsToRender); + self::assertContains('doc3', $result->documentsToRender); + } + + public function testPropagateSimple(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + $graph->addImport('docC', 'docA'); + + $result = $this->propagator->propagateSimple(['docB'], $graph); + + self::assertContains('docB', $result); + self::assertContains('docA', $result); // Depends on docB + self::assertContains('docC', $result); // Depends on docA + } + + public function testPropagateSimpleWithNoDependents(): void + { + $graph = new DependencyGraph(); + $graph->addImport('docA', 'docB'); + + // docB has no dependents except docA + $result = $this->propagator->propagateSimple(['docC'], $graph); + + // Only docC is dirty, nothing depends on it + self::assertSame(['docC'], $result); + } + + public function testSavingsRatioCalculation(): void + { + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: ['doc2', 'doc3', 'doc4'], + new: [], + deleted: [], + ); + + $graph = new DependencyGraph(); + $result = $this->propagator->propagate($changes, $graph, [], []); + + // 1 to render, 3 to skip = 75% savings + self::assertEqualsWithDelta(0.75, $result->getSavingsRatio(), 0.001); + } + + private function createExports(string $anchor): DocumentExports + { + return new DocumentExports( + documentPath: 'test', + contentHash: md5($anchor), + exportsHash: md5($anchor . '-exports'), + anchors: [$anchor => 'Title'], + sectionTitles: [], + citations: [], + lastModified: 0, + ); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/DocumentExportsTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/DocumentExportsTest.php new file mode 100644 index 000000000..76b751d72 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/DocumentExportsTest.php @@ -0,0 +1,328 @@ +createExports('hash1', 'exports-hash'); + $exports2 = $this->createExports('hash2', 'exports-hash'); + + self::assertFalse($exports1->hasExportsChanged($exports2)); + } + + public function testHasExportsChangedReturnsTrueForDifferentHash(): void + { + $exports1 = $this->createExports('hash1', 'exports-hash-1'); + $exports2 = $this->createExports('hash1', 'exports-hash-2'); + + self::assertTrue($exports1->hasExportsChanged($exports2)); + } + + public function testHasContentChangedReturnsFalseForSameHash(): void + { + $exports1 = $this->createExports('content-hash', 'exports1'); + $exports2 = $this->createExports('content-hash', 'exports2'); + + self::assertFalse($exports1->hasContentChanged($exports2)); + } + + public function testHasContentChangedReturnsTrueForDifferentHash(): void + { + $exports1 = $this->createExports('content-hash-1', 'exports'); + $exports2 = $this->createExports('content-hash-2', 'exports'); + + self::assertTrue($exports1->hasContentChanged($exports2)); + } + + public function testHasExportsChangedWithEmptyHashes(): void + { + $exports1 = $this->createExports('hash', ''); + $exports2 = $this->createExports('hash', ''); + + self::assertFalse($exports1->hasExportsChanged($exports2)); + } + + public function testGetAnchorNames(): void + { + $exports = new DocumentExports( + documentPath: 'test.rst', + contentHash: 'hash', + exportsHash: 'exports', + anchors: ['anchor1' => 'Title 1', 'anchor2' => 'Title 2'], + sectionTitles: [], + citations: [], + lastModified: 0, + ); + + self::assertSame(['anchor1', 'anchor2'], $exports->getAnchorNames()); + } + + public function testSerializationRoundTrip(): void + { + $original = new DocumentExports( + documentPath: 'path/to/doc.rst', + contentHash: 'abc123abc123abc123abc123abc12345', // 32 chars (xxh128) + exportsHash: 'def456def456def456def456def456def456def456def456def456def456def4', // 64 chars (sha256) + anchors: ['anchor1' => 'Title'], + sectionTitles: ['section1' => 'Section Title'], + citations: ['citation1'], + lastModified: 1_234_567_890, + documentTitle: 'Document Title', + ); + + $array = $original->toArray(); + $restored = DocumentExports::fromArray($array); + + self::assertSame($original->documentPath, $restored->documentPath); + self::assertSame($original->contentHash, $restored->contentHash); + self::assertSame($original->exportsHash, $restored->exportsHash); + self::assertSame($original->anchors, $restored->anchors); + self::assertSame($original->sectionTitles, $restored->sectionTitles); + self::assertSame($original->citations, $restored->citations); + self::assertSame($original->lastModified, $restored->lastModified); + self::assertSame($original->documentTitle, $restored->documentTitle); + } + + public function testFromArrayWithInvalidAnchorsType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected anchors to be array'); + + DocumentExports::fromArray(['anchors' => 'not-an-array']); + } + + public function testFromArrayWithInvalidAnchorsValue(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected anchors value to be string'); + + DocumentExports::fromArray(['anchors' => ['key' => 123]]); + } + + public function testFromArrayWithInvalidSectionTitlesType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected sectionTitles to be array'); + + DocumentExports::fromArray(['sectionTitles' => 'not-an-array']); + } + + public function testFromArrayWithInvalidSectionTitlesValue(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected sectionTitles value to be string'); + + DocumentExports::fromArray(['sectionTitles' => ['key' => null]]); + } + + public function testFromArrayWithInvalidCitationsType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected citations to be array'); + + DocumentExports::fromArray(['citations' => 'not-an-array']); + } + + public function testFromArrayWithInvalidCitationsItem(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected citations item to be string'); + + DocumentExports::fromArray(['citations' => [123]]); + } + + public function testFromArrayWithInvalidDocumentPath(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected documentPath to be string'); + + DocumentExports::fromArray(['documentPath' => 123]); + } + + public function testFromArrayWithInvalidLastModified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected lastModified to be int between'); + + DocumentExports::fromArray(['lastModified' => 'not-an-int']); + } + + public function testFromArrayWithDefaults(): void + { + $exports = DocumentExports::fromArray([]); + + self::assertSame('', $exports->documentPath); + self::assertSame('', $exports->contentHash); + self::assertSame('', $exports->exportsHash); + self::assertSame([], $exports->anchors); + self::assertSame([], $exports->sectionTitles); + self::assertSame([], $exports->citations); + self::assertSame(0, $exports->lastModified); + self::assertSame('', $exports->documentTitle); + } + + public function testFromArrayWithInvalidContentHashFormat(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('contentHash must be a hexadecimal string'); + + DocumentExports::fromArray(['contentHash' => 'not-hex-format!']); + } + + public function testFromArrayWithInvalidExportsHashFormat(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exportsHash must be a hexadecimal string'); + + DocumentExports::fromArray(['exportsHash' => 'contains spaces and symbols!']); + } + + public function testFromArrayAcceptsValidHexHashes(): void + { + // 32-char hash (xxh128 format) + $xxh128Hash = 'abc123DEF456abc123DEF456abc12345'; + // 64-char hash (sha256 format) + $sha256Hash = '0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef'; + + $exports = DocumentExports::fromArray([ + 'contentHash' => $xxh128Hash, + 'exportsHash' => $sha256Hash, + ]); + + self::assertSame($xxh128Hash, $exports->contentHash); + self::assertSame($sha256Hash, $exports->exportsHash); + } + + public function testFromArrayThrowsOnInvalidHashLength(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('must be 32 (xxh128) or 64 (sha256) hex chars'); + + // 16 chars is invalid (neither 32 nor 64) + DocumentExports::fromArray(['contentHash' => '0123456789abcdef']); + } + + public function testFromArrayAcceptsEmptyHashes(): void + { + $exports = DocumentExports::fromArray([ + 'contentHash' => '', + 'exportsHash' => '', + ]); + + self::assertSame('', $exports->contentHash); + self::assertSame('', $exports->exportsHash); + } + + public function testFromArrayThrowsOnNegativeLastModified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('lastModified to be int between'); + + DocumentExports::fromArray(['lastModified' => -1]); + } + + public function testFromArrayThrowsOnExcessiveLastModified(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('lastModified to be int between'); + + // Year 3001 - exceeds MAX_TIMESTAMP + DocumentExports::fromArray(['lastModified' => 32_535_216_000]); + } + + public function testFromArrayThrowsOnExcessiveAnchorsCount(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceeds maximum'); + + // Create array with more than MAX_ARRAY_ITEMS (10000) anchors + $anchors = []; + for ($i = 0; $i < 10_001; $i++) { + $anchors['anchor' . $i] = 'Title ' . $i; + } + + DocumentExports::fromArray(['anchors' => $anchors]); + } + + public function testFromArrayThrowsOnExcessiveCitationsCount(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceeds maximum'); + + // Create array with more than MAX_ARRAY_ITEMS (10000) citations + $citations = []; + for ($i = 0; $i < 10_001; $i++) { + $citations[] = 'citation' . $i; + } + + DocumentExports::fromArray(['citations' => $citations]); + } + + public function testFromArrayThrowsOnOverlongDocumentPath(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceeds maximum length'); + + // Create string longer than MAX_STRING_LENGTH (65536) + $longPath = str_repeat('a', 65_537); + + DocumentExports::fromArray(['documentPath' => $longPath]); + } + + public function testFromArrayThrowsOnControlCharactersInDocumentPath(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('contains invalid control characters'); + + // Null byte in path - could cause filesystem issues + DocumentExports::fromArray(['documentPath' => "docs/test\x00.rst"]); + } + + public function testFromArrayThrowsOnNewlineInDocumentPath(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('contains invalid control characters'); + + // Newline could affect log parsing + DocumentExports::fromArray(['documentPath' => "docs/test\nmalicious"]); + } + + public function testFromArrayAcceptsValidDocumentPath(): void + { + // Valid paths with allowed special characters + $exports = DocumentExports::fromArray(['documentPath' => 'docs/sub-dir/my_file.rst']); + + self::assertSame('docs/sub-dir/my_file.rst', $exports->documentPath); + } + + private function createExports(string $contentHash, string $exportsHash): DocumentExports + { + return new DocumentExports( + documentPath: 'test.rst', + contentHash: $contentHash, + exportsHash: $exportsHash, + anchors: [], + sectionTitles: [], + citations: [], + lastModified: 0, + ); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/GlobalInvalidationDetectorTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/GlobalInvalidationDetectorTest.php new file mode 100644 index 000000000..b7f63e7d3 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/GlobalInvalidationDetectorTest.php @@ -0,0 +1,369 @@ +requiresFullRebuild($changes, 'new-hash', 'old-hash'); + + self::assertTrue($result); + } + + public function testRequiresFullRebuildWithSameSettings(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: ['doc1'], + clean: [], + new: [], + deleted: [], + ); + + $result = $detector->requiresFullRebuild($changes, 'same-hash', 'same-hash'); + + self::assertFalse($result); + } + + public function testRequiresFullRebuildWithGuidesXml(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: ['path/to/guides.xml'], + clean: [], + new: [], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testRequiresFullRebuildWithSettingsCfg(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: [], + clean: [], + new: ['Settings.cfg'], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testRequiresFullRebuildWithConfPy(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: [], + clean: [], + new: [], + deleted: ['conf.py'], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testRequiresFullRebuildWithStaticDirectory(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: ['_static/custom.css'], + clean: [], + new: [], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testRequiresFullRebuildWithTemplatesDirectory(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: [], + clean: [], + new: ['project/_templates/layout.html'], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testRequiresFullRebuildWithObjectsInv(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: ['path/to/objects.inv'], + clean: [], + new: [], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testRequiresFullRebuildWithRegularDocChange(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: ['docs/index.rst', 'docs/chapter1.rst'], + clean: ['docs/chapter2.rst'], + new: [], + deleted: [], + ); + + self::assertFalse($detector->requiresFullRebuild($changes)); + } + + public function testCustomGlobalPatterns(): void + { + $detector = new GlobalInvalidationDetector([ + 'custom-config.yaml', + 'my-templates/', + ]); + + $changes = new ChangeDetectionResult( + dirty: ['custom-config.yaml'], + clean: [], + new: [], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + + // Default pattern should not trigger + $changes2 = new ChangeDetectionResult( + dirty: ['guides.xml'], + clean: [], + new: [], + deleted: [], + ); + + self::assertFalse($detector->requiresFullRebuild($changes2)); + } + + public function testCustomDirectoryPattern(): void + { + $detector = new GlobalInvalidationDetector(['assets/']); + + $changes = new ChangeDetectionResult( + dirty: ['project/assets/logo.png'], + clean: [], + new: [], + deleted: [], + ); + + self::assertTrue($detector->requiresFullRebuild($changes)); + } + + public function testGetGlobalPatterns(): void + { + $patterns = ['custom.xml', 'theme/']; + $detector = new GlobalInvalidationDetector($patterns); + + self::assertSame($patterns, $detector->getGlobalPatterns()); + } + + public function testGetDefaultGlobalPatterns(): void + { + $detector = new GlobalInvalidationDetector(); + $patterns = $detector->getGlobalPatterns(); + + self::assertContains('guides.xml', $patterns); + self::assertContains('Settings.cfg', $patterns); + self::assertContains('conf.py', $patterns); + self::assertContains('_static/', $patterns); + self::assertContains('_templates/', $patterns); + self::assertContains('objects.inv', $patterns); + } + + public function testIsGlobalFileWithExactMatch(): void + { + $detector = new GlobalInvalidationDetector(); + + self::assertTrue($detector->isGlobalFile('guides.xml')); + self::assertTrue($detector->isGlobalFile('/path/to/guides.xml')); + } + + public function testIsGlobalFileWithDirectoryPattern(): void + { + $detector = new GlobalInvalidationDetector(); + + self::assertTrue($detector->isGlobalFile('_static/file.css')); + self::assertTrue($detector->isGlobalFile('project/_static/script.js')); + } + + public function testDirectoryPatternDoesNotMatchPartialNames(): void + { + $detector = new GlobalInvalidationDetector(['foo/']); + + // Should match complete directory name + self::assertTrue($detector->isGlobalFile('foo/bar.txt')); + self::assertTrue($detector->isGlobalFile('path/foo/bar.txt')); + self::assertTrue($detector->isGlobalFile('/absolute/foo/bar.txt')); + + // Should NOT match partial directory names + self::assertFalse($detector->isGlobalFile('prefix_foo/bar.txt')); + self::assertFalse($detector->isGlobalFile('path/prefix_foo/bar.txt')); + self::assertFalse($detector->isGlobalFile('foobar/file.txt')); + } + + public function testIsGlobalFileNormalizesBackslashes(): void + { + $detector = new GlobalInvalidationDetector(); + + // Windows-style paths + self::assertTrue($detector->isGlobalFile('project\\_static\\file.css')); + self::assertTrue($detector->isGlobalFile('docs\\guides.xml')); + } + + public function testHasToctreeChangedWithNoChange(): void + { + $detector = new GlobalInvalidationDetector(); + $toctree = [ + 'index' => ['chapter1', 'chapter2'], + 'chapter1' => ['section1', 'section2'], + ]; + + self::assertFalse($detector->hasToctreeChanged($toctree, $toctree)); + } + + public function testHasToctreeChangedWithDifferentCount(): void + { + $detector = new GlobalInvalidationDetector(); + $old = ['index' => ['chapter1']]; + $new = [ + 'index' => ['chapter1'], + 'chapter1' => ['section1'], + ]; + + self::assertTrue($detector->hasToctreeChanged($old, $new)); + } + + public function testHasToctreeChangedWithMissingParent(): void + { + $detector = new GlobalInvalidationDetector(); + $old = [ + 'index' => ['chapter1'], + 'chapter1' => ['section1'], + ]; + $new = [ + 'index' => ['chapter1'], + 'chapter2' => ['section1'], // Different parent + ]; + + self::assertTrue($detector->hasToctreeChanged($old, $new)); + } + + public function testHasToctreeChangedWithDifferentChildren(): void + { + $detector = new GlobalInvalidationDetector(); + $old = ['index' => ['chapter1', 'chapter2']]; + $new = ['index' => ['chapter1', 'chapter3']]; // chapter3 instead of chapter2 + + self::assertTrue($detector->hasToctreeChanged($old, $new)); + } + + public function testHasToctreeChangedIgnoresOrder(): void + { + $detector = new GlobalInvalidationDetector(); + $old = ['index' => ['chapter1', 'chapter2']]; + $new = ['index' => ['chapter2', 'chapter1']]; // Different order but same children + + // Currently sorts before comparing, so order doesn't matter + self::assertFalse($detector->hasToctreeChanged($old, $new)); + } + + public function testHasToctreeChangedWithEmptyToctrees(): void + { + $detector = new GlobalInvalidationDetector(); + + self::assertFalse($detector->hasToctreeChanged([], [])); + } + + public function testRequiresFullRebuildWithNullHashes(): void + { + $detector = new GlobalInvalidationDetector(); + $changes = new ChangeDetectionResult( + dirty: [], + clean: ['doc1'], + new: [], + deleted: [], + ); + + // Both null - no comparison needed + self::assertFalse($detector->requiresFullRebuild($changes, null, null)); + + // One null - no comparison + self::assertFalse($detector->requiresFullRebuild($changes, 'hash', null)); + self::assertFalse($detector->requiresFullRebuild($changes, null, 'hash')); + } + + public function testConstructorThrowsOnEmptyPattern(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Global pattern cannot be empty'); + + new GlobalInvalidationDetector(['valid.xml', '']); + } + + public function testConstructorThrowsOnTooLongPattern(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Global pattern exceeds maximum length'); + + // Pattern longer than 256 characters + $longPattern = str_repeat('a', 257); + new GlobalInvalidationDetector([$longPattern]); + } + + /** @param array $patterns */ + #[DataProvider('invalidPatternTypesProvider')] + public function testConstructorThrowsOnNonStringPattern(array $patterns): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Global pattern must be a string'); + + /** @phpstan-ignore argument.type (testing invalid input validation) */ + new GlobalInvalidationDetector($patterns); + } + + /** @return array> */ + public static function invalidPatternTypesProvider(): array + { + return [ + 'integer' => [[123]], + 'array' => [[['nested']]], + 'null' => [[null]], + 'boolean' => [[true]], + 'float' => [[3.14]], + ]; + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/IncrementalBuildCacheTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/IncrementalBuildCacheTest.php new file mode 100644 index 000000000..287244610 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/IncrementalBuildCacheTest.php @@ -0,0 +1,506 @@ +tempDir = sys_get_temp_dir() . '/incremental-cache-test-' . uniqid(); + mkdir($this->tempDir, 0o755, true); + $this->versioning = new CacheVersioning(); + } + + protected function tearDown(): void + { + $this->recursiveDelete($this->tempDir); + } + + private function recursiveDelete(string $dir): void + { + if (!is_dir($dir)) { + return; + } + + $files = scandir($dir); + if ($files === false) { + return; + } + + foreach ($files as $file) { + if ($file === '.' || $file === '..') { + continue; + } + + $path = $dir . '/' . $file; + if (is_dir($path)) { + $this->recursiveDelete($path); + } else { + unlink($path); + } + } + + rmdir($dir); + } + + public function testLoadReturnsFalseWithNoCache(): void + { + $cache = new IncrementalBuildCache($this->versioning); + + self::assertFalse($cache->load($this->tempDir)); + self::assertFalse($cache->isLoaded()); + } + + public function testSaveAndLoadRoundTrip(): void + { + $cache = new IncrementalBuildCache($this->versioning); + + // Add some data + $exports = new DocumentExports( + documentPath: 'index', + contentHash: 'abc123abc123abc123abc123abc12345', + exportsHash: 'def456def456def456def456def456def456def456def456def456def456def4', + anchors: ['anchor1' => 'Title'], + sectionTitles: ['section1' => 'Section'], + citations: ['cite1'], + lastModified: 1_234_567_890, + documentTitle: 'Index', + ); + $cache->setExports('index', $exports); + $cache->setOutputPath('index', '/output/index.html'); + $cache->getDependencyGraph()->addImport('index', 'chapter1'); + + // Save + $cache->save($this->tempDir, 'settings-hash'); + + // Load into new cache instance + $cache2 = new IncrementalBuildCache($this->versioning); + self::assertTrue($cache2->load($this->tempDir)); + self::assertTrue($cache2->isLoaded()); + + // Verify data + $loadedExports = $cache2->getExports('index'); + self::assertNotNull($loadedExports); + self::assertSame('index', $loadedExports->documentPath); + self::assertSame(['anchor1' => 'Title'], $loadedExports->anchors); + + self::assertSame('/output/index.html', $cache2->getOutputPath('index')); + self::assertSame(['chapter1'], $cache2->getDependencyGraph()->getImports('index')); + } + + public function testShardedExportStorage(): void + { + $cache = new IncrementalBuildCache($this->versioning); + + // Add multiple exports to test sharding + for ($i = 0; $i < 10; $i++) { + $cache->setExports('doc' . $i, new DocumentExports( + documentPath: 'doc' . $i, + contentHash: str_repeat('a', 32), + exportsHash: str_repeat('b', 64), + anchors: [], + sectionTitles: [], + citations: [], + lastModified: 0, + )); + } + + $cache->save($this->tempDir); + + // Verify _exports directory exists + self::assertDirectoryExists($this->tempDir . '/_exports'); + + // Load and verify + $cache2 = new IncrementalBuildCache($this->versioning); + self::assertTrue($cache2->load($this->tempDir)); + self::assertCount(10, $cache2->getAllExports()); + } + + public function testIncrementalSaveOnlyWritesDirtyExports(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('doc1')); + $cache->setExports('doc2', $this->createExports('doc2')); + $cache->save($this->tempDir); + + // Modify only doc1 + $cache->setExports('doc1', $this->createExports('doc1-modified')); + $cache->save($this->tempDir); + + // Both should still be loadable + $cache2 = new IncrementalBuildCache($this->versioning); + self::assertTrue($cache2->load($this->tempDir)); + self::assertNotNull($cache2->getExports('doc1')); + self::assertNotNull($cache2->getExports('doc2')); + } + + public function testRemoveDocumentDeletesShardFile(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('doc1')); + $cache->setOutputPath('doc1', '/out/doc1.html'); + $cache->save($this->tempDir); + + // Verify file exists + self::assertNotNull($cache->getExports('doc1')); + + // Remove document + $cache->removeDocument('doc1'); + + self::assertNull($cache->getExports('doc1')); + self::assertNull($cache->getOutputPath('doc1')); + } + + public function testClear(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('doc1')); + $cache->setOutputPath('doc1', '/out/doc1.html'); + $cache->getDependencyGraph()->addImport('doc1', 'doc2'); + $cache->save($this->tempDir); + + $cache->clear(); + + self::assertSame([], $cache->getAllExports()); + self::assertNull($cache->getOutputPath('doc1')); + self::assertSame([], $cache->getDependencyGraph()->getAllDocuments()); + self::assertFalse($cache->isLoaded()); + } + + public function testGetStats(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('doc1')); + $cache->setExports('doc2', $this->createExports('doc2')); + $cache->setOutputPath('doc1', '/out/doc1.html'); + $cache->getDependencyGraph()->addImport('doc1', 'doc2'); + + $stats = $cache->getStats(); + + self::assertSame(2, $stats['documents']); + self::assertSame(1, $stats['outputs']); + self::assertFalse($stats['loaded']); + self::assertIsArray($stats['graph']); + } + + public function testExtractAndMergeState(): void + { + $cache1 = new IncrementalBuildCache($this->versioning); + $cache1->setExports('doc1', $this->createExports('doc1')); + $cache1->setOutputPath('doc1', '/out/doc1.html'); + $cache1->getDependencyGraph()->addImport('doc1', 'doc2'); + + $state = $cache1->extractState(); + + // Merge into new cache + $cache2 = new IncrementalBuildCache($this->versioning); + $cache2->mergeState($state); + + self::assertNotNull($cache2->getExports('doc1')); + self::assertSame('/out/doc1.html', $cache2->getOutputPath('doc1')); + self::assertSame(['doc2'], $cache2->getDependencyGraph()->getImports('doc1')); + } + + public function testMergeStateDoesNotOverwriteExisting(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('original')); + $cache->setOutputPath('doc1', '/original/path.html'); + + $state = [ + 'exports' => ['doc1' => $this->createExports('new')->toArray()], + 'outputPaths' => ['doc1' => '/new/path.html'], + ]; + + $cache->mergeState($state); + + // Original values should be preserved + self::assertSame('/original/path.html', $cache->getOutputPath('doc1')); + } + + public function testGetSettingsHash(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->save($this->tempDir, 'my-settings-hash'); + + $cache2 = new IncrementalBuildCache($this->versioning); + $cache2->load($this->tempDir); + + self::assertSame('my-settings-hash', $cache2->getSettingsHash()); + } + + public function testInputDir(): void + { + $cache = new IncrementalBuildCache($this->versioning); + + self::assertSame('', $cache->getInputDir()); + + $cache->setInputDir('/path/to/docs'); + + self::assertSame('/path/to/docs', $cache->getInputDir()); + } + + public function testGetAllDocPaths(): void + { + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('doc1')); + $cache->setExports('doc2', $this->createExports('doc2')); + $cache->setExports('doc3', $this->createExports('doc3')); + + $paths = $cache->getAllDocPaths(); + + self::assertCount(3, $paths); + self::assertContains('doc1', $paths); + self::assertContains('doc2', $paths); + self::assertContains('doc3', $paths); + } + + public function testLoadInvalidJson(): void + { + file_put_contents($this->tempDir . '/_build_meta.json', 'not valid json'); + + $cache = new IncrementalBuildCache($this->versioning); + + self::assertFalse($cache->load($this->tempDir)); + } + + public function testLoadInvalidMetadata(): void + { + // Write valid JSON but with old cache version + $data = [ + 'metadata' => ['version' => 999], // Invalid version + 'dependencies' => [], + 'outputs' => [], + ]; + file_put_contents( + $this->tempDir . '/_build_meta.json', + json_encode($data, JSON_THROW_ON_ERROR), + ); + + $cache = new IncrementalBuildCache($this->versioning); + + self::assertFalse($cache->load($this->tempDir)); + } + + public function testLoadLegacyMonolithicFormat(): void + { + // Simulate legacy format with exports in main file + $data = [ + 'metadata' => $this->versioning->createMetadata(), + 'dependencies' => [], + 'outputs' => [], + 'exports' => [ + 'doc1' => [ + 'documentPath' => 'doc1', + 'contentHash' => str_repeat('a', 32), + 'exportsHash' => str_repeat('b', 64), + 'anchors' => [], + 'sectionTitles' => [], + 'citations' => [], + 'lastModified' => 0, + ], + ], + ]; + file_put_contents( + $this->tempDir . '/_build_meta.json', + json_encode($data, JSON_THROW_ON_ERROR), + ); + + $cache = new IncrementalBuildCache($this->versioning); + self::assertTrue($cache->load($this->tempDir)); + self::assertNotNull($cache->getExports('doc1')); + } + + public function testSetExportsThrowsWhenLimitExceeded(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $cache = new IncrementalBuildCache($this->versioning); + + // Use reflection to set exports near limit + $reflection = new ReflectionClass($cache); + $property = $reflection->getProperty('exports'); + + $exports = []; + for ($i = 0; $i < 100_000; $i++) { + $exports['doc' . $i] = $this->createExports('doc' . $i); + } + + $property->setValue($cache, $exports); + + // Try to add one more + $cache->setExports('one-more', $this->createExports('one-more')); + } + + public function testSetOutputPathThrowsWhenLimitExceeded(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $cache = new IncrementalBuildCache($this->versioning); + + // Use reflection to set outputPaths near limit + $reflection = new ReflectionClass($cache); + $property = $reflection->getProperty('outputPaths'); + + $paths = []; + for ($i = 0; $i < 100_000; $i++) { + $paths['doc' . $i] = '/out/doc' . $i . '.html'; + } + + $property->setValue($cache, $paths); + + // Try to add one more + $cache->setOutputPath('one-more', '/out/one-more.html'); + } + + public function testLoadThrowsOnExcessiveExports(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + // Create legacy format with too many exports + $exports = []; + for ($i = 0; $i < 100_001; $i++) { + $exports['doc' . $i] = [ + 'documentPath' => 'doc' . $i, + 'contentHash' => '', + 'exportsHash' => '', + 'anchors' => [], + 'sectionTitles' => [], + 'citations' => [], + 'lastModified' => 0, + ]; + } + + $data = [ + 'metadata' => $this->versioning->createMetadata(), + 'dependencies' => [], + 'outputs' => [], + 'exports' => $exports, + ]; + file_put_contents( + $this->tempDir . '/_build_meta.json', + json_encode($data, JSON_THROW_ON_ERROR), + ); + + $cache = new IncrementalBuildCache($this->versioning); + $cache->load($this->tempDir); + } + + public function testMergeStateThrowsOnExcessiveExports(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $cache = new IncrementalBuildCache($this->versioning); + + // Use reflection to fill cache near limit + $reflection = new ReflectionClass($cache); + $property = $reflection->getProperty('exports'); + + $exports = []; + for ($i = 0; $i < 100_000; $i++) { + $exports['doc' . $i] = $this->createExports('doc' . $i); + } + + $property->setValue($cache, $exports); + + // Try to merge more + $state = ['exports' => ['new-doc' => $this->createExports('new-doc')->toArray()]]; + $cache->mergeState($state); + } + + public function testLoadIgnoresInvalidShardDirectories(): void + { + // Create valid export first + $cache = new IncrementalBuildCache($this->versioning); + $cache->setExports('doc1', $this->createExports('doc1')); + $cache->save($this->tempDir); + + // Create invalid shard directory (should be ignored) + mkdir($this->tempDir . '/_exports/invalid-name', 0o755); + file_put_contents( + $this->tempDir . '/_exports/invalid-name/test.json', + json_encode([ + 'path' => 'evil-doc', + 'documentPath' => 'evil-doc', + 'contentHash' => str_repeat('x', 32), + 'exportsHash' => str_repeat('y', 64), + 'anchors' => [], + 'sectionTitles' => [], + 'citations' => [], + 'lastModified' => 0, + ], JSON_THROW_ON_ERROR), + ); + + // Also create a non-hex shard directory + mkdir($this->tempDir . '/_exports/ZZ', 0o755); + file_put_contents( + $this->tempDir . '/_exports/ZZ/another.json', + json_encode([ + 'path' => 'another-evil', + 'documentPath' => 'another-evil', + 'contentHash' => str_repeat('x', 32), + 'exportsHash' => str_repeat('y', 64), + 'anchors' => [], + 'sectionTitles' => [], + 'citations' => [], + 'lastModified' => 0, + ], JSON_THROW_ON_ERROR), + ); + + // Load and verify invalid shards were ignored + $cache2 = new IncrementalBuildCache($this->versioning); + $cache2->load($this->tempDir); + + self::assertNull($cache2->getExports('evil-doc')); + self::assertNull($cache2->getExports('another-evil')); + self::assertNotNull($cache2->getExports('doc1')); + } + + private function createExports(string $id): DocumentExports + { + return new DocumentExports( + documentPath: $id, + contentHash: str_repeat('a', 32), + exportsHash: str_repeat('b', 64), + anchors: [], + sectionTitles: [], + citations: [], + lastModified: 0, + ); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/IncrementalBuildStateTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/IncrementalBuildStateTest.php new file mode 100644 index 000000000..b74cb0773 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/IncrementalBuildStateTest.php @@ -0,0 +1,299 @@ +getDependencyGraph()); + self::assertSame([], $state->getDependencyGraph()->getAllDocuments()); + } + + public function testSetAndGetExports(): void + { + $state = new IncrementalBuildState(); + $exports = new DocumentExports( + documentPath: 'index', + contentHash: 'abc123', + exportsHash: 'def456', + anchors: ['anchor1' => 'Title 1'], + sectionTitles: ['section1' => 'Section 1'], + citations: [], + lastModified: 1_234_567_890, + documentTitle: 'Index Page', + ); + + $state->setExports('index', $exports); + + self::assertSame($exports, $state->getExports('index')); + self::assertNull($state->getExports('nonexistent')); + } + + public function testGetAllExports(): void + { + $state = new IncrementalBuildState(); + $exports1 = new DocumentExports('doc1', 'hash1', 'exp1', [], [], [], 0, ''); + $exports2 = new DocumentExports('doc2', 'hash2', 'exp2', [], [], [], 0, ''); + + $state->setExports('doc1', $exports1); + $state->setExports('doc2', $exports2); + + $allExports = $state->getAllExports(); + + self::assertCount(2, $allExports); + self::assertSame($exports1, $allExports['doc1']); + self::assertSame($exports2, $allExports['doc2']); + } + + public function testPreviousExports(): void + { + $state = new IncrementalBuildState(); + $previousExports = [ + 'doc1' => new DocumentExports('doc1', 'hash1', 'exp1', [], [], [], 0, ''), + 'doc2' => new DocumentExports('doc2', 'hash2', 'exp2', [], [], [], 0, ''), + ]; + + $state->setPreviousExports($previousExports); + + self::assertSame($previousExports, $state->getPreviousExports()); + self::assertSame($previousExports['doc1'], $state->getPreviousExportsFor('doc1')); + self::assertNull($state->getPreviousExportsFor('nonexistent')); + } + + public function testInputDir(): void + { + $state = new IncrementalBuildState(); + + self::assertSame('', $state->getInputDir()); + + $state->setInputDir('/path/to/docs'); + + self::assertSame('/path/to/docs', $state->getInputDir()); + } + + public function testReset(): void + { + $state = new IncrementalBuildState(); + + // Set up some state + $state->setExports('doc1', new DocumentExports('doc1', 'hash1', 'exp1', [], [], [], 0, '')); + $state->getDependencyGraph()->addImport('docA', 'docB'); + $previousExports = [ + 'doc1' => new DocumentExports('doc1', 'old', 'old', [], [], [], 0, ''), + ]; + $state->setPreviousExports($previousExports); + + $state->reset(); + + // Exports and graph should be reset + self::assertSame([], $state->getAllExports()); + self::assertSame([], $state->getDependencyGraph()->getAllDocuments()); + + // But previous exports should be preserved (they're the reference for change detection) + self::assertSame($previousExports, $state->getPreviousExports()); + } + + public function testSetDependencyGraph(): void + { + $state = new IncrementalBuildState(); + $newGraph = new DependencyGraph(); + $newGraph->addImport('docA', 'docB'); + + $state->setDependencyGraph($newGraph); + + self::assertSame($newGraph, $state->getDependencyGraph()); + self::assertSame(['docB'], $state->getDependencyGraph()->getImports('docA')); + } + + public function testSerializationRoundTrip(): void + { + $state = new IncrementalBuildState(); + + // Use valid hash lengths: 32 chars (xxh128) and 64 chars (sha256) + $contentHash = 'abc123abc123abc123abc123abc12345'; + $exportsHash = 'def456def456def456def456def456def456def456def456def456def456def4'; + + // Add exports + $exports = new DocumentExports( + documentPath: 'index', + contentHash: $contentHash, + exportsHash: $exportsHash, + anchors: ['anchor1' => 'Title 1'], + sectionTitles: ['section1' => 'Section 1'], + citations: ['citation1'], + lastModified: 1_234_567_890, + documentTitle: 'Index Page', + ); + $state->setExports('index', $exports); + + // Add dependencies + $state->getDependencyGraph()->addImport('docA', 'docB'); + $state->getDependencyGraph()->addImport('docA', 'index'); + + // Serialize and restore + $array = $state->toArray(); + $restored = IncrementalBuildState::fromArray($array); + + // Verify exports + $restoredExports = $restored->getExports('index'); + self::assertNotNull($restoredExports); + self::assertSame('index', $restoredExports->documentPath); + self::assertSame($contentHash, $restoredExports->contentHash); + self::assertSame($exportsHash, $restoredExports->exportsHash); + self::assertSame(['anchor1' => 'Title 1'], $restoredExports->anchors); + self::assertSame(['section1' => 'Section 1'], $restoredExports->sectionTitles); + self::assertSame(['citation1'], $restoredExports->citations); + self::assertSame(1_234_567_890, $restoredExports->lastModified); + self::assertSame('Index Page', $restoredExports->documentTitle); + + // Verify dependency graph + self::assertContains('docB', $restored->getDependencyGraph()->getImports('docA')); + self::assertContains('index', $restored->getDependencyGraph()->getImports('docA')); + } + + public function testFromArrayWithEmptyData(): void + { + $state = IncrementalBuildState::fromArray([]); + + self::assertSame([], $state->getAllExports()); + self::assertSame([], $state->getDependencyGraph()->getAllDocuments()); + } + + public function testSetExportsAllowsUpdatingExistingDocument(): void + { + $state = new IncrementalBuildState(); + $exports1 = new DocumentExports('doc1', 'hash1', 'exp1', [], [], [], 0, ''); + $exports2 = new DocumentExports('doc1', 'hash2', 'exp2', [], [], [], 0, ''); + + $state->setExports('doc1', $exports1); + $state->setExports('doc1', $exports2); // Update same document + + // Should use the latest exports + self::assertSame($exports2, $state->getExports('doc1')); + self::assertCount(1, $state->getAllExports()); + } + + public function testSetExportsThrowsWhenLimitExceeded(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $state = new IncrementalBuildState(); + + // Use reflection to directly set exports near the limit + $reflection = new ReflectionClass($state); + $property = $reflection->getProperty('exports'); + + $exports = []; + for ($i = 0; $i < 100_000; $i++) { + $exports['doc' . $i] = new DocumentExports('doc' . $i, 'hash', 'exp', [], [], [], 0, ''); + } + + $property->setValue($state, $exports); + + // Now try to add one more - should throw + $state->setExports('one-more-doc', new DocumentExports('one-more-doc', 'hash', 'exp', [], [], [], 0, '')); + } + + public function testSetPreviousExportsThrowsWhenLimitExceeded(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $state = new IncrementalBuildState(); + + // Create array with more than MAX_EXPORTS (100000) + $exports = []; + for ($i = 0; $i < 100_001; $i++) { + $exports['doc' . $i] = new DocumentExports('doc' . $i, 'hash', 'exp', [], [], [], 0, ''); + } + + $state->setPreviousExports($exports); + } + + public function testSetHashAlgorithmAcceptsValidAlgorithms(): void + { + $state = new IncrementalBuildState(); + + $state->setHashAlgorithm('sha256'); + self::assertSame('sha256', $state->getHashAlgorithm()); + + $state->setHashAlgorithm('xxh128'); + self::assertSame('xxh128', $state->getHashAlgorithm()); + } + + public function testSetHashAlgorithmThrowsOnInvalidAlgorithm(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('invalid hash algorithm "md5"'); + + $state = new IncrementalBuildState(); + $state->setHashAlgorithm('md5'); + } + + public function testFromArrayThrowsOnInvalidHashAlgorithm(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('invalid hash algorithm "crc32"'); + + IncrementalBuildState::fromArray(['hashAlgorithm' => 'crc32']); + } + + public function testFromArrayAcceptsEmptyHashAlgorithmForLegacyState(): void + { + $state = IncrementalBuildState::fromArray(['hashAlgorithm' => '']); + + self::assertSame('', $state->getHashAlgorithm()); + } + + public function testIsHashAlgorithmCompatibleWithMatchingAlgorithm(): void + { + $state = new IncrementalBuildState(); + $state->setHashAlgorithm('sha256'); + + self::assertTrue($state->isHashAlgorithmCompatible('sha256')); + self::assertFalse($state->isHashAlgorithmCompatible('xxh128')); + } + + public function testIsHashAlgorithmCompatibleLegacyStateAssumeSha256(): void + { + // Legacy state has empty hashAlgorithm + $state = new IncrementalBuildState(); + // Don't call setHashAlgorithm - this simulates legacy state + + // Legacy state should be compatible with sha256 (the old fallback) + self::assertTrue($state->isHashAlgorithmCompatible('sha256')); + // But NOT compatible with xxh128 (to force rebuild when xxh128 becomes available) + self::assertFalse($state->isHashAlgorithmCompatible('xxh128')); + } + + public function testHashAlgorithmSerializationRoundTrip(): void + { + $state = new IncrementalBuildState(); + $state->setHashAlgorithm('xxh128'); + + $array = $state->toArray(); + $restored = IncrementalBuildState::fromArray($array); + + self::assertSame('xxh128', $restored->getHashAlgorithm()); + self::assertTrue($restored->isHashAlgorithmCompatible('xxh128')); + } +} diff --git a/packages/guides/tests/unit/Build/IncrementalBuild/PropagationResultTest.php b/packages/guides/tests/unit/Build/IncrementalBuild/PropagationResultTest.php new file mode 100644 index 000000000..25f18a656 --- /dev/null +++ b/packages/guides/tests/unit/Build/IncrementalBuild/PropagationResultTest.php @@ -0,0 +1,227 @@ +needsRendering('doc1')); + self::assertTrue($result->needsRendering('doc2')); + } + + public function testNeedsRenderingReturnsFalse(): void + { + $result = new PropagationResult( + documentsToRender: ['doc1'], + documentsToSkip: ['doc2', 'doc3'], + ); + + self::assertFalse($result->needsRendering('doc2')); + self::assertFalse($result->needsRendering('doc3')); + self::assertFalse($result->needsRendering('nonexistent')); + } + + public function testGetRenderCount(): void + { + $result = new PropagationResult( + documentsToRender: ['doc1', 'doc2', 'doc3'], + documentsToSkip: ['doc4'], + ); + + self::assertSame(3, $result->getRenderCount()); + } + + public function testGetSkipCount(): void + { + $result = new PropagationResult( + documentsToRender: ['doc1'], + documentsToSkip: ['doc2', 'doc3', 'doc4', 'doc5'], + ); + + self::assertSame(4, $result->getSkipCount()); + } + + public function testGetSavingsRatioWithMixedDocuments(): void + { + // 1 to render, 3 to skip = 75% savings + $result = new PropagationResult( + documentsToRender: ['doc1'], + documentsToSkip: ['doc2', 'doc3', 'doc4'], + ); + + self::assertEqualsWithDelta(0.75, $result->getSavingsRatio(), 0.001); + } + + public function testGetSavingsRatioAllRender(): void + { + $result = new PropagationResult( + documentsToRender: ['doc1', 'doc2'], + documentsToSkip: [], + ); + + self::assertSame(0.0, $result->getSavingsRatio()); + } + + public function testGetSavingsRatioAllSkip(): void + { + $result = new PropagationResult( + documentsToRender: [], + documentsToSkip: ['doc1', 'doc2'], + ); + + self::assertSame(1.0, $result->getSavingsRatio()); + } + + public function testGetSavingsRatioEmpty(): void + { + $result = new PropagationResult( + documentsToRender: [], + documentsToSkip: [], + ); + + self::assertSame(0.0, $result->getSavingsRatio()); + } + + public function testPropagatedFrom(): void + { + $result = new PropagationResult( + documentsToRender: ['doc1', 'doc2'], + documentsToSkip: [], + propagatedFrom: ['source1', 'source2'], + ); + + self::assertSame(['source1', 'source2'], $result->propagatedFrom); + } + + public function testToArray(): void + { + $result = new PropagationResult( + documentsToRender: ['doc1', 'doc2'], + documentsToSkip: ['doc3'], + propagatedFrom: ['source1'], + ); + + $array = $result->toArray(); + + self::assertSame(['doc1', 'doc2'], $array['documentsToRender']); + self::assertSame(['doc3'], $array['documentsToSkip']); + self::assertSame(['source1'], $array['propagatedFrom']); + } + + public function testFromArray(): void + { + $data = [ + 'documentsToRender' => ['doc1', 'doc2'], + 'documentsToSkip' => ['doc3'], + 'propagatedFrom' => ['source1'], + ]; + + $result = PropagationResult::fromArray($data); + + self::assertSame(['doc1', 'doc2'], $result->documentsToRender); + self::assertSame(['doc3'], $result->documentsToSkip); + self::assertSame(['source1'], $result->propagatedFrom); + } + + public function testFromArrayWithDefaults(): void + { + $result = PropagationResult::fromArray([]); + + self::assertSame([], $result->documentsToRender); + self::assertSame([], $result->documentsToSkip); + self::assertSame([], $result->propagatedFrom); + } + + public function testSerializationRoundTrip(): void + { + $original = new PropagationResult( + documentsToRender: ['a', 'b', 'c'], + documentsToSkip: ['d', 'e'], + propagatedFrom: ['a'], + ); + + $restored = PropagationResult::fromArray($original->toArray()); + + self::assertSame($original->documentsToRender, $restored->documentsToRender); + self::assertSame($original->documentsToSkip, $restored->documentsToSkip); + self::assertSame($original->propagatedFrom, $restored->propagatedFrom); + } + + public function testFromArrayThrowsOnInvalidDocumentsToRenderType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected documentsToRender to be array'); + + PropagationResult::fromArray(['documentsToRender' => 'not-array']); + } + + public function testFromArrayThrowsOnInvalidDocumentsToSkipType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected documentsToSkip to be array'); + + PropagationResult::fromArray(['documentsToSkip' => 123]); + } + + public function testFromArrayThrowsOnInvalidPropagatedFromType(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected propagatedFrom to be array'); + + PropagationResult::fromArray(['propagatedFrom' => null]); + } + + public function testFromArrayThrowsOnNonStringItem(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('expected documentsToRender item to be string'); + + PropagationResult::fromArray(['documentsToRender' => [123, 'valid']]); + } + + public function testFromArrayThrowsOnExcessiveDocumentsToRender(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $docs = []; + for ($i = 0; $i < 100_001; $i++) { + $docs[] = 'doc' . $i; + } + + PropagationResult::fromArray(['documentsToRender' => $docs]); + } + + public function testFromArrayThrowsOnExcessiveDocumentsToSkip(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('exceed maximum'); + + $docs = []; + for ($i = 0; $i < 100_001; $i++) { + $docs[] = 'doc' . $i; + } + + PropagationResult::fromArray(['documentsToSkip' => $docs]); + } +} diff --git a/packages/guides/tests/unit/Compiler/Passes/DependencyGraphPassTest.php b/packages/guides/tests/unit/Compiler/Passes/DependencyGraphPassTest.php new file mode 100644 index 000000000..e8c80ff8b --- /dev/null +++ b/packages/guides/tests/unit/Compiler/Passes/DependencyGraphPassTest.php @@ -0,0 +1,216 @@ +buildState = new IncrementalBuildState(); + $this->pass = new DependencyGraphPass($this->buildState); + } + + public function testGetPriorityIsLowerThanExportsCollector(): void + { + // Should run after ExportsCollectorPass (priority 10) + self::assertSame(9, $this->pass->getPriority()); + } + + public function testTracksDocReferences(): void + { + $projectNode = new ProjectNode(); + + // Register target document using proper API + $titleNode = new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Other Document'), 1, 'other-document'); + $docEntry = new DocumentEntryNode('other-doc', $titleNode); + $projectNode->addDocumentEntry($docEntry); + + // Create document with :doc: reference + // DocReferenceNode($targetDocument, $children = [], $interlinkDomain = '') + $docRef = new DocReferenceNode('other-doc', []); + $document = new DocumentNode('hash1', 'main-doc'); + $document->addChildNode(new ParagraphNode([new InlineCompoundNode([$docRef])])); + + $context = new CompilerContext($projectNode); + $this->pass->run([$document], $context); + + $graph = $this->buildState->getDependencyGraph(); + self::assertContains('other-doc', $graph->getImports('main-doc')); + } + + public function testTracksRefReferences(): void + { + $projectNode = new ProjectNode(); + + // Register an internal target using proper API + $target = new InternalTarget('target-doc', 'my-anchor', 'My Anchor Title'); + $projectNode->addLinkTarget('my-anchor', $target); + + // Create document with :ref: reference + // ReferenceNode($targetReference, $children = [], $interlinkDomain = '', $linkType = ..., $prefix = '') + $refNode = new ReferenceNode('my-anchor', []); + $document = new DocumentNode('hash1', 'referencing-doc'); + $document->addChildNode(new ParagraphNode([new InlineCompoundNode([$refNode])])); + + $context = new CompilerContext($projectNode); + $this->pass->run([$document], $context); + + $graph = $this->buildState->getDependencyGraph(); + self::assertContains('target-doc', $graph->getImports('referencing-doc')); + } + + public function testIgnoresSelfReferences(): void + { + $projectNode = new ProjectNode(); + $titleNode = new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Same Document'), 1, 'same-document'); + $docEntry = new DocumentEntryNode('same-doc', $titleNode); + $projectNode->addDocumentEntry($docEntry); + + $docRef = new DocReferenceNode('same-doc', []); + $document = new DocumentNode('hash1', 'same-doc'); + $document->addChildNode(new ParagraphNode([new InlineCompoundNode([$docRef])])); + + $context = new CompilerContext($projectNode); + $this->pass->run([$document], $context); + + $graph = $this->buildState->getDependencyGraph(); + // Should not import itself + self::assertNotContains('same-doc', $graph->getImports('same-doc')); + } + + public function testIgnoresInterlinkReferences(): void + { + $projectNode = new ProjectNode(); + + // Create interlink reference (to external project) + // DocReferenceNode($targetDocument, $children = [], $interlinkDomain = '') + $docRef = new DocReferenceNode('external-doc', [], 'other-project'); + $document = new DocumentNode('hash1', 'local-doc'); + $document->addChildNode(new ParagraphNode([new InlineCompoundNode([$docRef])])); + + $context = new CompilerContext($projectNode); + $this->pass->run([$document], $context); + + $graph = $this->buildState->getDependencyGraph(); + // Interlink references should be ignored + self::assertSame([], $graph->getImports('local-doc')); + } + + public function testClearsOldImportsBeforeRecomputing(): void + { + $projectNode = new ProjectNode(); + $titleNode = new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Target'), 1, 'target'); + $docEntry = new DocumentEntryNode('target', $titleNode); + $projectNode->addDocumentEntry($docEntry); + + // First run: document references 'target' + $docRef = new DocReferenceNode('target', []); + $document = new DocumentNode('hash1', 'source'); + $document->addChildNode(new ParagraphNode([new InlineCompoundNode([$docRef])])); + + $context = new CompilerContext($projectNode); + $this->pass->run([$document], $context); + + // Manually add another import to simulate previous state + $this->buildState->getDependencyGraph()->addImport('source', 'old-target'); + + // Second run: document no longer references anything + $document2 = new DocumentNode('hash2', 'source'); + $this->pass->run([$document2], $context); + + $graph = $this->buildState->getDependencyGraph(); + // Old import should be cleared + self::assertNotContains('old-target', $graph->getImports('source')); + } + + public function testReturnsDocumentsUnchanged(): void + { + $doc1 = new DocumentNode('h1', 'doc1'); + $doc2 = new DocumentNode('h2', 'doc2'); + + $context = new CompilerContext(new ProjectNode()); + $result = $this->pass->run([$doc1, $doc2], $context); + + self::assertSame([$doc1, $doc2], $result); + } + + public function testProcessesMultipleDocuments(): void + { + $projectNode = new ProjectNode(); + $titleNode = new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Shared'), 1, 'shared'); + $docEntry = new DocumentEntryNode('shared', $titleNode); + $projectNode->addDocumentEntry($docEntry); + + // Both documents reference 'shared' + $ref1 = new DocReferenceNode('shared', []); + $doc1 = new DocumentNode('h1', 'doc1'); + $doc1->addChildNode(new ParagraphNode([new InlineCompoundNode([$ref1])])); + + $ref2 = new DocReferenceNode('shared', []); + $doc2 = new DocumentNode('h2', 'doc2'); + $doc2->addChildNode(new ParagraphNode([new InlineCompoundNode([$ref2])])); + + $context = new CompilerContext($projectNode); + $this->pass->run([$doc1, $doc2], $context); + + $graph = $this->buildState->getDependencyGraph(); + self::assertContains('shared', $graph->getImports('doc1')); + self::assertContains('shared', $graph->getImports('doc2')); + // 'shared' should have both as dependents + $dependents = $graph->getDependents('shared'); + self::assertContains('doc1', $dependents); + self::assertContains('doc2', $dependents); + } + + public function testHandlesNestedReferences(): void + { + $projectNode = new ProjectNode(); + $titleNode = new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Nested Target'), 1, 'nested-target'); + $docEntry = new DocumentEntryNode('nested-target', $titleNode); + $projectNode->addDocumentEntry($docEntry); + + // Create nested structure: document > section > paragraph > reference + $ref = new DocReferenceNode('nested-target', []); + $section = new SectionNode( + new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Section'), 1, 'section'), + ); + $section->addChildNode(new ParagraphNode([new InlineCompoundNode([$ref])])); + + $document = new DocumentNode('hash1', 'parent-doc'); + $document->addChildNode($section); + + $context = new CompilerContext($projectNode); + $this->pass->run([$document], $context); + + $graph = $this->buildState->getDependencyGraph(); + self::assertContains('nested-target', $graph->getImports('parent-doc')); + } +} diff --git a/packages/guides/tests/unit/Compiler/Passes/ExportsCollectorPassTest.php b/packages/guides/tests/unit/Compiler/Passes/ExportsCollectorPassTest.php new file mode 100644 index 000000000..11bf20e2f --- /dev/null +++ b/packages/guides/tests/unit/Compiler/Passes/ExportsCollectorPassTest.php @@ -0,0 +1,367 @@ +buildState = new IncrementalBuildState(); + $this->hasher = new ContentHasher(); + $this->pass = new ExportsCollectorPass($this->buildState, $this->hasher); + } + + public function testGetPriorityIsLow(): void + { + // Priority should be low to run after all other passes + self::assertSame(10, $this->pass->getPriority()); + } + + public function testCollectsSectionTitles(): void + { + $document = new DocumentNode('hash1', 'docs/getting-started'); + $document->addChildNode( + new SectionNode( + new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Getting Started'), 1, 'getting-started'), + ), + ); + $document->addChildNode( + new SectionNode( + new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Installation'), 2, 'installation'), + ), + ); + + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('docs/getting-started'); + self::assertNotNull($exports); + self::assertArrayHasKey('getting-started', $exports->sectionTitles); + self::assertArrayHasKey('installation', $exports->sectionTitles); + self::assertSame('Getting Started', $exports->sectionTitles['getting-started']); + self::assertSame('Installation', $exports->sectionTitles['installation']); + } + + public function testCollectsCitations(): void + { + $document = new DocumentNode('hash1', 'docs/references'); + // CitationNode(array $value, string $name) + $document->addChildNode(new CitationNode([], 'RFC7231')); + $document->addChildNode(new CitationNode([], 'ECMA-262')); + + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('docs/references'); + self::assertNotNull($exports); + self::assertContains('RFC7231', $exports->citations); + self::assertContains('ECMA-262', $exports->citations); + } + + public function testCollectsDuplicateCitations(): void + { + // Documents can have multiple citations with the same name (e.g., multiple + // references to the same source). The collector captures all occurrences. + $document = new DocumentNode('hash1', 'docs/with-duplicates'); + $document->addChildNode(new CitationNode([], 'RFC7231')); + $document->addChildNode(new CitationNode([], 'RFC7231')); // Duplicate + $document->addChildNode(new CitationNode([], 'ECMA-262')); + + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('docs/with-duplicates'); + self::assertNotNull($exports); + // Duplicates are preserved (not deduplicated) + self::assertCount(3, $exports->citations); + self::assertSame(['RFC7231', 'RFC7231', 'ECMA-262'], $exports->citations); + } + + public function testCollectsDocumentTitle(): void + { + $document = new DocumentNode('hash1', 'docs/index'); + $document->addChildNode( + new SectionNode( + new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Main Documentation'), 1, 'main-documentation'), + ), + ); + + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('docs/index'); + self::assertNotNull($exports); + self::assertSame('Main Documentation', $exports->documentTitle); + } + + public function testComputesExportsHash(): void + { + $doc1 = new DocumentNode('hash1', 'doc1'); + $doc1->addChildNode( + new SectionNode( + new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Title A'), 1, 'title-a'), + ), + ); + + $doc2 = new DocumentNode('hash2', 'doc2'); + $doc2->addChildNode( + new SectionNode( + new TitleNode(InlineCompoundNode::getPlainTextInlineNode('Title B'), 1, 'title-b'), + ), + ); + + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$doc1, $doc2], $context); + + $exports1 = $this->buildState->getExports('doc1'); + $exports2 = $this->buildState->getExports('doc2'); + + self::assertNotNull($exports1); + self::assertNotNull($exports2); + // Different exports should have different hashes + self::assertNotSame($exports1->exportsHash, $exports2->exportsHash); + } + + public function testHashesSourceFileWhenInputDirSet(): void + { + $tempDir = sys_get_temp_dir() . '/exports-test-' . uniqid(); + mkdir($tempDir, 0755, true); + $sourceFile = $tempDir . '/index.rst'; + file_put_contents($sourceFile, 'Test content for hashing'); + + try { + $this->buildState->setInputDir($tempDir); + + $document = new DocumentNode('hash1', 'index'); + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('index'); + self::assertNotNull($exports); + self::assertNotEmpty($exports->contentHash); + self::assertGreaterThan(0, $exports->lastModified); + } finally { + unlink($sourceFile); + rmdir($tempDir); + } + } + + public function testReturnsDocumentsUnchanged(): void + { + $document = new DocumentNode('hash1', 'docs/test'); + + $context = new CompilerContext(new ProjectNode()); + $result = $this->pass->run([$document], $context); + + self::assertSame([$document], $result); + } + + public function testProcessesMultipleDocuments(): void + { + $doc1 = new DocumentNode('h1', 'doc1'); + $doc2 = new DocumentNode('h2', 'doc2'); + $doc3 = new DocumentNode('h3', 'doc3'); + + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$doc1, $doc2, $doc3], $context); + + self::assertNotNull($this->buildState->getExports('doc1')); + self::assertNotNull($this->buildState->getExports('doc2')); + self::assertNotNull($this->buildState->getExports('doc3')); + } + + public function testHandlesEmptyDocumentList(): void + { + $context = new CompilerContext(new ProjectNode()); + $result = $this->pass->run([], $context); + + self::assertSame([], $result); + self::assertSame([], $this->buildState->getAllExports()); + } + + public function testFallsBackToZeroLastModifiedWhenNoSourceFile(): void + { + // When no source file exists (no inputDir set), the pass falls back to + // hashing the document structure and uses current time. However, if + // filemtime() were to fail on an existing file, it would use 0. + // This test verifies the fallback behavior produces valid exports. + $document = new DocumentNode('hash1', 'docs/no-source-file'); + + // Don't set inputDir - simulates fallback path + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('docs/no-source-file'); + self::assertNotNull($exports); + // lastModified will be current time (>0) when fallback to document hash + self::assertGreaterThan(0, $exports->lastModified); + self::assertNotEmpty($exports->contentHash); + } + + public function testPreventsPathTraversalAttack(): void + { + $tempDir = sys_get_temp_dir() . '/exports-traversal-test-' . uniqid(); + mkdir($tempDir . '/docs', 0755, true); + + // Create a file inside the input directory + $insideFile = $tempDir . '/docs/legit.rst'; + file_put_contents($insideFile, 'Legitimate content'); + + // Create a file OUTSIDE the input directory (sibling) + $outsideFile = $tempDir . '/secret.txt'; + file_put_contents($outsideFile, 'SECRET DATA - should not be accessible'); + + try { + // Set input dir to the 'docs' subdirectory + $this->buildState->setInputDir($tempDir . '/docs'); + + // Try to access file via path traversal + $document = new DocumentNode('hash1', '../secret'); + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('../secret'); + self::assertNotNull($exports); + + // The hash should be computed from document serialization (fallback), + // NOT from the actual secret file content + $secretFileHash = $this->hasher->hashFile($outsideFile); + self::assertNotSame($secretFileHash, $exports->contentHash); + } finally { + if (file_exists($insideFile)) { + unlink($insideFile); + } + + if (file_exists($outsideFile)) { + unlink($outsideFile); + } + + rmdir($tempDir . '/docs'); + rmdir($tempDir); + } + } + + public function testPreventsPathTraversalWithSimilarDirectoryNames(): void + { + $tempDir = sys_get_temp_dir() . '/exports-prefix-test-' . uniqid(); + mkdir($tempDir . '/docs', 0755, true); + mkdir($tempDir . '/docs-internal', 0755, true); + + // Create file in the sibling directory with similar prefix + $siblingFile = $tempDir . '/docs-internal/secret.rst'; + file_put_contents($siblingFile, 'Internal secret content'); + + try { + // Set input dir to 'docs' + $this->buildState->setInputDir($tempDir . '/docs'); + + // Try to access file in docs-internal via traversal + $document = new DocumentNode('hash1', '../docs-internal/secret'); + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('../docs-internal/secret'); + self::assertNotNull($exports); + + // Should NOT have accessed the sibling directory file + $siblingHash = $this->hasher->hashFile($siblingFile); + self::assertNotSame($siblingHash, $exports->contentHash); + } finally { + if (file_exists($siblingFile)) { + unlink($siblingFile); + } + + rmdir($tempDir . '/docs-internal'); + rmdir($tempDir . '/docs'); + rmdir($tempDir); + } + } + + public function testPreventsSymlinkPathTraversalAttack(): void + { + $tempDir = sys_get_temp_dir() . '/exports-symlink-test-' . uniqid(); + mkdir($tempDir . '/docs', 0755, true); + + // Create a secret file OUTSIDE the input directory + $secretFile = $tempDir . '/secret.txt'; + file_put_contents($secretFile, 'SECRET DATA via symlink - should not be accessible'); + + // Create a symlink INSIDE the docs directory pointing to the secret file + $symlinkPath = $tempDir . '/docs/linked.rst'; + + // Skip test if symlinks not supported + if (!@symlink($secretFile, $symlinkPath)) { + self::markTestSkipped('Symlinks not supported on this system'); + } + + try { + // Verify symlink was created + self::assertTrue(is_link($symlinkPath)); + + // Set input dir to 'docs' + $this->buildState->setInputDir($tempDir . '/docs'); + + // Try to access the symlink + $document = new DocumentNode('hash1', 'linked'); + $context = new CompilerContext(new ProjectNode()); + $this->pass->run([$document], $context); + + $exports = $this->buildState->getExports('linked'); + self::assertNotNull($exports); + + // The realpath() check in findSourceFile() resolves symlinks, so + // the symlink target (/tmp/.../secret.txt) is outside the input dir + // and should be rejected. The content hash should be from document + // serialization (fallback), NOT from the secret file. + $secretFileHash = $this->hasher->hashFile($secretFile); + self::assertNotSame($secretFileHash, $exports->contentHash); + } finally { + if (is_link($symlinkPath)) { + unlink($symlinkPath); + } + + if (file_exists($secretFile)) { + unlink($secretFile); + } + + rmdir($tempDir . '/docs'); + rmdir($tempDir); + } + } +} diff --git a/packages/guides/tests/unit/Compiler/Passes/NodeTraversalTraitTest.php b/packages/guides/tests/unit/Compiler/Passes/NodeTraversalTraitTest.php new file mode 100644 index 000000000..6d20662a4 --- /dev/null +++ b/packages/guides/tests/unit/Compiler/Passes/NodeTraversalTraitTest.php @@ -0,0 +1,148 @@ +visitCount = 0; + } + + public function testTraversesShallowStructure(): void + { + $nodes = [ + $this->createMock(Node::class), + $this->createMock(Node::class), + ]; + + $this->traverseNodes($nodes, function (Node $node): void { + $this->visitCount++; + }); + + self::assertSame(2, $this->visitCount); + } + + public function testTraversesNestedStructure(): void + { + $child = $this->createMock(Node::class); + $parent = $this->createCompoundNodeMock([$child]); + $root = $this->createCompoundNodeMock([$parent]); + + $this->traverseNodes([$root], function (Node $node): void { + $this->visitCount++; + }); + + self::assertSame(3, $this->visitCount); + } + + public function testStopsAtMaxDepth(): void + { + // Create a deeply nested structure (deeper than MAX_TRAVERSAL_DEPTH of 100) + $deepestNode = $this->createMock(Node::class); + $current = $deepestNode; + + // Create 105 levels of nesting + for ($i = 0; $i < 105; $i++) { + $current = $this->createCompoundNodeMock([$current]); + } + + $this->traverseNodes([$current], function (Node $node): void { + $this->visitCount++; + }); + + // Should stop at depth 100, so we should visit at most 101 nodes + // (depth 0 through depth 100 inclusive) + self::assertLessThanOrEqual(101, $this->visitCount); + // But should visit more than just a few (sanity check) + self::assertGreaterThan(50, $this->visitCount); + } + + public function testHandlesNodesWithoutGetChildrenMethod(): void + { + $nodeWithoutChildren = $this->createMock(Node::class); + // This mock doesn't have getChildren method + + $this->traverseNodes([$nodeWithoutChildren], function (Node $node): void { + $this->visitCount++; + }); + + self::assertSame(1, $this->visitCount); + } + + public function testExactlyDepth100IsVisited(): void + { + // Create exactly 100 levels of nesting (depth 0 to 99) + // The deepest node is at depth 99, so all 100 nodes should be visited + $deepestNode = $this->createMock(Node::class); + $current = $deepestNode; + + // Create 99 more levels (total 100 nodes) + for ($i = 0; $i < 99; $i++) { + $current = $this->createCompoundNodeMock([$current]); + } + + $this->traverseNodes([$current], function (Node $node): void { + $this->visitCount++; + }); + + // All 100 nodes should be visited (depth 0 through 99) + self::assertSame(100, $this->visitCount); + } + + public function testDepth101IsNotVisited(): void + { + // Create 102 levels of nesting (depth 0 to 101) + // Nodes at depth 101 should NOT be visited due to limit check at depth > 100 + $deepestNode = $this->createMock(Node::class); + $current = $deepestNode; + + // Create 101 more levels (total 102 nodes) + for ($i = 0; $i < 101; $i++) { + $current = $this->createCompoundNodeMock([$current]); + } + + $this->traverseNodes([$current], function (Node $node): void { + $this->visitCount++; + }); + + // Should visit exactly 101 nodes (depth 0 through 100) + // The node at depth 101 should be skipped + self::assertSame(101, $this->visitCount); + } + + /** + * @param Node[] $children + * + * @return CompoundNode + */ + private function createCompoundNodeMock(array $children): CompoundNode + { + $mock = $this->createMock(CompoundNode::class); + $mock->method('getChildren')->willReturn($children); + + return $mock; + } +}