From 4f2bcd05123e1f3983efb0c39f12aec527763ffd Mon Sep 17 00:00:00 2001 From: Darshan Date: Sun, 12 Oct 2025 11:12:20 +0530 Subject: [PATCH 1/5] fix: tests. --- src/Database/Database.php | 150 +++++++++++++++++++++++++++++++------- 1 file changed, 123 insertions(+), 27 deletions(-) diff --git a/src/Database/Database.php b/src/Database/Database.php index ca8ceac41..8695628c4 100644 --- a/src/Database/Database.php +++ b/src/Database/Database.php @@ -113,10 +113,11 @@ class Database /** * Check if bulk relationship write optimizations are enabled. * Controlled via environment variable DB_RELATIONSHIP_BULK_WRITES (default: enabled). + * + * @return bool */ private function shouldUseRelationshipBulkWrites(): bool { - // env for tests and comparison! $val = getenv('DB_RELATIONSHIP_BULK_WRITES'); if ($val === false || $val === '') { return true; @@ -4463,22 +4464,50 @@ private function createDocumentRelationships(Document $collection, Document $doc } } - // Split object relations to ID-only vs rich docs + // Process object relations: detect nested relationships and handle appropriately $idOnlyDocs = []; - $richDocs = []; + $richDocsNoNesting = []; + foreach ($objectRelations as $objRel) { - if ($this->isIdOnlyDocument($objRel)) { + // Check if document has nested relationships + $hasNestedRelationships = false; + foreach ($objRel->getAttributes() as $attrKey => $attrValue) { + if ($attrKey === '$id' || $attrKey === '$permissions') { + continue; + } + // Check if attribute is a Document or array of Documents (nested relationship) + if ($attrValue instanceof Document || + (is_array($attrValue) && !empty($attrValue) && isset($attrValue[0]) && $attrValue[0] instanceof Document)) { + $hasNestedRelationships = true; + break; + } + } + + if ($hasNestedRelationships) { + // Use original method for nested relationships - handles everything including links + $this->relateDocuments( + $collection, + $relatedCollection, + $key, + $document, + $objRel, + $relationType, + $twoWay, + $twoWayKey, + $side + ); + } else if ($this->isIdOnlyDocument($objRel)) { $idOnlyDocs[] = $objRel; } else { - $richDocs[] = $objRel; + $richDocsNoNesting[] = $objRel; } } // Ensure ID-only docs in batch (create missing) and collect their IDs $ensuredIds = $this->batchEnsureIdOnlyDocuments($relatedCollection, $idOnlyDocs, $document); - // Ensure rich docs (with attributes beyond $id/$permissions) one-by-one - foreach ($richDocs as $relatedDoc) { + // Ensure rich docs (without nesting) one-by-one + foreach ($richDocsNoNesting as $relatedDoc) { $ensuredIds[] = $this->ensureRelatedDocumentAndGetId( $relatedCollection, $relatedDoc, @@ -4506,6 +4535,7 @@ private function createDocumentRelationships(Document $collection, Document $doc ); break; case Database::RELATION_ONE_TO_MANY: + case Database::RELATION_MANY_TO_ONE: $this->batchUpdateBackReferences( $collection, $relatedCollection, @@ -4717,7 +4747,7 @@ private function relateDocumentsById( string $side, ): void { // Get the related document, will be empty on permissions failure - $related = $this->skipRelationships(fn () => $this->getDocument($relatedCollection->getId(), $relationId, [Query::select(['$id'])])); + $related = $this->skipRelationships(fn () => $this->getDocument($relatedCollection->getId(), $relationId)); if ($related->isEmpty() && $this->checkRelationshipsExist) { return; @@ -4762,7 +4792,19 @@ private function relateDocumentsById( /** - * Batch insert junction links for M2M relationships + * Batch insert junction links for many-to-many relationships. + * + * Optimizes bulk relationship creation by using bulk insert operations + * instead of individual document inserts for each junction link. + * + * @param Document $collection Parent collection + * @param Document $relatedCollection Related collection + * @param string $side Relationship side (parent/child) + * @param string $key Relationship attribute key + * @param string $twoWayKey Two-way relationship key + * @param string $documentId Parent document ID + * @param array $relationIds Array of related document IDs to link + * @return void */ private function batchCreateJunctionLinks( Document $collection, @@ -4808,7 +4850,20 @@ private function batchCreateJunctionLinks( } /** - * Batch update back-references for O2M/M2O relationships when IDs are provided + * Batch update back-references for one-to-many and many-to-one relationships. + * + * Optimizes bulk relationship updates by using SQL UPDATE with IN clause + * instead of individual document updates for each relationship. + * + * @param Document $collection Parent collection + * @param Document $relatedCollection Related collection + * @param string $relationType Type of relationship (O2M or M2O) + * @param string $side Relationship side (parent/child) + * @param string $key Relationship attribute key + * @param string $twoWayKey Two-way relationship key + * @param string $documentId Parent document ID + * @param array $relationIds Array of related document IDs to update + * @return void */ private function batchUpdateBackReferences( Document $collection, @@ -4850,11 +4905,11 @@ private function batchUpdateBackReferences( $relatedDocSecurity = $relatedCollection->getAttribute('documentSecurity', false); $idsAllowed = $relationIds; if ($relatedDocSecurity) { - $allowedDocs = $this->silent(fn () => $this->find( + // Skip authorization to match original relateDocumentsById behavior + $allowedDocs = Authorization::skip(fn () => $this->silent(fn () => $this->find( $relatedCollection->getId(), - [Query::select(['$id', '$updatedAt']), Query::equal('$id', $relationIds)], - forPermission: Database::PERMISSION_UPDATE - )); + [Query::select(['$id', '$updatedAt']), Query::equal('$id', $relationIds), Query::limit(count($relationIds))] + ))); $idsAllowed = array_map(fn ($d) => $d->getId(), $allowedDocs); if (empty($idsAllowed)) { return; // nothing to update @@ -4869,12 +4924,11 @@ private function batchUpdateBackReferences( } } } else { - // Ensure we have collection-level update permission; find will throw if unauthorized - $found = $this->silent(fn () => $this->find( + // Skip authorization to match original relateDocumentsById behavior + $found = Authorization::skip(fn () => $this->silent(fn () => $this->find( $relatedCollection->getId(), - [Query::select(['$id', '$updatedAt']), Query::equal('$id', $relationIds)], - forPermission: Database::PERMISSION_UPDATE - )); + [Query::select(['$id', '$updatedAt']), Query::equal('$id', $relationIds), Query::limit(count($relationIds))] + ))); // Conflict check vs request timestamp if (!\is_null($this->timestamp)) { foreach ($found as $docFound) { @@ -4931,7 +4985,10 @@ private function batchUpdateBackReferences( } /** - * Detect if a Document is ID-only (only '$id' and optional '$permissions'). + * Detect if a Document contains only ID and permissions metadata. + * + * @param Document $doc Document to check + * @return bool True if document has only $id and optionally $permissions */ private function isIdOnlyDocument(Document $doc): bool { @@ -4971,21 +5028,38 @@ private function batchEnsureIdOnlyDocuments(Document $relatedCollection, array $ // Fetch existing IDs in one call $existing = $this->skipRelationships(fn () => $this->find( $relatedCollection->getId(), - [Query::select(['$id']), Query::equal('$id', $ids)] + [Query::select(['$id']), Query::equal('$id', $ids), Query::limit(count($ids))] )); $found = array_map(fn (Document $d) => $d->getId(), $existing); - // Compute missing - // $missing = array_values(array_diff($ids, $found)); - // Do not attempt to create missing ID-only docs to preserve structure rules. - // Parity: missing related IDs are skipped for O2M, and allowed for M2M junctions like original behavior. + // Compute missing and create them + $missing = array_values(array_diff($ids, $found)); + foreach ($missing as $missingId) { + $perms = $idPerms[$missingId] ?? $parent->getPermissions(); + $newDoc = new Document([ + '$id' => $missingId, + '$permissions' => $perms + ]); + $this->skipRelationships(fn () => $this->createDocument($relatedCollection->getId(), $newDoc)); + } return $ids; } /** * Ensure a related Document exists (create or update) and return its ID. - * Does not perform link/junction writes or set back-references; only ensures the document. + * + * This method creates the related document if it doesn't exist, or updates it if it does. + * It does not handle junction table writes or back-reference updates. + * + * @param Document $relatedCollection Collection containing the related document + * @param Document $relation Related document to ensure + * @param Document $parent Parent document + * @param string $relationType Type of relationship + * @param bool $twoWay Whether this is a two-way relationship + * @param string $twoWayKey Two-way relationship key + * @param string $side Relationship side (parent/child) + * @return string ID of the ensured document */ private function ensureRelatedDocumentAndGetId( Document $relatedCollection, @@ -4996,6 +5070,26 @@ private function ensureRelatedDocumentAndGetId( string $twoWayKey, string $side ): string { + // Set back-reference attribute on the relation document BEFORE create/update + // This is critical for documents that only have READ permission (not UPDATE) + switch ($relationType) { + case Database::RELATION_ONE_TO_ONE: + if ($twoWay) { + $relation->setAttribute($twoWayKey, $parent->getId()); + } + break; + case Database::RELATION_ONE_TO_MANY: + if ($side === Database::RELATION_SIDE_PARENT) { + $relation->setAttribute($twoWayKey, $parent->getId()); + } + break; + case Database::RELATION_MANY_TO_ONE: + if ($side === Database::RELATION_SIDE_CHILD) { + $relation->setAttribute($twoWayKey, $parent->getId()); + } + break; + } + // Try to get the related document $related = $this->skipRelationships(fn () => $this->getDocument($relatedCollection->getId(), $relation->getId())); @@ -5005,11 +5099,13 @@ private function ensureRelatedDocumentAndGetId( $relation->setAttribute('$permissions', $parent->getPermissions()); } + // This method is only called for documents without nested relationships, + // so we can safely skip relationship processing $created = $this->skipRelationships(fn () => $this->createDocument($relatedCollection->getId(), $relation)); return $created->getId(); } - // If the related document exists and the data is not the same, update it (excluding back-reference) + // If the related document exists and the data is not the same, update it $needsUpdate = ($related->getAttributes() != $relation->getAttributes()); if ($needsUpdate) { foreach ($relation->getAttributes() as $attribute => $value) { From 13b680430f90a6c4e4671f421a04a67171c9eabd Mon Sep 17 00:00:00 2001 From: Darshan Date: Sun, 12 Oct 2025 13:08:48 +0530 Subject: [PATCH 2/5] feat: optimized processor. --- src/Database/Database.php | 114 +++++- src/Database/DocumentProcessor.php | 555 +++++++++++++++++++++++++++++ 2 files changed, 662 insertions(+), 7 deletions(-) create mode 100644 src/Database/DocumentProcessor.php diff --git a/src/Database/Database.php b/src/Database/Database.php index 8695628c4..9d17c8f9f 100644 --- a/src/Database/Database.php +++ b/src/Database/Database.php @@ -412,6 +412,11 @@ private function shouldUseRelationshipBulkWrites(): bool */ protected array $relationshipDeleteStack = []; + /** + * Reusable instance of single-pass DocumentProcessor + */ + private ?DocumentProcessor $singlePassProcessor = null; + private bool $adapterFiltersRegistered = false; /** * @param Adapter $adapter * @param Cache $cache @@ -3552,8 +3557,19 @@ public function getDocument(string $collection, string $id, array $queries = [], } } - $document = $this->casting($collection, $document); - $document = $this->decode($collection, $document, $selections); + if ($this->shouldUseSinglePassProcessor() && $this->canUseSinglePass($collection)) { + $this->singlePassProcessor ??= new DocumentProcessor(); + $document = $this->singlePassProcessor->processRead( + $collection, + $document, + fn (string $k) => $this->adapter->filter($k), + $selections, + $this->adapter->getSupportForCasting() + ); + } else { + $document = $this->casting($collection, $document); + $document = $this->decode($collection, $document, $selections); + } // Skip relationship population if we're in batch mode (relationships will be populated later) if (!$this->inBatchRelationshipPopulation && $this->resolveRelationships && !empty($relationships) && (empty($selects) || !empty($nestedSelections))) { @@ -7227,15 +7243,27 @@ public function find(string $collection, array $queries = [], string $forPermiss } } - foreach ($results as $index => $node) { - $node = $this->casting($collection, $node); - $node = $this->decode($collection, $node, $selections); + if ($this->shouldUseSinglePassProcessor() && $this->canUseSinglePass($collection)) { + $this->singlePassProcessor ??= new DocumentProcessor(); + $results = $this->singlePassProcessor->processReadBatch( + $collection, + $results, + fn (string $k) => $this->adapter->filter($k), + $selections, + $this->adapter->getSupportForCasting() + ); + } else { + foreach ($results as $index => $node) { + $node = $this->casting($collection, $node); + $node = $this->decode($collection, $node, $selections); + $results[$index] = $node; + } + } + foreach ($results as $index => $node) { if (!$node->isEmpty()) { $node->setAttribute('$collection', $collection->getId()); } - - $results[$index] = $node; } $this->trigger(self::EVENT_DOCUMENT_FIND, $results); @@ -7243,6 +7271,78 @@ public function find(string $collection, array $queries = [], string $forPermiss return $results; } + private function shouldUseSinglePassProcessor(): bool + { + $val = getenv('DB_SINGLE_PASS_PROCESSOR'); + if ($val === false || $val === '') { + return false; + } + $val = strtolower((string)$val); + if (in_array($val, ['0', 'false', 'off'], true)) { + return false; + } + // Do not use single-pass when relationship resolution is disabled + if (!$this->resolveRelationships) { + return false; + } + return true; + } + + private function canUseSinglePass(Document $collection): bool + { + // Register adapter-aware filters (spatial) once so support list is complete + if (!$this->adapterFiltersRegistered) { + DocumentProcessor::registerAdapterFilters($this->adapter); + $this->adapterFiltersRegistered = true; + } + + // Safe if: no relationships AND all filters are within DocumentProcessor supported set + $supported = DocumentProcessor::getSupportedFilters(); + $instanceFilterNames = \array_keys($this->getInstanceFilters()); + + // Guard against disabled relationship handling + if (!$this->resolveRelationships) { + return false; + } + + // Respect runtime filter flags to preserve decode semantics + if ($this->filter === false) { + return false; + } + + if (!empty($this->disabledFilters)) { + return false; + } + + $attributes = $collection->getAttribute('attributes', []); + foreach ($attributes as $attr) { + $filters = $attr['filters'] ?? []; + foreach ($filters as $filter) { + if (!in_array($filter, $supported, true)) { + return false; + } + // If an instance filter overrides behavior, skip single-pass to keep parity + if (in_array($filter, $instanceFilterNames, true)) { + return false; + } + } + } + + // Internal attributes allowed if within supported filters + foreach (Database::INTERNAL_ATTRIBUTES as $internal) { + $filters = $internal['filters'] ?? []; + foreach ($filters as $filter) { + if (!in_array($filter, $supported, true)) { + return false; + } + if (in_array($filter, $instanceFilterNames, true)) { + return false; + } + } + } + return true; + } + /** * Call callback for each document of the given collection * that matches the given queries diff --git a/src/Database/DocumentProcessor.php b/src/Database/DocumentProcessor.php new file mode 100644 index 000000000..73290af71 --- /dev/null +++ b/src/Database/DocumentProcessor.php @@ -0,0 +1,555 @@ + + */ + private static array $filters = []; + + /** + * Guard to ensure we only register filters once. + */ + private static bool $initialized = false; + + public function __construct() + { + self::ensureInitialized(); + } + + private static function ensureInitialized(): void + { + if (self::$initialized) { + return; + } + // Register standard filters (matching Database class filters semantics used on read) + // Note: simdjson provides ~10-15% improvement on JSON parsing but throws exceptions + // For production use with controlled JSON, consider json_decode for better compatibility + self::$filters["json"] = [ + "decode" => function (mixed $value) { + if (!is_string($value)) { + return $value; + } + + // Use standard json_decode for reliability + // (simdjson is faster but has compatibility issues with some edge cases) + $value = json_decode($value, true) ?? []; + + if (is_array($value) && array_key_exists('$id', $value)) { + return new Document($value); + } + if (is_array($value)) { + // Manual loop faster than array_map for small arrays + foreach ($value as $i => $item) { + if (is_array($item) && array_key_exists('$id', $item)) { + $value[$i] = new Document($item); + } + } + } + return $value; + }, + ]; + + self::$filters["datetime"] = [ + "decode" => function (?string $value) { + return DateTime::formatTz($value); + }, + ]; + + self::$initialized = true; + } + + /** + * Expose supported filter names for gating logic. + * + * @return array + */ + public static function getSupportedFilters(): array + { + self::ensureInitialized(); + return array_keys(self::$filters); + } + + /** + * Register adapter-aware decoders (spatial types) for single-pass processing. + * Safe to call multiple times; overwrites existing entries. + */ + public static function registerAdapterFilters(Adapter $adapter): void + { + self::ensureInitialized(); + + self::$filters[Database::VAR_POINT] = [ + 'decode' => function (?string $value) use ($adapter) { + if ($value === null) { + return null; + } + return $adapter->decodePoint($value); + }, + ]; + + self::$filters[Database::VAR_LINESTRING] = [ + 'decode' => function (?string $value) use ($adapter) { + if ($value === null) { + return null; + } + return $adapter->decodeLinestring($value); + }, + ]; + + self::$filters[Database::VAR_POLYGON] = [ + 'decode' => function (?string $value) use ($adapter) { + if ($value === null) { + return null; + } + return $adapter->decodePolygon($value); + }, + ]; + } + + /** + * Process document for read (decode + casting) in a single pass. + * + * @param Document $collection + * @param Document $document + * @return Document + */ + public function processRead( + Document $collection, + Document $document, + ?callable $keyMapper = null, + array $selections = [], + bool $skipCasting = false + ): Document + { + $attributes = $collection->getAttribute("attributes", []); + + // Pre-normalize relationship keys like Database::decode + $relationships = \array_filter( + $attributes, + fn($attribute) => ($attribute['type'] ?? '') === Database::VAR_RELATIONSHIP + ); + if (!empty($relationships) && $keyMapper !== null) { + foreach ($relationships as $relationship) { + $rKey = $relationship['$id'] ?? ''; + if ($rKey === '') { + continue; + } + $mapped = $keyMapper($rKey); + $hasOriginal = \array_key_exists($rKey, (array)$document); + $hasMapped = is_string($mapped) && \array_key_exists($mapped, (array)$document); + if ($hasOriginal || $hasMapped) { + $value = $document->getAttribute($rKey); + if ($value === null && $hasMapped) { + $value = $document->getAttribute($mapped); + } + if ($hasMapped) { + $document->removeAttribute($mapped); + } + $document->setAttribute($rKey, $value); + } + } + } + + // Iterate attributes and skip relationships without creating a new array + $filteredValues = []; + foreach ($attributes as $attribute) { + if (($attribute['type'] ?? '') === Database::VAR_RELATIONSHIP) { + continue; + } + $key = $attribute['$id'] ?? ""; + $type = $attribute["type"] ?? ""; + $array = $attribute["array"] ?? false; + $filters = $attribute["filters"] ?? []; + + if ($key === '$permissions') { + continue; + } + + // Prefer original key; fall back to adapter-mapped key if provided + $value = $document->getAttribute($key); + if ($value === null && $keyMapper !== null) { + $mapped = $keyMapper($key); + if (is_string($mapped) && $mapped !== $key) { + $value = $document->getAttribute($mapped); + if ($value !== null) { + $document->removeAttribute($mapped); + } + } + } + + if ($array) { + // In a single pass, if DB returns arrays as JSON strings, normalize once + if (is_string($value)) { + $decoded = json_decode($value, true); + $value = \is_array($decoded) ? $decoded : $value; + } + if (!\is_array($value)) { + $value = $value === null ? [] : [$value]; + } + + $revFilters = empty($filters) ? [] : array_reverse($filters); + foreach ($value as $i => $node) { + foreach ($revFilters as $filter) { + $node = $this->decodeAttribute($filter, $node); + } + $value[$i] = $skipCasting ? $node : $this->castNode($type, $node); + } + $filteredValues[$key] = $value; + if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { + $document->setAttribute($key, $value); + } + } else { + // Apply filters for non-array values + if (!empty($filters)) { + foreach (array_reverse($filters) as $filter) { + $value = $this->decodeAttribute($filter, $value); + } + } + $final = $skipCasting ? $value : $this->castNode($type, $value); + $filteredValues[$key] = $final; + if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { + $document->setAttribute($key, $final); + } + } + } + + // Apply internal attributes at the end to keep behavior consistent + foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { + $key = $attribute['$id'] ?? ""; + $type = $attribute["type"] ?? ""; + $array = $attribute["array"] ?? false; + $filters = $attribute["filters"] ?? []; + + if ($key === '$permissions') { + continue; + } + + $value = $document->getAttribute($key); + if ($value === null && $keyMapper !== null) { + $mapped = $keyMapper($key); + if (is_string($mapped) && $mapped !== $key) { + $value = $document->getAttribute($mapped); + if ($value !== null) { + $document->removeAttribute($mapped); + } + } + } + + if ($array) { + if (is_string($value)) { + $decoded = json_decode($value, true); + $value = \is_array($decoded) ? $decoded : $value; + } + if (!\is_array($value)) { + $value = $value === null ? [] : [$value]; + } + + $revFilters = empty($filters) ? [] : array_reverse($filters); + foreach ($value as $i => $node) { + foreach ($revFilters as $filter) { + $node = $this->decodeAttribute($filter, $node); + } + $value[$i] = $skipCasting ? $node : $this->castNode($type, $node); + } + if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { + $document->setAttribute($key, $value); + } + } else { + if (!empty($filters)) { + foreach (array_reverse($filters) as $filter) { + $value = $this->decodeAttribute($filter, $value); + } + } + $final = $skipCasting ? $value : $this->castNode($type, $value); + if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { + $document->setAttribute($key, $final); + } + } + } + + // Relationship selection semantics: if selecting relationship attributes, also include + // non-relationship attributes even if not explicitly selected. + $hasRelationshipSelections = false; + if (!empty($selections)) { + foreach ($selections as $sel) { + if (\str_contains($sel, '.')) { $hasRelationshipSelections = true; break; } + } + } + if ($hasRelationshipSelections && !empty($selections) && !\in_array('*', $selections, true)) { + foreach ($collection->getAttribute('attributes', []) as $attribute) { + $key = $attribute['$id'] ?? ''; + if (($attribute['type'] ?? '') === Database::VAR_RELATIONSHIP || $key === '$permissions') { + continue; + } + if (!\in_array($key, $selections, true) && \array_key_exists($key, $filteredValues)) { + $document->setAttribute($key, $filteredValues[$key]); + } + } + } + + return $document; + } + + /** + * Prepare a per-collection plan for batch processing. + * + * @return array{ + * relationships: array, + * attrs: array, selected:bool}>, + * internals: array, selected:bool}>, + * skipCasting: bool + * } + */ + private function preparePlan(Document $collection, ?callable $keyMapper, array $selections, bool $skipCasting): array + { + $attributes = $collection->getAttribute('attributes', []); + + $relationships = []; + $attrs = []; + foreach ($attributes as $attr) { + $type = $attr['type'] ?? ''; + $key = $attr['$id'] ?? ''; + if ($type === Database::VAR_RELATIONSHIP) { + $mapped = ($keyMapper !== null) ? $keyMapper($key) : null; + $relationships[] = [ + 'key' => $key, + 'mapped' => (is_string($mapped) && $mapped !== $key) ? $mapped : null, + ]; + continue; + } + $mapped = ($keyMapper !== null) ? $keyMapper($key) : null; + $attrs[] = [ + 'key' => $key, + 'mapped' => (is_string($mapped) && $mapped !== $key) ? $mapped : null, + 'type' => $type, + 'array' => (bool)($attr['array'] ?? false), + 'filters' => array_reverse($attr['filters'] ?? []), + 'selected' => empty($selections) || in_array($key, $selections, true) || in_array('*', $selections, true), + ]; + } + + $internals = []; + foreach (Database::INTERNAL_ATTRIBUTES as $attr) { + $key = $attr['$id'] ?? ''; + if ($key === '$permissions') { + continue; + } + $internals[] = [ + 'key' => $key, + 'type' => $attr['type'] ?? '', + 'array' => (bool)($attr['array'] ?? false), + 'filters' => array_reverse($attr['filters'] ?? []), + 'selected' => empty($selections) || in_array($key, $selections, true) || in_array('*', $selections, true), + ]; + } + + // Detect relationship selections + $hasRelationshipSelections = false; + if (!empty($selections)) { + foreach ($selections as $sel) { + if (\str_contains($sel, '.')) { $hasRelationshipSelections = true; break; } + } + } + + return [ + 'relationships' => $relationships, + 'attrs' => $attrs, + 'internals' => $internals, + 'skipCasting' => $skipCasting, + 'hasRelSelects' => $hasRelationshipSelections && !empty($selections) && !\in_array('*', $selections, true), + ]; + } + + /** + * Batch version of processRead preserving parity semantics. + * + * @param array $documents + * @return array + */ + public function processReadBatch( + Document $collection, + array $documents, + ?callable $keyMapper = null, + array $selections = [], + bool $skipCasting = false + ): array { + if (empty($documents)) { + return $documents; + } + + $plan = $this->preparePlan($collection, $keyMapper, $selections, $skipCasting); + + foreach ($documents as $idx => $document) { + if (!$document instanceof Document) { + continue; + } + + // Relationship key normalization + if (!empty($plan['relationships'])) { + foreach ($plan['relationships'] as $rel) { + $key = $rel['key']; + $mapped = $rel['mapped'] ?? null; + $hasOriginal = array_key_exists($key, (array)$document); + $hasMapped = $mapped && array_key_exists($mapped, (array)$document); + if ($hasOriginal || $hasMapped) { + $value = $document->getAttribute($key); + if ($value === null && $hasMapped) { + $value = $document->getAttribute($mapped); + } + if ($hasMapped) { + $document->removeAttribute($mapped); + } + $document->setAttribute($key, $value); + } + } + } + + // Regular attributes + $filteredValues = []; + foreach ($plan['attrs'] as $a) { + $key = $a['key']; + if ($key === '$permissions') { + continue; + } + $value = $document->getAttribute($key); + if ($value === null && !empty($a['mapped'])) { + $value = $document->getAttribute($a['mapped']); + if ($value !== null) { + $document->removeAttribute($a['mapped']); + } + } + + if ($a['array']) { + if (is_string($value)) { + $decoded = json_decode($value, true); + $value = is_array($decoded) ? $decoded : $value; + } + if (!is_array($value)) { + $value = $value === null ? [] : [$value]; + } + foreach ($value as $i => $node) { + foreach ($a['filters'] as $filter) { + $node = $this->decodeAttribute($filter, $node); + } + $value[$i] = $plan['skipCasting'] ? $node : $this->castNode($a['type'], $node); + } + $filteredValues[$key] = $value; + if ($a['selected']) { + $document->setAttribute($key, $value); + } + } else { + foreach ($a['filters'] as $filter) { + $value = $this->decodeAttribute($filter, $value); + } + $final = $plan['skipCasting'] ? $value : $this->castNode($a['type'], $value); + $filteredValues[$key] = $final; + if ($a['selected']) { + $document->setAttribute($key, $final); + } + } + } + + // Internal attributes + foreach ($plan['internals'] as $a) { + $key = $a['key']; + $value = $document->getAttribute($key); + + if ($a['array']) { + if (is_string($value)) { + $decoded = json_decode($value, true); + $value = is_array($decoded) ? $decoded : $value; + } + if (!is_array($value)) { + $value = $value === null ? [] : [$value]; + } + foreach ($value as $i => $node) { + foreach ($a['filters'] as $filter) { + $node = $this->decodeAttribute($filter, $node); + } + $value[$i] = $plan['skipCasting'] ? $node : $this->castNode($a['type'], $node); + } + if ($a['selected']) { + $document->setAttribute($key, $value); + } + } else { + foreach ($a['filters'] as $filter) { + $value = $this->decodeAttribute($filter, $value); + } + $final = $plan['skipCasting'] ? $value : $this->castNode($a['type'], $value); + if ($a['selected']) { + $document->setAttribute($key, $final); + } + } + } + + // Relationship selection semantic adjustment + if (!empty($plan['hasRelSelects'])) { + foreach ($plan['attrs'] as $a) { + if ($a['selected']) { continue; } + $key = $a['key']; + if (\array_key_exists($key, $filteredValues)) { + $document->setAttribute($key, $filteredValues[$key]); + } + } + } + + $documents[$idx] = $document; + } + + return $documents; + } + + /** + * Apply a decode filter to a value + * + * @param string $filter + * @param mixed $value + * @return mixed + */ + protected function decodeAttribute(string $filter, mixed $value): mixed + { + if (!array_key_exists($filter, self::$filters)) { + return $value; // Unknown filter, pass through + } + return self::$filters[$filter]["decode"]($value); + } + + private function castNode(string $type, mixed $node): mixed + { + // Preserve null values like legacy decode does + if ($node === null) { + return null; + } + + switch ($type) { + case Database::VAR_ID: + return (string) $node; + case Database::VAR_BOOLEAN: + return (bool) $node; + case Database::VAR_INTEGER: + return (int) $node; + case Database::VAR_FLOAT: + return (float) $node; + default: + return $node; + } + } + + /** + * Add a custom filter + * + * @param string $name + * @param callable $decode + * @return void + */ + public static function addFilter(string $name, callable $decode): void + { + self::$filters[$name] = ["decode" => $decode]; + } +} From cf1b684ba2d8cbb8e8ca67bdea704d06005f6233 Mon Sep 17 00:00:00 2001 From: Darshan Date: Sun, 12 Oct 2025 13:09:33 +0530 Subject: [PATCH 3/5] add: tests. --- .../document_processor_benchmark.php | 462 ++++++++++++++++++ tests/benchmarking/run-processor-benchmark.sh | 25 + tests/unit/DocumentProcessorTest.php | 368 ++++++++++++++ 3 files changed, 855 insertions(+) create mode 100755 tests/benchmarking/document_processor_benchmark.php create mode 100755 tests/benchmarking/run-processor-benchmark.sh create mode 100644 tests/unit/DocumentProcessorTest.php diff --git a/tests/benchmarking/document_processor_benchmark.php b/tests/benchmarking/document_processor_benchmark.php new file mode 100755 index 000000000..489225016 --- /dev/null +++ b/tests/benchmarking/document_processor_benchmark.php @@ -0,0 +1,462 @@ + ["docs" => 1000, "arrays" => 1, "array_size" => 10], + "MEDIUM" => ["docs" => 5000, "arrays" => 1, "array_size" => 10], + "HEAVY" => ["docs" => 10000, "arrays" => 2, "array_size" => 20], + // Spatial-heavy scenario: adds spatial attributes with decode filters + "SPATIAL" => ["docs" => 5000, "arrays" => 1, "array_size" => 10, "spatial" => true], +]; + +if (!isset($levels[$level])) { + fwrite(STDERR, "Invalid level: {$level}\n"); + exit(1); +} + +$cfg = $levels[$level]; +$docs = $cfg["docs"]; +$arraySize = $cfg["array_size"]; + +// Build a realistic collection schema with filters (optionally spatial) +function buildCollection(bool $spatial = false): Document { + $attributes = []; + for ($i = 1; $i <= 3; $i++) { + $attributes[] = ['$id' => "s{$i}", "type" => Database::VAR_STRING, "array" => false, "filters" => []]; + } + $attributes[] = ['$id' => "jsonData", "type" => Database::VAR_STRING, "array" => false, "filters" => ["json"]]; + $attributes[] = ['$id' => "jsonArray", "type" => Database::VAR_STRING, "array" => true, "filters" => ["json"]]; + for ($i = 1; $i <= 2; $i++) { + $attributes[] = ['$id' => "n{$i}", "type" => Database::VAR_INTEGER, "array" => false, "filters" => []]; + } + $attributes[] = ['$id' => "b1", "type" => Database::VAR_BOOLEAN, "array" => false, "filters" => []]; + $attributes[] = ['$id' => "d1", "type" => Database::VAR_DATETIME, "array" => false, "filters" => ["datetime"]]; + $attributes[] = ['$id' => "d2", "type" => Database::VAR_DATETIME, "array" => false, "filters" => ["datetime"]]; + $attributes[] = ['$id' => "arr", "type" => Database::VAR_STRING, "array" => true, "filters" => []]; + + if ($spatial) { + $attributes[] = ['$id' => 'p1', 'type' => Database::VAR_POINT, 'array' => false, 'filters' => [Database::VAR_POINT]]; + $attributes[] = ['$id' => 'ls1', 'type' => Database::VAR_LINESTRING, 'array' => false, 'filters' => [Database::VAR_LINESTRING]]; + $attributes[] = ['$id' => 'pg1', 'type' => Database::VAR_POLYGON, 'array' => false, 'filters' => [Database::VAR_POLYGON]]; + } + + return new Document(["attributes" => $attributes]); +} + +function makeDoc(int $i, int $arraySize, bool $spatial = false): Document +{ + $d = new Document([ + '$id' => "doc{$i}", + "s1" => "alpha{$i}", + "s2" => "beta{$i}", + "s3" => "gamma{$i}", + "jsonData" => ["nested" => "data", "count" => $i], + "jsonArray" => [["id" => 1], ["id" => 2]], + "n1" => $i, + "n2" => $i * 2, + "b1" => $i % 2 === 0, + "d1" => "2024-01-15 10:30:00", + "d2" => "2024-01-15 15:45:30", + "arr" => array_map(fn($k) => "it{$k}", range(1, $arraySize)), + ]); + + if ($spatial) { + // Encode spatial as JSON strings to simulate adapter-encoded values + $lon = ($i % 180) - 90; $lat = (($i * 2) % 180) - 90; + $d->setAttribute('p1', json_encode(['type' => 'Point', 'coordinates' => [$lon, $lat]])); + $d->setAttribute('ls1', json_encode(['type' => 'LineString', 'coordinates' => [[$lon, $lat], [$lon + 1, $lat + 1], [$lon + 2, $lat + 2]]])); + $d->setAttribute('pg1', json_encode(['type' => 'Polygon', 'coordinates' => [[[$lon, $lat], [$lon + 1, $lat], [$lon + 1, $lat + 1], [$lon, $lat + 1], [$lon, $lat]]]])); + } + + return $d; +} + +$collection = buildCollection((bool)($cfg['spatial'] ?? false)); + +function measure(callable $fn, int $repeat = 1, int $warmup = 0): array { + for ($w = 0; $w < $warmup; $w++) { $fn(); } + $times = []; + for ($r = 0; $r < $repeat; $r++) { + $start = microtime(true); + $fn(); + $times[] = (microtime(true) - $start) * 1000; + } + sort($times); + return $times; // sorted ascending +} + +// Baseline and optimized functions +$baseline = new BaselineProcessor(); +$processor = new DocumentProcessorWithFilters(); + +$spatialEnabled = (bool)($cfg['spatial'] ?? false); + +$baselineTimes = measure(function () use ($baseline, $collection, $docs, $arraySize, $spatialEnabled) { + for ($i = 1; $i <= $docs; $i++) { + $doc = makeDoc($i, $arraySize, $spatialEnabled); + $doc = $baseline->decodeBaseline($collection, $doc); + $doc = $baseline->castingBaseline($collection, $doc); + } +}, $repeat, $warmup); + +$optimizedTimes = measure(function () use ($processor, $collection, $docs, $arraySize, $spatialEnabled) { + for ($i = 1; $i <= $docs; $i++) { + $doc = makeDoc($i, $arraySize, $spatialEnabled); + $doc = $processor->processRead($collection, $doc); + } +}, $repeat, $warmup); + +$baselineMs = (int) round($baselineTimes[(int) floor((count($baselineTimes)-1)/2)]); +$optMs = (int) round($optimizedTimes[(int) floor((count($optimizedTimes)-1)/2)]); + +$gain = $baselineMs > 0 ? (($baselineMs - $optMs) / $baselineMs) * 100 : 0; + +echo "\nDocument Processor Benchmark - {$level} (WITH FILTERS)\n"; +echo "+---------+----------+----------+--------+\n"; +echo "| Metric | Baseline | Optimized| Gain |\n"; +echo "+---------+----------+----------+--------+\n"; +printf("| %-7s | %8d | %8d | %6.1f%% |\n", "time", (int) $baselineMs, (int) $optMs, $gain); +if ($repeat > 1) { + echo "(median of {$repeat} runs, warmup={$warmup})\n"; +} +echo "\n"; + +// Optional parity assert mode +if ($assertParity) { + $checks = min($docs, 1000); + for ($i = 1; $i <= $checks; $i++) { + $docA = makeDoc($i, $arraySize, $spatialEnabled); + $base = $baseline->decodeBaseline($collection, clone $docA); + $base = $baseline->castingBaseline($collection, $base); + + $docB = makeDoc($i, $arraySize, $spatialEnabled); + $opt = $processor->processRead($collection, $docB); + + $a = $base->getArrayCopy(); + $b = $opt->getArrayCopy(); + if ($a != $b) { + fwrite(STDERR, "Parity mismatch on doc {$i}\n"); + // Find first differing key + foreach ($a as $k => $v) { + $va = $a[$k] ?? null; + $vb = $b[$k] ?? null; + if ($va != $vb) { + fwrite(STDERR, " - Attribute '{$k}' differs\n"); + break; + } + } + exit(1); + } + } + echo "Parity assertion passed on {$checks} docs.\n\n"; +} +echo "+---------+----------+----------+--------+\n\n"; + +/** + * DocumentProcessor with proper filter support for fair comparison + */ +class DocumentProcessorWithFilters +{ + private static array $filters = []; + + public function __construct() + { + // Register the same filters as Database class + self::$filters["json"] = [ + "decode" => function (mixed $value) { + if (!is_string($value)) { + return $value; + } + $value = json_decode($value, true) ?? []; + if (array_key_exists('$id', $value)) { + return new Document($value); + } else { + $value = array_map(function ($item) { + if (is_array($item) && array_key_exists('$id', $item)) { + return new Document($item); + } + return $item; + }, $value); + } + return $value; + }, + ]; + + self::$filters["datetime"] = [ + "decode" => function (?string $value) { + return DateTime::formatTz($value); + }, + ]; + + // Spatial-like decoders for benchmark (decode JSON strings) + self::$filters[Database::VAR_POINT] = [ + 'decode' => function (?string $value) { + return is_string($value) ? (json_decode($value, true) ?? $value) : $value; + }, + ]; + self::$filters[Database::VAR_LINESTRING] = [ + 'decode' => function (?string $value) { + return is_string($value) ? (json_decode($value, true) ?? $value) : $value; + }, + ]; + self::$filters[Database::VAR_POLYGON] = [ + 'decode' => function (?string $value) { + return is_string($value) ? (json_decode($value, true) ?? $value) : $value; + }, + ]; + } + + public function processRead(Document $collection, Document $document): Document + { + $attributes = \array_filter( + $collection->getAttribute("attributes", []), + fn($attribute) => $attribute["type"] !== Database::VAR_RELATIONSHIP, + ); + + foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { + $attributes[] = $attribute; + } + + foreach ($attributes as $attribute) { + $key = $attribute['$id'] ?? ""; + $type = $attribute["type"] ?? ""; + $array = $attribute["array"] ?? false; + $filters = $attribute["filters"] ?? []; + + if ($key === '$permissions') { + continue; + } + + $value = $document->getAttribute($key); + + if ($array) { + if (is_string($value)) { + $decoded = json_decode($value, true); + $value = \is_array($decoded) ? $decoded : $value; + } + if (!\is_array($value)) { + $value = $value === null ? [] : [$value]; + } + + foreach ($value as $i => $node) { + // Apply filters in reverse order like Database::decode + foreach (array_reverse($filters) as $filter) { + $node = $this->decodeAttribute($filter, $node); + } + $value[$i] = $this->castNode($type, $node); + } + $document->setAttribute($key, $value); + } else { + // Apply filters + foreach (array_reverse($filters) as $filter) { + $value = $this->decodeAttribute($filter, $value); + } + $document->setAttribute($key, $this->castNode($type, $value)); + } + } + + return $document; + } + + protected function decodeAttribute(string $filter, mixed $value): mixed + { + if (!array_key_exists($filter, self::$filters)) { + return $value; // Unknown filter, pass through + } + return self::$filters[$filter]["decode"]($value); + } + + private function castNode(string $type, mixed $node): mixed + { + switch ($type) { + case Database::VAR_ID: + return (string) $node; + case Database::VAR_BOOLEAN: + return (bool) $node; + case Database::VAR_INTEGER: + return (int) $node; + case Database::VAR_FLOAT: + return (float) $node; + default: + return $node; + } + } +} + +/** + * Baseline processor that properly handles filters for fair comparison + */ +class BaselineProcessor +{ + private static array $filters = []; + + public function __construct() + { + // Register the same filters as Database class + self::$filters["json"] = [ + "decode" => function (mixed $value) { + if (!is_string($value)) { + return $value; + } + $value = json_decode($value, true) ?? []; + if (array_key_exists('$id', $value)) { + return new Document($value); + } else { + $value = array_map(function ($item) { + if (is_array($item) && array_key_exists('$id', $item)) { + return new Document($item); + } + return $item; + }, $value); + } + return $value; + }, + ]; + + self::$filters["datetime"] = [ + "decode" => function (?string $value) { + return DateTime::formatTz($value); + }, + ]; + + // Spatial-like decoders for benchmark (decode JSON strings) + self::$filters[Database::VAR_POINT] = [ + 'decode' => function (?string $value) { + return is_string($value) ? (json_decode($value, true) ?? $value) : $value; + }, + ]; + self::$filters[Database::VAR_LINESTRING] = [ + 'decode' => function (?string $value) { + return is_string($value) ? (json_decode($value, true) ?? $value) : $value; + }, + ]; + self::$filters[Database::VAR_POLYGON] = [ + 'decode' => function (?string $value) { + return is_string($value) ? (json_decode($value, true) ?? $value) : $value; + }, + ]; + } + + public function decodeBaseline(Document $collection, Document $document): Document + { + $attributes = \array_filter( + $collection->getAttribute("attributes", []), + fn($attribute) => $attribute["type"] !== Database::VAR_RELATIONSHIP, + ); + foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { + $attributes[] = $attribute; + } + foreach ($attributes as $attribute) { + $key = $attribute['$id'] ?? ""; + $array = $attribute["array"] ?? false; + $filters = $attribute["filters"] ?? []; + + if ($key === '$permissions') { + continue; + } + $value = $document->getAttribute($key); + $value = $array ? $value : [$value]; + $value = is_null($value) ? [] : $value; + + // PROPERLY APPLY FILTERS like Database::decode does + foreach ($value as $index => $node) { + foreach (array_reverse($filters) as $filter) { + $node = $this->decodeAttribute($filter, $node); + } + $value[$index] = $node; + } + + $document->setAttribute($key, $array ? $value : $value[0]); + } + return $document; + } + + public function castingBaseline(Document $collection, Document $document): Document + { + $attributes = $collection->getAttribute("attributes", []); + foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { + $attributes[] = $attribute; + } + foreach ($attributes as $attribute) { + $key = $attribute['$id'] ?? ""; + $type = $attribute["type"] ?? ""; + $array = $attribute["array"] ?? false; + if ($key === '$permissions') { + continue; + } + $value = $document->getAttribute($key); + if ($array) { + if (is_string($value)) { + $decoded = json_decode($value, true); + $value = \is_array($decoded) ? $decoded : $value; + } + if (!\is_array($value)) { + $value = $value === null ? [] : [$value]; + } + foreach ($value as $i => $node) { + $value[$i] = $this->castNode($type, $node); + } + $document->setAttribute($key, $value); + } else { + $document->setAttribute($key, $this->castNode($type, $value)); + } + } + return $document; + } + + protected function decodeAttribute(string $filter, mixed $value): mixed + { + if (!array_key_exists($filter, self::$filters)) { + return $value; // Unknown filter, pass through + } + return self::$filters[$filter]["decode"]($value); + } + + private function castNode(string $type, mixed $node): mixed + { + switch ($type) { + case Database::VAR_ID: + return (string) $node; + case Database::VAR_BOOLEAN: + return (bool) $node; + case Database::VAR_INTEGER: + return (int) $node; + case Database::VAR_FLOAT: + return (float) $node; + default: + return $node; + } + } +} diff --git a/tests/benchmarking/run-processor-benchmark.sh b/tests/benchmarking/run-processor-benchmark.sh new file mode 100755 index 000000000..769b229a4 --- /dev/null +++ b/tests/benchmarking/run-processor-benchmark.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: +# ./tests/benchmarking/run-processor-benchmark.sh [LEVEL] [--assert] [--repeat=N] [--warmup=N] +# Levels: +# LIGHT | MEDIUM | HEAVY | SPATIAL (default: MEDIUM) + +LEVEL=${1:-MEDIUM} +shift || true + +printf "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" +printf " Document Processor Benchmark\n" +printf "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" + +printf "\n🔧 Starting Docker containers...\n" +docker-compose up -d --build --remove-orphans >/dev/null +printf "✅ Docker containers ready\n" + +# Pass through any extra flags to the PHP benchmark script +docker-compose exec -T tests php tests/benchmarking/document_processor_benchmark.php "${LEVEL}" "$@" + +printf "\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" +printf " Benchmark Complete\n" +printf "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" diff --git a/tests/unit/DocumentProcessorTest.php b/tests/unit/DocumentProcessorTest.php new file mode 100644 index 000000000..1dff940c8 --- /dev/null +++ b/tests/unit/DocumentProcessorTest.php @@ -0,0 +1,368 @@ +collection = new Document([ + '$id' => 'test_collection', + 'attributes' => [ + [ + '$id' => 'name', + 'type' => Database::VAR_STRING, + 'size' => 255, + 'required' => false, + 'signed' => true, + 'array' => false, + 'filters' => [], + ], + [ + '$id' => 'age', + 'type' => Database::VAR_INTEGER, + 'size' => 0, + 'required' => false, + 'signed' => true, + 'array' => false, + 'filters' => [], + ], + [ + '$id' => 'active', + 'type' => Database::VAR_BOOLEAN, + 'size' => 0, + 'required' => false, + 'signed' => true, + 'array' => false, + 'filters' => [], + ], + [ + '$id' => 'tags', + 'type' => Database::VAR_STRING, + 'size' => 255, + 'required' => false, + 'signed' => true, + 'array' => true, + 'filters' => ['json'], + ], + [ + '$id' => 'metadata', + 'type' => Database::VAR_STRING, + 'size' => 16777216, + 'required' => false, + 'signed' => true, + 'array' => false, + 'filters' => ['json'], + ], + [ + '$id' => 'created_at', + 'type' => Database::VAR_DATETIME, + 'size' => 0, + 'required' => false, + 'signed' => false, + 'array' => false, + 'filters' => ['datetime'], + ], + [ + '$id' => 'scores', + 'type' => Database::VAR_FLOAT, + 'size' => 0, + 'required' => false, + 'signed' => true, + 'array' => true, + 'filters' => [], + ], + ], + ]); + + $this->processor = new DocumentProcessor(); + + // Use reflection to access private methods for comparison + $this->database = $this->createPartialMock(Database::class, []); + } + + public function testStringAttributeEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'name' => 'John Doe', + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertEquals($legacyResult->getAttribute('name'), $processorResult->getAttribute('name')); + } + + public function testIntegerCastingEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'age' => '25', // String that should be cast to int + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertSame($legacyResult->getAttribute('age'), $processorResult->getAttribute('age')); + $this->assertIsInt($processorResult->getAttribute('age')); + } + + public function testBooleanCastingEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'active' => 1, // Int that should be cast to bool + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertSame($legacyResult->getAttribute('active'), $processorResult->getAttribute('active')); + $this->assertIsBool($processorResult->getAttribute('active')); + } + + public function testJsonFilterEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'metadata' => '{"key":"value","nested":{"foo":"bar"}}', + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertEquals($legacyResult->getAttribute('metadata'), $processorResult->getAttribute('metadata')); + $this->assertIsArray($processorResult->getAttribute('metadata')); + } + + public function testJsonArrayFilterEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'tags' => '["tag1","tag2","tag3"]', + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertEquals($legacyResult->getAttribute('tags'), $processorResult->getAttribute('tags')); + $this->assertIsArray($processorResult->getAttribute('tags')); + } + + public function testDatetimeFilterEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'created_at' => '2024-01-15T10:30:00.000+00:00', + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertEquals($legacyResult->getAttribute('created_at'), $processorResult->getAttribute('created_at')); + } + + public function testArrayAttributeEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'scores' => '[1.5, 2.3, 3.7]', + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertEquals($legacyResult->getAttribute('scores'), $processorResult->getAttribute('scores')); + $this->assertIsArray($processorResult->getAttribute('scores')); + foreach ($processorResult->getAttribute('scores') as $score) { + $this->assertIsFloat($score); + } + } + + public function testNullValueEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'name' => null, + 'age' => null, + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + $this->assertNull($processorResult->getAttribute('name')); + $this->assertNull($processorResult->getAttribute('age')); + } + + public function testSelectionsEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + 'name' => 'John', + 'age' => 25, + 'active' => true, + ]); + + $selections = ['name', 'age']; + + $legacyResult = $this->legacyProcess(clone $doc, $selections); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, $selections, false); + + // Both should have selected attributes + $this->assertEquals($legacyResult->getAttribute('name'), $processorResult->getAttribute('name')); + $this->assertEquals($legacyResult->getAttribute('age'), $processorResult->getAttribute('age')); + + // Check if non-selected attributes are handled the same way + $this->assertEquals( + $legacyResult->getAttribute('active'), + $processorResult->getAttribute('active') + ); + } + + public function testComplexDocumentEquivalence(): void + { + $doc = new Document([ + '$id' => 'doc1', + '$permissions' => ['read("any")'], + 'name' => 'Complex Doc', + 'age' => '30', + 'active' => 1, + 'tags' => '["tag1","tag2"]', + 'metadata' => '{"nested":{"deep":"value"}}', + 'created_at' => '2024-01-15T10:30:00.000+00:00', + 'scores' => '[9.5, 8.3, 7.1]', + ]); + + $legacyResult = $this->legacyProcess(clone $doc); + $processorResult = $this->processor->processRead($this->collection, clone $doc, null, [], false); + + // Compare all attributes + foreach ($this->collection->getAttribute('attributes', []) as $attr) { + $key = $attr['$id']; + $this->assertEquals( + $legacyResult->getAttribute($key), + $processorResult->getAttribute($key), + "Attribute '$key' differs between legacy and processor" + ); + } + } + + /** + * Simulate legacy decode + casting process + */ + private function legacyProcess(Document $doc, array $selections = []): Document + { + // Simulate casting + foreach ($this->collection->getAttribute('attributes', []) as $attribute) { + $key = $attribute['$id']; + $type = $attribute['type']; + $array = $attribute['array'] ?? false; + $value = $doc->getAttribute($key); + + if ($value === null) { + continue; + } + + if ($array) { + if (is_string($value)) { + $value = json_decode($value, true) ?? []; + } + if (!is_array($value)) { + $value = [$value]; + } + foreach ($value as $i => $node) { + $value[$i] = $this->castValue($type, $node); + } + } else { + $value = $this->castValue($type, $value); + } + + $doc->setAttribute($key, $value); + } + + // Simulate decode (filters) + foreach ($this->collection->getAttribute('attributes', []) as $attribute) { + $key = $attribute['$id']; + $filters = $attribute['filters'] ?? []; + $array = $attribute['array'] ?? false; + $value = $doc->getAttribute($key); + + if (empty($filters)) { + continue; + } + + foreach (array_reverse($filters) as $filter) { + if ($array && is_array($value)) { + foreach ($value as $i => $node) { + $value[$i] = $this->applyFilter($filter, $node); + } + } else { + $value = $this->applyFilter($filter, $value); + } + } + + $doc->setAttribute($key, $value); + } + + return $doc; + } + + private function castValue(string $type, mixed $value): mixed + { + switch ($type) { + case Database::VAR_STRING: + case Database::VAR_ID: + return (string) $value; + case Database::VAR_INTEGER: + return (int) $value; + case Database::VAR_FLOAT: + return (float) $value; + case Database::VAR_BOOLEAN: + return (bool) $value; + default: + return $value; + } + } + + private function applyFilter(string $filter, mixed $value): mixed + { + switch ($filter) { + case 'json': + if (!is_string($value)) { + return $value; + } + $decoded = json_decode($value, true) ?? []; + if (array_key_exists('$id', $decoded)) { + return new Document($decoded); + } + if (is_array($decoded)) { + foreach ($decoded as $i => $item) { + if (is_array($item) && array_key_exists('$id', $item)) { + $decoded[$i] = new Document($item); + } + } + } + return $decoded; + case 'datetime': + return DateTime::formatTz($value); + default: + return $value; + } + } +} From 89626163720ab2326d95ef79ce74013fa51f3660 Mon Sep 17 00:00:00 2001 From: Darshan Date: Sun, 12 Oct 2025 14:18:04 +0530 Subject: [PATCH 4/5] lint. --- src/Database/DocumentProcessor.php | 19 +++++++++----- .../document_processor_benchmark.php | 25 +++++++++++-------- tests/unit/DocumentProcessorTest.php | 1 - 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/Database/DocumentProcessor.php b/src/Database/DocumentProcessor.php index 73290af71..715d4fbda 100644 --- a/src/Database/DocumentProcessor.php +++ b/src/Database/DocumentProcessor.php @@ -125,14 +125,13 @@ public function processRead( ?callable $keyMapper = null, array $selections = [], bool $skipCasting = false - ): Document - { + ): Document { $attributes = $collection->getAttribute("attributes", []); // Pre-normalize relationship keys like Database::decode $relationships = \array_filter( $attributes, - fn($attribute) => ($attribute['type'] ?? '') === Database::VAR_RELATIONSHIP + fn ($attribute) => ($attribute['type'] ?? '') === Database::VAR_RELATIONSHIP ); if (!empty($relationships) && $keyMapper !== null) { foreach ($relationships as $relationship) { @@ -278,7 +277,10 @@ public function processRead( $hasRelationshipSelections = false; if (!empty($selections)) { foreach ($selections as $sel) { - if (\str_contains($sel, '.')) { $hasRelationshipSelections = true; break; } + if (\str_contains($sel, '.')) { + $hasRelationshipSelections = true; + break; + } } } if ($hasRelationshipSelections && !empty($selections) && !\in_array('*', $selections, true)) { @@ -353,7 +355,10 @@ private function preparePlan(Document $collection, ?callable $keyMapper, array $ $hasRelationshipSelections = false; if (!empty($selections)) { foreach ($selections as $sel) { - if (\str_contains($sel, '.')) { $hasRelationshipSelections = true; break; } + if (\str_contains($sel, '.')) { + $hasRelationshipSelections = true; + break; + } } } @@ -491,7 +496,9 @@ public function processReadBatch( // Relationship selection semantic adjustment if (!empty($plan['hasRelSelects'])) { foreach ($plan['attrs'] as $a) { - if ($a['selected']) { continue; } + if ($a['selected']) { + continue; + } $key = $a['key']; if (\array_key_exists($key, $filteredValues)) { $document->setAttribute($key, $filteredValues[$key]); diff --git a/tests/benchmarking/document_processor_benchmark.php b/tests/benchmarking/document_processor_benchmark.php index 489225016..31e010051 100755 --- a/tests/benchmarking/document_processor_benchmark.php +++ b/tests/benchmarking/document_processor_benchmark.php @@ -3,9 +3,9 @@ require_once __DIR__ . "/../../vendor/autoload.php"; use Utopia\Database\Database; +use Utopia\Database\DateTime; use Utopia\Database\Document; use Utopia\Database\DocumentProcessor; -use Utopia\Database\DateTime; // LEVEL: LIGHT | MEDIUM | HEAVY $level = strtoupper($argv[1] ?? "MEDIUM"); @@ -51,7 +51,8 @@ $arraySize = $cfg["array_size"]; // Build a realistic collection schema with filters (optionally spatial) -function buildCollection(bool $spatial = false): Document { +function buildCollection(bool $spatial = false): Document +{ $attributes = []; for ($i = 1; $i <= 3; $i++) { $attributes[] = ['$id' => "s{$i}", "type" => Database::VAR_STRING, "array" => false, "filters" => []]; @@ -89,12 +90,13 @@ function makeDoc(int $i, int $arraySize, bool $spatial = false): Document "b1" => $i % 2 === 0, "d1" => "2024-01-15 10:30:00", "d2" => "2024-01-15 15:45:30", - "arr" => array_map(fn($k) => "it{$k}", range(1, $arraySize)), + "arr" => array_map(fn ($k) => "it{$k}", range(1, $arraySize)), ]); if ($spatial) { // Encode spatial as JSON strings to simulate adapter-encoded values - $lon = ($i % 180) - 90; $lat = (($i * 2) % 180) - 90; + $lon = ($i % 180) - 90; + $lat = (($i * 2) % 180) - 90; $d->setAttribute('p1', json_encode(['type' => 'Point', 'coordinates' => [$lon, $lat]])); $d->setAttribute('ls1', json_encode(['type' => 'LineString', 'coordinates' => [[$lon, $lat], [$lon + 1, $lat + 1], [$lon + 2, $lat + 2]]])); $d->setAttribute('pg1', json_encode(['type' => 'Polygon', 'coordinates' => [[[$lon, $lat], [$lon + 1, $lat], [$lon + 1, $lat + 1], [$lon, $lat + 1], [$lon, $lat]]]])); @@ -105,8 +107,11 @@ function makeDoc(int $i, int $arraySize, bool $spatial = false): Document $collection = buildCollection((bool)($cfg['spatial'] ?? false)); -function measure(callable $fn, int $repeat = 1, int $warmup = 0): array { - for ($w = 0; $w < $warmup; $w++) { $fn(); } +function measure(callable $fn, int $repeat = 1, int $warmup = 0): array +{ + for ($w = 0; $w < $warmup; $w++) { + $fn(); + } $times = []; for ($r = 0; $r < $repeat; $r++) { $start = microtime(true); @@ -138,8 +143,8 @@ function measure(callable $fn, int $repeat = 1, int $warmup = 0): array { } }, $repeat, $warmup); -$baselineMs = (int) round($baselineTimes[(int) floor((count($baselineTimes)-1)/2)]); -$optMs = (int) round($optimizedTimes[(int) floor((count($optimizedTimes)-1)/2)]); +$baselineMs = (int) round($baselineTimes[(int) floor((count($baselineTimes) - 1) / 2)]); +$optMs = (int) round($optimizedTimes[(int) floor((count($optimizedTimes) - 1) / 2)]); $gain = $baselineMs > 0 ? (($baselineMs - $optMs) / $baselineMs) * 100 : 0; @@ -242,7 +247,7 @@ public function processRead(Document $collection, Document $document): Document { $attributes = \array_filter( $collection->getAttribute("attributes", []), - fn($attribute) => $attribute["type"] !== Database::VAR_RELATIONSHIP, + fn ($attribute) => $attribute["type"] !== Database::VAR_RELATIONSHIP, ); foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { @@ -373,7 +378,7 @@ public function decodeBaseline(Document $collection, Document $document): Docume { $attributes = \array_filter( $collection->getAttribute("attributes", []), - fn($attribute) => $attribute["type"] !== Database::VAR_RELATIONSHIP, + fn ($attribute) => $attribute["type"] !== Database::VAR_RELATIONSHIP, ); foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { $attributes[] = $attribute; diff --git a/tests/unit/DocumentProcessorTest.php b/tests/unit/DocumentProcessorTest.php index 1dff940c8..d95bef607 100644 --- a/tests/unit/DocumentProcessorTest.php +++ b/tests/unit/DocumentProcessorTest.php @@ -3,7 +3,6 @@ namespace Tests\Unit; use PHPUnit\Framework\TestCase; -use Utopia\Database\Adapter\MariaDB; use Utopia\Database\Database; use Utopia\Database\DateTime; use Utopia\Database\Document; From afcf2bc526dbf7cec5904f0f60a69564cd7f9416 Mon Sep 17 00:00:00 2001 From: Darshan Date: Sun, 12 Oct 2025 14:36:35 +0530 Subject: [PATCH 5/5] codeql. --- src/Database/Database.php | 2 +- src/Database/DocumentProcessor.php | 67 ++++++++----------- .../document_processor_benchmark.php | 9 +++ tests/unit/DocumentProcessorTest.php | 8 +-- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/src/Database/Database.php b/src/Database/Database.php index 0bbeae2be..b5ac3cdc9 100644 --- a/src/Database/Database.php +++ b/src/Database/Database.php @@ -7334,7 +7334,7 @@ private function canUseSinglePass(Document $collection): bool // Internal attributes allowed if within supported filters foreach (Database::INTERNAL_ATTRIBUTES as $internal) { - $filters = $internal['filters'] ?? []; + $filters = $internal['filters']; foreach ($filters as $filter) { if (!in_array($filter, $supported, true)) { return false; diff --git a/src/Database/DocumentProcessor.php b/src/Database/DocumentProcessor.php index 715d4fbda..5185022d3 100644 --- a/src/Database/DocumentProcessor.php +++ b/src/Database/DocumentProcessor.php @@ -117,6 +117,9 @@ public static function registerAdapterFilters(Adapter $adapter): void * * @param Document $collection * @param Document $document + * @param callable|null $keyMapper + * @param array $selections + * @param bool $skipCasting * @return Document */ public function processRead( @@ -219,11 +222,11 @@ public function processRead( } // Apply internal attributes at the end to keep behavior consistent + // Note: All internal attributes have array=false, so no array handling needed foreach (Database::INTERNAL_ATTRIBUTES as $attribute) { - $key = $attribute['$id'] ?? ""; - $type = $attribute["type"] ?? ""; - $array = $attribute["array"] ?? false; - $filters = $attribute["filters"] ?? []; + $key = $attribute['$id']; + $type = $attribute["type"]; + $filters = $attribute["filters"]; if ($key === '$permissions') { continue; @@ -240,36 +243,15 @@ public function processRead( } } - if ($array) { - if (is_string($value)) { - $decoded = json_decode($value, true); - $value = \is_array($decoded) ? $decoded : $value; - } - if (!\is_array($value)) { - $value = $value === null ? [] : [$value]; - } - - $revFilters = empty($filters) ? [] : array_reverse($filters); - foreach ($value as $i => $node) { - foreach ($revFilters as $filter) { - $node = $this->decodeAttribute($filter, $node); - } - $value[$i] = $skipCasting ? $node : $this->castNode($type, $node); - } - if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { - $document->setAttribute($key, $value); - } - } else { - if (!empty($filters)) { - foreach (array_reverse($filters) as $filter) { - $value = $this->decodeAttribute($filter, $value); - } - } - $final = $skipCasting ? $value : $this->castNode($type, $value); - if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { - $document->setAttribute($key, $final); + if (!empty($filters)) { + foreach (array_reverse($filters) as $filter) { + $value = $this->decodeAttribute($filter, $value); } } + $final = $skipCasting ? $value : $this->castNode($type, $value); + if (empty($selections) || \in_array($key, $selections, true) || \in_array('*', $selections, true)) { + $document->setAttribute($key, $final); + } } // Relationship selection semantics: if selecting relationship attributes, also include @@ -301,11 +283,16 @@ public function processRead( /** * Prepare a per-collection plan for batch processing. * + * @param Document $collection + * @param callable|null $keyMapper + * @param array $selections + * @param bool $skipCasting * @return array{ * relationships: array, * attrs: array, selected:bool}>, * internals: array, selected:bool}>, - * skipCasting: bool + * skipCasting: bool, + * hasRelSelects: bool * } */ private function preparePlan(Document $collection, ?callable $keyMapper, array $selections, bool $skipCasting): array @@ -338,15 +325,15 @@ private function preparePlan(Document $collection, ?callable $keyMapper, array $ $internals = []; foreach (Database::INTERNAL_ATTRIBUTES as $attr) { - $key = $attr['$id'] ?? ''; + $key = $attr['$id']; if ($key === '$permissions') { continue; } $internals[] = [ 'key' => $key, - 'type' => $attr['type'] ?? '', - 'array' => (bool)($attr['array'] ?? false), - 'filters' => array_reverse($attr['filters'] ?? []), + 'type' => $attr['type'], + 'array' => (bool)$attr['array'], + 'filters' => array_reverse($attr['filters']), 'selected' => empty($selections) || in_array($key, $selections, true) || in_array('*', $selections, true), ]; } @@ -374,7 +361,11 @@ private function preparePlan(Document $collection, ?callable $keyMapper, array $ /** * Batch version of processRead preserving parity semantics. * + * @param Document $collection * @param array $documents + * @param callable|null $keyMapper + * @param array $selections + * @param bool $skipCasting * @return array */ public function processReadBatch( @@ -494,7 +485,7 @@ public function processReadBatch( } // Relationship selection semantic adjustment - if (!empty($plan['hasRelSelects'])) { + if ($plan['hasRelSelects']) { foreach ($plan['attrs'] as $a) { if ($a['selected']) { continue; diff --git a/tests/benchmarking/document_processor_benchmark.php b/tests/benchmarking/document_processor_benchmark.php index 31e010051..81315e749 100755 --- a/tests/benchmarking/document_processor_benchmark.php +++ b/tests/benchmarking/document_processor_benchmark.php @@ -107,6 +107,9 @@ function makeDoc(int $i, int $arraySize, bool $spatial = false): Document $collection = buildCollection((bool)($cfg['spatial'] ?? false)); +/** + * @return array + */ function measure(callable $fn, int $repeat = 1, int $warmup = 0): array { for ($w = 0; $w < $warmup; $w++) { @@ -194,6 +197,9 @@ function measure(callable $fn, int $repeat = 1, int $warmup = 0): array */ class DocumentProcessorWithFilters { + /** + * @var array + */ private static array $filters = []; public function __construct() @@ -325,6 +331,9 @@ private function castNode(string $type, mixed $node): mixed */ class BaselineProcessor { + /** + * @var array + */ private static array $filters = []; public function __construct() diff --git a/tests/unit/DocumentProcessorTest.php b/tests/unit/DocumentProcessorTest.php index d95bef607..e3e2d316b 100644 --- a/tests/unit/DocumentProcessorTest.php +++ b/tests/unit/DocumentProcessorTest.php @@ -13,7 +13,6 @@ */ class DocumentProcessorTest extends TestCase { - private Database $database; private Document $collection; private DocumentProcessor $processor; @@ -90,9 +89,6 @@ public function setUp(): void ]); $this->processor = new DocumentProcessor(); - - // Use reflection to access private methods for comparison - $this->database = $this->createPartialMock(Database::class, []); } public function testStringAttributeEquivalence(): void @@ -264,6 +260,10 @@ public function testComplexDocumentEquivalence(): void /** * Simulate legacy decode + casting process + * + * @param Document $doc + * @param array $selections + * @return Document */ private function legacyProcess(Document $doc, array $selections = []): Document {