diff --git a/api/app/models/admin.py b/api/app/models/admin.py index 9dd37bc23..d884817b7 100644 --- a/api/app/models/admin.py +++ b/api/app/models/admin.py @@ -76,25 +76,6 @@ class BackupRequest(BaseModel): format: str = Field("archive", description="Export format: 'archive' (tar.gz with documents, default), 'json' (graph only), or 'gexf' (Gephi visualization)") -class BackupIntegrityAssessment(BaseModel): - """Backup integrity assessment results""" - external_dependencies_count: int = 0 - warnings_count: int = 0 - issues_count: int = 0 - has_external_deps: bool = False - details: Dict[str, Any] = {} - - -class BackupResponse(BaseModel): - """Backup operation response""" - success: bool - backup_file: str - file_size_mb: float - statistics: Dict[str, int] - integrity_assessment: Optional[BackupIntegrityAssessment] = None - message: str - - class ListBackupsResponse(BaseModel): """List available backups""" backups: List[Dict[str, Any]] @@ -102,27 +83,8 @@ class ListBackupsResponse(BaseModel): count: int -# ========== Restore Models ========== - -class RestoreRequest(BaseModel): - """Request to restore a backup (requires authentication)""" - username: str = Field(..., description="Username for authentication") - password: str = Field(..., description="Password for authentication") - backup_file: str = Field(..., description="Path to backup file") - overwrite: bool = Field(False, description="Overwrite existing data") - handle_external_deps: str = Field( - "prune", - description="How to handle external dependencies: 'prune', 'stitch', or 'defer'" - ) - - -class RestoreResponse(BaseModel): - """Restore operation response""" - success: bool - restored_counts: Dict[str, int] - warnings: List[str] = [] - message: str - external_deps_handled: Optional[str] = None +# Restore models removed in ADR-102 P6: the /restore route uses Form params +# (mode, epoch), not request/response models. See routes/admin.py + restore_worker. # ========== Reset Models ========== diff --git a/api/app/routes/admin.py b/api/app/routes/admin.py index 8fad80e81..dab0b3026 100644 --- a/api/app/routes/admin.py +++ b/api/app/routes/admin.py @@ -27,11 +27,9 @@ from ..models.admin import ( SystemStatusResponse, BackupRequest, - BackupResponse, ListBackupsResponse, - RestoreRequest, - RestoreResponse, # ResetRequest, ResetResponse removed - reset moved to initialize-platform.sh option 0 + # BackupResponse / RestoreRequest / RestoreResponse removed in ADR-102 P6 (dead) ) from ..dependencies.auth import CurrentUser, require_permission from ..services.admin_service import AdminService diff --git a/api/app/services/admin_service.py b/api/app/services/admin_service.py index e99ed572b..f915ea184 100644 --- a/api/app/services/admin_service.py +++ b/api/app/services/admin_service.py @@ -6,7 +6,6 @@ """ import asyncio -import subprocess import json import os import logging @@ -25,10 +24,7 @@ DatabaseStats, PythonEnvironment, ConfigurationStatus, - BackupResponse, - BackupIntegrityAssessment, ListBackupsResponse, - RestoreResponse, ResetResponse, SchemaValidation, ) @@ -108,140 +104,6 @@ async def list_backups(self) -> ListBackupsResponse: count=len(backups), ) - async def create_backup( - self, - backup_type: str, - ontology_name: Optional[str] = None, - output_filename: Optional[str] = None - ) -> BackupResponse: - """Create a backup (full or ontology-specific). - - DEAD (ADR-102 P6): no caller — the live backup route uses - create_backup_stream / stream_backup_archive, not this subprocess. The - spawned ``src.admin.backup`` module path is also stale. Scheduled for - removal in P6; do not wire to new code. - """ - # Build command - cmd = [ - str(self.project_root / "venv" / "bin" / "python"), - "-m", - "src.admin.backup", - ] - - if backup_type == "full": - cmd.append("--auto-full") - elif backup_type == "ontology": - if not ontology_name: - raise ValueError("ontology_name required for ontology backup") - cmd.extend(["--ontology", ontology_name]) - else: - raise ValueError(f"Invalid backup_type: {backup_type}") - - if output_filename: - cmd.extend(["--output", output_filename]) - - # Execute backup - proc = await asyncio.create_subprocess_exec( - *cmd, - cwd=self.project_root, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - - stdout, stderr = await proc.communicate() - - if proc.returncode != 0: - raise RuntimeError(f"Backup failed: {stderr.decode()}") - - # Parse output to find backup file - output = stdout.decode() - backup_file = await self._extract_backup_file_from_output(output, output_filename) - - # Get file info - backup_path = Path(backup_file) - file_size_mb = backup_path.stat().st_size / (1024 * 1024) - - # Load backup to get statistics (single kg-backup/2 model: counts come from - # the bulk record streams, not a top-level statistics field). - with open(backup_path, 'r') as f: - backup_data = json.load(f) - - from ...lib.serialization import KgBackupV2Reader - _counts = KgBackupV2Reader(backup_data).counts() - statistics = { - k: _counts[k] for k in ("concepts", "sources", "instances", "relationships", "vocabulary") - } - - # TODO: Add integrity assessment - integrity = None - - return BackupResponse( - success=True, - backup_file=str(backup_path), - file_size_mb=file_size_mb, - statistics=statistics, - integrity_assessment=integrity, - message=f"Backup created successfully: {backup_path.name}", - ) - - async def restore_backup( - self, - backup_file: str, - overwrite: bool = False, - handle_external_deps: str = "prune" - ) -> RestoreResponse: - """Restore a backup. - - DEAD (ADR-102 P6): no caller — the live restore route enqueues - run_restore_worker (which uses the kg-backup/2 mode machinery), not this - subprocess. The spawned ``src.admin.restore`` path is stale and the - overwrite/handle_external_deps args were removed from the real flow in P4. - Scheduled for removal in P6; do not wire to new code. - """ - # Validate backup file exists - backup_path = Path(backup_file) - if not backup_path.exists(): - raise FileNotFoundError(f"Backup file not found: {backup_file}") - - # Build command - cmd = [ - str(self.project_root / "venv" / "bin" / "python"), - "-m", - "src.admin.restore", - "--file", - str(backup_path), - ] - - if overwrite: - cmd.append("--overwrite") - - # TODO: Add external deps handling once the restore script supports it - - # Execute restore - proc = await asyncio.create_subprocess_exec( - *cmd, - cwd=self.project_root, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - stdin=asyncio.subprocess.PIPE, # For confirmations - ) - - # Send confirmations (auto-accept for now) - stdout, stderr = await proc.communicate(input=b"y\n") - - if proc.returncode != 0: - raise RuntimeError(f"Restore failed: {stderr.decode()}") - - # Parse output for results - output = stdout.decode() - - return RestoreResponse( - success=True, - restored_counts={}, # TODO: Parse from output - message="Restore completed successfully", - external_deps_handled=handle_external_deps, - ) - async def reset_database( self, clear_logs: bool = True, @@ -415,23 +277,3 @@ async def _check_api_keys(self) -> tuple[bool, bool]: return anthropic, openai - async def _extract_backup_file_from_output( - self, - output: str, - custom_filename: Optional[str] - ) -> str: - """Extract backup filename from command output""" - # If custom filename was provided, use it - if custom_filename: - return str(self.backup_dir / custom_filename) - - # Otherwise, find latest backup file - backup_files = sorted( - self.backup_dir.glob("*.json"), - key=lambda f: f.stat().st_mtime, - reverse=True - ) - if backup_files: - return str(backup_files[0]) - - raise RuntimeError("Could not determine backup file path") diff --git a/api/lib/integrity.py b/api/lib/integrity.py index 60732db65..277964f19 100644 --- a/api/lib/integrity.py +++ b/api/lib/integrity.py @@ -1,12 +1,14 @@ """ -Data integrity validation and assessment +Database integrity validation and repair. -Analyzes backup completeness and database integrity to detect: -- Cross-ontology dependencies +Runtime checks on the live graph (post-restore and on demand) to detect and +repair: - Dangling relationship references - Orphaned concepts - Missing embeddings -- Torn ontological fabric from partial restores + +(The v1 BackupAssessment static-analysis class was removed in ADR-102 P6 — backup +inspection now goes through KgBackupV2Reader / backup_integrity.check_backup_data.) """ from typing import Dict, Any, List, Set, Optional @@ -20,177 +22,6 @@ from .console import Console, Colors -class BackupAssessment: - """Assess backup completeness and dependencies. - - DEAD (ADR-102 P6): reads the removed v1 ``data["data"]`` shape and has no live - caller — backup/restore/stitch now use KgBackupV2Reader.external_concept_ids() / - check_backup_data(). Scheduled for deletion in P6 (the runtime DatabaseIntegrity - prune/repair below stays until its logic is ported). Do not wire to new code. - """ - - @staticmethod - def analyze_backup(backup_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Analyze a backup file for completeness and external dependencies - - Args: - backup_data: Parsed backup JSON - - Returns: - Assessment report with warnings and recommendations - """ - report = { - "backup_type": backup_data.get("type"), - "ontology": backup_data.get("ontology"), - "statistics": backup_data.get("statistics", {}), - "issues": [], - "warnings": [], - "external_dependencies": { - "concepts": set(), - "sources": set() - }, - "integrity_checks": {} - } - - data = backup_data.get("data", {}) - - # Build internal concept and source IDs - internal_concept_ids = {c["concept_id"] for c in data.get("concepts", [])} - internal_source_ids = {s["source_id"] for s in data.get("sources", [])} - - # Check for external concept references in relationships - external_concepts = set() - for rel in data.get("relationships", []): - from_id = rel.get("from") - to_id = rel.get("to") - - if from_id not in internal_concept_ids: - external_concepts.add(from_id) - if to_id not in internal_concept_ids: - external_concepts.add(to_id) - - if external_concepts: - report["warnings"].append( - f"Found {len(external_concepts)} relationships pointing to external concepts " - f"not included in this backup" - ) - report["external_dependencies"]["concepts"] = list(external_concepts) - - # Check for missing embeddings - concepts_without_embeddings = [ - c["concept_id"] for c in data.get("concepts", []) - if not c.get("embedding") or len(c.get("embedding", [])) == 0 - ] - if concepts_without_embeddings: - report["issues"].append( - f"{len(concepts_without_embeddings)} concepts missing embeddings" - ) - report["integrity_checks"]["missing_embeddings"] = concepts_without_embeddings - - # Check for instances referencing external sources - external_sources = set() - for instance in data.get("instances", []): - source_id = instance.get("source_id") - if source_id not in internal_source_ids: - external_sources.add(source_id) - - if external_sources: - report["issues"].append( - f"{len(external_sources)} instances reference sources not in this backup" - ) - report["external_dependencies"]["sources"] = list(external_sources) - - # Check for orphaned concepts (no instances/sources) - concepts_with_instances = {inst["concept_id"] for inst in data.get("instances", [])} - orphaned_concepts = internal_concept_ids - concepts_with_instances - - if orphaned_concepts: - report["warnings"].append( - f"{len(orphaned_concepts)} concepts have no instances/evidence in this backup" - ) - - # Relationship integrity - total_relationships = len(data.get("relationships", [])) - internal_relationships = sum( - 1 for rel in data.get("relationships", []) - if rel["from"] in internal_concept_ids and rel["to"] in internal_concept_ids - ) - external_relationships = total_relationships - internal_relationships - - report["integrity_checks"]["relationships"] = { - "total": total_relationships, - "internal": internal_relationships, - "external": external_relationships, - "external_percentage": (external_relationships / total_relationships * 100) - if total_relationships > 0 else 0 - } - - if external_relationships > 0: - report["warnings"].append( - f"{external_relationships}/{total_relationships} " - f"({report['integrity_checks']['relationships']['external_percentage']:.1f}%) " - f"relationships point to external concepts" - ) - - return report - - @staticmethod - def print_assessment(report: Dict[str, Any]): - """Print assessment report to console""" - Console.section("Backup Assessment") - - # Basic info - Console.key_value("Backup Type", report["backup_type"]) - if report["ontology"]: - Console.key_value("Ontology", report["ontology"]) - - # Statistics - Console.info("\nContents:") - stats = report["statistics"] - for key, value in stats.items(): - Console.key_value(f" {key.title()}", str(value)) - - # Relationship integrity - rel_check = report["integrity_checks"].get("relationships", {}) - if rel_check: - Console.info("\nRelationship Integrity:") - Console.key_value(" Internal", f"{rel_check['internal']}/{rel_check['total']}", - Colors.BOLD, Colors.OKGREEN) - if rel_check["external"] > 0: - Console.key_value(" External", f"{rel_check['external']}/{rel_check['total']}", - Colors.BOLD, Colors.WARNING) - Console.key_value(" External %", f"{rel_check['external_percentage']:.1f}%", - Colors.BOLD, Colors.WARNING) - - # Issues - if report["issues"]: - Console.warning("\n⚠ Issues Found:") - for issue in report["issues"]: - print(f" • {issue}") - - # Warnings - if report["warnings"]: - Console.warning("\nWarnings:") - for warning in report["warnings"]: - print(f" • {warning}") - - # External dependencies - ext_deps = report["external_dependencies"] - if ext_deps["concepts"] or ext_deps["sources"]: - Console.warning("\nExternal Dependencies:") - if ext_deps["concepts"]: - print(f" • {len(ext_deps['concepts'])} external concepts referenced") - if ext_deps["sources"]: - print(f" • {len(ext_deps['sources'])} external sources referenced") - - Console.warning("\n⚠ Restoring this backup may create dangling references!") - Console.info(" Consider one of these strategies:") - print(" 1. Restore into database that already has these dependencies") - print(" 2. Use --prune-external to skip external relationships") - print(" 3. Backup dependent ontologies together") - - class DatabaseIntegrity: """Validate database integrity after restore""" diff --git a/cli/src/types/index.ts b/cli/src/types/index.ts index 6e99a107e..d5ff8d0e7 100644 --- a/cli/src/types/index.ts +++ b/cli/src/types/index.ts @@ -730,7 +730,7 @@ export interface RestoreResponse { restored_counts: Record; warnings: string[]; message: string; - external_deps_handled?: string; + // external_deps_handled removed in ADR-102 P6 (dead field; restore uses --mode/--epoch) } // ========== RBAC Types (ADR-028) ==========