diff --git a/api/Dockerfile b/api/Dockerfile index 605eca238..f97a9a441 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -89,6 +89,11 @@ ENV PATH="/opt/venv/bin:$PATH" # Copy application code COPY api/ /app/api/ COPY schema/ /app/schema/ +# Offline backup-object oracle: the single source of truth for kg-backup/2 validation. +# Shipped so the API can load it by path for POST /admin/backup/verify. Stdlib-only / +# standalone — no api-package coupling. Copy just the one file (not the dir) to avoid +# pulling in __pycache__ and unrelated lint scripts. +COPY scripts/development/lint/lint_backup.py /app/scripts/development/lint/lint_backup.py # Set Python path for imports (api.app.lib.*) ENV PYTHONPATH=/app diff --git a/api/Dockerfile.rocm-host b/api/Dockerfile.rocm-host index 34ab3890e..f5c69649f 100644 --- a/api/Dockerfile.rocm-host +++ b/api/Dockerfile.rocm-host @@ -39,6 +39,11 @@ RUN grep -v "^torch" requirements.txt | grep -v "^torchvision" > requirements-no # Copy application code COPY api/ /app/api/ COPY schema/ /app/schema/ +# Offline backup-object oracle: the single source of truth for kg-backup/2 validation. +# Shipped so the API can load it by path for POST /admin/backup/verify. Stdlib-only / +# standalone — no api-package coupling. Copy just the one file (not the dir) to avoid +# pulling in __pycache__ and unrelated lint scripts. +COPY scripts/development/lint/lint_backup.py /app/scripts/development/lint/lint_backup.py # Set Python path for imports ENV PYTHONPATH=/app diff --git a/api/app/lib/backup_oracle.py b/api/app/lib/backup_oracle.py new file mode 100644 index 000000000..67349a27f --- /dev/null +++ b/api/app/lib/backup_oracle.py @@ -0,0 +1,75 @@ +""" +Adapter to the offline backup-object oracle (ADR-102). + +The single source of truth for kg-backup/2 *spec* validation is the standalone +``scripts/development/lint/lint_backup.py`` — a stdlib-only oracle with no +api-package dependency, so it doubles as a CI/test gate and loads standalone by +path (ADR-102 Track D / P6c). This module loads that oracle by path and exposes +a thin :func:`validate_backup_object`, so the API (``POST /admin/backup/verify``) +runs the *same* checks server-side — no reimplementation, no cross-language drift. + +The oracle file is shipped into the API image (see ``api/Dockerfile``: +``COPY scripts/development/lint/``) and is present at the repo root in dev. +""" + +from __future__ import annotations + +import importlib.util +from pathlib import Path +from typing import Any, Dict + +# Candidate locations for the standalone oracle, in priority order: resolved +# relative to this file (api/app/lib -> repo root), the container path, then cwd. +_ORACLE_PATH_CANDIDATES = [ + Path(__file__).resolve().parents[3] / "scripts" / "development" / "lint" / "lint_backup.py", + Path("/app/scripts/development/lint/lint_backup.py"), + Path.cwd() / "scripts" / "development" / "lint" / "lint_backup.py", +] + +_oracle = None # lazily-loaded module, cached after first load + + +def _load_oracle(): + """Load (once) the standalone ``lint_backup`` module by file path. + + Mirrors the importlib-by-path loading the pytest suites use, so the API runs + the exact same oracle. Raises FileNotFoundError if the file is absent (e.g. + an API image built without the ``COPY scripts/development/lint/`` line). + """ + global _oracle + if _oracle is not None: + return _oracle + for path in _ORACLE_PATH_CANDIDATES: + if path.is_file(): + spec = importlib.util.spec_from_file_location("kg_backup_oracle", str(path)) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + _oracle = module + return _oracle + raise FileNotFoundError( + "offline backup oracle (lint_backup.py) not found; looked in: " + + ", ".join(str(p) for p in _ORACLE_PATH_CANDIDATES) + ) + + +def validate_backup_object(obj: Dict[str, Any]) -> Dict[str, Any]: + """Run the offline oracle on a parsed kg-backup/2 object; return a JSON report. + + Returns ``{ok, format_version, errors, warnings, notices, issues}`` where + ``issues`` is the full ordered list of ``{severity, code, message, location}`` + and the severity buckets are convenience filters of that list. + """ + oracle = _load_oracle() + result = oracle.validate_backup(obj) + issues = [ + {"severity": i.severity, "code": i.code, "message": i.message, "location": i.location} + for i in result.issues + ] + return { + "ok": result.ok, + "format_version": result.format_version, + "errors": [i for i in issues if i["severity"] == "ERROR"], + "warnings": [i for i in issues if i["severity"] == "WARNING"], + "notices": [i for i in issues if i["severity"] == "NOTICE"], + "issues": issues, + } diff --git a/api/app/routes/admin.py b/api/app/routes/admin.py index 40f2573e1..0dd453848 100644 --- a/api/app/routes/admin.py +++ b/api/app/routes/admin.py @@ -13,6 +13,7 @@ """ import re +import json import uuid import shutil import tempfile @@ -39,7 +40,8 @@ from ..services.job_queue import get_job_queue from ..lib.backup_streaming import create_backup_stream from ..lib.backup_archive import stream_backup_archive, extract_backup_archive, cleanup_extracted_archive -from ..lib.backup_integrity import check_backup_integrity +from ..lib.backup_integrity import check_backup_integrity, check_backup_data +from ..lib.backup_oracle import validate_backup_object from ..lib.age_client import AGEClient from ..lib.encrypted_keys import EncryptedKeyStore from pydantic import BaseModel @@ -423,6 +425,103 @@ async def restore_backup( ) +@router.post("/backup/verify") +async def verify_backup( + current_user: CurrentUser, + _: None = Depends(require_permission("backups", "read")), + file: UploadFile = File(..., description="Backup file (.tar.gz archive or .json)") +): + """ + Validate a kg-backup/2 backup object WITHOUT restoring it (ADR-102). + + Runs the **offline oracle** — the single source of truth for kg-backup/2 spec + validation (``scripts/development/lint/lint_backup.py``) — server-side against + the uploaded backup, so the CLI/web do not reimplement validation (no + cross-language drift). Returns its structured report: ``errors`` / ``warnings`` + / ``notices``, each with a stable ``code`` and JSON-path ``location``, plus + best-effort record-count ``statistics`` (de-interned view). + + Read-only: no graph access, nothing queued, no mutation. Accepts the same two + containers as ``/restore`` — ``.tar.gz`` (manifest.json extracted) or ``.json``. + + **Authorization:** Requires ``backups:read`` (admin-gated by default; grant + ``backups:read`` to another role to delegate verification). + """ + filename = file.filename or "" + is_archive = filename.endswith('.tar.gz') + is_json = filename.endswith('.json') + if not is_archive and not is_json: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Backup file must be .tar.gz archive or .json format" + ) + + temp_file_id = uuid.uuid4() + archive_temp_dir = None + archive_path = ( + Path(tempfile.gettempdir()) / f"verify_{temp_file_id}.tar.gz" if is_archive else None + ) + temp_path = Path(tempfile.gettempdir()) / f"verify_{temp_file_id}.json" + + try: + if is_archive: + with open(archive_path, "wb") as temp_file: + shutil.copyfileobj(file.file, temp_file) + archive_temp_dir, manifest_path = extract_backup_archive(str(archive_path)) + temp_path = Path(manifest_path) + archive_path.unlink() + else: + with open(temp_path, "wb") as temp_file: + shutil.copyfileobj(file.file, temp_file) + + with open(temp_path, "r", encoding="utf-8") as f: + obj = json.load(f) + + # Single source of truth: run the offline oracle server-side. + report = validate_backup_object(obj) + + # Best-effort record-count statistics (de-interned view). Skipped if the + # object is too malformed for the reader — the oracle already reported why. + try: + integrity = check_backup_data(obj) + report["statistics"] = integrity.statistics or {} + report["external_deps"] = getattr(integrity, "external_deps", 0) + except Exception: + report["statistics"] = {} + report["external_deps"] = 0 + + report["filename"] = filename + logger.info( + f"Verified backup {filename!r}: ok={report['ok']} " + f"errors={len(report['errors'])} warnings={len(report['warnings'])}" + ) + return report + + except json.JSONDecodeError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Backup file is not valid JSON: {e}" + ) + except HTTPException: + raise + except Exception as e: + logger.error(f"Backup verify failed: {str(e)}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Backup verify failed: {str(e)}" + ) + finally: + # Clean up everything we may have created — including the saved .tar.gz when + # extraction throws before its in-try unlink (review: avoid a temp leak on + # exactly the malformed-archive case verify exists to catch). + if archive_path is not None and archive_path.exists(): + archive_path.unlink() + if temp_path.exists(): + temp_path.unlink() + if archive_temp_dir: + cleanup_extracted_archive(archive_temp_dir) + + # ========== Database Reset REMOVED - Too Dangerous for API ========== # # Database reset has been moved to initialize-platform.sh option 0 for security: diff --git a/cli/src/api/client.ts b/cli/src/api/client.ts index 7f1b7b8df..7443a9444 100644 --- a/cli/src/api/client.ts +++ b/cli/src/api/client.ts @@ -1334,6 +1334,45 @@ export class KnowledgeGraphClient { return response.data; } + /** + * Verify a kg-backup/2 backup file WITHOUT restoring it (ADR-102). + * + * Uploads the file to POST /admin/backup/verify, which runs the offline oracle + * (the single source of truth — scripts/development/lint/lint_backup.py) server- + * side and returns its structured report. No validation logic lives in the CLI. + */ + async verifyBackup( + backupFilePath: string, + onUploadProgress?: (uploaded: number, total: number, percent: number) => void + ): Promise<{ + ok: boolean; + format_version: string | null; + errors: Array<{ severity: string; code: string; message: string; location: string }>; + warnings: Array<{ severity: string; code: string; message: string; location: string }>; + notices: Array<{ severity: string; code: string; message: string; location: string }>; + issues: Array<{ severity: string; code: string; message: string; location: string }>; + statistics?: Record; + external_deps?: number; + filename?: string; + }> { + const form = new FormData(); + form.append('file', fs.createReadStream(backupFilePath)); + + const response = await this.client.post('/admin/backup/verify', form, { + headers: form.getHeaders(), + onUploadProgress: (progressEvent) => { + if (onUploadProgress && progressEvent.total) { + const uploaded = progressEvent.loaded; + const total = progressEvent.total; + const percent = Math.round((uploaded / total) * 100); + onUploadProgress(uploaded, total, percent); + } + } + }); + + return response.data; + } + /** * Get job scheduler status and statistics (ADR-014) */ diff --git a/cli/src/cli/admin/backup.ts b/cli/src/cli/admin/backup.ts index 3f6af7040..d2d9b90d6 100644 --- a/cli/src/cli/admin/backup.ts +++ b/cli/src/cli/admin/backup.ts @@ -392,3 +392,125 @@ export function createRestoreCommand(): Command { } }); } + +export function createVerifyBackupCommand(): Command { + return new Command('verify-backup') + .description('Validate a backup file without restoring it (runs the server-side oracle)') + .argument('[file]', 'Path to a backup .tar.gz or .json (omit to pick from the backup directory)') + .option('--file ', 'Backup filename from the configured backup directory') + .action(async (fileArg: string | undefined, options: any) => { + try { + const client = createClientFromEnv(); + const config = getConfig(); + const backupDir = config.getBackupDir(); + + console.log('\n' + separator()); + console.log(colors.ui.title('🔎 Verify Backup')); + console.log(separator()); + + // Resolve the backup file: positional arg → --file (from dir) → interactive. + let backupFilePath: string; + if (fileArg) { + backupFilePath = fileArg; + } else if (options.file) { + backupFilePath = path.join(backupDir, options.file); + } else { + if (!fs.existsSync(backupDir)) { + console.error(colors.status.error('\n✗ No backups available - directory does not exist')); + console.log(colors.status.dim(`Directory: ${backupDir}\n`)); + process.exit(1); + } + const backups = fs.readdirSync(backupDir) + .filter(f => f.endsWith('.tar.gz') || f.endsWith('.json')) + .map(filename => { + const filepath = path.join(backupDir, filename); + return { filename, path: filepath, size_mb: fs.statSync(filepath).size / (1024 * 1024) }; + }) + .sort((a, b) => b.size_mb - a.size_mb); + if (backups.length === 0) { + console.error(colors.status.error('\n✗ No backups available')); + console.log(colors.status.dim(`Directory: ${backupDir}\n`)); + process.exit(1); + } + console.log('\n' + colors.ui.key('Available Backups:')); + backups.slice(0, 10).forEach((b, i) => { + console.log(` ${i + 1}. ${b.filename} (${b.size_mb.toFixed(2)} MB)`); + }); + const choice = await prompt('\nSelect backup [1-10] or enter filename: '); + if (/^\d+$/.test(choice)) { + const index = parseInt(choice) - 1; + if (index < 0 || index >= backups.length) { + console.error(colors.status.error('✗ Invalid selection')); + process.exit(1); + } + backupFilePath = backups[index].path; + } else { + backupFilePath = path.join(backupDir, choice); + } + } + + if (!fs.existsSync(backupFilePath)) { + console.error(colors.status.error(`\n✗ Backup file not found: ${backupFilePath}\n`)); + process.exit(1); + } + + const ora = require('ora'); + const spinner = ora('Uploading & validating...').start(); + let report; + try { + report = await client.verifyBackup(backupFilePath, (uploaded, total, percent) => { + const u = (uploaded / (1024 * 1024)).toFixed(2); + const t = (total / (1024 * 1024)).toFixed(2); + spinner.text = `Uploading & validating... ${percent}% (${u}/${t} MB)`; + }); + } catch (uploadError) { + spinner.fail('Verification request failed'); + throw uploadError; + } + spinner.stop(); + + // Report + console.log(''); + if (report.format_version) { + console.log(` ${colors.ui.key('Format:')} ${report.format_version}`); + } + const stats = report.statistics || {}; + if (Object.keys(stats).length > 0) { + const parts = ['concepts', 'sources', 'instances', 'relationships', 'vocabulary'] + .filter(k => stats[k] !== undefined) + .map(k => `${stats[k]} ${k}`); + console.log(` ${colors.ui.key('Contents:')} ${parts.join(', ')}`); + } + if (report.external_deps) { + console.log(` ${colors.ui.key('External deps:')} ${report.external_deps}`); + } + + const printIssue = (i: any, colorFn: (s: string) => string) => { + const loc = i.location ? ` at ${i.location}` : ''; + console.log(' ' + colorFn(`[${i.severity}] ${i.code}: ${i.message}${loc}`)); + }; + if (report.errors.length || report.warnings.length || report.notices.length) { + console.log(''); + } + report.errors.forEach(i => printIssue(i, colors.status.error)); + report.warnings.forEach(i => printIssue(i, colors.status.warning)); + report.notices.forEach(i => printIssue(i, colors.status.dim)); + + console.log('\n' + separator()); + if (report.ok) { + console.log(colors.status.success( + `✓ Valid backup (${report.warnings.length} warning(s), ${report.notices.length} notice(s))`)); + console.log(separator() + '\n'); + } else { + console.log(colors.status.error( + `✗ Invalid backup — ${report.errors.length} error(s), ${report.warnings.length} warning(s)`)); + console.log(separator() + '\n'); + process.exit(1); + } + } catch (error: any) { + console.error(colors.status.error('✗ Verification failed')); + console.error(colors.status.error(error.response?.data?.detail || error.message)); + process.exit(1); + } + }); +} diff --git a/cli/src/cli/admin/index.ts b/cli/src/cli/admin/index.ts index 1a2190b83..9656867f5 100644 --- a/cli/src/cli/admin/index.ts +++ b/cli/src/cli/admin/index.ts @@ -12,7 +12,7 @@ import { createEmbeddingCommand, createExtractionCommand, createVisionCommand, c // Import split command modules import { createStatusCommand } from './status'; -import { createBackupCommand, createListBackupsCommand, createRestoreCommand } from './backup'; +import { createBackupCommand, createListBackupsCommand, createRestoreCommand, createVerifyBackupCommand } from './backup'; import { createSchedulerCommand } from './scheduler'; import { createWorkersCommand } from './workers'; @@ -21,6 +21,7 @@ const statusCommand = createStatusCommand(); const backupCommand = createBackupCommand(); const listBackupsCommand = createListBackupsCommand(); const restoreCommand = createRestoreCommand(); +const verifyBackupCommand = createVerifyBackupCommand(); const schedulerCommand = createSchedulerCommand(); const workersCommand = createWorkersCommand(); @@ -36,6 +37,7 @@ export const adminCommand = setCommandHelp( .addCommand(backupCommand) .addCommand(listBackupsCommand) .addCommand(restoreCommand) + .addCommand(verifyBackupCommand) .addCommand(schedulerCommand) .addCommand(workersCommand); @@ -65,4 +67,4 @@ adminCommand.addCommand(visionCommand); adminCommand.addCommand(keysCommand); // Configure colored help for all admin commands -[statusCommand, backupCommand, listBackupsCommand, restoreCommand, schedulerCommand, workersCommand].forEach(configureColoredHelp); +[statusCommand, backupCommand, listBackupsCommand, restoreCommand, verifyBackupCommand, schedulerCommand, workersCommand].forEach(configureColoredHelp); diff --git a/docs/reference/cli/README.md b/docs/reference/cli/README.md index c25562e0d..892f69fc1 100644 --- a/docs/reference/cli/README.md +++ b/docs/reference/cli/README.md @@ -3,7 +3,7 @@ > **Auto-Generated Documentation** > > Generated from CLI source code. -> Last updated: 2026-06-01 +> Last updated: 2026-06-02 --- @@ -2264,6 +2264,7 @@ kg admin [options] - `backup` - Create database backup (ADR-036) - full system or per-ontology, in restorable JSON or Gephi GEXF format - `list-backups` - List available backup files from configured directory - `restore` - Restore a database backup (uses OAuth authentication) +- `verify-backup` - Validate a backup file without restoring it (runs the server-side oracle) - `scheduler` - Job scheduler management (ADR-014 job queue) - monitor worker status, cleanup stale jobs - `workers` - Worker lane management (ADR-100) - monitor slot utilization, queue depth, active jobs - `user` - User management commands (admin only) @@ -2330,6 +2331,25 @@ kg restore [options] | `--epoch ` | Epoch reconciliation: "simple" (default; one restore event) or "faithful" (replay the backup's history; clone-only — requires --mode idempotent into an empty target) | `"simple"` | | `--confirm` | Confirm restore operation (required for non-interactive use) | `false` | +### verify-backup + +Validate a backup file without restoring it (runs the server-side oracle) + +**Usage:** +```bash +kg verify-backup [file] +``` + +**Arguments:** + +- `` - Path to a backup .tar.gz or .json (omit to pick from the backup directory) + +**Options:** + +| Option | Description | Default | +|--------|-------------|---------| +| `--file ` | Backup filename from the configured backup directory | - | + ### scheduler Job scheduler management (ADR-014 job queue) - monitor worker status, cleanup stale jobs diff --git a/docs/reference/cli/commands/admin.md b/docs/reference/cli/commands/admin.md index f0b66385a..88420eca1 100644 --- a/docs/reference/cli/commands/admin.md +++ b/docs/reference/cli/commands/admin.md @@ -17,6 +17,7 @@ kg admin [options] - `backup` - Create database backup (ADR-036) - full system or per-ontology, in restorable JSON or Gephi GEXF format - `list-backups` - List available backup files from configured directory - `restore` - Restore a database backup (uses OAuth authentication) +- `verify-backup` - Validate a backup file without restoring it (runs the server-side oracle) - `scheduler` - Job scheduler management (ADR-014 job queue) - monitor worker status, cleanup stale jobs - `workers` - Worker lane management (ADR-100) - monitor slot utilization, queue depth, active jobs - `user` - User management commands (admin only) @@ -83,6 +84,25 @@ kg restore [options] | `--epoch ` | Epoch reconciliation: "simple" (default; one restore event) or "faithful" (replay the backup's history; clone-only — requires --mode idempotent into an empty target) | `"simple"` | | `--confirm` | Confirm restore operation (required for non-interactive use) | `false` | +### verify-backup + +Validate a backup file without restoring it (runs the server-side oracle) + +**Usage:** +```bash +kg verify-backup [file] +``` + +**Arguments:** + +- `` - Path to a backup .tar.gz or .json (omit to pick from the backup directory) + +**Options:** + +| Option | Description | Default | +|--------|-------------|---------| +| `--file ` | Backup filename from the configured backup directory | - | + ### scheduler Job scheduler management (ADR-014 job queue) - monitor worker status, cleanup stale jobs diff --git a/tests/api/test_backup_verify.py b/tests/api/test_backup_verify.py new file mode 100644 index 000000000..d2c1dc100 --- /dev/null +++ b/tests/api/test_backup_verify.py @@ -0,0 +1,110 @@ +""" +Backup verify endpoint tests (ADR-102). + +Endpoint: POST /admin/backup/verify — runs the offline oracle +(scripts/development/lint/lint_backup.py) server-side against an uploaded +kg-backup/2 object and returns its structured report. Read-only, no restore. +""" + +import json +import pytest + + +def _valid_backup(concept_embedding=None): + """A minimal kg-backup/2 object that passes the offline oracle. + + The single embedding profile declares @3; pass a 3-vector to stay valid or a + wrong-length vector to trip the dimension check (E_CONCEPT_EMBEDDING_DIM). + """ + concept = {"concept_id": "c1", "label": "A"} + if concept_embedding is not None: + concept["embedding"] = concept_embedding + return { + "header": { + "format_version": "kg-backup/2", + "source": {"platform": "kg", "version": "1.7.3"}, + "exported_at": "2026-06-01T00:00:00Z", + "schema_version": 76, + "embedding_profiles": [{"identity": "test:embed@3"}], + "default_embedding_profile": 0, + "relationship_vocabulary": [{"relationship_type": "IMPLIES"}], + "epoch_kinds": [{"kind": "ingestion"}], + "actors": ["system"], + "content_types": ["text/plain"], + "ontologies": [{"name": "Corpus", "default_embedding_profile": 0}], + }, + "bulk": { + "concepts": [concept], + "sources": [{"source_id": "s1", "content_type": 0}], + "instances": [{"instance_id": "i1", "source_id": "s1"}], + "evidence": [{"concept_id": "c1", "instance_id": "i1"}], + "relationships": [ + {"from": "c1", "to": "c1", "type": 0, "properties": {"learned_id": "s1"}} + ], + "vocabulary": [], + }, + } + + +def _upload(api_client, headers, obj, filename="backup.json"): + return api_client.post( + "/admin/backup/verify", + files={"file": (filename, json.dumps(obj).encode("utf-8"), "application/json")}, + headers=headers, + ) + + +@pytest.mark.api +def test_verify_valid_backup_ok(api_client, mock_oauth_validation, auth_headers_admin, bypass_permission_check): + """A well-formed kg-backup/2 object verifies ok=True with no errors.""" + resp = _upload(api_client, auth_headers_admin, _valid_backup(concept_embedding=[0.1, 0.2, 0.3])) + assert resp.status_code == 200 + body = resp.json() + assert body["ok"] is True + assert body["format_version"] == "kg-backup/2" + assert body["errors"] == [] + # statistics are best-effort (de-interned view) — present and counting the concept + assert body.get("statistics", {}).get("concepts") == 1 + + +@pytest.mark.api +def test_verify_surfaces_dimension_mismatch(api_client, mock_oauth_validation, auth_headers_admin, bypass_permission_check): + """A concept whose vector length != its profile @dims is flagged (not restorable).""" + resp = _upload(api_client, auth_headers_admin, _valid_backup(concept_embedding=[0.1, 0.2])) # 2 != @3 + assert resp.status_code == 200 + body = resp.json() + assert body["ok"] is False + codes = {e["code"] for e in body["errors"]} + assert "E_CONCEPT_EMBEDDING_DIM" in codes + + +@pytest.mark.api +def test_verify_rejects_bad_extension(api_client, mock_oauth_validation, auth_headers_admin, bypass_permission_check): + """Only .tar.gz / .json are accepted.""" + resp = api_client.post( + "/admin/backup/verify", + files={"file": ("backup.txt", b"not a backup", "text/plain")}, + headers=auth_headers_admin, + ) + assert resp.status_code == 400 + + +@pytest.mark.api +def test_verify_rejects_invalid_json(api_client, mock_oauth_validation, auth_headers_admin, bypass_permission_check): + """A .json file that isn't valid JSON returns 400.""" + resp = api_client.post( + "/admin/backup/verify", + files={"file": ("backup.json", b"{not json", "application/json")}, + headers=auth_headers_admin, + ) + assert resp.status_code == 400 + + +@pytest.mark.api +def test_verify_refuses_legacy_format(api_client, mock_oauth_validation, auth_headers_admin, bypass_permission_check): + """A lower-major (legacy) object is refused by the oracle (single-path, no upcast).""" + resp = _upload(api_client, auth_headers_admin, {"header": {"format_version": "kg-backup/1"}, "bulk": {}}) + assert resp.status_code == 200 + body = resp.json() + assert body["ok"] is False + assert any(e["code"] == "E_LOWER_MAJOR" for e in body["errors"])