openshield-org · Vishnu2707 · Jun 5, 2026 · May 29, 2026 · May 29, 2026 · May 30, 2026
diff --git a/api/models/finding.py b/api/models/finding.py
@@ -136,7 +136,8 @@ def create_tables(self) -> None:
                     started_at      TIMESTAMPTZ NOT NULL,
                     completed_at    TIMESTAMPTZ,
                     total_findings  INTEGER DEFAULT 0,
-                    score           INTEGER DEFAULT NULL
+                    score           INTEGER DEFAULT NULL,
+                    cve_enrichment_status TEXT DEFAULT 'PENDING'
                 );
             """)
             cur.execute("""
@@ -203,6 +204,10 @@ def run_migrations(self) -> None:
                         ADD COLUMN IF NOT EXISTS cvss_score        FLOAT   DEFAULT NULL,
                         ADD COLUMN IF NOT EXISTS exploit_available BOOLEAN DEFAULT FALSE
                 """)
+                cur.execute("""
+                    ALTER TABLE scans
+                        ADD COLUMN IF NOT EXISTS cve_enrichment_status TEXT DEFAULT 'PENDING'
+                """)
             conn.commit()
             logger.info("CVE migrations applied successfully")
         except Exception as e:
@@ -219,8 +224,8 @@ def save_scan(self, scan_result: Dict[str, Any]) -> None:
         with conn.cursor() as cur:
             cur.execute(
                 """
-                INSERT INTO scans (scan_id, subscription_id, started_at, completed_at, total_findings, score)
-                VALUES (%s, %s, %s, %s, %s, %s)
+                INSERT INTO scans (scan_id, subscription_id, started_at, completed_at, total_findings, score, cve_enrichment_status)
+                VALUES (%s, %s, %s, %s, %s, %s, %s)
                 ON CONFLICT (scan_id) DO NOTHING
                 """,
                 (
@@ -230,6 +235,7 @@ def save_scan(self, scan_result: Dict[str, Any]) -> None:
                     scan_result["completed_at"],
                     scan_result["total_findings"],
                     scan_result.get("score"),
+                    scan_result.get("cve_enrichment_status", "PENDING"),
                 ),
             )
             for f in scan_result.get("findings", []):
@@ -345,6 +351,17 @@ def update_cve_fields(self, findings: List[Dict[str, Any]]) -> None:
                 )
         conn.commit()
 
+    def update_scan_enrichment_status(self, scan_id: str, status: str) -> None:
+        """Update the CVE enrichment status for a specific scan."""
+        conn = self._get_conn()
+        with conn.cursor() as cur:
+            cur.execute(
+                "UPDATE scans SET cve_enrichment_status = %s WHERE scan_id = %s",
+                (status, scan_id),
+            )
+        conn.commit()
+        logger.info("Updated scan %s enrichment status to %s", scan_id, status)
+
     def get_scans(self) -> List[Dict[str, Any]]:
         """Return all scan records ordered by most recent first."""
         conn = self._get_conn()
@@ -387,21 +404,25 @@ def get_cve_summary(self) -> Dict[str, Any]:
         conn = self._get_conn()
         with conn.cursor() as cur:
             cur.execute("""
-                SELECT
-                    COUNT(*) as total_findings,
-                    COUNT(CASE WHEN exploit_available = TRUE THEN 1 END) as exploit_count,
-                    MAX(cvss_score) as max_cvss_score,
-                    AVG(cvss_score) as avg_cvss_score,
-                    COUNT(CASE WHEN cvss_score >= 9.0 THEN 1 END) as critical_cve_count
-                FROM findings
-                WHERE scan_id = (
+                SELECT 
+                    s.cve_enrichment_status,
+                    COUNT(f.*) as total_findings,
+                    COUNT(CASE WHEN f.exploit_available = TRUE THEN 1 END) as exploit_count,
+                    MAX(f.cvss_score) as max_cvss_score,
+                    AVG(f.cvss_score) as avg_cvss_score,
+                    COUNT(CASE WHEN f.cvss_score >= 9.0 THEN 1 END) as critical_cve_count
+                FROM scans s
+                LEFT JOIN findings f ON s.scan_id = f.scan_id
+                WHERE s.scan_id = (
                     SELECT scan_id FROM scans WHERE total_findings > 0 ORDER BY started_at DESC LIMIT 1
                 )
+                GROUP BY s.cve_enrichment_status
             """)
             row = cur.fetchone()
 
         if not row:
             return {
+                "status": "UNKNOWN",
                 "total_findings": 0,
                 "exploit_count": 0,
                 "max_cvss_score": None,
@@ -410,11 +431,12 @@ def get_cve_summary(self) -> Dict[str, Any]:
             }
 
         return {
-            "total_findings": row[0],
-            "exploit_count": row[1],
-            "max_cvss_score": row[2],
-            "avg_cvss_score": round(row[3], 2) if row[3] is not None else None,
-            "critical_cve_count": row[4],
+            "status": row[0],
+            "total_findings": row[1],
+            "exploit_count": row[2],
+            "max_cvss_score": row[3],
+            "avg_cvss_score": round(row[4], 2) if row[4] is not None else None,
+            "critical_cve_count": row[5],
         }
 
     def get_compliance_score(self, framework: str) -> Dict[str, Any]:

diff --git a/api/routes/findings.py b/api/routes/findings.py
@@ -6,7 +6,6 @@
 from flask import Blueprint, g, jsonify, request
 
 from api.models.finding import DatabaseManager
-from scanner.cve_correlator import enrich_findings
 
 _PLAYBOOKS_DIR = Path(__file__).parent.parent.parent / "playbooks" / "cli"
 
@@ -39,16 +38,6 @@ def list_findings():
         }
         db = _get_db()
         findings = db.get_findings(filters)
-        legacy_findings = [
-            f
-            for f in findings
-            if f.get("cve_references") is None
-            and f.get("cvss_score") is None
-            and f.get("exploit_available") is None
-        ]
-        if legacy_findings:
-            enrich_findings(legacy_findings)
-            db.update_cve_fields(legacy_findings)
         return jsonify({"count": len(findings), "findings": findings})
     except Exception as exc:
         logger.error("Failed to list findings: %s", exc)

diff --git a/api/routes/scans.py b/api/routes/scans.py
@@ -5,6 +5,7 @@
 from flask import Blueprint, g, jsonify, request
 
 from api.models.finding import DatabaseManager
+from scanner.cve_correlator import enrich_findings
 
 scans_bp = Blueprint("scans", __name__)
 logger = logging.getLogger(__name__)
@@ -79,4 +80,50 @@ def trigger_scan():
 
     except Exception as exc:
         logger.error("Critical error in trigger_scan route: %s", exc, exc_info=True)
-        return jsonify({"error": "Critical route failure", "detail": str(exc)}), 500
+        return jsonify({"error": "Critical route failure", "detail": str(exc)}), 500
+
+
+@scans_bp.post("/api/scans/<scan_id>/enrich")
+def enrich_scan(scan_id):
+    """Trigger CVE enrichment for an existing scan."""
+    try:
+        db = _get_db()
+
+        # Check current status to avoid redundant NVD calls
+        scans = db.get_scans()
+        current_scan = next((s for s in scans if str(s["scan_id"]) == scan_id), None)
+
+        if not current_scan:
+            return jsonify({"error": "Scan not found"}), 404
+
+        status = current_scan.get("cve_enrichment_status")
+        if status == "COMPLETED":
+            return jsonify({"message": "Scan already enriched", "scan_id": scan_id}), 200
+        if status == "ENRICHING":
+            return jsonify({"message": "Enrichment already in progress", "scan_id": scan_id}), 202
+
+        findings = db.get_findings({"scan_id": scan_id})
+        if not findings:
+            return jsonify({"error": "No findings found for this scan"}), 404
+
+        logger.info("Enriching %d findings for scan %s", len(findings), scan_id)
+        db.update_scan_enrichment_status(scan_id, "ENRICHING")
+
+        try:
+            enriched = enrich_findings(findings)
+            db.update_cve_fields(enriched)
+            db.update_scan_enrichment_status(scan_id, "COMPLETED")
+        except Exception as exc:
+            logger.error("Enrichment failed for scan %s: %s", scan_id, exc)
+            db.update_scan_enrichment_status(scan_id, "FAILED")
+            return jsonify({"error": "Enrichment failed", "detail": str(exc)}), 500
+
+        return jsonify({
+            "scan_id": scan_id,
+            "status": "COMPLETED",
+            "enriched_count": len(enriched)
+        })
+
+    except Exception as exc:
+        logger.error("Failed to enrich scan %s: %s", scan_id, exc)
+        return jsonify({"error": "Internal server error", "detail": str(exc)}), 500
diff --git a/docs/cve_correlation_feature.md b/docs/cve_correlation_feature.md
@@ -18,20 +18,22 @@ The CVE Correlation feature integrates the MITRE National Vulnerability Database
 
 | File | Change | Why |
 |---|---|---|
-| scanner/engine.py | Enrichment-at-Source. Integrated enrich_findings directly into the scan lifecycle. | Performance: By enriching during the scan, CVE data is saved once to the database. The frontend does not have to wait for an NVD API call when loading the dashboard. |
-| api/models/finding.py | Updated Finding dataclass and added run_migrations and get_cve_summary. | Persistence: Adds cve_references, cvss_score, and exploit_available columns to PostgreSQL. get_cve_summary provides stats for dashboard widgets. |
+| scanner/engine.py | Decoupled Scan. Removed synchronous enrichment from the scan lifecycle. | Performance: Azure scans now return immediately without waiting for NVD rate limits (7s per resource type). |
+| api/routes/scans.py | New Endpoint. Added `POST /api/scans/<scan_id>/enrich`. | Flexibility: CVE enrichment can now be triggered on-demand or by a background job after the scan completes. |
+| api/models/finding.py | Updated Scan model and added enrichment status tracking. | Persistence: Adds `cve_enrichment_status` to track `PENDING`, `COMPLETED`, or `FAILED` states. |
 | api/app.py | Added db.run_migrations call at startup. | Auto-Deployment: Ensures the database schema is updated automatically on any environment where the app is launched. |
-| api/routes/score.py | Added GET /api/score/cve-summary endpoint. | Dashboard UI: Provides the frontend with high-level data like Total Known Exploits in a single lightweight request. |
-| api/routes/findings.py | Returns findings from the database and enriches only legacy rows missing CVE fields. | Performance: Avoids extra NVD calls on every request while still backfilling older records. |
+| api/routes/score.py | Added GET /api/score/cve-summary endpoint. | Dashboard UI: Provides the frontend with high-level data like Total Known Exploits and enrichment status. |
+| api/routes/findings.py | Returns findings from the database without JIT enrichment. | Performance: Ensures predictable and fast API responses for findings. |
 
 ## Frontend Integration Design
 
-To ensure the frontend dashboard works perfectly, the architecture uses an Enrichment-at-Source model:
+To ensure the frontend dashboard works perfectly, the architecture uses a Decoupled Enrichment model:
 
-1. Zero-Latency Dashboard Loads: The scan engine pre-enriches findings. When the frontend calls the API, it receives static data from the database. Legacy rows missing CVE fields are enriched on-demand only once.
-2. Dashboard-Ready Summary Endpoint: The /api/score/cve-summary endpoint allows the frontend to fetch high-level statistics (Total Findings, Exploit Count, Max CVSS) in one call instead of processing thousands of records locally.
-3. Actionable Risk (CISA KEV): The exploit_available flag uses the CISA Known Exploited Vulnerabilities catalogue, allowing the dashboard to highlight high-priority risks that are being exploited in the wild.
-4. Persistent Historical State: Enrichment happens at the time of scan, meaning the dashboard shows the CVE status as it existed on that day. This ensures accurate compliance and historical reporting.
+1. Fast Dashboard Loads: The scan engine completes rapidly. The dashboard can check the enrichment status of the latest scan.
+2. Manual/Job Enrichment: A "Trigger Enrichment" button or a background task calls `POST /api/scans/<scan_id>/enrich` to populate CVE data.
+3. Dashboard-Ready Summary Endpoint: The /api/score/cve-summary endpoint includes the `status` field, allowing the UI to show a "Scan Enriched" badge or a "Pending" spinner.
+4. Actionable Risk (CISA KEV): The exploit_available flag uses the CISA Known Exploited Vulnerabilities catalogue, allowing the dashboard to highlight high-priority risks that are being exploited in the wild.
+5. Persistent Historical State: Enrichment happens at the time of the enrichment call, and the result is persisted.
 
 ## Security and Compliance Audit
 
@@ -55,32 +57,25 @@ Response shape (abridged):
          "rule_id": "AZ-STOR-003",
          "severity": "HIGH",
          "resource_id": "/subscriptions/...",
-         "cve_references": [
-            {
-               "cve_id": "CVE-2023-12345",
-               "cvss_score": 9.8,
-               "cvss_severity": "CRITICAL",
-               "exploit_available": true,
-               "nvd_url": "https://nvd.nist.gov/vuln/detail/CVE-2023-12345"
-            }
-         ],
-         "cvss_score": 9.8,
-         "exploit_available": true
+         "cve_references": [],
+         "cvss_score": null,
+         "exploit_available": false
       }
    ]
 }
 ```
 
 Notes:
 1. Results are ordered by detected_at descending and capped at 1000.
-2. CVE fields are always present. Legacy rows are backfilled on request.
+2. CVE fields are present but empty if enrichment has not been triggered.
 
 ### GET /api/score/cve-summary
 
 Response shape:
 
 ```json
 {
+   "status": "COMPLETED",
    "total_findings": 74,
    "exploit_count": 5,
    "max_cvss_score": 9.8,

diff --git a/scanner/engine.py b/scanner/engine.py
@@ -3,13 +3,11 @@
 import importlib.util
 import logging
 import uuid
-import json
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List
 
 from scanner.azure_client import AzureClient
-from scanner.cve_correlator import enrich_findings
 
 logger = logging.getLogger(__name__)
 
@@ -129,9 +127,6 @@ def run_scan(self) -> Dict[str, Any]:
             except Exception as exc:
                 logger.error("Rule %s raised an exception: %s", rule_id, exc, exc_info=True)
 
-        logger.info("Enriching %d findings with CVE data...", len(findings))
-        findings = enrich_findings(findings)
-
         completed_at = datetime.now(timezone.utc).isoformat()
 
         severity_weights = {"HIGH": 10, "MEDIUM": 5, "LOW": 2}
@@ -142,6 +137,7 @@ def run_scan(self) -> Dict[str, Any]:
             "scan_id": scan_id,
             "subscription_id": self.subscription_id,
             "status": "completed",
+            "cve_enrichment_status": "PENDING",
             "started_at": started_at,
             "completed_at": completed_at,
             "total_findings": len(findings),

diff --git a/tests/smoke_test.py b/tests/smoke_test.py
@@ -319,16 +319,47 @@ def skip(name, reason):
     skip("TC-27 GET /api/findings/<id>/playbook returns 200", "No findings in DB — seed the database first.")
     skip("TC-28 GET /api/findings/<id>/playbook returns playbook keys", "No findings in DB — seed the database first.")
 
+# ── TC-33 to TC-35: CVE Enrichment endpoints ──────────────────────────────
+print("\n=== CVE Enrichment Endpoints ===")
+_scan_status, _scan_body = request("GET", "/api/scans")
+_scan_id = (
+    _scan_body[0].get("scan_id")
+    if _scan_status == 200 and isinstance(_scan_body, list) and _scan_body
+    else None
+)
+if _scan_id is not None:
+    test(
+        f"TC-33 POST /api/scans/{_scan_id}/enrich returns 200",
+        "POST", f"/api/scans/{_scan_id}/enrich",
+        lambda s, b: s == 200,
+        body={},
+    )
+    test(
+        f"TC-34 POST /api/scans/{_scan_id}/enrich returns status COMPLETED",
+        "POST", f"/api/scans/{_scan_id}/enrich",
+        lambda s, b: b.get("status") == "COMPLETED",
+        body={},
+    )
+else:
+    skip("TC-33 POST /api/scans/<id>/enrich returns 200", "No scans in DB — trigger a scan first.")
+    skip("TC-34 POST /api/scans/<id>/enrich returns status COMPLETED", "No scans in DB — trigger a scan first.")
+
+test(
+    "TC-35 GET /api/score/cve-summary returns status field",
+    "GET", "/api/score/cve-summary",
+    lambda s, b: "status" in b,
+)
+
 # ── TC-29 to TC-32: General edge cases ────────────────────────────────────
 print("\n=== Edge Cases ===")
 test(
-    "TC-29 GET /nonexistent returns 404",
+    "TC-36 GET /nonexistent returns 404",
     "GET", "/nonexistent-endpoint-xyz",
     lambda s, b: s == 404,
     auth=True,
 )
 test(
-    "TC-30 POST /api/scans/trigger with empty body returns 400 or starts scan",
+    "TC-37 POST /api/scans/trigger with empty body returns 400 or starts scan",
     "POST", "/api/scans/trigger",
     # 400 = missing subscription_id (expected when no AZURE_SUBSCRIPTION_ID env var)
     # 200/201/202 = scan started (AZURE_SUBSCRIPTION_ID configured on server)
@@ -338,12 +369,12 @@ def skip(name, reason):
     body={},
 )
 test(
-    "TC-31 GET /api/findings?limit=0 does not crash",
+    "TC-38 GET /api/findings?limit=0 does not crash",
     "GET", "/api/findings?limit=0",
     lambda s, b: s in (200, 400),
 )
 test(
-    "TC-32 Response Content-Type is JSON",
+    "TC-39 Response Content-Type is JSON",
     "GET", "/api/findings",
     lambda s, b: isinstance(b, dict),
 )