Skip to content
54 changes: 38 additions & 16 deletions api/models/finding.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ def create_tables(self) -> None:
started_at TIMESTAMPTZ NOT NULL,
completed_at TIMESTAMPTZ,
total_findings INTEGER DEFAULT 0,
score INTEGER DEFAULT NULL
score INTEGER DEFAULT NULL,
cve_enrichment_status TEXT DEFAULT 'PENDING'
);
""")
cur.execute("""
Expand Down Expand Up @@ -203,6 +204,10 @@ def run_migrations(self) -> None:
ADD COLUMN IF NOT EXISTS cvss_score FLOAT DEFAULT NULL,
ADD COLUMN IF NOT EXISTS exploit_available BOOLEAN DEFAULT FALSE
""")
cur.execute("""
ALTER TABLE scans
ADD COLUMN IF NOT EXISTS cve_enrichment_status TEXT DEFAULT 'PENDING'
""")
conn.commit()
logger.info("CVE migrations applied successfully")
except Exception as e:
Expand All @@ -219,8 +224,8 @@ def save_scan(self, scan_result: Dict[str, Any]) -> None:
with conn.cursor() as cur:
cur.execute(
"""
INSERT INTO scans (scan_id, subscription_id, started_at, completed_at, total_findings, score)
VALUES (%s, %s, %s, %s, %s, %s)
INSERT INTO scans (scan_id, subscription_id, started_at, completed_at, total_findings, score, cve_enrichment_status)
VALUES (%s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (scan_id) DO NOTHING
""",
(
Expand All @@ -230,6 +235,7 @@ def save_scan(self, scan_result: Dict[str, Any]) -> None:
scan_result["completed_at"],
scan_result["total_findings"],
scan_result.get("score"),
scan_result.get("cve_enrichment_status", "PENDING"),
),
)
for f in scan_result.get("findings", []):
Expand Down Expand Up @@ -345,6 +351,17 @@ def update_cve_fields(self, findings: List[Dict[str, Any]]) -> None:
)
conn.commit()

def update_scan_enrichment_status(self, scan_id: str, status: str) -> None:
"""Update the CVE enrichment status for a specific scan."""
conn = self._get_conn()
with conn.cursor() as cur:
cur.execute(
"UPDATE scans SET cve_enrichment_status = %s WHERE scan_id = %s",
(status, scan_id),
)
conn.commit()
logger.info("Updated scan %s enrichment status to %s", scan_id, status)

def get_scans(self) -> List[Dict[str, Any]]:
"""Return all scan records ordered by most recent first."""
conn = self._get_conn()
Expand Down Expand Up @@ -387,21 +404,25 @@ def get_cve_summary(self) -> Dict[str, Any]:
conn = self._get_conn()
with conn.cursor() as cur:
cur.execute("""
SELECT
COUNT(*) as total_findings,
COUNT(CASE WHEN exploit_available = TRUE THEN 1 END) as exploit_count,
MAX(cvss_score) as max_cvss_score,
AVG(cvss_score) as avg_cvss_score,
COUNT(CASE WHEN cvss_score >= 9.0 THEN 1 END) as critical_cve_count
FROM findings
WHERE scan_id = (
SELECT
s.cve_enrichment_status,
COUNT(f.*) as total_findings,
COUNT(CASE WHEN f.exploit_available = TRUE THEN 1 END) as exploit_count,
MAX(f.cvss_score) as max_cvss_score,
AVG(f.cvss_score) as avg_cvss_score,
COUNT(CASE WHEN f.cvss_score >= 9.0 THEN 1 END) as critical_cve_count
FROM scans s
LEFT JOIN findings f ON s.scan_id = f.scan_id
WHERE s.scan_id = (
SELECT scan_id FROM scans WHERE total_findings > 0 ORDER BY started_at DESC LIMIT 1
)
GROUP BY s.cve_enrichment_status
""")
row = cur.fetchone()

if not row:
return {
"status": "UNKNOWN",
"total_findings": 0,
"exploit_count": 0,
"max_cvss_score": None,
Expand All @@ -410,11 +431,12 @@ def get_cve_summary(self) -> Dict[str, Any]:
}

return {
"total_findings": row[0],
"exploit_count": row[1],
"max_cvss_score": row[2],
"avg_cvss_score": round(row[3], 2) if row[3] is not None else None,
"critical_cve_count": row[4],
"status": row[0],
"total_findings": row[1],
"exploit_count": row[2],
"max_cvss_score": row[3],
"avg_cvss_score": round(row[4], 2) if row[4] is not None else None,
"critical_cve_count": row[5],
}

def get_compliance_score(self, framework: str) -> Dict[str, Any]:
Expand Down
11 changes: 0 additions & 11 deletions api/routes/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from flask import Blueprint, g, jsonify, request

from api.models.finding import DatabaseManager
from scanner.cve_correlator import enrich_findings

_PLAYBOOKS_DIR = Path(__file__).parent.parent.parent / "playbooks" / "cli"

Expand Down Expand Up @@ -39,16 +38,6 @@ def list_findings():
}
db = _get_db()
findings = db.get_findings(filters)
legacy_findings = [
f
for f in findings
if f.get("cve_references") is None
and f.get("cvss_score") is None
and f.get("exploit_available") is None
]
if legacy_findings:
enrich_findings(legacy_findings)
db.update_cve_fields(legacy_findings)
return jsonify({"count": len(findings), "findings": findings})
except Exception as exc:
logger.error("Failed to list findings: %s", exc)
Expand Down
49 changes: 48 additions & 1 deletion api/routes/scans.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from flask import Blueprint, g, jsonify, request

from api.models.finding import DatabaseManager
from scanner.cve_correlator import enrich_findings

scans_bp = Blueprint("scans", __name__)
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -79,4 +80,50 @@ def trigger_scan():

except Exception as exc:
logger.error("Critical error in trigger_scan route: %s", exc, exc_info=True)
return jsonify({"error": "Critical route failure", "detail": str(exc)}), 500
return jsonify({"error": "Critical route failure", "detail": str(exc)}), 500


@scans_bp.post("/api/scans/<scan_id>/enrich")
def enrich_scan(scan_id):
"""Trigger CVE enrichment for an existing scan."""
try:
db = _get_db()

# Check current status to avoid redundant NVD calls
scans = db.get_scans()
current_scan = next((s for s in scans if str(s["scan_id"]) == scan_id), None)

if not current_scan:
return jsonify({"error": "Scan not found"}), 404

status = current_scan.get("cve_enrichment_status")
if status == "COMPLETED":
return jsonify({"message": "Scan already enriched", "scan_id": scan_id}), 200
if status == "ENRICHING":
return jsonify({"message": "Enrichment already in progress", "scan_id": scan_id}), 202

findings = db.get_findings({"scan_id": scan_id})
if not findings:
return jsonify({"error": "No findings found for this scan"}), 404

logger.info("Enriching %d findings for scan %s", len(findings), scan_id)
db.update_scan_enrichment_status(scan_id, "ENRICHING")

try:
enriched = enrich_findings(findings)
db.update_cve_fields(enriched)
db.update_scan_enrichment_status(scan_id, "COMPLETED")
except Exception as exc:
logger.error("Enrichment failed for scan %s: %s", scan_id, exc)
db.update_scan_enrichment_status(scan_id, "FAILED")
return jsonify({"error": "Enrichment failed", "detail": str(exc)}), 500

return jsonify({
"scan_id": scan_id,
"status": "COMPLETED",
"enriched_count": len(enriched)
})

except Exception as exc:
logger.error("Failed to enrich scan %s: %s", scan_id, exc)
return jsonify({"error": "Internal server error", "detail": str(exc)}), 500
37 changes: 16 additions & 21 deletions docs/cve_correlation_feature.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,22 @@ The CVE Correlation feature integrates the MITRE National Vulnerability Database

| File | Change | Why |
|---|---|---|
| scanner/engine.py | Enrichment-at-Source. Integrated enrich_findings directly into the scan lifecycle. | Performance: By enriching during the scan, CVE data is saved once to the database. The frontend does not have to wait for an NVD API call when loading the dashboard. |
| api/models/finding.py | Updated Finding dataclass and added run_migrations and get_cve_summary. | Persistence: Adds cve_references, cvss_score, and exploit_available columns to PostgreSQL. get_cve_summary provides stats for dashboard widgets. |
| scanner/engine.py | Decoupled Scan. Removed synchronous enrichment from the scan lifecycle. | Performance: Azure scans now return immediately without waiting for NVD rate limits (7s per resource type). |
| api/routes/scans.py | New Endpoint. Added `POST /api/scans/<scan_id>/enrich`. | Flexibility: CVE enrichment can now be triggered on-demand or by a background job after the scan completes. |
| api/models/finding.py | Updated Scan model and added enrichment status tracking. | Persistence: Adds `cve_enrichment_status` to track `PENDING`, `COMPLETED`, or `FAILED` states. |
| api/app.py | Added db.run_migrations call at startup. | Auto-Deployment: Ensures the database schema is updated automatically on any environment where the app is launched. |
| api/routes/score.py | Added GET /api/score/cve-summary endpoint. | Dashboard UI: Provides the frontend with high-level data like Total Known Exploits in a single lightweight request. |
| api/routes/findings.py | Returns findings from the database and enriches only legacy rows missing CVE fields. | Performance: Avoids extra NVD calls on every request while still backfilling older records. |
| api/routes/score.py | Added GET /api/score/cve-summary endpoint. | Dashboard UI: Provides the frontend with high-level data like Total Known Exploits and enrichment status. |
| api/routes/findings.py | Returns findings from the database without JIT enrichment. | Performance: Ensures predictable and fast API responses for findings. |

## Frontend Integration Design

To ensure the frontend dashboard works perfectly, the architecture uses an Enrichment-at-Source model:
To ensure the frontend dashboard works perfectly, the architecture uses a Decoupled Enrichment model:

1. Zero-Latency Dashboard Loads: The scan engine pre-enriches findings. When the frontend calls the API, it receives static data from the database. Legacy rows missing CVE fields are enriched on-demand only once.
2. Dashboard-Ready Summary Endpoint: The /api/score/cve-summary endpoint allows the frontend to fetch high-level statistics (Total Findings, Exploit Count, Max CVSS) in one call instead of processing thousands of records locally.
3. Actionable Risk (CISA KEV): The exploit_available flag uses the CISA Known Exploited Vulnerabilities catalogue, allowing the dashboard to highlight high-priority risks that are being exploited in the wild.
4. Persistent Historical State: Enrichment happens at the time of scan, meaning the dashboard shows the CVE status as it existed on that day. This ensures accurate compliance and historical reporting.
1. Fast Dashboard Loads: The scan engine completes rapidly. The dashboard can check the enrichment status of the latest scan.
2. Manual/Job Enrichment: A "Trigger Enrichment" button or a background task calls `POST /api/scans/<scan_id>/enrich` to populate CVE data.
3. Dashboard-Ready Summary Endpoint: The /api/score/cve-summary endpoint includes the `status` field, allowing the UI to show a "Scan Enriched" badge or a "Pending" spinner.
4. Actionable Risk (CISA KEV): The exploit_available flag uses the CISA Known Exploited Vulnerabilities catalogue, allowing the dashboard to highlight high-priority risks that are being exploited in the wild.
5. Persistent Historical State: Enrichment happens at the time of the enrichment call, and the result is persisted.

## Security and Compliance Audit

Expand All @@ -55,32 +57,25 @@ Response shape (abridged):
"rule_id": "AZ-STOR-003",
"severity": "HIGH",
"resource_id": "/subscriptions/...",
"cve_references": [
{
"cve_id": "CVE-2023-12345",
"cvss_score": 9.8,
"cvss_severity": "CRITICAL",
"exploit_available": true,
"nvd_url": "https://nvd.nist.gov/vuln/detail/CVE-2023-12345"
}
],
"cvss_score": 9.8,
"exploit_available": true
"cve_references": [],
"cvss_score": null,
"exploit_available": false
}
]
}
```

Notes:
1. Results are ordered by detected_at descending and capped at 1000.
2. CVE fields are always present. Legacy rows are backfilled on request.
2. CVE fields are present but empty if enrichment has not been triggered.

### GET /api/score/cve-summary

Response shape:

```json
{
"status": "COMPLETED",
"total_findings": 74,
"exploit_count": 5,
"max_cvss_score": 9.8,
Expand Down
6 changes: 1 addition & 5 deletions scanner/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
import importlib.util
import logging
import uuid
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List

from scanner.azure_client import AzureClient
from scanner.cve_correlator import enrich_findings

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -129,9 +127,6 @@ def run_scan(self) -> Dict[str, Any]:
except Exception as exc:
logger.error("Rule %s raised an exception: %s", rule_id, exc, exc_info=True)

logger.info("Enriching %d findings with CVE data...", len(findings))
findings = enrich_findings(findings)

completed_at = datetime.now(timezone.utc).isoformat()

severity_weights = {"HIGH": 10, "MEDIUM": 5, "LOW": 2}
Expand All @@ -142,6 +137,7 @@ def run_scan(self) -> Dict[str, Any]:
"scan_id": scan_id,
"subscription_id": self.subscription_id,
"status": "completed",
"cve_enrichment_status": "PENDING",
"started_at": started_at,
"completed_at": completed_at,
"total_findings": len(findings),
Expand Down
39 changes: 35 additions & 4 deletions tests/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,16 +319,47 @@ def skip(name, reason):
skip("TC-27 GET /api/findings/<id>/playbook returns 200", "No findings in DB — seed the database first.")
skip("TC-28 GET /api/findings/<id>/playbook returns playbook keys", "No findings in DB — seed the database first.")

# ── TC-33 to TC-35: CVE Enrichment endpoints ──────────────────────────────
print("\n=== CVE Enrichment Endpoints ===")
_scan_status, _scan_body = request("GET", "/api/scans")
_scan_id = (
_scan_body[0].get("scan_id")
if _scan_status == 200 and isinstance(_scan_body, list) and _scan_body
else None
)
if _scan_id is not None:
test(
f"TC-33 POST /api/scans/{_scan_id}/enrich returns 200",
"POST", f"/api/scans/{_scan_id}/enrich",
lambda s, b: s == 200,
body={},
)
test(
f"TC-34 POST /api/scans/{_scan_id}/enrich returns status COMPLETED",
"POST", f"/api/scans/{_scan_id}/enrich",
lambda s, b: b.get("status") == "COMPLETED",
body={},
)
else:
skip("TC-33 POST /api/scans/<id>/enrich returns 200", "No scans in DB — trigger a scan first.")
skip("TC-34 POST /api/scans/<id>/enrich returns status COMPLETED", "No scans in DB — trigger a scan first.")

test(
"TC-35 GET /api/score/cve-summary returns status field",
"GET", "/api/score/cve-summary",
lambda s, b: "status" in b,
)

# ── TC-29 to TC-32: General edge cases ────────────────────────────────────
print("\n=== Edge Cases ===")
test(
"TC-29 GET /nonexistent returns 404",
"TC-36 GET /nonexistent returns 404",
"GET", "/nonexistent-endpoint-xyz",
lambda s, b: s == 404,
auth=True,
)
test(
"TC-30 POST /api/scans/trigger with empty body returns 400 or starts scan",
"TC-37 POST /api/scans/trigger with empty body returns 400 or starts scan",
"POST", "/api/scans/trigger",
# 400 = missing subscription_id (expected when no AZURE_SUBSCRIPTION_ID env var)
# 200/201/202 = scan started (AZURE_SUBSCRIPTION_ID configured on server)
Expand All @@ -338,12 +369,12 @@ def skip(name, reason):
body={},
)
test(
"TC-31 GET /api/findings?limit=0 does not crash",
"TC-38 GET /api/findings?limit=0 does not crash",
"GET", "/api/findings?limit=0",
lambda s, b: s in (200, 400),
)
test(
"TC-32 Response Content-Type is JSON",
"TC-39 Response Content-Type is JSON",
"GET", "/api/findings",
lambda s, b: isinstance(b, dict),
)
Expand Down
Loading