diff --git a/CIVIC_INTELLIGENCE.md b/CIVIC_INTELLIGENCE.md index b916291e..c68f77b1 100644 --- a/CIVIC_INTELLIGENCE.md +++ b/CIVIC_INTELLIGENCE.md @@ -21,6 +21,7 @@ Every day at 00:00 UTC, the system performs the following steps: * Volume > 5 * Increase > 50% compared to yesterday. * **Geographic Clustering:** Uses DBSCAN (Density-Based Spatial Clustering of Applications with Noise) to find clusters of issues (e.g., multiple reports of the same pothole). +* **Top Emerging Concern:** The system prioritizes the category with the highest percentage increase (spike) over raw volume. If no spikes are detected, the category with the highest volume is selected. ### 2. Adaptive Weight Optimization The system learns from manual interventions: @@ -40,7 +41,8 @@ A daily score (0-100) reflecting the city's civic health. * **Base Score:** 70 * **Bonus:** +2.0 per resolved issue. * **Penalty:** -0.5 per new issue. -* **Output:** Includes "Top Emerging Concern" and "Highest Severity Region". +* **Delta Calculation:** Compares the current score with the previous day's score to show improvement or decline (e.g., `+3.1`). +* **Output:** Includes "Top Emerging Concern", "Highest Severity Region", and the daily score change. ## Data Storage & Auditability @@ -51,7 +53,7 @@ A daily score (0-100) reflecting the city's civic health. ### Daily Snapshots * Stored in `backend/data/dailySnapshots/YYYY-MM-DD.json`. * Contains: - * `civic_index`: The calculated score and metrics. + * `civic_index`: The calculated score, score delta, and metrics. * `trends`: Keywords, distribution, clusters, and detected spikes. * `weight_changes`: A detailed audit log of what weights were changed, the old value, the new value, and the reason. * `model_weights`: A copy of the full weight configuration at the time of the snapshot for full reproducibility. diff --git a/backend/civic_intelligence.py b/backend/civic_intelligence.py index 468bef57..dbf82537 100644 --- a/backend/civic_intelligence.py +++ b/backend/civic_intelligence.py @@ -63,17 +63,29 @@ def run_daily_cycle(self): current_dist = trends.get('category_distribution', {}) spikes = [] + max_spike_increase = 0.0 + top_spike_category = None + for category, count in current_dist.items(): prev_count = previous_dist.get(category, 0) - # Spike definition: > 50% increase AND significant volume (> 5) - if prev_count > 0 and count > 5: + increase = 0.0 + + if prev_count > 0: increase = (count - prev_count) / prev_count - if increase > 0.5: + # Spike definition: > 50% increase AND significant volume (> 5) + if count > 5 and increase > 0.5: spikes.append(category) elif prev_count == 0 and count > 5: + increase = float('inf') # Infinite increase spikes.append(category) # New surge + # Track the highest spike for "Emerging Concern" + if increase > max_spike_increase: + max_spike_increase = increase + top_spike_category = category + trends['spikes'] = spikes + trends['top_spike_category'] = top_spike_category # 3. Adaptive Weight Optimization (Severity) # Find manual severity upgrades in the last 24h @@ -149,7 +161,7 @@ def run_daily_cycle(self): }) # 5. Civic Intelligence Index - index_data = self._calculate_index(db, issues_24h, trends) + index_data = self._calculate_index(db, issues_24h, trends, previous_snapshot) # 6. Snapshot snapshot = { @@ -176,7 +188,7 @@ def run_daily_cycle(self): finally: db.close() - def _calculate_index(self, db: Session, issues_24h: List[Issue], trends: Dict[str, Any]) -> Dict[str, Any]: + def _calculate_index(self, db: Session, issues_24h: List[Issue], trends: Dict[str, Any], previous_snapshot: Dict[str, Any]) -> Dict[str, Any]: """ Generates a daily 'Civic Intelligence Index' score. """ @@ -200,12 +212,26 @@ def _calculate_index(self, db: Session, issues_24h: List[Issue], trends: Dict[st # Clamp 0-100 score = max(0.0, min(100.0, score)) + score = round(score, 1) + + # Calculate Delta + previous_index_data = previous_snapshot.get('civic_index', {}) + previous_score = previous_index_data.get('score') + + score_delta = 0.0 + if previous_score is not None: + score_delta = round(score - previous_score, 1) # Top emerging concern - top_cat = "None" - category_dist = trends.get('category_distribution', {}) - if category_dist: - top_cat = max(category_dist, key=category_dist.get) + # Prioritize identified spike, otherwise top volume + top_cat = trends.get('top_spike_category') + + if not top_cat: + category_dist = trends.get('category_distribution', {}) + if category_dist: + top_cat = max(category_dist, key=category_dist.get) + else: + top_cat = "None" # Highest severity region (from clusters) highest_severity_region = "None" @@ -218,7 +244,8 @@ def _calculate_index(self, db: Session, issues_24h: List[Issue], trends: Dict[st highest_severity_region = f"Lat {top_cluster['latitude']:.4f}, Lon {top_cluster['longitude']:.4f}" return { - "score": round(score, 1), + "score": score, + "score_delta": score_delta, "new_issues_count": total_new, "resolved_issues_count": resolved_count, "top_emerging_concern": top_cat, diff --git a/backend/requirements-render.txt b/backend/requirements-render.txt index f4111435..9777c87c 100644 --- a/backend/requirements-render.txt +++ b/backend/requirements-render.txt @@ -19,5 +19,5 @@ SpeechRecognition pydub googletrans==4.0.2 langdetect -numpy +# scikit-learn includes numpy, scipy, joblib dependencies scikit-learn diff --git a/backend/tests/test_civic_intelligence_delta.py b/backend/tests/test_civic_intelligence_delta.py new file mode 100644 index 00000000..33e51fc4 --- /dev/null +++ b/backend/tests/test_civic_intelligence_delta.py @@ -0,0 +1,164 @@ +import pytest +import json +import os +from unittest.mock import MagicMock, patch, mock_open +from datetime import datetime, timedelta, timezone + +from backend.models import Issue, EscalationAudit, Grievance +from backend.civic_intelligence import CivicIntelligenceEngine + +@patch('backend.civic_intelligence.SessionLocal') +@patch('backend.civic_intelligence.trend_analyzer') +@patch('backend.civic_intelligence.adaptive_weights') +@patch('builtins.open', new_callable=mock_open) +@patch('json.dump') +@patch('os.listdir') +def test_civic_intelligence_index_delta(mock_listdir, mock_json_dump, mock_file_open, mock_weights, mock_trend_analyzer, mock_db_session): + engine = CivicIntelligenceEngine() + + # Mock DB + mock_session = MagicMock() + mock_db_session.return_value = mock_session + + # 1. Simulate Previous Snapshot with Score 70.0 + previous_snapshot_content = json.dumps({ + "civic_index": {"score": 70.0}, + "trends": {"category_distribution": {"Fire": 2, "Water": 5}} + }) + + mock_listdir.return_value = ['2023-01-01.json'] + + # Mock open to return previous snapshot content when reading + read_mock = mock_open(read_data=previous_snapshot_content) + write_mock = mock_open() + + def open_side_effect(file, mode='r', *args, **kwargs): + if 'r' in mode: + return read_mock(file, mode, *args, **kwargs) + return write_mock(file, mode, *args, **kwargs) + + mock_file_open.side_effect = open_side_effect + + # 2. Simulate Current Data for Higher Score + # Mock Query Results + mock_query_issues = MagicMock() # For Issues + mock_query_audits = MagicMock() # For EscalationAudit + mock_query_grievances = MagicMock() # For Grievances + + def query_side_effect(model): + if model == Issue: + return mock_query_issues + elif model == EscalationAudit: + return mock_query_audits + elif model == Grievance: + return mock_query_grievances + return MagicMock() + + mock_session.query.side_effect = query_side_effect + + # issues_24h query (new issues) + # The code calls: db.query(Issue).filter(Issue.created_at >= last_24h).all() + # And: db.query(Issue).filter(Issue.resolved_at >= last_24h).count() + + # We need to distinguish between the two filter calls or just return something compatible + # Let's make the first call return a list, and the second a count + + # Configure mock_query_issues to handle chained calls + # .filter().all() -> returns [Issue, Issue] + # .filter().count() -> returns 5 + + mock_query_issues.filter.return_value.all.return_value = [Issue(id=1), Issue(id=2)] + mock_query_issues.filter.return_value.count.return_value = 5 + + # Mock Escalation Audits (Empty list to avoid iteration error) + mock_query_audits.filter.return_value.all.return_value = [] + + # Setup Trend Analyzer to return a spike + mock_trend_analyzer.analyze.return_value = { + "top_keywords": [], + "category_distribution": {"Fire": 10}, # Spiked from 2 + "clusters": [] + } + + # Mock adaptive weights radius + mock_weights.get_duplicate_search_radius.return_value = 50.0 + + # Run + engine.run_daily_cycle() + + # Verify Snapshot Content + # Ensure json.dump was called + assert mock_json_dump.called + args, _ = mock_json_dump.call_args + snapshot = args[0] + + index_data = snapshot['civic_index'] + + # Check Score + # Base 70 + 10 - 1 = 79.0 + assert index_data['score'] == 79.0 + + # Check Delta + # 79.0 - 70.0 = 9.0 + assert index_data['score_delta'] == 9.0 + + # Check Emerging Concern + # Fire increased from 2 to 10 (>50% and >5 items) -> Should be top spike + assert index_data['top_emerging_concern'] == "Fire" + +@patch('backend.civic_intelligence.SessionLocal') +@patch('backend.civic_intelligence.trend_analyzer') +@patch('backend.civic_intelligence.adaptive_weights') +@patch('builtins.open', new_callable=mock_open) +@patch('json.dump') +@patch('os.listdir') +def test_civic_intelligence_no_previous_snapshot(mock_listdir, mock_json_dump, mock_file_open, mock_weights, mock_trend_analyzer, mock_db_session): + engine = CivicIntelligenceEngine() + mock_session = MagicMock() + mock_db_session.return_value = mock_session + + # Simulate NO previous snapshot + mock_listdir.return_value = [] + + # Write mock only + write_mock = mock_open() + mock_file_open.side_effect = lambda f, m='r', *a, **k: write_mock(f, m, *a, **k) + + # Mock Query Results + mock_query_issues = MagicMock() # For Issues + mock_query_audits = MagicMock() # For EscalationAudit + + def query_side_effect(model): + if model == Issue: + return mock_query_issues + elif model == EscalationAudit: + return mock_query_audits + return MagicMock() + + mock_session.query.side_effect = query_side_effect + + # Data: 10 resolved (+20), 0 new (0) => 90.0 + mock_query_issues.filter.return_value.all.return_value = [] # 0 new issues + mock_query_issues.filter.return_value.count.return_value = 10 # 10 resolved + + mock_query_audits.filter.return_value.all.return_value = [] + + mock_trend_analyzer.analyze.return_value = { + "category_distribution": {"Water": 10} + } + + # Mock adaptive weights radius (return int/float) + mock_weights.get_duplicate_search_radius.return_value = 50.0 + + engine.run_daily_cycle() + + assert mock_json_dump.called + args, _ = mock_json_dump.call_args + snapshot = args[0] + index_data = snapshot['civic_index'] + + assert index_data['score'] == 90.0 + assert index_data['score_delta'] == 0.0 # No previous snapshot, so delta 0 + + # Since no previous snapshot, no spike detection base, so fallback to max volume + assert index_data['top_emerging_concern'] == "Water" diff --git a/render-build.sh b/render-build.sh index 342c4cb5..763774b7 100755 --- a/render-build.sh +++ b/render-build.sh @@ -5,9 +5,9 @@ set -o errexit echo "Installing Python dependencies..." if [ -f "backend/requirements-render.txt" ]; then echo "Using requirements-render.txt for lightweight deployment..." - pip install -r backend/requirements-render.txt + pip install --no-cache-dir -r backend/requirements-render.txt else - pip install -r backend/requirements.txt + pip install --no-cache-dir -r backend/requirements.txt fi echo "Building Frontend..." diff --git a/render.yaml b/render.yaml index 593ec813..45bf510d 100644 --- a/render.yaml +++ b/render.yaml @@ -3,7 +3,7 @@ services: - type: web name: vishwaguru-backend runtime: python - buildCommand: "pip install -r backend/requirements-render.txt" + buildCommand: "./render-build.sh" startCommand: "python start-backend.py" envVars: - key: PYTHON_VERSION