diff --git a/backend/cache.py b/backend/cache.py index 3f2bebf2..0e372a52 100644 --- a/backend/cache.py +++ b/backend/cache.py @@ -156,3 +156,6 @@ def invalidate(self): recent_issues_cache = ThreadSafeCache(ttl=300, max_size=20) # 5 minutes TTL, max 20 entries nearby_issues_cache = ThreadSafeCache(ttl=60, max_size=100) # 1 minute TTL, max 100 entries user_upload_cache = ThreadSafeCache(ttl=3600, max_size=1000) # 1 hour TTL for upload limits + +# Blockchain optimization: cache the most recent integrity hash to avoid redundant DB queries during chaining +blockchain_last_hash_cache = ThreadSafeCache(ttl=3600, max_size=1) diff --git a/backend/init_db.py b/backend/init_db.py index 8021447a..0c308a8b 100644 --- a/backend/init_db.py +++ b/backend/init_db.py @@ -70,7 +70,7 @@ def index_exists(table, index_name): logger.info("Added integrity_hash column to issues") if not column_exists("issues", "previous_integrity_hash"): - conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR")) + conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR(255)")) logger.info("Added previous_integrity_hash column to issues") # Indexes (using IF NOT EXISTS syntax where supported or check first) @@ -95,6 +95,9 @@ def index_exists(table, index_name): if not index_exists("issues", "ix_issues_user_email"): conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_user_email ON issues (user_email)")) + if not index_exists("issues", "ix_issues_previous_integrity_hash"): + conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_previous_integrity_hash ON issues (previous_integrity_hash)")) + # Voice and Language Support Columns (Issue #291) if not column_exists("issues", "submission_type"): conn.execute(text("ALTER TABLE issues ADD COLUMN submission_type VARCHAR DEFAULT 'text'")) diff --git a/backend/models.py b/backend/models.py index 8b9020de..f8d85007 100644 --- a/backend/models.py +++ b/backend/models.py @@ -149,6 +149,7 @@ class Issue(Base): location = Column(String, nullable=True) action_plan = Column(JSON, nullable=True) integrity_hash = Column(String, nullable=True) # Blockchain integrity seal + previous_integrity_hash = Column(String, nullable=True, index=True) # Link to previous block for faster verification # Voice and Language Support (Issue #291) submission_type = Column(String, default="text") # 'text', 'voice' diff --git a/backend/routers/issues.py b/backend/routers/issues.py index 9cc304ee..460fc1af 100644 --- a/backend/routers/issues.py +++ b/backend/routers/issues.py @@ -30,7 +30,7 @@ send_status_notification ) from backend.spatial_utils import get_bounding_box, find_nearby_issues -from backend.cache import recent_issues_cache, nearby_issues_cache +from backend.cache import recent_issues_cache, nearby_issues_cache, blockchain_last_hash_cache from backend.hf_api_service import verify_resolution_vqa from backend.dependencies import get_http_client from backend.rag_service import rag_service @@ -172,13 +172,18 @@ async def create_issue( # Save to DB only if no nearby issues found or deduplication failed if deduplication_info is None or not deduplication_info.has_nearby_issues: # Blockchain feature: calculate integrity hash for the report - # Optimization: Fetch only the last hash to maintain the chain with minimal overhead - prev_issue = await run_in_threadpool( - lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first() - ) - prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else "" + # Optimization: Use cache for the last hash to maintain the chain with near-zero overhead + prev_hash = blockchain_last_hash_cache.get("last_integrity_hash") + + if prev_hash is None: + # Cache miss: Fetch only the last hash from DB + prev_issue = await run_in_threadpool( + lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first() + ) + prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else "" + blockchain_last_hash_cache.set(data=prev_hash, key="last_integrity_hash") -# Simple but effective SHA-256 chaining + # Simple but effective SHA-256 chaining hash_content = f"{description}|{category}|{prev_hash}" integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest() @@ -199,11 +204,15 @@ async def create_issue( longitude=longitude, location=location, action_plan=initial_action_plan, - integrity_hash=integrity_hash + integrity_hash=integrity_hash, + previous_integrity_hash=prev_hash ) # Offload blocking DB operations to threadpool await run_in_threadpool(save_issue_db, db, new_issue) + + # Update last hash cache + blockchain_last_hash_cache.set(data=integrity_hash, key="last_integrity_hash") else: # Don't create new issue, just return deduplication info new_issue = None @@ -620,24 +629,28 @@ def get_user_issues( async def verify_blockchain_integrity(issue_id: int, db: Session = Depends(get_db)): """ Verify the cryptographic integrity of a report using the blockchain-style chaining. - Optimized: Uses column projection to fetch only needed data. + Optimized: Uses stored previous hash to eliminate redundant lookup for the parent record. """ # Fetch current issue data + # Optimization: Include previous_integrity_hash to verify the chain in one step current_issue = await run_in_threadpool( lambda: db.query( - Issue.id, Issue.description, Issue.category, Issue.integrity_hash + Issue.id, Issue.description, Issue.category, Issue.integrity_hash, Issue.previous_integrity_hash ).filter(Issue.id == issue_id).first() ) if not current_issue: raise HTTPException(status_code=404, detail="Issue not found") - # Fetch previous issue's integrity hash to verify the chain - prev_issue_hash = await run_in_threadpool( - lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first() - ) - - prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else "" + # Use stored previous hash if available, otherwise fall back to DB lookup (for legacy records) + if current_issue.previous_integrity_hash is not None: + prev_hash = current_issue.previous_integrity_hash + else: + # Fallback for legacy records without previous_integrity_hash column populated + prev_issue_hash = await run_in_threadpool( + lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first() + ) + prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else "" # Recompute hash based on current data and previous hash # Chaining logic: hash(description|category|prev_hash) diff --git a/tests/test_blockchain.py b/tests/test_blockchain.py index 341ecf49..3dca5583 100644 --- a/tests/test_blockchain.py +++ b/tests/test_blockchain.py @@ -29,7 +29,8 @@ def test_blockchain_verification_success(client, db_session): issue1 = Issue( description="First issue", category="Road", - integrity_hash=hash1 + integrity_hash=hash1, + previous_integrity_hash="" ) db_session.add(issue1) db_session.commit() @@ -42,7 +43,8 @@ def test_blockchain_verification_success(client, db_session): issue2 = Issue( description="Second issue", category="Garbage", - integrity_hash=hash2 + integrity_hash=hash2, + previous_integrity_hash=hash1 ) db_session.add(issue2) db_session.commit() @@ -62,6 +64,41 @@ def test_blockchain_verification_success(client, db_session): assert data["is_valid"] == True assert data["current_hash"] == hash2 +def test_blockchain_previous_hash_stored(client, db_session): + # Create first issue + hash1 = hashlib.sha256(b"First").hexdigest() + issue1 = Issue(description="First", category="Road", integrity_hash=hash1) + db_session.add(issue1) + db_session.commit() + + # Create second issue via API (to test logic in create_issue) + # Mocking background tasks to avoid errors + from unittest.mock import patch + with patch("backend.routers.issues.process_action_plan_background"), \ + patch("backend.routers.issues.create_grievance_from_issue_background"): + response = client.post( + "/api/issues", + data={ + "description": "Second issue description", + "category": "Garbage", + "latitude": 10.0, + "longitude": 20.0 + } + ) + + assert response.status_code == 201 + issue_id = response.json()["id"] + + # Verify issue in DB has previous_integrity_hash + issue2 = db_session.query(Issue).filter(Issue.id == issue_id).first() + assert issue2.previous_integrity_hash == hash1 + + # Verify blockchain-verify endpoint uses the stored hash + # (Implicitly tested if it returns is_valid=True and we know we didn't mock the internal DB query in the router) + response = client.get(f"/api/issues/{issue_id}/blockchain-verify") + assert response.status_code == 200 + assert response.json()["is_valid"] == True + def test_blockchain_verification_failure(client, db_session): # Create issue with tampered hash issue = Issue(