diff --git a/backend/cache.py b/backend/cache.py index 3f2bebf2..d8f93d6d 100644 --- a/backend/cache.py +++ b/backend/cache.py @@ -156,3 +156,4 @@ def invalidate(self): recent_issues_cache = ThreadSafeCache(ttl=300, max_size=20) # 5 minutes TTL, max 20 entries nearby_issues_cache = ThreadSafeCache(ttl=60, max_size=100) # 1 minute TTL, max 100 entries user_upload_cache = ThreadSafeCache(ttl=3600, max_size=1000) # 1 hour TTL for upload limits +blockchain_last_hash_cache = ThreadSafeCache(ttl=86400, max_size=1) # 24 hour TTL, only stores the absolute latest diff --git a/backend/init_db.py b/backend/init_db.py index 8021447a..ed44dc04 100644 --- a/backend/init_db.py +++ b/backend/init_db.py @@ -66,11 +66,11 @@ def index_exists(table, index_name): logger.info("Added action_plan column to issues") if not column_exists("issues", "integrity_hash"): - conn.execute(text("ALTER TABLE issues ADD COLUMN integrity_hash VARCHAR")) + conn.execute(text("ALTER TABLE issues ADD COLUMN integrity_hash VARCHAR(255)")) logger.info("Added integrity_hash column to issues") if not column_exists("issues", "previous_integrity_hash"): - conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR")) + conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR(255)")) logger.info("Added previous_integrity_hash column to issues") # Indexes (using IF NOT EXISTS syntax where supported or check first) @@ -95,6 +95,12 @@ def index_exists(table, index_name): if not index_exists("issues", "ix_issues_user_email"): conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_user_email ON issues (user_email)")) + if not index_exists("issues", "ix_issues_integrity_hash"): + conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_integrity_hash ON issues (integrity_hash)")) + + if not index_exists("issues", "ix_issues_previous_integrity_hash"): + conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_previous_integrity_hash ON issues (previous_integrity_hash)")) + # Voice and Language Support Columns (Issue #291) if not column_exists("issues", "submission_type"): conn.execute(text("ALTER TABLE issues ADD COLUMN submission_type VARCHAR DEFAULT 'text'")) diff --git a/backend/models.py b/backend/models.py index 8b9020de..53364208 100644 --- a/backend/models.py +++ b/backend/models.py @@ -148,7 +148,8 @@ class Issue(Base): longitude = Column(Float, nullable=True, index=True) location = Column(String, nullable=True) action_plan = Column(JSON, nullable=True) - integrity_hash = Column(String, nullable=True) # Blockchain integrity seal + integrity_hash = Column(String(255), index=True, nullable=True) # Blockchain integrity seal + previous_integrity_hash = Column(String(255), index=True, nullable=True) # Linked list chaining # Voice and Language Support (Issue #291) submission_type = Column(String, default="text") # 'text', 'voice' diff --git a/backend/routers/issues.py b/backend/routers/issues.py index 9cc304ee..5c3ad6cb 100644 --- a/backend/routers/issues.py +++ b/backend/routers/issues.py @@ -30,7 +30,7 @@ send_status_notification ) from backend.spatial_utils import get_bounding_box, find_nearby_issues -from backend.cache import recent_issues_cache, nearby_issues_cache +from backend.cache import recent_issues_cache, nearby_issues_cache, blockchain_last_hash_cache from backend.hf_api_service import verify_resolution_vqa from backend.dependencies import get_http_client from backend.rag_service import rag_service @@ -172,16 +172,24 @@ async def create_issue( # Save to DB only if no nearby issues found or deduplication failed if deduplication_info is None or not deduplication_info.has_nearby_issues: # Blockchain feature: calculate integrity hash for the report - # Optimization: Fetch only the last hash to maintain the chain with minimal overhead - prev_issue = await run_in_threadpool( - lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first() - ) - prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else "" + # Performance Boost: Use cached last hash to avoid database lookup on every submission + prev_hash = blockchain_last_hash_cache.get("latest_hash") + + if prev_hash is None: + # Cache miss: fetch from DB and populate cache + prev_issue = await run_in_threadpool( + lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first() + ) + prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else "" + blockchain_last_hash_cache.set(data=prev_hash, key="latest_hash") -# Simple but effective SHA-256 chaining + # Simple but effective SHA-256 chaining hash_content = f"{description}|{category}|{prev_hash}" integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest() + # Update cache for next submission + blockchain_last_hash_cache.set(data=integrity_hash, key="latest_hash") + # RAG Retrieval (New) relevant_rule = rag_service.retrieve(description) initial_action_plan = None @@ -199,7 +207,8 @@ async def create_issue( longitude=longitude, location=location, action_plan=initial_action_plan, - integrity_hash=integrity_hash + integrity_hash=integrity_hash, + previous_integrity_hash=prev_hash ) # Offload blocking DB operations to threadpool @@ -620,24 +629,32 @@ def get_user_issues( async def verify_blockchain_integrity(issue_id: int, db: Session = Depends(get_db)): """ Verify the cryptographic integrity of a report using the blockchain-style chaining. - Optimized: Uses column projection to fetch only needed data. + Performance Boost: Uses pre-stored previous_integrity_hash to eliminate extra database lookup. """ - # Fetch current issue data + # Fetch current issue data including its chain link current_issue = await run_in_threadpool( lambda: db.query( - Issue.id, Issue.description, Issue.category, Issue.integrity_hash + Issue.id, + Issue.description, + Issue.category, + Issue.integrity_hash, + Issue.previous_integrity_hash ).filter(Issue.id == issue_id).first() ) if not current_issue: raise HTTPException(status_code=404, detail="Issue not found") - # Fetch previous issue's integrity hash to verify the chain - prev_issue_hash = await run_in_threadpool( - lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first() - ) - - prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else "" + # Use the stored previous hash from the record + # Regression Fix: Fallback to DB lookup if previous_integrity_hash is None (for old records) + if current_issue.previous_integrity_hash is not None: + prev_hash = current_issue.previous_integrity_hash + else: + # Fetch previous issue's integrity hash from DB for legacy records + prev_issue_hash = await run_in_threadpool( + lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first() + ) + prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else "" # Recompute hash based on current data and previous hash # Chaining logic: hash(description|category|prev_hash) diff --git a/tests/test_blockchain.py b/tests/test_blockchain.py index 341ecf49..4c921f02 100644 --- a/tests/test_blockchain.py +++ b/tests/test_blockchain.py @@ -42,7 +42,8 @@ def test_blockchain_verification_success(client, db_session): issue2 = Issue( description="Second issue", category="Garbage", - integrity_hash=hash2 + integrity_hash=hash2, + previous_integrity_hash=hash1 ) db_session.add(issue2) db_session.commit()