Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,4 @@ def invalidate(self):
recent_issues_cache = ThreadSafeCache(ttl=300, max_size=20) # 5 minutes TTL, max 20 entries
nearby_issues_cache = ThreadSafeCache(ttl=60, max_size=100) # 1 minute TTL, max 100 entries
user_upload_cache = ThreadSafeCache(ttl=3600, max_size=1000) # 1 hour TTL for upload limits
blockchain_last_hash_cache = ThreadSafeCache(ttl=86400, max_size=1) # 24 hour TTL, only stores the absolute latest
10 changes: 8 additions & 2 deletions backend/init_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ def index_exists(table, index_name):
logger.info("Added action_plan column to issues")

if not column_exists("issues", "integrity_hash"):
conn.execute(text("ALTER TABLE issues ADD COLUMN integrity_hash VARCHAR"))
conn.execute(text("ALTER TABLE issues ADD COLUMN integrity_hash VARCHAR(255)"))
logger.info("Added integrity_hash column to issues")

if not column_exists("issues", "previous_integrity_hash"):
conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR"))
conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR(255)"))
logger.info("Added previous_integrity_hash column to issues")

# Indexes (using IF NOT EXISTS syntax where supported or check first)
Expand All @@ -95,6 +95,12 @@ def index_exists(table, index_name):
if not index_exists("issues", "ix_issues_user_email"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_user_email ON issues (user_email)"))

if not index_exists("issues", "ix_issues_integrity_hash"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_integrity_hash ON issues (integrity_hash)"))

if not index_exists("issues", "ix_issues_previous_integrity_hash"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_previous_integrity_hash ON issues (previous_integrity_hash)"))

# Voice and Language Support Columns (Issue #291)
if not column_exists("issues", "submission_type"):
conn.execute(text("ALTER TABLE issues ADD COLUMN submission_type VARCHAR DEFAULT 'text'"))
Expand Down
3 changes: 2 additions & 1 deletion backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ class Issue(Base):
longitude = Column(Float, nullable=True, index=True)
location = Column(String, nullable=True)
action_plan = Column(JSON, nullable=True)
integrity_hash = Column(String, nullable=True) # Blockchain integrity seal
integrity_hash = Column(String(255), index=True, nullable=True) # Blockchain integrity seal
previous_integrity_hash = Column(String(255), index=True, nullable=True) # Linked list chaining

# Voice and Language Support (Issue #291)
submission_type = Column(String, default="text") # 'text', 'voice'
Expand Down
51 changes: 34 additions & 17 deletions backend/routers/issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
send_status_notification
)
from backend.spatial_utils import get_bounding_box, find_nearby_issues
from backend.cache import recent_issues_cache, nearby_issues_cache
from backend.cache import recent_issues_cache, nearby_issues_cache, blockchain_last_hash_cache
from backend.hf_api_service import verify_resolution_vqa
from backend.dependencies import get_http_client
from backend.rag_service import rag_service
Expand Down Expand Up @@ -172,16 +172,24 @@ async def create_issue(
# Save to DB only if no nearby issues found or deduplication failed
if deduplication_info is None or not deduplication_info.has_nearby_issues:
# Blockchain feature: calculate integrity hash for the report
# Optimization: Fetch only the last hash to maintain the chain with minimal overhead
prev_issue = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
# Performance Boost: Use cached last hash to avoid database lookup on every submission
prev_hash = blockchain_last_hash_cache.get("latest_hash")

if prev_hash is None:
# Cache miss: fetch from DB and populate cache
prev_issue = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
blockchain_last_hash_cache.set(data=prev_hash, key="latest_hash")
Comment on lines +175 to +184
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This cache-based approach is only process-local and not transactionally tied to the DB. If the API is run with multiple workers/processes or scaled horizontally, different instances can compute prev_hash from stale cache state and create forks in the hash chain. Consider using a DB-backed single-row “latest hash” (updated atomically in the same transaction as the Issue insert) or a distributed cache/lock (e.g., Redis) to keep chaining consistent across instances.

Copilot uses AI. Check for mistakes.

# Simple but effective SHA-256 chaining
# Simple but effective SHA-256 chaining
hash_content = f"{description}|{category}|{prev_hash}"
integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest()

# Update cache for next submission
blockchain_last_hash_cache.set(data=integrity_hash, key="latest_hash")
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Cache is updated with the new integrity_hash before the DB save succeeds. If save_issue_db fails (exception path), the error handler never rolls back the cache entry. This leaves a phantom hash in the cache that will poison the chain for the next issue submission.

Move the cache update to after the successful DB save—i.e., after save_issue_db returns without exception.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/issues.py, line 191:

<comment>Cache is updated with the new `integrity_hash` before the DB save succeeds. If `save_issue_db` fails (exception path), the error handler never rolls back the cache entry. This leaves a phantom hash in the cache that will poison the chain for the next issue submission.

Move the cache update to after the successful DB save—i.e., after `save_issue_db` returns without exception.</comment>

<file context>
@@ -172,16 +172,24 @@ async def create_issue(
             integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest()
 
+            # Update cache for next submission
+            blockchain_last_hash_cache.set(data=integrity_hash, key="latest_hash")
+
             # RAG Retrieval (New)
</file context>
Fix with Cubic

Comment on lines +190 to +191
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR introduces new behavior (using and mutating blockchain_last_hash_cache during /api/issues creation), but there are no automated tests covering cache correctness—especially that the cache is not advanced on DB failures and that a cache miss repopulates correctly from the DB. Adding a pytest that exercises issue creation with a forced DB error (and asserts cache state) would prevent regressions.

Copilot uses AI. Check for mistakes.
Comment on lines +175 to +191
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's find the blockchain_last_hash_cache implementation
rg "blockchain_last_hash_cache\s*=" --type py -A 5 -B 2

Repository: RohanExploit/VishwaGuru

Length of output: 418


🏁 Script executed:

# Check the imports and setup
head -50 backend/routers/issues.py

Repository: RohanExploit/VishwaGuru

Length of output: 2131


🏁 Script executed:

# Examine the full context around lines 175-191 and 210-211
sed -n '170,215p' backend/routers/issues.py

Repository: RohanExploit/VishwaGuru

Length of output: 2140


🏁 Script executed:

# Look at the verification function around lines 632-657
sed -n '625,665p' backend/routers/issues.py

Repository: RohanExploit/VishwaGuru

Length of output: 1866


🏁 Script executed:

# Search for any locking mechanisms or thread safety measures
rg "Lock|RLock|Semaphore|asyncio.lock" backend/routers/issues.py

Repository: RohanExploit/VishwaGuru

Length of output: 49


🏁 Script executed:

# Check if there's synchronization elsewhere in the codebase
rg "blockchain_last_hash_cache" --type py -B 3 -A 3

Repository: RohanExploit/VishwaGuru

Length of output: 2738


🏁 Script executed:

# Find ThreadSafeCache class definition
rg "class ThreadSafeCache" --type py -A 30

Repository: RohanExploit/VishwaGuru

Length of output: 1866


🏁 Script executed:

# Also check the cache.py file for any synchronization mechanisms
cat backend/cache.py

Repository: RohanExploit/VishwaGuru

Length of output: 5517


Race condition: Concurrent issue creations may use the same prev_hash from cache.

Two concurrent requests can both read the same cached prev_hash before either updates the cache. Since the sequence of get→compute→set is not atomic, both issues will end up with the same previous_integrity_hash value in the database.

This is mitigated somewhat since the blockchain verification function uses the database-stored previous_integrity_hash, so verification chains remain intact. However, this breaks the intended blockchain property where each issue should have a unique predecessor, and the cache synchronization relies on threading.RLock() in an async context (FastAPI/asyncio), where thread-level locks may not be effective.

If strict blockchain-style chain semantics are required, consider computing and storing the hash atomically in a database transaction before updating the cache, rather than computing first and caching after.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/routers/issues.py` around lines 175 - 191, The cached prev_hash
read/update (blockchain_last_hash_cache.get/set) around prev_hash/integrity_hash
is racy: concurrent requests can compute the same prev_hash before any write,
producing duplicate previous_integrity_hash values; fix by moving the prev_hash
determination and issue insert/update into an atomic DB operation (e.g., run a
transaction that SELECTs the latest Issue.previous_integrity_hash or id FOR
UPDATE and then INSERT the new Issue with the computed integrity_hash) instead
of computing first and then setting the cache, and after the successful
transactional insert update blockchain_last_hash_cache with the new
integrity_hash; reference functions/variables: blockchain_last_hash_cache,
prev_hash, integrity_hash, Issue, previous_integrity_hash, and
run_in_threadpool.


Comment on lines 188 to +192
Copy link

Copilot AI Mar 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blockchain_last_hash_cache is updated with the newly computed integrity_hash before the issue is persisted. If save_issue_db (or the DB commit) fails, the cache will still advance and subsequent submissions can chain to a hash that never existed in the database. Update the cache only after the DB write has succeeded (and consider invalidating/resetting it on exceptions).

Copilot uses AI. Check for mistakes.
# RAG Retrieval (New)
relevant_rule = rag_service.retrieve(description)
initial_action_plan = None
Expand All @@ -199,7 +207,8 @@ async def create_issue(
longitude=longitude,
location=location,
action_plan=initial_action_plan,
integrity_hash=integrity_hash
integrity_hash=integrity_hash,
previous_integrity_hash=prev_hash
)

# Offload blocking DB operations to threadpool
Expand Down Expand Up @@ -620,24 +629,32 @@ def get_user_issues(
async def verify_blockchain_integrity(issue_id: int, db: Session = Depends(get_db)):
"""
Verify the cryptographic integrity of a report using the blockchain-style chaining.
Optimized: Uses column projection to fetch only needed data.
Performance Boost: Uses pre-stored previous_integrity_hash to eliminate extra database lookup.
"""
# Fetch current issue data
# Fetch current issue data including its chain link
current_issue = await run_in_threadpool(
lambda: db.query(
Issue.id, Issue.description, Issue.category, Issue.integrity_hash
Issue.id,
Issue.description,
Issue.category,
Issue.integrity_hash,
Issue.previous_integrity_hash
).filter(Issue.id == issue_id).first()
)

if not current_issue:
raise HTTPException(status_code=404, detail="Issue not found")

# Fetch previous issue's integrity hash to verify the chain
prev_issue_hash = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first()
)

prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""
# Use the stored previous hash from the record
# Regression Fix: Fallback to DB lookup if previous_integrity_hash is None (for old records)
if current_issue.previous_integrity_hash is not None:
prev_hash = current_issue.previous_integrity_hash
else:
# Fetch previous issue's integrity hash from DB for legacy records
prev_issue_hash = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""

# Recompute hash based on current data and previous hash
# Chaining logic: hash(description|category|prev_hash)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_blockchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def test_blockchain_verification_success(client, db_session):
issue2 = Issue(
description="Second issue",
category="Garbage",
integrity_hash=hash2
integrity_hash=hash2,
previous_integrity_hash=hash1
)
db_session.add(issue2)
db_session.commit()
Expand Down