Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,6 @@ def invalidate(self):
recent_issues_cache = ThreadSafeCache(ttl=300, max_size=20) # 5 minutes TTL, max 20 entries
nearby_issues_cache = ThreadSafeCache(ttl=60, max_size=100) # 1 minute TTL, max 100 entries
user_upload_cache = ThreadSafeCache(ttl=3600, max_size=1000) # 1 hour TTL for upload limits

# Blockchain optimization: cache the most recent integrity hash to avoid redundant DB queries during chaining
blockchain_last_hash_cache = ThreadSafeCache(ttl=3600, max_size=1)
5 changes: 4 additions & 1 deletion backend/init_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def index_exists(table, index_name):
logger.info("Added integrity_hash column to issues")

if not column_exists("issues", "previous_integrity_hash"):
conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR"))
conn.execute(text("ALTER TABLE issues ADD COLUMN previous_integrity_hash VARCHAR(255)"))
logger.info("Added previous_integrity_hash column to issues")

# Indexes (using IF NOT EXISTS syntax where supported or check first)
Expand All @@ -95,6 +95,9 @@ def index_exists(table, index_name):
if not index_exists("issues", "ix_issues_user_email"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_user_email ON issues (user_email)"))

if not index_exists("issues", "ix_issues_previous_integrity_hash"):
conn.execute(text("CREATE INDEX IF NOT EXISTS ix_issues_previous_integrity_hash ON issues (previous_integrity_hash)"))

# Voice and Language Support Columns (Issue #291)
if not column_exists("issues", "submission_type"):
conn.execute(text("ALTER TABLE issues ADD COLUMN submission_type VARCHAR DEFAULT 'text'"))
Expand Down
1 change: 1 addition & 0 deletions backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class Issue(Base):
location = Column(String, nullable=True)
action_plan = Column(JSON, nullable=True)
integrity_hash = Column(String, nullable=True) # Blockchain integrity seal
previous_integrity_hash = Column(String, nullable=True, index=True) # Link to previous block for faster verification

# Voice and Language Support (Issue #291)
submission_type = Column(String, default="text") # 'text', 'voice'
Expand Down
45 changes: 29 additions & 16 deletions backend/routers/issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
send_status_notification
)
from backend.spatial_utils import get_bounding_box, find_nearby_issues
from backend.cache import recent_issues_cache, nearby_issues_cache
from backend.cache import recent_issues_cache, nearby_issues_cache, blockchain_last_hash_cache
from backend.hf_api_service import verify_resolution_vqa
from backend.dependencies import get_http_client
from backend.rag_service import rag_service
Expand Down Expand Up @@ -172,13 +172,18 @@ async def create_issue(
# Save to DB only if no nearby issues found or deduplication failed
if deduplication_info is None or not deduplication_info.has_nearby_issues:
# Blockchain feature: calculate integrity hash for the report
# Optimization: Fetch only the last hash to maintain the chain with minimal overhead
prev_issue = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
# Optimization: Use cache for the last hash to maintain the chain with near-zero overhead
prev_hash = blockchain_last_hash_cache.get("last_integrity_hash")
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Using a process-local last-hash cache to build previous_integrity_hash can create incorrect parent links (forked chain) across workers/processes.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/issues.py, line 176:

<comment>Using a process-local last-hash cache to build `previous_integrity_hash` can create incorrect parent links (forked chain) across workers/processes.</comment>

<file context>
@@ -172,13 +172,18 @@ async def create_issue(
-            )
-            prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
+            # Optimization: Use cache for the last hash to maintain the chain with near-zero overhead
+            prev_hash = blockchain_last_hash_cache.get("last_integrity_hash")
+
+            if prev_hash is None:
</file context>
Fix with Cubic


if prev_hash is None:
# Cache miss: Fetch only the last hash from DB
prev_issue = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
Comment on lines +179 to +181
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

create_issue now relies on blockchain_last_hash_cache for prev_hash, but the DB fallback query on cache miss selects the most recent row regardless of whether Issue.integrity_hash is NULL. Since there are other code paths that insert issues without integrity_hash (e.g., voice/telegram), prev_hash can become nondeterministic: cache hit continues the chain from the last hashed issue, while a cache miss after such inserts resets the chain to "". Update the cache-miss query to fetch the latest non-null integrity_hash (and consider aligning any other issue-creation paths with this rule) so cache hits and misses compute the same parent hash.

Suggested change
# Cache miss: Fetch only the last hash from DB
prev_issue = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
# Cache miss: Fetch only the last *non-null* hash from DB
prev_issue = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash)
.filter(Issue.integrity_hash.isnot(None))
.order_by(Issue.id.desc())
.first()

Copilot uses AI. Check for mistakes.
)
prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
blockchain_last_hash_cache.set(data=prev_hash, key="last_integrity_hash")

Comment on lines +176 to 185
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Chain fork risk under concurrent issue creation (TOCTOU on prev_hash).

Line 176 reads cached parent hash and Line 215 updates cache only after persistence. Concurrent requests can compute different new blocks from the same parent, breaking linear chain guarantees.

🔧 Suggested fix (serialize chain head assignment in app process)
+import asyncio
+
+# Serialize blockchain head updates inside this worker process
+_blockchain_chain_lock = asyncio.Lock()

 ...
-            prev_hash = blockchain_last_hash_cache.get("last_integrity_hash")
-
-            if prev_hash is None:
-                prev_issue = await run_in_threadpool(
-                    lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
-                )
-                prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
-                blockchain_last_hash_cache.set(data=prev_hash, key="last_integrity_hash")
-
-            hash_content = f"{description}|{category}|{prev_hash}"
-            integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest()
+            async with _blockchain_chain_lock:
+                prev_hash = blockchain_last_hash_cache.get("last_integrity_hash")
+                if prev_hash is None:
+                    prev_issue = await run_in_threadpool(
+                        lambda: db.query(Issue.integrity_hash).order_by(Issue.id.desc()).first()
+                    )
+                    prev_hash = prev_issue[0] if prev_issue and prev_issue[0] else ""
+                    blockchain_last_hash_cache.set(data=prev_hash, key="last_integrity_hash")
+
+                hash_content = f"{description}|{category}|{prev_hash}"
+                integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest()

             new_issue = Issue(
                 ...
                 integrity_hash=integrity_hash,
                 previous_integrity_hash=prev_hash
             )

-            await run_in_threadpool(save_issue_db, db, new_issue)
-
-            blockchain_last_hash_cache.set(data=integrity_hash, key="last_integrity_hash")
+                await run_in_threadpool(save_issue_db, db, new_issue)
+                blockchain_last_hash_cache.set(data=integrity_hash, key="last_integrity_hash")

For multi-worker deployments, use a DB-backed lock/chain-head row so serialization is global, not per process.

Also applies to: 214-215

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/routers/issues.py` around lines 176 - 185, The current code reads
prev_hash from blockchain_last_hash_cache and then computes/creates a new block
without global serialization, causing TOCTOU forks; replace this
local-cache-only pattern by acquiring a DB-backed chain-head lock/row when
generating a new issue: within a DB transaction select the chain-head row (or a
dedicated single-row table) FOR UPDATE to read the authoritative parent hash
(instead of the cache), compute and persist the new Issue (using
Issue.integrity_hash logic), update the chain-head row to point to the new hash,
commit, and only then update blockchain_last_hash_cache with the committed
value; ensure the code paths referencing prev_hash, blockchain_last_hash_cache,
and the run_in_threadpool/Issue.integrity_hash query use this transactional lock
to serialize head assignment across processes.

# Simple but effective SHA-256 chaining
# Simple but effective SHA-256 chaining
hash_content = f"{description}|{category}|{prev_hash}"
integrity_hash = hashlib.sha256(hash_content.encode()).hexdigest()

Expand All @@ -199,11 +204,15 @@ async def create_issue(
longitude=longitude,
location=location,
action_plan=initial_action_plan,
integrity_hash=integrity_hash
integrity_hash=integrity_hash,
previous_integrity_hash=prev_hash
)

# Offload blocking DB operations to threadpool
await run_in_threadpool(save_issue_db, db, new_issue)

# Update last hash cache
blockchain_last_hash_cache.set(data=integrity_hash, key="last_integrity_hash")
else:
# Don't create new issue, just return deduplication info
new_issue = None
Expand Down Expand Up @@ -620,24 +629,28 @@ def get_user_issues(
async def verify_blockchain_integrity(issue_id: int, db: Session = Depends(get_db)):
"""
Verify the cryptographic integrity of a report using the blockchain-style chaining.
Optimized: Uses column projection to fetch only needed data.
Optimized: Uses stored previous hash to eliminate redundant lookup for the parent record.
"""
# Fetch current issue data
# Optimization: Include previous_integrity_hash to verify the chain in one step
current_issue = await run_in_threadpool(
lambda: db.query(
Issue.id, Issue.description, Issue.category, Issue.integrity_hash
Issue.id, Issue.description, Issue.category, Issue.integrity_hash, Issue.previous_integrity_hash
).filter(Issue.id == issue_id).first()
)

if not current_issue:
raise HTTPException(status_code=404, detail="Issue not found")

# Fetch previous issue's integrity hash to verify the chain
prev_issue_hash = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first()
)

prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""
# Use stored previous hash if available, otherwise fall back to DB lookup (for legacy records)
if current_issue.previous_integrity_hash is not None:
prev_hash = current_issue.previous_integrity_hash
else:
# Fallback for legacy records without previous_integrity_hash column populated
prev_issue_hash = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""
Comment on lines +646 to +653
Copy link
Contributor

@cubic-dev-ai cubic-dev-ai bot Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Integrity verification is weakened by trusting previous_integrity_hash directly instead of validating against the canonical previous issue hash.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At backend/routers/issues.py, line 646:

<comment>Integrity verification is weakened by trusting `previous_integrity_hash` directly instead of validating against the canonical previous issue hash.</comment>

<file context>
@@ -620,24 +629,28 @@ def get_user_issues(
-
-    prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""
+    # Use stored previous hash if available, otherwise fall back to DB lookup (for legacy records)
+    if current_issue.previous_integrity_hash is not None:
+        prev_hash = current_issue.previous_integrity_hash
+    else:
</file context>
Suggested change
if current_issue.previous_integrity_hash is not None:
prev_hash = current_issue.previous_integrity_hash
else:
# Fallback for legacy records without previous_integrity_hash column populated
prev_issue_hash = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""
prev_issue_hash = await run_in_threadpool(
lambda: db.query(Issue.integrity_hash).filter(Issue.id < issue_id).order_by(Issue.id.desc()).first()
)
prev_hash = prev_issue_hash[0] if prev_issue_hash and prev_issue_hash[0] else ""
Fix with Cubic


# Recompute hash based on current data and previous hash
# Chaining logic: hash(description|category|prev_hash)
Expand Down
41 changes: 39 additions & 2 deletions tests/test_blockchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def test_blockchain_verification_success(client, db_session):
issue1 = Issue(
description="First issue",
category="Road",
integrity_hash=hash1
integrity_hash=hash1,
previous_integrity_hash=""
)
db_session.add(issue1)
db_session.commit()
Expand All @@ -42,7 +43,8 @@ def test_blockchain_verification_success(client, db_session):
issue2 = Issue(
description="Second issue",
category="Garbage",
integrity_hash=hash2
integrity_hash=hash2,
previous_integrity_hash=hash1
)
db_session.add(issue2)
db_session.commit()
Expand All @@ -62,6 +64,41 @@ def test_blockchain_verification_success(client, db_session):
assert data["is_valid"] == True
assert data["current_hash"] == hash2

def test_blockchain_previous_hash_stored(client, db_session):
# Create first issue
hash1 = hashlib.sha256(b"First").hexdigest()
issue1 = Issue(description="First", category="Road", integrity_hash=hash1)
db_session.add(issue1)
db_session.commit()

# Create second issue via API (to test logic in create_issue)
# Mocking background tasks to avoid errors
from unittest.mock import patch
with patch("backend.routers.issues.process_action_plan_background"), \
patch("backend.routers.issues.create_grievance_from_issue_background"):
response = client.post(
"/api/issues",
data={
"description": "Second issue description",
"category": "Garbage",
"latitude": 10.0,
"longitude": 20.0
}
)

assert response.status_code == 201
issue_id = response.json()["id"]

# Verify issue in DB has previous_integrity_hash
issue2 = db_session.query(Issue).filter(Issue.id == issue_id).first()
assert issue2.previous_integrity_hash == hash1

Comment on lines +67 to +95
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Reset blockchain cache in test setup to avoid order-dependent failures.

This test assumes cache state is empty, but blockchain_last_hash_cache is module-global and can leak state from other tests, making previous_integrity_hash assertions flaky.

✅ Suggested test hardening
 from backend.models import Issue
+from backend.cache import blockchain_last_hash_cache
 ...
 def test_blockchain_previous_hash_stored(client, db_session):
+    blockchain_last_hash_cache.invalidate("last_integrity_hash")
+
     # Create first issue
     hash1 = hashlib.sha256(b"First").hexdigest()
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/test_blockchain.py` around lines 67 - 95, The test
test_blockchain_previous_hash_stored is flaky because the module-global
blockchain_last_hash_cache can retain state across tests; before creating issues
(or in test setup) clear or reset blockchain_last_hash_cache from the blockchain
module so the cache is empty (e.g., import the symbol blockchain_last_hash_cache
and reinitialize/clear it) to ensure previous_integrity_hash is computed
deterministically for the test.

Comment on lines +67 to +95
Copy link

Copilot AI Mar 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test can become order-dependent because blockchain_last_hash_cache is a module-level singleton and is not cleared between tests. If any other test calls /api/issues before this one, the cache may contain a stale hash and create_issue will skip the DB lookup, causing issue2.previous_integrity_hash to differ from hash1. Clear blockchain_last_hash_cache (e.g., in the db_session/client fixture setup or at the start of this test) to make the assertion deterministic.

Copilot uses AI. Check for mistakes.
# Verify blockchain-verify endpoint uses the stored hash
# (Implicitly tested if it returns is_valid=True and we know we didn't mock the internal DB query in the router)
response = client.get(f"/api/issues/{issue_id}/blockchain-verify")
assert response.status_code == 200
assert response.json()["is_valid"] == True
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Use direct truth assertion instead of == True.

Prefer a direct truthy check on Line 100 for clarity and lint compliance.

✏️ Suggested fix
-    assert response.json()["is_valid"] == True
+    assert response.json()["is_valid"]
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
assert response.json()["is_valid"] == True
assert response.json()["is_valid"]
🧰 Tools
🪛 Ruff (0.15.2)

[error] 100-100: Avoid equality comparisons to True; use response.json()["is_valid"]: for truth checks

Replace with response.json()["is_valid"]

(E712)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/test_blockchain.py` at line 100, Replace the equality comparison
asserting a boolean on the response with a direct truth assertion: change the
assertion that checks response.json()["is_valid"] == True to assert
response.json()["is_valid"] so the test uses a direct truthy check on the
response value (refer to the assertion that inspects response.json()["is_valid"]
in tests/test_blockchain.py).


def test_blockchain_verification_failure(client, db_session):
# Create issue with tampered hash
issue = Issue(
Expand Down