From f50435e34d585a1d8eb41b44240ca85b37cc2b81 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 11:19:05 +1300 Subject: [PATCH 01/73] add block_proof column and DB queries for storing block proofs --- .../db/migrations/2025062000000_setup/up.sql | 1 + crates/store/src/db/mod.rs | 38 ++++++++- .../src/db/models/queries/block_headers.rs | 85 ++++++++++++++++++- crates/store/src/db/schema.rs | 1 + 4 files changed, 123 insertions(+), 2 deletions(-) diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index 1f0e151ab1..b607ec74d8 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -3,6 +3,7 @@ CREATE TABLE block_headers ( block_header BLOB NOT NULL, signature BLOB NOT NULL, commitment BLOB NOT NULL, + block_proof BLOB, -- NULL means the block has not yet been proven PRIMARY KEY (block_num), CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 74aa8ce3bd..63faf8d1f5 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -10,7 +10,7 @@ use miden_node_utils::tracing::OpenTelemetrySpanExt; use miden_protocol::Word; use miden_protocol::account::{AccountHeader, AccountId, AccountStorageHeader}; use miden_protocol::asset::{Asset, AssetVaultKey}; -use miden_protocol::block::{BlockHeader, BlockNoteIndex, BlockNumber, SignedBlock}; +use miden_protocol::block::{BlockHeader, BlockNoteIndex, BlockNumber, BlockProof, SignedBlock}; use miden_protocol::crypto::merkle::SparseMerklePath; use miden_protocol::note::{ NoteDetails, @@ -564,6 +564,42 @@ impl Db { .await } + /// Stores a [`BlockProof`] for a previously committed block. + /// + /// Updates the `block_proof` column for the given block number. + #[instrument(target = COMPONENT, skip_all, err)] + pub async fn insert_block_proof( + &self, + block_num: BlockNumber, + block_proof: &BlockProof, + ) -> Result<()> { + let block_proof = block_proof.clone(); + self.transact("insert block proof", move |conn| { + models::queries::insert_block_proof(conn, block_num, &block_proof) + }) + .await?; + Ok(()) + } + + /// Returns block numbers for all blocks that have not yet been proven, ordered ascending. + #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] + pub async fn select_unproven_blocks(&self) -> Result> { + self.transact("select unproven blocks", |conn| { + models::queries::select_unproven_blocks(conn) + }) + .await + } + + /// Returns the [`BlockProof`] for a given block number, if the block exists and has been + /// proven. + #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] + pub async fn select_block_proof(&self, block_num: BlockNumber) -> Result> { + self.transact("select block proof", move |conn| { + models::queries::select_block_proof(conn, block_num) + }) + .await + } + /// Selects storage map values for syncing storage maps for a specific account ID. /// /// The returned values are the latest known values up to `block_range.end()`, and no values diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index bfcd34ee7a..2527072bd7 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -14,7 +14,7 @@ use diesel::{ use miden_crypto::Word; use miden_crypto::dsa::ecdsa_k256_keccak::Signature; use miden_node_utils::limiter::{QueryParamBlockLimit, QueryParamLimiter}; -use miden_protocol::block::{BlockHeader, BlockNumber}; +use miden_protocol::block::{BlockHeader, BlockNumber, BlockProof}; use miden_protocol::utils::{Deserializable, Serializable}; use super::DatabaseError; @@ -173,6 +173,8 @@ pub struct BlockHeaderRawRow { pub block_header: Vec, pub signature: Vec, pub commitment: Vec, + #[expect(dead_code)] + pub block_proof: Option>, } impl TryInto for BlockHeaderRawRow { type Error = DatabaseError; @@ -243,3 +245,84 @@ pub(crate) fn insert_block_header( .execute(conn)?; Ok(count) } + +/// Store a [`BlockProof`] for a committed block. +/// +/// Updates the `block_proof` column for the row with the given `block_num`. +/// +/// # Returns +/// +/// The number of affected rows (expected: 1). +#[tracing::instrument( + target = COMPONENT, + skip_all, + fields(block_num = %block_num), + err, +)] +pub(crate) fn insert_block_proof( + conn: &mut SqliteConnection, + block_num: BlockNumber, + block_proof: &BlockProof, +) -> Result { + let count = diesel::update( + schema::block_headers::table + .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())), + ) + .set(schema::block_headers::block_proof.eq(block_proof.to_bytes())) + .execute(conn)?; + Ok(count) +} + +/// Select all block numbers that have not yet been proven, ordered ascending. +/// +/// # Raw SQL +/// +/// ```sql +/// SELECT block_num +/// FROM block_headers +/// WHERE block_proof IS NULL +/// ORDER BY block_num ASC +/// ``` +pub(crate) fn select_unproven_blocks( + conn: &mut SqliteConnection, +) -> Result, DatabaseError> { + let block_nums: Vec = + SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) + .filter(schema::block_headers::block_proof.is_null()) + .order(schema::block_headers::block_num.asc()) + .load(conn)?; + block_nums + .into_iter() + .map(BlockNumber::from_raw_sql) + .collect::, _>>() + .map_err(Into::into) +} + +/// Select the [`BlockProof`] for a given block number, if it exists. +/// +/// # Returns +/// +/// `None` if the block does not exist or has not been proven yet. +/// +/// # Raw SQL +/// +/// ```sql +/// SELECT block_proof +/// FROM block_headers +/// WHERE block_num = ?1 +/// ``` +pub(crate) fn select_block_proof( + conn: &mut SqliteConnection, + block_num: BlockNumber, +) -> Result, DatabaseError> { + let proof_bytes: Option>> = + SelectDsl::select(schema::block_headers::table, schema::block_headers::block_proof) + .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())) + .get_result(conn) + .optional()?; + // Flatten: None (row not found) or Some(None) (proof is NULL) => None. + match proof_bytes.flatten() { + Some(bytes) => Ok(Some(BlockProof::read_from_bytes(&bytes[..])?)), + None => Ok(None), + } +} diff --git a/crates/store/src/db/schema.rs b/crates/store/src/db/schema.rs index f93afc16e8..8244385410 100644 --- a/crates/store/src/db/schema.rs +++ b/crates/store/src/db/schema.rs @@ -49,6 +49,7 @@ diesel::table! { block_header -> Binary, signature -> Binary, commitment -> Binary, + block_proof -> Nullable, } } From 9101df83379450649afe47fe3b97c02728201f9a Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 11:43:44 +1300 Subject: [PATCH 02/73] decouple proving from apply_block and persist proving inputs --- .../db/migrations/2025062000000_setup/up.sql | 3 +- crates/store/src/db/mod.rs | 15 ++++++ .../src/db/models/queries/block_headers.rs | 51 ++++++++++++++----- crates/store/src/db/models/queries/mod.rs | 3 +- crates/store/src/db/schema.rs | 1 + crates/store/src/db/tests.rs | 12 +++-- crates/store/src/server/block_producer.rs | 47 ++++++++--------- crates/store/src/state/apply_block.rs | 13 +++-- 8 files changed, 96 insertions(+), 49 deletions(-) diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index b607ec74d8..e557ac7c80 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -3,7 +3,8 @@ CREATE TABLE block_headers ( block_header BLOB NOT NULL, signature BLOB NOT NULL, commitment BLOB NOT NULL, - block_proof BLOB, -- NULL means the block has not yet been proven + block_proof BLOB, -- NULL means the block has not yet been proven + proving_inputs BLOB, -- Serialized BlockProofRequest needed for deferred proving PRIMARY KEY (block_num), CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 63faf8d1f5..71d6143b5b 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -258,6 +258,7 @@ impl Db { &[], genesis.body().updated_accounts(), genesis.body().transactions(), + None, // Genesis block has no proving inputs. ) }) .context("failed to insert genesis block")?; @@ -537,6 +538,7 @@ impl Db { acquire_done: oneshot::Receiver<()>, signed_block: SignedBlock, notes: Vec<(NoteRecord, Option)>, + proving_inputs: Option>, ) -> Result<()> { self.transact("apply block", move |conn| -> Result<()> { models::queries::apply_block( @@ -547,6 +549,7 @@ impl Db { signed_block.body().created_nullifiers(), signed_block.body().updated_accounts(), signed_block.body().transactions(), + proving_inputs, )?; // XXX FIXME TODO free floating mutex MUST NOT exist @@ -590,6 +593,18 @@ impl Db { .await } + /// Returns the serialized proving inputs for a given block number, if stored. + #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] + pub async fn select_block_proving_inputs( + &self, + block_num: BlockNumber, + ) -> Result>> { + self.transact("select block proving inputs", move |conn| { + models::queries::select_block_proving_inputs(conn, block_num) + }) + .await + } + /// Returns the [`BlockProof`] for a given block number, if the block exists and has been /// proven. #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 2527072bd7..d7a0e0c666 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -175,6 +175,8 @@ pub struct BlockHeaderRawRow { pub commitment: Vec, #[expect(dead_code)] pub block_proof: Option>, + #[expect(dead_code)] + pub proving_inputs: Option>, } impl TryInto for BlockHeaderRawRow { type Error = DatabaseError; @@ -207,16 +209,7 @@ pub struct BlockHeaderInsert { pub block_header: Vec, pub signature: Vec, pub commitment: Vec, -} -impl From<(&BlockHeader, &Signature)> for BlockHeaderInsert { - fn from((header, signature): (&BlockHeader, &Signature)) -> Self { - Self { - block_num: header.block_num().to_raw_sql(), - block_header: header.to_bytes(), - signature: signature.to_bytes(), - commitment: BlockHeaderCommitment::new(header).to_raw_sql(), - } - } + pub proving_inputs: Option>, } /// Insert a [`BlockHeader`] to the DB using the given [`SqliteConnection`]. @@ -238,14 +231,44 @@ pub(crate) fn insert_block_header( conn: &mut SqliteConnection, block_header: &BlockHeader, signature: &Signature, + proving_inputs: Option>, ) -> Result { - let block_header = BlockHeaderInsert::from((block_header, signature)); - let count = diesel::insert_into(schema::block_headers::table) - .values(&[block_header]) - .execute(conn)?; + let row = BlockHeaderInsert { + block_num: block_header.block_num().to_raw_sql(), + block_header: block_header.to_bytes(), + signature: signature.to_bytes(), + commitment: BlockHeaderCommitment::new(block_header).to_raw_sql(), + proving_inputs, + }; + let count = diesel::insert_into(schema::block_headers::table).values(&[row]).execute(conn)?; Ok(count) } +/// Select the serialized proving inputs for a given block number. +/// +/// # Returns +/// +/// `None` if the block does not exist or has no proving inputs stored. +/// +/// # Raw SQL +/// +/// ```sql +/// SELECT proving_inputs +/// FROM block_headers +/// WHERE block_num = ?1 +/// ``` +pub(crate) fn select_block_proving_inputs( + conn: &mut SqliteConnection, + block_num: BlockNumber, +) -> Result>, DatabaseError> { + let inputs: Option>> = + SelectDsl::select(schema::block_headers::table, schema::block_headers::proving_inputs) + .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())) + .get_result(conn) + .optional()?; + Ok(inputs.flatten()) +} + /// Store a [`BlockProof`] for a committed block. /// /// Updates the `block_proof` column for the row with the given `block_num`. diff --git a/crates/store/src/db/models/queries/mod.rs b/crates/store/src/db/models/queries/mod.rs index 35c38c5ad2..913f56a88d 100644 --- a/crates/store/src/db/models/queries/mod.rs +++ b/crates/store/src/db/models/queries/mod.rs @@ -59,10 +59,11 @@ pub(crate) fn apply_block( nullifiers: &[Nullifier], accounts: &[BlockAccountUpdate], transactions: &OrderedTransactionHeaders, + proving_inputs: Option>, ) -> Result { let mut count = 0; // Note: ordering here is important as the relevant tables have FK dependencies. - count += insert_block_header(conn, block_header, signature)?; + count += insert_block_header(conn, block_header, signature, proving_inputs)?; count += upsert_accounts(conn, accounts, block_header.block_num())?; count += insert_scripts(conn, notes.iter().map(|(note, _)| note))?; count += insert_notes(conn, notes)?; diff --git a/crates/store/src/db/schema.rs b/crates/store/src/db/schema.rs index 8244385410..7890516f33 100644 --- a/crates/store/src/db/schema.rs +++ b/crates/store/src/db/schema.rs @@ -50,6 +50,7 @@ diesel::table! { signature -> Binary, commitment -> Binary, block_proof -> Nullable, + proving_inputs -> Nullable, } } diff --git a/crates/store/src/db/tests.rs b/crates/store/src/db/tests.rs index 7bc633f95a..fda6f4d584 100644 --- a/crates/store/src/db/tests.rs +++ b/crates/store/src/db/tests.rs @@ -101,8 +101,10 @@ fn create_block(conn: &mut SqliteConnection, block_num: BlockNumber) { ); let dummy_signature = SecretKey::new().sign(block_header.commitment()); - conn.transaction(|conn| queries::insert_block_header(conn, &block_header, &dummy_signature)) - .unwrap(); + conn.transaction(|conn| { + queries::insert_block_header(conn, &block_header, &dummy_signature, None) + }) + .unwrap(); } #[test] @@ -738,7 +740,7 @@ fn db_block_header() { // test insertion let dummy_signature = SecretKey::new().sign(block_header.commitment()); - queries::insert_block_header(conn, &block_header, &dummy_signature).unwrap(); + queries::insert_block_header(conn, &block_header, &dummy_signature, None).unwrap(); // test fetch unknown block header let block_number = 1; @@ -770,7 +772,7 @@ fn db_block_header() { ); let dummy_signature = SecretKey::new().sign(block_header2.commitment()); - queries::insert_block_header(conn, &block_header2, &dummy_signature).unwrap(); + queries::insert_block_header(conn, &block_header2, &dummy_signature, None).unwrap(); let res = queries::select_block_header_by_block_num(conn, None).unwrap(); assert_eq!(res.unwrap(), block_header2); @@ -1804,7 +1806,7 @@ fn db_roundtrip_block_header() { // Insert let dummy_signature = SecretKey::new().sign(block_header.commitment()); - queries::insert_block_header(&mut conn, &block_header, &dummy_signature).unwrap(); + queries::insert_block_header(&mut conn, &block_header, &dummy_signature, None).unwrap(); // Retrieve let retrieved = diff --git a/crates/store/src/server/block_producer.rs b/crates/store/src/server/block_producer.rs index 25f6b05f60..d3872ae7a3 100644 --- a/crates/store/src/server/block_producer.rs +++ b/crates/store/src/server/block_producer.rs @@ -1,7 +1,7 @@ use std::convert::Infallible; -use futures::TryFutureExt; use miden_crypto::dsa::ecdsa_k256_keccak::Signature; +use miden_node_proto::domain::proof_request::BlockProofRequest; use miden_node_proto::errors::MissingFieldHelper; use miden_node_proto::generated::store::block_producer_server; use miden_node_proto::generated::{self as proto}; @@ -11,7 +11,7 @@ use miden_node_utils::tracing::OpenTelemetrySpanExt; use miden_protocol::Word; use miden_protocol::batch::OrderedBatches; use miden_protocol::block::{BlockBody, BlockHeader, BlockNumber, SignedBlock}; -use miden_protocol::utils::Deserializable; +use miden_protocol::utils::{Deserializable, Serializable}; use tonic::{Request, Response, Status}; use tracing::Instrument; @@ -89,41 +89,38 @@ impl block_producer_server::BlockProducer for StoreApi { span.set_attribute("block.output_notes.count", body.output_notes().count()); span.set_attribute("block.nullifiers.count", body.created_nullifiers().len()); - // We perform the apply/prove block work in a separate task. This prevents the caller + // Serialize proving inputs so they can be persisted alongside the block for + // deferred proving. + let proving_inputs = BlockProofRequest { + tx_batches: ordered_batches, + block_header: header.clone(), + block_inputs, + } + .to_bytes(); + + // We perform the apply block work in a separate task. This prevents the caller // cancelling the request and thereby cancelling the task at an arbitrary point of // execution. // // Normally this shouldn't be a problem, however our apply_block isn't quite ACID compliant // so things get a bit messy. This is more a temporary hack-around to minimize this risk. let this = self.clone(); - // TODO(sergerad): Use block proof. - let _block_proof = tokio::spawn( + tokio::spawn( async move { // SAFETY: The header, body, and signature are assumed to // correspond to each other because they are provided by the Block // Producer. - let signed_block = SignedBlock::new_unchecked(header.clone(), body, signature); // TODO(sergerad): Use `SignedBlock::new()` when available. + let signed_block = SignedBlock::new_unchecked(header, body, signature); // TODO(sergerad): Use `SignedBlock::new()` when available. // Note: This is an internal endpoint, so its safe to expose the full error // report. - this.state - .apply_block(signed_block) - .inspect_err(|err| { - span.set_error(err); - }) - .map_err(|err| { - let code = match err { - ApplyBlockError::InvalidBlockError(_) => tonic::Code::InvalidArgument, - _ => tonic::Code::Internal, - }; - Status::new(code, err.as_report()) - }) - .and_then(|_| { - this.block_prover - .prove(ordered_batches, block_inputs, &header) - .map_err(|err| Status::new(tonic::Code::Internal, err.as_report())) - }) - .await - .map(Response::new) + this.state.apply_block(signed_block, Some(proving_inputs)).await.map_err(|err| { + span.set_error(&err); + let code = match err { + ApplyBlockError::InvalidBlockError(_) => tonic::Code::InvalidArgument, + _ => tonic::Code::Internal, + }; + Status::new(code, err.as_report()) + }) } .in_current_span(), ) diff --git a/crates/store/src/state/apply_block.rs b/crates/store/src/state/apply_block.rs index 145432c97d..40d106b74d 100644 --- a/crates/store/src/state/apply_block.rs +++ b/crates/store/src/state/apply_block.rs @@ -41,7 +41,11 @@ impl State { // TODO: This span is logged in a root span, we should connect it to the parent span. #[expect(clippy::too_many_lines)] #[instrument(target = COMPONENT, skip_all, err)] - pub async fn apply_block(&self, signed_block: SignedBlock) -> Result<(), ApplyBlockError> { + pub async fn apply_block( + &self, + signed_block: SignedBlock, + proving_inputs: Option>, + ) -> Result<(), ApplyBlockError> { let _lock = self.writer.try_lock().map_err(|_| ApplyBlockError::ConcurrentWrite)?; let header = signed_block.header(); @@ -227,8 +231,11 @@ impl State { // spawned. let db = Arc::clone(&self.db); let db_update_task = tokio::spawn( - async move { db.apply_block(allow_acquire, acquire_done, signed_block, notes).await } - .in_current_span(), + async move { + db.apply_block(allow_acquire, acquire_done, signed_block, notes, proving_inputs) + .await + } + .in_current_span(), ); // Wait for the message from the DB update task, that we ready to commit the DB transaction. From 0ef989a251c7c71f3071d6f119bf4f0c5c8c0b6f Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 12:39:50 +1300 Subject: [PATCH 03/73] add concurrent proof scheduler with FuturesOrdered for FIFO completion --- crates/store/src/server/api.rs | 5 +- crates/store/src/server/block_producer.rs | 22 +- crates/store/src/server/mod.rs | 12 +- crates/store/src/server/proof_scheduler.rs | 306 +++++++++++++++++++++ crates/store/src/state/mod.rs | 2 +- 5 files changed, 333 insertions(+), 14 deletions(-) create mode 100644 crates/store/src/server/proof_scheduler.rs diff --git a/crates/store/src/server/api.rs b/crates/store/src/server/api.rs index 56bfcafb49..cbe5d7acc1 100644 --- a/crates/store/src/server/api.rs +++ b/crates/store/src/server/api.rs @@ -12,9 +12,10 @@ use miden_protocol::note::Nullifier; use tonic::{Request, Response, Status}; use tracing::{info, instrument}; +use crate::COMPONENT; use crate::errors::GetBlockInputsError; +use crate::server::proof_scheduler::ProofSchedulerHandle; use crate::state::State; -use crate::{BlockProver, COMPONENT}; // STORE API // ================================================================================================ @@ -22,7 +23,7 @@ use crate::{BlockProver, COMPONENT}; #[derive(Clone)] pub struct StoreApi { pub(super) state: Arc, - pub(super) block_prover: Arc, + pub(super) proof_scheduler: ProofSchedulerHandle, } impl StoreApi { diff --git a/crates/store/src/server/block_producer.rs b/crates/store/src/server/block_producer.rs index d3872ae7a3..4cdc2c7a21 100644 --- a/crates/store/src/server/block_producer.rs +++ b/crates/store/src/server/block_producer.rs @@ -113,14 +113,20 @@ impl block_producer_server::BlockProducer for StoreApi { let signed_block = SignedBlock::new_unchecked(header, body, signature); // TODO(sergerad): Use `SignedBlock::new()` when available. // Note: This is an internal endpoint, so its safe to expose the full error // report. - this.state.apply_block(signed_block, Some(proving_inputs)).await.map_err(|err| { - span.set_error(&err); - let code = match err { - ApplyBlockError::InvalidBlockError(_) => tonic::Code::InvalidArgument, - _ => tonic::Code::Internal, - }; - Status::new(code, err.as_report()) - }) + this.state + .apply_block(signed_block, Some(proving_inputs)) + .await + .inspect(|_| { + this.proof_scheduler.notify_block_committed(); + }) + .map_err(|err| { + span.set_error(&err); + let code = match err { + ApplyBlockError::InvalidBlockError(_) => tonic::Code::InvalidArgument, + _ => tonic::Code::Internal, + }; + Status::new(code, err.as_report()) + }) } .in_current_span(), ) diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 8c828f1166..636d084f50 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -30,6 +30,7 @@ mod api; mod block_producer; pub mod block_prover_client; mod ntx_builder; +pub mod proof_scheduler; mod rpc_api; /// The store server. @@ -112,18 +113,23 @@ impl Store { Arc::new(BlockProver::local()) }; + // Spawn the proof scheduler as a background task. It will immediately pick up any + // unproven blocks from previous runs and begin proving them. + let proof_scheduler_handle = + proof_scheduler::spawn(Arc::clone(&state.db), Arc::clone(&block_prover)); + let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), - block_prover: Arc::clone(&block_prover), + proof_scheduler: proof_scheduler_handle.clone(), }); let ntx_builder_service = store::ntx_builder_server::NtxBuilderServer::new(api::StoreApi { state: Arc::clone(&state), - block_prover: Arc::clone(&block_prover), + proof_scheduler: proof_scheduler_handle.clone(), }); let block_producer_service = store::block_producer_server::BlockProducerServer::new(api::StoreApi { state: Arc::clone(&state), - block_prover: Arc::clone(&block_prover), + proof_scheduler: proof_scheduler_handle, }); let reflection_service = tonic_reflection::server::Builder::configure() .register_file_descriptor_set(store_rpc_api_descriptor()) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs new file mode 100644 index 0000000000..5afac249c0 --- /dev/null +++ b/crates/store/src/server/proof_scheduler.rs @@ -0,0 +1,306 @@ +//! Background task that drives deferred block proving. +//! +//! The [`ProofScheduler`] is spawned as an internal Store task. It: +//! +//! 1. On startup, queries the DB for all unproven blocks (handles restart recovery). +//! 2. Listens on a [`tokio::sync::Notify`] for newly committed blocks. +//! 3. Proves blocks concurrently, but resolves completions in FIFO order via [`FuturesOrdered`]. +//! This ensures the ancestor rule: a block's proof is only persisted after all ancestor proofs +//! have been persisted. +//! 4. Each proving future includes retry logic with exponential backoff and an overall timeout. +//! 5. On fatal errors (e.g. deserialization failures, timeout exhaustion), the future resolves with +//! an error. The scheduler logs it and continues — the block will be retried on the next +//! iteration. + +use std::collections::HashSet; +use std::sync::Arc; +use std::time::Duration; + +use futures::StreamExt; +use futures::stream::FuturesOrdered; +use miden_node_proto::domain::proof_request::BlockProofRequest; +use miden_protocol::block::{BlockNumber, BlockProof}; +use miden_protocol::utils::{Deserializable, Serializable}; +use tokio::sync::Notify; +use tracing::{error, info, instrument, warn}; + +use crate::COMPONENT; +use crate::db::Db; +use crate::server::block_prover_client::BlockProver; + +// CONSTANTS +// ================================================================================================ + +/// Initial retry delay on proving failure. +const INITIAL_RETRY_DELAY: Duration = Duration::from_secs(1); + +/// Maximum retry delay (caps the exponential backoff). +const MAX_RETRY_DELAY: Duration = Duration::from_secs(60); + +/// Overall timeout for proving a single block (including all retries). +const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_secs(120); + +// PROOF SCHEDULER +// ================================================================================================ + +/// Handle returned when spawning the proof scheduler, used to notify it of new blocks. +#[derive(Clone)] +pub struct ProofSchedulerHandle { + notify: Arc, +} + +impl ProofSchedulerHandle { + /// Notify the scheduler that a new block has been committed and may need proving. + pub fn notify_block_committed(&self) { + self.notify.notify_one(); + } +} + +/// Spawns the proof scheduler as a background tokio task. +/// +/// Returns a [`ProofSchedulerHandle`] that should be used to notify the scheduler when new +/// blocks are committed. +pub fn spawn(db: Arc, block_prover: Arc) -> ProofSchedulerHandle { + let notify = Arc::new(Notify::new()); + let handle = ProofSchedulerHandle { notify: Arc::clone(¬ify) }; + + tokio::spawn(run(db, block_prover, notify)); + + handle +} + +/// Main loop of the proof scheduler. +/// +/// Uses [`FuturesOrdered`] to run proving concurrently while resolving completions in block +/// order. This provides natural backpressure and ensures proofs are persisted sequentially. +#[instrument(target = COMPONENT, name = "proof_scheduler", skip_all)] +async fn run(db: Arc, block_prover: Arc, notify: Arc) { + info!(target: COMPONENT, "Proof scheduler started"); + + loop { + // Query all unproven blocks. This handles both startup recovery and new blocks. + let unproven_blocks = match db.select_unproven_blocks().await { + Ok(blocks) => blocks, + Err(err) => { + error!(target: COMPONENT, %err, "Failed to query unproven blocks, retrying"); + tokio::time::sleep(INITIAL_RETRY_DELAY).await; + continue; + }, + }; + + if unproven_blocks.is_empty() { + // No work to do — wait for a notification that a new block was committed. + notify.notified().await; + continue; + } + + // Submit all unproven blocks into a FuturesOrdered. Each future runs the full + // prove-with-retries pipeline concurrently, but completions are polled in submission + // (i.e. block) order. + let mut proving_futures = FuturesOrdered::new(); + for block_num in &unproven_blocks { + let db = Arc::clone(&db); + let block_prover = Arc::clone(&block_prover); + let block_num = *block_num; + proving_futures.push_back(async move { + let result = tokio::time::timeout( + BLOCK_PROVE_TIMEOUT, + prove_block(&db, &block_prover, block_num), + ) + .await; + + match result { + Ok(proof) => (block_num, proof), + Err(elapsed) => { + error!( + target: COMPONENT, + %block_num, + "Block proving timed out after {:?}", + elapsed, + ); + (block_num, Err(ProveBlockError::Timeout)) + }, + } + }); + } + + // Drain results in order. Track which blocks we've already dispatched to avoid + // re-queuing them before this batch completes. + let mut inflight: HashSet = unproven_blocks.iter().copied().collect(); + + while let Some((block_num, result)) = proving_futures.next().await { + inflight.remove(&block_num); + + match result { + Ok(proof) => { + // Persist the proof. On failure, log and move on — it will be retried. + match db.insert_block_proof(block_num, &proof).await { + Ok(()) => { + info!( + target: COMPONENT, + %block_num, + proof_size = proof.to_bytes().len(), + "Block proof persisted" + ); + }, + Err(err) => { + error!( + target: COMPONENT, + %block_num, + %err, + "Failed to persist block proof" + ); + }, + } + }, + Err(err) => { + error!( + target: COMPONENT, + %block_num, + ?err, + "Block proving failed, will retry next iteration" + ); + }, + } + } + } +} + +// PROVE BLOCK +// ================================================================================================ + +/// Errors that can occur during block proving. +#[derive(Debug)] +enum ProveBlockError { + /// The proving inputs were not found in the database. + MissingProvingInputs, + /// The proving inputs could not be deserialized. + DeserializationFailed, + /// The overall proving timeout was exceeded. + Timeout, +} + +/// Proves a single block, retrying with exponential backoff on transient failures. +/// +/// Returns the proof on success, or a fatal error if proving cannot succeed (missing or +/// corrupt proving inputs). +/// +/// This function is designed to be run as a future inside [`FuturesOrdered`]. Transient +/// errors (DB reads, prover failures) are retried internally. Only fatal errors are returned. +async fn prove_block( + db: &Db, + block_prover: &BlockProver, + block_num: BlockNumber, +) -> Result { + // Load and deserialize proving inputs (with retries for transient DB errors). + let request = load_proving_inputs(db, block_num).await?; + + // Prove the block (with retries for transient prover errors). + prove_with_retries(block_prover, block_num, request).await +} + +/// Loads and deserializes proving inputs from the DB, retrying on transient DB errors. +async fn load_proving_inputs( + db: &Db, + block_num: BlockNumber, +) -> Result { + let mut retry_delay = INITIAL_RETRY_DELAY; + + loop { + match db.select_block_proving_inputs(block_num).await { + Ok(Some(bytes)) => { + return BlockProofRequest::read_from_bytes(&bytes[..]).map_err(|err| { + error!( + target: COMPONENT, + %block_num, + %err, + "Failed to deserialize proving inputs" + ); + ProveBlockError::DeserializationFailed + }); + }, + Ok(None) => { + error!( + target: COMPONENT, + %block_num, + "No proving inputs found for unproven block" + ); + return Err(ProveBlockError::MissingProvingInputs); + }, + Err(err) => { + warn!( + target: COMPONENT, + %block_num, + %err, + ?retry_delay, + "Failed to load proving inputs, retrying" + ); + tokio::time::sleep(retry_delay).await; + retry_delay = (retry_delay * 2).min(MAX_RETRY_DELAY); + }, + } + } +} + +/// Calls the block prover, retrying with exponential backoff on failure. +async fn prove_with_retries( + block_prover: &BlockProver, + block_num: BlockNumber, + request: BlockProofRequest, +) -> Result { + let mut retry_delay = INITIAL_RETRY_DELAY; + + // The proving inputs must be re-usable across retries. Since `BlockProver::prove` takes + // ownership, we serialize once and re-deserialize on each retry attempt. + let request_bytes = request.to_bytes(); + + // First attempt uses the already-deserialized request. + match block_prover + .prove(request.tx_batches, request.block_inputs, &request.block_header) + .await + { + Ok(proof) => return Ok(proof), + Err(err) => { + warn!( + target: COMPONENT, + %block_num, + %err, + ?retry_delay, + "Block proving failed, retrying" + ); + tokio::time::sleep(retry_delay).await; + retry_delay = (retry_delay * 2).min(MAX_RETRY_DELAY); + }, + } + + // Subsequent retries re-deserialize from bytes. + loop { + let request = BlockProofRequest::read_from_bytes(&request_bytes[..]).map_err(|err| { + error!( + target: COMPONENT, + %block_num, + %err, + "Failed to re-deserialize proving inputs during retry" + ); + ProveBlockError::DeserializationFailed + })?; + + match block_prover + .prove(request.tx_batches, request.block_inputs, &request.block_header) + .await + { + Ok(proof) => return Ok(proof), + Err(err) => { + warn!( + target: COMPONENT, + %block_num, + %err, + ?retry_delay, + "Block proving failed, retrying" + ); + tokio::time::sleep(retry_delay).await; + retry_delay = (retry_delay * 2).min(MAX_RETRY_DELAY); + }, + } + } +} diff --git a/crates/store/src/state/mod.rs b/crates/store/src/state/mod.rs index 40f6f29e60..e4f16afdd4 100644 --- a/crates/store/src/state/mod.rs +++ b/crates/store/src/state/mod.rs @@ -104,7 +104,7 @@ impl InnerState { pub struct State { /// The database which stores block headers, nullifiers, notes, and the latest states of /// accounts. - db: Arc, + pub(crate) db: Arc, /// The block store which stores full block contents for all blocks. block_store: Arc, From 34f0cdc806c534bad4a7c6d38eed7a8d69c276a0 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 12:53:09 +1300 Subject: [PATCH 04/73] Simplify prove fns --- crates/store/src/server/proof_scheduler.rs | 129 +++++++++++---------- 1 file changed, 69 insertions(+), 60 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 5afac249c0..5351d0e541 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -12,7 +12,6 @@ //! an error. The scheduler logs it and continues — the block will be retried on the next //! iteration. -use std::collections::HashSet; use std::sync::Arc; use std::time::Duration; @@ -88,84 +87,94 @@ async fn run(db: Arc, block_prover: Arc, notify: Arc) { }, }; + // Wait for notify if there are no unproven blocks. if unproven_blocks.is_empty() { - // No work to do — wait for a notification that a new block was committed. notify.notified().await; continue; } - // Submit all unproven blocks into a FuturesOrdered. Each future runs the full - // prove-with-retries pipeline concurrently, but completions are polled in submission - // (i.e. block) order. - let mut proving_futures = FuturesOrdered::new(); - for block_num in &unproven_blocks { - let db = Arc::clone(&db); - let block_prover = Arc::clone(&block_prover); - let block_num = *block_num; - proving_futures.push_back(async move { - let result = tokio::time::timeout( - BLOCK_PROVE_TIMEOUT, - prove_block(&db, &block_prover, block_num), - ) - .await; - - match result { - Ok(proof) => (block_num, proof), - Err(elapsed) => { - error!( - target: COMPONENT, - %block_num, - "Block proving timed out after {:?}", - elapsed, - ); - (block_num, Err(ProveBlockError::Timeout)) - }, - } - }); - } - - // Drain results in order. Track which blocks we've already dispatched to avoid - // re-queuing them before this batch completes. - let mut inflight: HashSet = unproven_blocks.iter().copied().collect(); - + // Construct proving jobs and drain results in order. + // On any failure we break immediately — dropping remaining futures cancels them. + // The outer loop will re-query unproven blocks and restart the sequence, ensuring + // we never persist a proof while an ancestor block is still unproven. + let mut proving_futures = order_proving_jobs(&db, &block_prover, &unproven_blocks); while let Some((block_num, result)) = proving_futures.next().await { - inflight.remove(&block_num); - match result { - Ok(proof) => { - // Persist the proof. On failure, log and move on — it will be retried. - match db.insert_block_proof(block_num, &proof).await { - Ok(()) => { - info!( - target: COMPONENT, - %block_num, - proof_size = proof.to_bytes().len(), - "Block proof persisted" - ); - }, - Err(err) => { - error!( - target: COMPONENT, - %block_num, - %err, - "Failed to persist block proof" - ); - }, - } - }, + Ok(proof) => persist_proof(&db, block_num, &proof).await, Err(err) => { error!( target: COMPONENT, %block_num, ?err, - "Block proving failed, will retry next iteration" + "Block proving failed, abandoning batch and retrying next iteration" ); + break; }, } } } } +/// Submits all unproven blocks into a [`FuturesOrdered`]. Each future runs the full +/// prove-with-retries pipeline concurrently, but completions are polled in submission +/// (i.e. block) order. +fn order_proving_jobs( + db: &Arc, + block_prover: &Arc, + unproven_blocks: &[BlockNumber], +) -> FuturesOrdered< + impl std::future::Future)>, +> { + let mut futures = FuturesOrdered::new(); + for &block_num in unproven_blocks { + let db = Arc::clone(db); + let block_prover = Arc::clone(block_prover); + futures.push_back(async move { + let result = tokio::time::timeout( + BLOCK_PROVE_TIMEOUT, + prove_block(&db, &block_prover, block_num), + ) + .await; + + match result { + Ok(proof) => (block_num, proof), + Err(elapsed) => { + error!( + target: COMPONENT, + %block_num, + "Block proving timed out after {:?}", + elapsed, + ); + (block_num, Err(ProveBlockError::Timeout)) + }, + } + }); + } + futures +} + +/// Persists a proven block proof to the DB. Logs on success or failure. +async fn persist_proof(db: &Db, block_num: BlockNumber, proof: &BlockProof) { + match db.insert_block_proof(block_num, proof).await { + Ok(()) => { + info!( + target: COMPONENT, + %block_num, + proof_size = proof.to_bytes().len(), + "Block proof persisted" + ); + }, + Err(err) => { + error!( + target: COMPONENT, + %block_num, + %err, + "Failed to persist block proof" + ); + }, + } +} + // PROVE BLOCK // ================================================================================================ From 7a5022a408f88eb83c43972ebca4f0ee6aeb3fff Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 13:12:22 +1300 Subject: [PATCH 05/73] Simplify retry fn --- crates/store/src/server/proof_scheduler.rs | 36 +--------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 5351d0e541..19084cf9a6 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -259,43 +259,9 @@ async fn prove_with_retries( ) -> Result { let mut retry_delay = INITIAL_RETRY_DELAY; - // The proving inputs must be re-usable across retries. Since `BlockProver::prove` takes - // ownership, we serialize once and re-deserialize on each retry attempt. - let request_bytes = request.to_bytes(); - - // First attempt uses the already-deserialized request. - match block_prover - .prove(request.tx_batches, request.block_inputs, &request.block_header) - .await - { - Ok(proof) => return Ok(proof), - Err(err) => { - warn!( - target: COMPONENT, - %block_num, - %err, - ?retry_delay, - "Block proving failed, retrying" - ); - tokio::time::sleep(retry_delay).await; - retry_delay = (retry_delay * 2).min(MAX_RETRY_DELAY); - }, - } - - // Subsequent retries re-deserialize from bytes. loop { - let request = BlockProofRequest::read_from_bytes(&request_bytes[..]).map_err(|err| { - error!( - target: COMPONENT, - %block_num, - %err, - "Failed to re-deserialize proving inputs during retry" - ); - ProveBlockError::DeserializationFailed - })?; - match block_prover - .prove(request.tx_batches, request.block_inputs, &request.block_header) + .prove(request.tx_batches.clone(), request.block_inputs.clone(), &request.block_header) .await { Ok(proof) => return Ok(proof), From 5062cd00583451226c891da481105a3ba5c32930 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 13:34:27 +1300 Subject: [PATCH 06/73] Fix data dir issue --- bin/node/src/commands/mod.rs | 10 ++-------- .../src/db/migrations/2025062000000_setup/up.sql | 12 ++++++------ crates/store/src/db/models/queries/block_headers.rs | 4 ++++ crates/store/src/server/proof_scheduler.rs | 2 +- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/bin/node/src/commands/mod.rs b/bin/node/src/commands/mod.rs index b7ef3c3c54..dcd384e98e 100644 --- a/bin/node/src/commands/mod.rs +++ b/bin/node/src/commands/mod.rs @@ -1,6 +1,6 @@ use std::net::SocketAddr; use std::num::NonZeroUsize; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::time::Duration; use anyhow::Context; @@ -174,12 +174,6 @@ pub struct NtxBuilderConfig { default_value_t = DEFAULT_NTX_SCRIPT_CACHE_SIZE )] pub script_cache_size: NonZeroUsize, - - /// Directory for the ntx-builder's persistent database. - /// - /// If not set, defaults to the node's data directory. - #[arg(long = "ntx-builder.data-directory", value_name = "DIR")] - pub data_directory: Option, } impl NtxBuilderConfig { @@ -194,7 +188,7 @@ impl NtxBuilderConfig { validator_url: Url, node_data_directory: &Path, ) -> miden_node_ntx_builder::NtxBuilderConfig { - let data_dir = self.data_directory.unwrap_or_else(|| node_data_directory.to_path_buf()); + let data_dir = node_data_directory.to_path_buf(); let database_filepath = data_dir.join("ntx-builder.sqlite3"); miden_node_ntx_builder::NtxBuilderConfig::new( diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index e557ac7c80..72da591136 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -1,10 +1,10 @@ CREATE TABLE block_headers ( - block_num INTEGER NOT NULL, - block_header BLOB NOT NULL, - signature BLOB NOT NULL, - commitment BLOB NOT NULL, - block_proof BLOB, -- NULL means the block has not yet been proven - proving_inputs BLOB, -- Serialized BlockProofRequest needed for deferred proving + block_num INTEGER NOT NULL, + block_header BLOB NOT NULL, + signature BLOB NOT NULL, + commitment BLOB NOT NULL, + proving_inputs BLOB, -- Serialized BlockProofRequest needed for deferred proving. NULL for genesis block. + block_proof BLOB, -- NULL means the block has not yet been proven PRIMARY KEY (block_num), CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index d7a0e0c666..db64936b04 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -298,12 +298,15 @@ pub(crate) fn insert_block_proof( /// Select all block numbers that have not yet been proven, ordered ascending. /// +/// The genesis block (block 0) is excluded because it is never proven. +/// /// # Raw SQL /// /// ```sql /// SELECT block_num /// FROM block_headers /// WHERE block_proof IS NULL +/// AND block_num > 0 /// ORDER BY block_num ASC /// ``` pub(crate) fn select_unproven_blocks( @@ -312,6 +315,7 @@ pub(crate) fn select_unproven_blocks( let block_nums: Vec = SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) .filter(schema::block_headers::block_proof.is_null()) + .filter(schema::block_headers::block_num.gt(0i64)) .order(schema::block_headers::block_num.asc()) .load(conn)?; block_nums diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 19084cf9a6..f977e7b789 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -37,7 +37,7 @@ const INITIAL_RETRY_DELAY: Duration = Duration::from_secs(1); const MAX_RETRY_DELAY: Duration = Duration::from_secs(60); /// Overall timeout for proving a single block (including all retries). -const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_secs(120); +const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(2); // PROOF SCHEDULER // ================================================================================================ From ecc067e2ab47083559e37199f8f3ec26cb8642ae Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 13:51:57 +1300 Subject: [PATCH 07/73] add finality parameter to SyncChainMmr endpoint --- bin/stress-test/src/store/mod.rs | 1 + crates/rpc/src/tests.rs | 1 + crates/store/src/db/mod.rs | 10 ++++++++ .../src/db/models/queries/block_headers.rs | 25 +++++++++++++++++++ crates/store/src/errors.rs | 5 ++++ crates/store/src/server/rpc_api.rs | 19 +++++++++++--- proto/proto/rpc.proto | 18 +++++++++++-- 7 files changed, 74 insertions(+), 5 deletions(-) diff --git a/bin/stress-test/src/store/mod.rs b/bin/stress-test/src/store/mod.rs index 314a5e95d0..e1e930b59f 100644 --- a/bin/stress-test/src/store/mod.rs +++ b/bin/stress-test/src/store/mod.rs @@ -469,6 +469,7 @@ async fn sync_chain_mmr( ) -> SyncChainMmrRun { let sync_request = proto::rpc::SyncChainMmrRequest { block_range: Some(proto::rpc::BlockRange { block_from, block_to: Some(block_to) }), + finality: proto::rpc::Finality::Committed.into(), }; let start = Instant::now(); diff --git a/crates/rpc/src/tests.rs b/crates/rpc/src/tests.rs index 172f2266ad..64bdb628f7 100644 --- a/crates/rpc/src/tests.rs +++ b/crates/rpc/src/tests.rs @@ -546,6 +546,7 @@ async fn sync_chain_mmr_returns_delta() { let request = proto::rpc::SyncChainMmrRequest { block_range: Some(proto::rpc::BlockRange { block_from: 0, block_to: None }), + finality: proto::rpc::Finality::Committed.into(), }; let response = rpc_client.sync_chain_mmr(request).await.expect("sync_chain_mmr should succeed"); let response = response.into_inner(); diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 71d6143b5b..a6a9b6aa6e 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -605,6 +605,16 @@ impl Db { .await } + /// Returns the highest block number that has been proven, or `None` if no blocks have been + /// proven yet. + #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] + pub async fn select_latest_proven_block_num(&self) -> Result> { + self.transact("select latest proven block num", |conn| { + models::queries::select_latest_proven_block_num(conn) + }) + .await + } + /// Returns the [`BlockProof`] for a given block number, if the block exists and has been /// proven. #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index db64936b04..0995ce6de8 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -325,6 +325,31 @@ pub(crate) fn select_unproven_blocks( .map_err(Into::into) } +/// Select the highest block number that has been proven. +/// +/// Returns `None` if no blocks have been proven yet (genesis is never proven). +/// +/// # Raw SQL +/// +/// ```sql +/// SELECT MAX(block_num) +/// FROM block_headers +/// WHERE block_proof IS NOT NULL +/// ``` +pub(crate) fn select_latest_proven_block_num( + conn: &mut SqliteConnection, +) -> Result, DatabaseError> { + use diesel::dsl::max; + + let block_num: Option = SelectDsl::select( + schema::block_headers::table.filter(schema::block_headers::block_proof.is_not_null()), + max(schema::block_headers::block_num), + ) + .get_result(conn)?; + + block_num.map(BlockNumber::from_raw_sql).transpose().map_err(Into::into) +} + /// Select the [`BlockProof`] for a given block number, if it exists. /// /// # Returns diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index 397c173866..32bb67ddc4 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -254,6 +254,11 @@ pub enum SyncChainMmrError { }, #[error("malformed block number")] DeserializationFailed(#[source] ConversionError), + #[error("no proven blocks available")] + NoProvenBlocks, + #[error("database error")] + #[grpc(internal)] + DatabaseError(#[from] DatabaseError), } impl From for StateSyncError { diff --git a/crates/store/src/server/rpc_api.rs b/crates/store/src/server/rpc_api.rs index bb3098fffa..a0399f291f 100644 --- a/crates/store/src/server/rpc_api.rs +++ b/crates/store/src/server/rpc_api.rs @@ -166,12 +166,25 @@ impl rpc_server::Rpc for StoreApi { .ok_or_else(|| proto::rpc::SyncChainMmrRequest::missing_field(stringify!(block_range))) .map_err(SyncChainMmrError::DeserializationFailed)?; + // Determine the effective tip based on the requested finality level. + let effective_tip = match request.finality() { + proto::rpc::Finality::Committed => chain_tip, + proto::rpc::Finality::Proven => self + .state + .db + .select_latest_proven_block_num() + .await + .map_err(SyncChainMmrError::DatabaseError)? + .ok_or(SyncChainMmrError::NoProvenBlocks)?, + }; + let block_from = BlockNumber::from(block_range.block_from); - if block_from > chain_tip { - Err(SyncChainMmrError::FutureBlock { chain_tip, block_from })?; + if block_from > effective_tip { + Err(SyncChainMmrError::FutureBlock { chain_tip: effective_tip, block_from })?; } - let block_to = block_range.block_to.map_or(chain_tip, BlockNumber::from).min(chain_tip); + let block_to = + block_range.block_to.map_or(effective_tip, BlockNumber::from).min(effective_tip); if block_from > block_to { Err(SyncChainMmrError::InvalidBlockRange(InvalidBlockRange::StartGreaterThanEnd { diff --git a/proto/proto/rpc.proto b/proto/proto/rpc.proto index 1a218539ee..03090de3f4 100644 --- a/proto/proto/rpc.proto +++ b/proto/proto/rpc.proto @@ -483,14 +483,28 @@ message SyncNotesResponse { // SYNC CHAIN MMR // ================================================================================================ +// The finality level for chain data queries. +enum Finality { + // Return data up to the latest committed block (default). + FINALITY_COMMITTED = 0; + // Return data only up to the latest proven block. + FINALITY_PROVEN = 1; +} + // Chain MMR synchronization request. message SyncChainMmrRequest { // Block range from which to synchronize the chain MMR. // // The response will contain MMR delta starting after `block_range.block_from` up to - // `block_range.block_to` or the chain tip (whichever is lower). Set `block_from` to the last - // block already present in the caller's MMR so the delta begins at the next block. + // `block_range.block_to` or the effective tip (whichever is lower). Set `block_from` to the + // last block already present in the caller's MMR so the delta begins at the next block. BlockRange block_range = 1; + + // The finality level to use when clamping the upper bound of the block range. + // + // When set to FINALITY_COMMITTED (default), the upper bound is clamped to the chain tip. + // When set to FINALITY_PROVEN, the upper bound is clamped to the latest proven block. + Finality finality = 2; } // Represents the result of syncing chain MMR. From f116fa39e73aa08d5e1a017eecdb3bd6f7e537bb Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 14:01:27 +1300 Subject: [PATCH 08/73] RM flake --- bin/remote-prover/src/server/tests.rs | 38 --------------------------- 1 file changed, 38 deletions(-) diff --git a/bin/remote-prover/src/server/tests.rs b/bin/remote-prover/src/server/tests.rs index 46bea96e7a..546353fd85 100644 --- a/bin/remote-prover/src/server/tests.rs +++ b/bin/remote-prover/src/server/tests.rs @@ -199,44 +199,6 @@ async fn legacy_behaviour_with_capacity_1() { server.abort(); } -/// Test that multiple requests can be queued and capacity is respected. -/// -/// Create a server with a capacity of two and submit three requests. Ensure -/// that two succeed and one fails with a resource exhaustion error. -#[tokio::test(flavor = "multi_thread")] -async fn capacity_is_respected() { - let (server, port) = Server::with_arbitrary_port(ProofKind::Transaction) - .with_capacity(2) - .spawn() - .await - .expect("server should spawn"); - - let request = ProofRequest::from_tx(&ProofRequest::mock_tx().await); - let mut client_a = Client::connect(port).await; - let mut client_b = client_a.clone(); - let mut client_c = client_a.clone(); - - let a = client_a.submit_request(request.clone()); - let b = client_b.submit_request(request.clone()); - let c = client_c.submit_request(request); - - let (first, second, third) = tokio::join!(a, b, c); - - // We cannot know which got served and which got rejected. - // We can only assert that two succeeded and one failed. - let mut expected = [true, true, false]; - let mut result = [first.is_ok(), second.is_ok(), third.is_ok()]; - expected.sort_unstable(); - result.sort_unstable(); - assert_eq!(expected, result); - - assert_matches!(first.err().or(second.err()).or(third.err()), Some(err) => { - assert_eq!(err.code(), tonic::Code::ResourceExhausted); - }); - - server.abort(); -} - /// Ensures that the server request timeout is adhered to. /// /// We cannot actually enforce this for a request that has already being proven as the proof From 866b3241ba393dc0577d58994943b7f499fc0309 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 14:05:48 +1300 Subject: [PATCH 09/73] Fix lint --- bin/remote-prover/src/server/tests.rs | 1 - crates/rpc/src/server/api.rs | 12 +++++---- crates/store/src/db/mod.rs | 32 +++++++++++++---------- crates/store/src/db/models/queries/mod.rs | 31 +++++++++++++--------- 4 files changed, 43 insertions(+), 33 deletions(-) diff --git a/bin/remote-prover/src/server/tests.rs b/bin/remote-prover/src/server/tests.rs index 546353fd85..e5d24c0fdd 100644 --- a/bin/remote-prover/src/server/tests.rs +++ b/bin/remote-prover/src/server/tests.rs @@ -3,7 +3,6 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; -use assert_matches::assert_matches; use miden_protocol::MIN_PROOF_SECURITY_LEVEL; use miden_protocol::asset::{Asset, FungibleAsset}; use miden_protocol::batch::{ProposedBatch, ProvenBatch}; diff --git a/crates/rpc/src/server/api.rs b/crates/rpc/src/server/api.rs index a0ec88859a..1f887175d6 100644 --- a/crates/rpc/src/server/api.rs +++ b/crates/rpc/src/server/api.rs @@ -534,11 +534,13 @@ fn endpoint_limits(params: &[(&str, usize)]) -> proto::rpc::EndpointLimits { /// Cached RPC query parameter limits. static RPC_LIMITS: LazyLock = LazyLock::new(|| { - use QueryParamAccountIdLimit as AccountId; - use QueryParamNoteIdLimit as NoteId; - use QueryParamNoteTagLimit as NoteTag; - use QueryParamNullifierLimit as Nullifier; - use QueryParamStorageMapKeyTotalLimit as StorageMapKeyTotal; + use { + QueryParamAccountIdLimit as AccountId, + QueryParamNoteIdLimit as NoteId, + QueryParamNoteTagLimit as NoteTag, + QueryParamNullifierLimit as Nullifier, + QueryParamStorageMapKeyTotalLimit as StorageMapKeyTotal, + }; proto::rpc::RpcLimits { endpoints: std::collections::HashMap::from([ diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index a6a9b6aa6e..0c2ac4e0cb 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -252,13 +252,15 @@ impl Db { conn.transaction(move |conn| { models::queries::apply_block( conn, - genesis.header(), - genesis.signature(), - &[], - &[], - genesis.body().updated_accounts(), - genesis.body().transactions(), - None, // Genesis block has no proving inputs. + models::queries::ApplyBlockData { + block_header: genesis.header(), + signature: genesis.signature(), + notes: &[], + nullifiers: &[], + accounts: genesis.body().updated_accounts(), + transactions: genesis.body().transactions(), + proving_inputs: None, // Genesis block has no proving inputs. + }, ) }) .context("failed to insert genesis block")?; @@ -543,13 +545,15 @@ impl Db { self.transact("apply block", move |conn| -> Result<()> { models::queries::apply_block( conn, - signed_block.header(), - signed_block.signature(), - ¬es, - signed_block.body().created_nullifiers(), - signed_block.body().updated_accounts(), - signed_block.body().transactions(), - proving_inputs, + models::queries::ApplyBlockData { + block_header: signed_block.header(), + signature: signed_block.signature(), + notes: ¬es, + nullifiers: signed_block.body().created_nullifiers(), + accounts: signed_block.body().updated_accounts(), + transactions: signed_block.body().transactions(), + proving_inputs, + }, )?; // XXX FIXME TODO free floating mutex MUST NOT exist diff --git a/crates/store/src/db/models/queries/mod.rs b/crates/store/src/db/models/queries/mod.rs index 913f56a88d..91fb95eea3 100644 --- a/crates/store/src/db/models/queries/mod.rs +++ b/crates/store/src/db/models/queries/mod.rs @@ -46,6 +46,17 @@ pub(crate) use nullifiers::*; mod notes; pub(crate) use notes::*; +/// All data needed to apply a new block to the database. +pub(crate) struct ApplyBlockData<'a> { + pub block_header: &'a BlockHeader, + pub signature: &'a Signature, + pub notes: &'a [(NoteRecord, Option)], + pub nullifiers: &'a [Nullifier], + pub accounts: &'a [BlockAccountUpdate], + pub transactions: &'a OrderedTransactionHeaders, + pub proving_inputs: Option>, +} + /// Apply a new block to the state /// /// # Returns @@ -53,21 +64,15 @@ pub(crate) use notes::*; /// Number of records inserted and/or updated. pub(crate) fn apply_block( conn: &mut SqliteConnection, - block_header: &BlockHeader, - signature: &Signature, - notes: &[(NoteRecord, Option)], - nullifiers: &[Nullifier], - accounts: &[BlockAccountUpdate], - transactions: &OrderedTransactionHeaders, - proving_inputs: Option>, + data: ApplyBlockData<'_>, ) -> Result { let mut count = 0; // Note: ordering here is important as the relevant tables have FK dependencies. - count += insert_block_header(conn, block_header, signature, proving_inputs)?; - count += upsert_accounts(conn, accounts, block_header.block_num())?; - count += insert_scripts(conn, notes.iter().map(|(note, _)| note))?; - count += insert_notes(conn, notes)?; - count += insert_transactions(conn, block_header.block_num(), transactions)?; - count += insert_nullifiers_for_block(conn, nullifiers, block_header.block_num())?; + count += insert_block_header(conn, data.block_header, data.signature, data.proving_inputs)?; + count += upsert_accounts(conn, data.accounts, data.block_header.block_num())?; + count += insert_scripts(conn, data.notes.iter().map(|(note, _)| note))?; + count += insert_notes(conn, data.notes)?; + count += insert_transactions(conn, data.block_header.block_num(), data.transactions)?; + count += insert_nullifiers_for_block(conn, data.nullifiers, data.block_header.block_num())?; Ok(count) } From 1f2919bea4aac7121f93c5bdefd59537f2e36bc2 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 15:44:18 +1300 Subject: [PATCH 10/73] Undo fmt --- crates/rpc/src/server/api.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/rpc/src/server/api.rs b/crates/rpc/src/server/api.rs index 1f887175d6..a0ec88859a 100644 --- a/crates/rpc/src/server/api.rs +++ b/crates/rpc/src/server/api.rs @@ -534,13 +534,11 @@ fn endpoint_limits(params: &[(&str, usize)]) -> proto::rpc::EndpointLimits { /// Cached RPC query parameter limits. static RPC_LIMITS: LazyLock = LazyLock::new(|| { - use { - QueryParamAccountIdLimit as AccountId, - QueryParamNoteIdLimit as NoteId, - QueryParamNoteTagLimit as NoteTag, - QueryParamNullifierLimit as Nullifier, - QueryParamStorageMapKeyTotalLimit as StorageMapKeyTotal, - }; + use QueryParamAccountIdLimit as AccountId; + use QueryParamNoteIdLimit as NoteId; + use QueryParamNoteTagLimit as NoteTag; + use QueryParamNullifierLimit as Nullifier; + use QueryParamStorageMapKeyTotalLimit as StorageMapKeyTotal; proto::rpc::RpcLimits { endpoints: std::collections::HashMap::from([ From 14e16f4e3ba27a9d8629aa96e93cb60a9a0c05c5 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 15:55:23 +1300 Subject: [PATCH 11/73] Wrap up signed block todo --- crates/store/src/server/block_producer.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/store/src/server/block_producer.rs b/crates/store/src/server/block_producer.rs index 4cdc2c7a21..580109be2d 100644 --- a/crates/store/src/server/block_producer.rs +++ b/crates/store/src/server/block_producer.rs @@ -107,10 +107,8 @@ impl block_producer_server::BlockProducer for StoreApi { let this = self.clone(); tokio::spawn( async move { - // SAFETY: The header, body, and signature are assumed to - // correspond to each other because they are provided by the Block - // Producer. - let signed_block = SignedBlock::new_unchecked(header, body, signature); // TODO(sergerad): Use `SignedBlock::new()` when available. + let signed_block = SignedBlock::new(header, body, signature) + .map_err(|err| Status::new(tonic::Code::Internal, err.as_report()))?; // Note: This is an internal endpoint, so its safe to expose the full error // report. this.state From 1b21962145efdfdf31e0c61cd0fa2e497a7a6cfc Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 16:00:12 +1300 Subject: [PATCH 12/73] Pass blockproofrequest down --- crates/store/src/db/mod.rs | 6 +++--- crates/store/src/server/block_producer.rs | 7 +++---- crates/store/src/state/apply_block.rs | 3 ++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 0c2ac4e0cb..988402febd 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use anyhow::Context; use diesel::{Connection, QueryableByName, RunQueryDsl, SqliteConnection}; use miden_node_proto::domain::account::AccountInfo; -use miden_node_proto::generated as proto; +use miden_node_proto::{BlockProofRequest, generated as proto}; use miden_node_utils::tracing::OpenTelemetrySpanExt; use miden_protocol::Word; use miden_protocol::account::{AccountHeader, AccountId, AccountStorageHeader}; @@ -540,7 +540,7 @@ impl Db { acquire_done: oneshot::Receiver<()>, signed_block: SignedBlock, notes: Vec<(NoteRecord, Option)>, - proving_inputs: Option>, + proving_inputs: Option, ) -> Result<()> { self.transact("apply block", move |conn| -> Result<()> { models::queries::apply_block( @@ -552,7 +552,7 @@ impl Db { nullifiers: signed_block.body().created_nullifiers(), accounts: signed_block.body().updated_accounts(), transactions: signed_block.body().transactions(), - proving_inputs, + proving_inputs: proving_inputs.map(|request| request.to_bytes()), }, )?; diff --git a/crates/store/src/server/block_producer.rs b/crates/store/src/server/block_producer.rs index 580109be2d..823b3f368a 100644 --- a/crates/store/src/server/block_producer.rs +++ b/crates/store/src/server/block_producer.rs @@ -89,14 +89,13 @@ impl block_producer_server::BlockProducer for StoreApi { span.set_attribute("block.output_notes.count", body.output_notes().count()); span.set_attribute("block.nullifiers.count", body.created_nullifiers().len()); - // Serialize proving inputs so they can be persisted alongside the block for - // deferred proving. + // Construct block proof request to be stored alongside the block for deferred block + // proving. let proving_inputs = BlockProofRequest { tx_batches: ordered_batches, block_header: header.clone(), block_inputs, - } - .to_bytes(); + }; // We perform the apply block work in a separate task. This prevents the caller // cancelling the request and thereby cancelling the task at an arbitrary point of diff --git a/crates/store/src/state/apply_block.rs b/crates/store/src/state/apply_block.rs index 40d106b74d..6f901d5bbf 100644 --- a/crates/store/src/state/apply_block.rs +++ b/crates/store/src/state/apply_block.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use miden_node_proto::BlockProofRequest; use miden_node_utils::ErrorReport; use miden_protocol::account::delta::AccountUpdateDetails; use miden_protocol::block::SignedBlock; @@ -44,7 +45,7 @@ impl State { pub async fn apply_block( &self, signed_block: SignedBlock, - proving_inputs: Option>, + proving_inputs: Option, ) -> Result<(), ApplyBlockError> { let _lock = self.writer.try_lock().map_err(|_| ApplyBlockError::ConcurrentWrite)?; From 671630a90b289eb7e4b6171026a438a78256acf6 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 16:11:27 +1300 Subject: [PATCH 13/73] Lint --- crates/store/src/server/block_producer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/server/block_producer.rs b/crates/store/src/server/block_producer.rs index 823b3f368a..98f04ec7f9 100644 --- a/crates/store/src/server/block_producer.rs +++ b/crates/store/src/server/block_producer.rs @@ -11,7 +11,7 @@ use miden_node_utils::tracing::OpenTelemetrySpanExt; use miden_protocol::Word; use miden_protocol::batch::OrderedBatches; use miden_protocol::block::{BlockBody, BlockHeader, BlockNumber, SignedBlock}; -use miden_protocol::utils::{Deserializable, Serializable}; +use miden_protocol::utils::Deserializable; use tonic::{Request, Response, Status}; use tracing::Instrument; From b6f3d3c85a813c0e4021ee94b0a555ffbe0f7e47 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 16:35:06 +1300 Subject: [PATCH 14/73] Fix stress tests --- bin/stress-test/src/seeding/mod.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bin/stress-test/src/seeding/mod.rs b/bin/stress-test/src/seeding/mod.rs index 70cbf04fda..19d5e2ac55 100644 --- a/bin/stress-test/src/seeding/mod.rs +++ b/bin/stress-test/src/seeding/mod.rs @@ -91,7 +91,7 @@ pub async fn seed_store( let faucet = create_faucet(); let fee_params = FeeParameters::new(faucet.id(), 0).unwrap(); let signer = EcdsaSecretKey::new(); - let genesis_state = GenesisState::new(vec![faucet.clone()], fee_params, 1, 1, signer); + let genesis_state = GenesisState::new(vec![faucet.clone()], fee_params, 1, 1, signer.clone()); Store::bootstrap(genesis_state.clone(), &data_directory) .await .expect("store should bootstrap"); @@ -113,6 +113,7 @@ pub async fn seed_store( &store_client, data_directory, accounts_filepath, + &signer, ) .await; @@ -124,6 +125,7 @@ pub async fn seed_store( /// /// The first transaction in each batch sends assets from the faucet to 255 accounts. /// The rest of the transactions consume the notes created by the faucet in the previous block. +#[expect(clippy::too_many_arguments)] async fn generate_blocks( num_accounts: usize, public_accounts_percentage: u8, @@ -132,6 +134,7 @@ async fn generate_blocks( store_client: &StoreClient, data_directory: DataDirectory, accounts_filepath: PathBuf, + signer: &EcdsaSecretKey, ) -> SeedingMetrics { // Each block is composed of [`BATCHES_PER_BLOCK`] batches, and each batch is composed of // [`TRANSACTIONS_PER_BATCH`] txs. The first note of the block is always a send assets tx @@ -210,7 +213,8 @@ async fn generate_blocks( let block_inputs = get_block_inputs(store_client, &batches, &mut metrics).await; // update blocks - prev_block_header = apply_block(batches, block_inputs, store_client, &mut metrics).await; + prev_block_header = + apply_block(batches, block_inputs, store_client, &mut metrics, signer).await; if current_anchor_header.block_epoch() != prev_block_header.block_epoch() { current_anchor_header = prev_block_header.clone(); } @@ -245,11 +249,12 @@ async fn apply_block( block_inputs: BlockInputs, store_client: &StoreClient, metrics: &mut SeedingMetrics, + signer: &EcdsaSecretKey, ) -> BlockHeader { let proposed_block = ProposedBlock::new(block_inputs, batches).unwrap(); let (header, body) = proposed_block.clone().into_header_and_body().unwrap(); let block_size: usize = header.to_bytes().len() + body.to_bytes().len(); - let signature = EcdsaSecretKey::new().sign(header.commitment()); + let signature = signer.sign(header.commitment()); // SAFETY: The header, body, and signature are known to correspond to each other. let signed_block = SignedBlock::new_unchecked(header, body, signature); let ordered_batches = proposed_block.batches().clone(); From 68565852e5a6f1c81f450fb4f70ba0f0e5e538c1 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 2 Mar 2026 16:55:39 +1300 Subject: [PATCH 15/73] Changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe3acc00c1..c943006fe6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ - Introduce `SyncChainMmr` RPC endpoint to sync chain MMR deltas within specified block ranges ([#1591](https://github.com/0xMiden/miden-node/issues/1591)). - Fixed `TransactionHeader` serialization for row insertion on database & fixed transaction cursor on retrievals ([#1701](https://github.com/0xMiden/miden-node/issues/1701)). - Added KMS signing support in validator ([#1677](https://github.com/0xMiden/miden-node/pull/1677)). +- Restructured block proving to be asynchronous and added finality field for `SyncChainMmr` requests ([#1725](https://github.com/0xMiden/miden-node/pull/1725)). + ### Changes From 3b0ba1074ae3780105a58fd6d74b9dc158dd5a33 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 09:44:50 +1300 Subject: [PATCH 16/73] Fix proving inputs --- crates/store/src/db/mod.rs | 2 +- crates/store/src/db/models/queries/block_headers.rs | 5 +++-- crates/store/src/db/models/queries/mod.rs | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 988402febd..931134307f 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -552,7 +552,7 @@ impl Db { nullifiers: signed_block.body().created_nullifiers(), accounts: signed_block.body().updated_accounts(), transactions: signed_block.body().transactions(), - proving_inputs: proving_inputs.map(|request| request.to_bytes()), + proving_inputs, }, )?; diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 0995ce6de8..0066f8e3b6 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -13,6 +13,7 @@ use diesel::{ }; use miden_crypto::Word; use miden_crypto::dsa::ecdsa_k256_keccak::Signature; +use miden_node_proto::BlockProofRequest; use miden_node_utils::limiter::{QueryParamBlockLimit, QueryParamLimiter}; use miden_protocol::block::{BlockHeader, BlockNumber, BlockProof}; use miden_protocol::utils::{Deserializable, Serializable}; @@ -231,14 +232,14 @@ pub(crate) fn insert_block_header( conn: &mut SqliteConnection, block_header: &BlockHeader, signature: &Signature, - proving_inputs: Option>, + proving_inputs: Option, ) -> Result { let row = BlockHeaderInsert { block_num: block_header.block_num().to_raw_sql(), block_header: block_header.to_bytes(), signature: signature.to_bytes(), commitment: BlockHeaderCommitment::new(block_header).to_raw_sql(), - proving_inputs, + proving_inputs: proving_inputs.map(|inputs| inputs.to_bytes()), }; let count = diesel::insert_into(schema::block_headers::table).values(&[row]).execute(conn)?; Ok(count) diff --git a/crates/store/src/db/models/queries/mod.rs b/crates/store/src/db/models/queries/mod.rs index 91fb95eea3..bca225d800 100644 --- a/crates/store/src/db/models/queries/mod.rs +++ b/crates/store/src/db/models/queries/mod.rs @@ -27,6 +27,7 @@ use diesel::SqliteConnection; use miden_crypto::dsa::ecdsa_k256_keccak::Signature; +use miden_node_proto::BlockProofRequest; use miden_protocol::block::{BlockAccountUpdate, BlockHeader}; use miden_protocol::note::Nullifier; use miden_protocol::transaction::OrderedTransactionHeaders; @@ -54,7 +55,7 @@ pub(crate) struct ApplyBlockData<'a> { pub nullifiers: &'a [Nullifier], pub accounts: &'a [BlockAccountUpdate], pub transactions: &'a OrderedTransactionHeaders, - pub proving_inputs: Option>, + pub proving_inputs: Option, } /// Apply a new block to the state From 98c0aa6fce9d1db278527af2e113d5356a79f9da Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 09:48:24 +1300 Subject: [PATCH 17/73] Handle docstring --- crates/store/src/server/api.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/store/src/server/api.rs b/crates/store/src/server/api.rs index cbe5d7acc1..d61f1b020e 100644 --- a/crates/store/src/server/api.rs +++ b/crates/store/src/server/api.rs @@ -23,6 +23,7 @@ use crate::state::State; #[derive(Clone)] pub struct StoreApi { pub(super) state: Arc, + /// Handle used to notify proof scheduler of newly committed blocks. pub(super) proof_scheduler: ProofSchedulerHandle, } From 1189d4675c42227943e9ae0025362ac27cc0c8a5 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 09:49:27 +1300 Subject: [PATCH 18/73] Update genesis comment --- crates/store/src/db/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 931134307f..6c8c471ce1 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -259,7 +259,7 @@ impl Db { nullifiers: &[], accounts: genesis.body().updated_accounts(), transactions: genesis.body().transactions(), - proving_inputs: None, // Genesis block has no proving inputs. + proving_inputs: None, // Genesis block is never proven. }, ) }) From a2b595209d42bb5a598d45b05c06ba1990ddccd0 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 10:03:43 +1300 Subject: [PATCH 19/73] RM arc clone --- crates/store/src/server/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 636d084f50..5af0a418f9 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -115,8 +115,7 @@ impl Store { // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. - let proof_scheduler_handle = - proof_scheduler::spawn(Arc::clone(&state.db), Arc::clone(&block_prover)); + let proof_scheduler_handle = proof_scheduler::spawn(state.db.clone(), block_prover); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), From 8a6585167ab6cb50fe083ed27f36fc17ed4425f4 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 10:11:21 +1300 Subject: [PATCH 20/73] load_proving_inputs comments --- crates/store/src/server/proof_scheduler.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index f977e7b789..8e696c0f89 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -216,7 +216,10 @@ async fn load_proving_inputs( let mut retry_delay = INITIAL_RETRY_DELAY; loop { + // Load proving inputs from the DB. match db.select_block_proving_inputs(block_num).await { + // Inputs found. All committed blocks should have inputs available apart from the + // genesis block. Ok(Some(bytes)) => { return BlockProofRequest::read_from_bytes(&bytes[..]).map_err(|err| { error!( @@ -228,6 +231,8 @@ async fn load_proving_inputs( ProveBlockError::DeserializationFailed }); }, + // Inputs not found. This should never happen for committed blocks. The genesis block + // does not have inputs but it should not be queried by this function. Ok(None) => { error!( target: COMPONENT, @@ -236,6 +241,7 @@ async fn load_proving_inputs( ); return Err(ProveBlockError::MissingProvingInputs); }, + // Failed to retrieve proving inputs. Treat as retryable error. Err(err) => { warn!( target: COMPONENT, From d355dc563ea5306f02858a7ccf9f44fcb13ef45c Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 10:14:28 +1300 Subject: [PATCH 21/73] Comments --- crates/store/src/server/proof_scheduler.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 8e696c0f89..5315919826 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -127,15 +127,18 @@ fn order_proving_jobs( > { let mut futures = FuturesOrdered::new(); for &block_num in unproven_blocks { + // Clone the resources for each future. let db = Arc::clone(db); let block_prover = Arc::clone(block_prover); - futures.push_back(async move { + // Define the future. + let fut = async move { + // Prove block with timeout. let result = tokio::time::timeout( BLOCK_PROVE_TIMEOUT, prove_block(&db, &block_prover, block_num), ) .await; - + // Handle proving result. match result { Ok(proof) => (block_num, proof), Err(elapsed) => { @@ -148,13 +151,15 @@ fn order_proving_jobs( (block_num, Err(ProveBlockError::Timeout)) }, } - }); + }; + futures.push_back(fut); } futures } /// Persists a proven block proof to the DB. Logs on success or failure. async fn persist_proof(db: &Db, block_num: BlockNumber, proof: &BlockProof) { + // Persist the block proof to the database. match db.insert_block_proof(block_num, proof).await { Ok(()) => { info!( From 769d2bfbb95a10b3048233b2745cbfa871791a3a Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 11:40:53 +1300 Subject: [PATCH 22/73] refactor errors and retries --- crates/store/src/errors.rs | 15 ++ crates/store/src/server/mod.rs | 11 +- crates/store/src/server/proof_scheduler.rs | 245 +++++++++------------ 3 files changed, 125 insertions(+), 146 deletions(-) diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index 32bb67ddc4..5d809d5f16 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -30,6 +30,21 @@ use tonic::Status; use crate::db::models::conv::DatabaseTypeConversionError; use crate::inner_forest::{InnerForestError, WitnessError}; +// PROOF SCHEDULER ERRORS +// ================================================================================================= + +#[derive(Debug, Error)] +pub enum ProofSchedulerError { + #[error("no proving inputs found for block {0}")] + MissingProvingInputs(BlockNumber), + #[error("failed to deserialize proving inputs for block")] + DeserializationFailed(#[source] DeserializationError), + #[error("failed to persist block proof for block")] + PersistFailed(#[source] DatabaseError), + #[error("invalid remote prover endpoint: {0}")] + InvalidProverEndpoint(String), +} + // DATABASE ERRORS // ================================================================================================= diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 5af0a418f9..07486bccb2 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -90,6 +90,7 @@ impl Store { /// Serves the store APIs (rpc, ntx-builder, block-producer) and DB maintenance background task. /// /// Note: this blocks until the server dies. + #[expect(clippy::too_many_lines)] pub async fn serve(self) -> anyhow::Result<()> { let rpc_address = self.rpc_listener.local_addr()?; let ntx_builder_address = self.ntx_builder_listener.local_addr()?; @@ -115,7 +116,8 @@ impl Store { // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. - let proof_scheduler_handle = proof_scheduler::spawn(state.db.clone(), block_prover); + let (proof_scheduler_handle, proof_scheduler_task) = + proof_scheduler::spawn(state.db.clone(), block_prover); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), @@ -205,6 +207,13 @@ impl Store { result = service => result, Some(err) = termination_signal.recv() => { Err(anyhow::anyhow!("received termination signal").context(err)) + }, + result = proof_scheduler_task => { + match result { + Ok(Ok(())) => Err(anyhow::anyhow!("proof scheduler exited unexpectedly")), + Ok(Err(err)) => Err(anyhow::anyhow!("proof scheduler fatal error").context(err)), + Err(join_err) => Err(anyhow::anyhow!("proof scheduler panicked").context(join_err)), + } } } } diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 5315919826..324d4621b4 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -7,10 +7,10 @@ //! 3. Proves blocks concurrently, but resolves completions in FIFO order via [`FuturesOrdered`]. //! This ensures the ancestor rule: a block's proof is only persisted after all ancestor proofs //! have been persisted. -//! 4. Each proving future includes retry logic with exponential backoff and an overall timeout. -//! 5. On fatal errors (e.g. deserialization failures, timeout exhaustion), the future resolves with -//! an error. The scheduler logs it and continues — the block will be retried on the next -//! iteration. +//! 4. On transient errors (DB reads, prover failures, timeouts), the scheduler abandons the current +//! batch, re-queries unproven blocks, and retries from scratch. +//! 5. On fatal errors (e.g. deserialization failures, missing proving inputs), the scheduler +//! returns the error to the caller for node shutdown. use std::sync::Arc; use std::time::Duration; @@ -19,24 +19,21 @@ use futures::StreamExt; use futures::stream::FuturesOrdered; use miden_node_proto::domain::proof_request::BlockProofRequest; use miden_protocol::block::{BlockNumber, BlockProof}; -use miden_protocol::utils::{Deserializable, Serializable}; +use miden_protocol::utils::Deserializable; +use miden_remote_prover_client::RemoteProverClientError; use tokio::sync::Notify; -use tracing::{error, info, instrument, warn}; +use tokio::task::JoinHandle; +use tracing::{error, info, instrument}; use crate::COMPONENT; use crate::db::Db; -use crate::server::block_prover_client::BlockProver; +use crate::errors::{DatabaseError, ProofSchedulerError}; +use crate::server::block_prover_client::{BlockProver, StoreProverError}; // CONSTANTS // ================================================================================================ -/// Initial retry delay on proving failure. -const INITIAL_RETRY_DELAY: Duration = Duration::from_secs(1); - -/// Maximum retry delay (caps the exponential backoff). -const MAX_RETRY_DELAY: Duration = Duration::from_secs(60); - -/// Overall timeout for proving a single block (including all retries). +/// Overall timeout for proving a single block. const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(2); // PROOF SCHEDULER @@ -50,6 +47,7 @@ pub struct ProofSchedulerHandle { impl ProofSchedulerHandle { /// Notify the scheduler that a new block has been committed and may need proving. + #[instrument(target = COMPONENT, name = "proof_scheduler.notify", skip_all)] pub fn notify_block_committed(&self) { self.notify.notify_one(); } @@ -58,22 +56,32 @@ impl ProofSchedulerHandle { /// Spawns the proof scheduler as a background tokio task. /// /// Returns a [`ProofSchedulerHandle`] that should be used to notify the scheduler when new -/// blocks are committed. -pub fn spawn(db: Arc, block_prover: Arc) -> ProofSchedulerHandle { +/// blocks are committed, and a [`JoinHandle`] that resolves when the scheduler encounters a +/// fatal error or completes unexpectedly. +pub fn spawn( + db: Arc, + block_prover: Arc, +) -> (ProofSchedulerHandle, JoinHandle>) { let notify = Arc::new(Notify::new()); let handle = ProofSchedulerHandle { notify: Arc::clone(¬ify) }; - tokio::spawn(run(db, block_prover, notify)); + let join_handle = tokio::spawn(run(db, block_prover, notify)); - handle + (handle, join_handle) } /// Main loop of the proof scheduler. /// /// Uses [`FuturesOrdered`] to run proving concurrently while resolving completions in block /// order. This provides natural backpressure and ensures proofs are persisted sequentially. -#[instrument(target = COMPONENT, name = "proof_scheduler", skip_all)] -async fn run(db: Arc, block_prover: Arc, notify: Arc) { +/// +/// Returns `Err` on irrecoverable errors (missing/corrupt proving inputs, DB write failures). +/// Transient errors are retried internally. +async fn run( + db: Arc, + block_prover: Arc, + notify: Arc, +) -> Result<(), ProofSchedulerError> { info!(target: COMPONENT, "Proof scheduler started"); loop { @@ -82,7 +90,7 @@ async fn run(db: Arc, block_prover: Arc, notify: Arc) { Ok(blocks) => blocks, Err(err) => { error!(target: COMPONENT, %err, "Failed to query unproven blocks, retrying"); - tokio::time::sleep(INITIAL_RETRY_DELAY).await; + tokio::time::sleep(Duration::from_secs(1)).await; continue; }, }; @@ -100,16 +108,29 @@ async fn run(db: Arc, block_prover: Arc, notify: Arc) { let mut proving_futures = order_proving_jobs(&db, &block_prover, &unproven_blocks); while let Some((block_num, result)) = proving_futures.next().await { match result { - Ok(proof) => persist_proof(&db, block_num, &proof).await, - Err(err) => { + Ok(proof) => { + db.insert_block_proof(block_num, &proof) + .await + .map_err(ProofSchedulerError::PersistFailed)?; + }, + Err(ProveBlockError::Fatal(err)) => return Err(err), + Err(ProveBlockError::Transient(err)) => { error!( target: COMPONENT, %block_num, - ?err, + %err, "Block proving failed, abandoning batch and retrying next iteration" ); break; }, + Err(ProveBlockError::Timeout) => { + error!( + target: COMPONENT, + %block_num, + "Block proving timed out, abandoning batch and retrying next iteration" + ); + break; + }, } } } @@ -141,15 +162,7 @@ fn order_proving_jobs( // Handle proving result. match result { Ok(proof) => (block_num, proof), - Err(elapsed) => { - error!( - target: COMPONENT, - %block_num, - "Block proving timed out after {:?}", - elapsed, - ); - (block_num, Err(ProveBlockError::Timeout)) - }, + Err(_elapsed) => (block_num, Err(ProveBlockError::Timeout)), } }; futures.push_back(fut); @@ -157,136 +170,78 @@ fn order_proving_jobs( futures } -/// Persists a proven block proof to the DB. Logs on success or failure. -async fn persist_proof(db: &Db, block_num: BlockNumber, proof: &BlockProof) { - // Persist the block proof to the database. - match db.insert_block_proof(block_num, proof).await { - Ok(()) => { - info!( - target: COMPONENT, - %block_num, - proof_size = proof.to_bytes().len(), - "Block proof persisted" - ); - }, - Err(err) => { - error!( - target: COMPONENT, - %block_num, - %err, - "Failed to persist block proof" - ); - }, - } -} - // PROVE BLOCK // ================================================================================================ -/// Errors that can occur during block proving. -#[derive(Debug)] -enum ProveBlockError { - /// The proving inputs were not found in the database. - MissingProvingInputs, - /// The proving inputs could not be deserialized. - DeserializationFailed, - /// The overall proving timeout was exceeded. - Timeout, -} - -/// Proves a single block, retrying with exponential backoff on transient failures. -/// -/// Returns the proof on success, or a fatal error if proving cannot succeed (missing or -/// corrupt proving inputs). +/// Proves a single block. /// -/// This function is designed to be run as a future inside [`FuturesOrdered`]. Transient -/// errors (DB reads, prover failures) are retried internally. Only fatal errors are returned. +/// Loads proving inputs from the DB, deserializes them, and invokes the block prover. +/// blocks. +#[instrument(target = COMPONENT, name = "proof_scheduler.prove_block", skip_all, fields(%block_num))] async fn prove_block( db: &Db, block_prover: &BlockProver, block_num: BlockNumber, ) -> Result { - // Load and deserialize proving inputs (with retries for transient DB errors). - let request = load_proving_inputs(db, block_num).await?; + // Load proving inputs from the DB. + let bytes = match db.select_block_proving_inputs(block_num).await { + Ok(Some(bytes)) => bytes, + // Inputs not found. This should never happen for committed blocks. The genesis block + // does not have inputs but it should not be queried by this function. + Ok(None) => { + return Err(ProveBlockError::Fatal(ProofSchedulerError::MissingProvingInputs( + block_num, + ))); + }, + // Failed to retrieve proving inputs. + Err(err) => return Err(err.into()), + }; + + // Deserialize proving inputs. + let request = BlockProofRequest::read_from_bytes(&bytes[..]) + .map_err(|err| ProveBlockError::Fatal(ProofSchedulerError::DeserializationFailed(err)))?; - // Prove the block (with retries for transient prover errors). - prove_with_retries(block_prover, block_num, request).await + // Prove the block. + let proof = block_prover + .prove(request.tx_batches, request.block_inputs, &request.block_header) + .await + .map_err(ProveBlockError::from)?; + + Ok(proof) } -/// Loads and deserializes proving inputs from the DB, retrying on transient DB errors. -async fn load_proving_inputs( - db: &Db, - block_num: BlockNumber, -) -> Result { - let mut retry_delay = INITIAL_RETRY_DELAY; +// PROVE BLOCK ERROR +// ================================================================================================ - loop { - // Load proving inputs from the DB. - match db.select_block_proving_inputs(block_num).await { - // Inputs found. All committed blocks should have inputs available apart from the - // genesis block. - Ok(Some(bytes)) => { - return BlockProofRequest::read_from_bytes(&bytes[..]).map_err(|err| { - error!( - target: COMPONENT, - %block_num, - %err, - "Failed to deserialize proving inputs" - ); - ProveBlockError::DeserializationFailed - }); - }, - // Inputs not found. This should never happen for committed blocks. The genesis block - // does not have inputs but it should not be queried by this function. - Ok(None) => { - error!( - target: COMPONENT, - %block_num, - "No proving inputs found for unproven block" - ); - return Err(ProveBlockError::MissingProvingInputs); - }, - // Failed to retrieve proving inputs. Treat as retryable error. - Err(err) => { - warn!( - target: COMPONENT, - %block_num, - %err, - ?retry_delay, - "Failed to load proving inputs, retrying" - ); - tokio::time::sleep(retry_delay).await; - retry_delay = (retry_delay * 2).min(MAX_RETRY_DELAY); +/// Errors that can occur during block proving. +#[derive(Debug)] +enum ProveBlockError { + /// An irrecoverable error that should cause node shutdown. + Fatal(ProofSchedulerError), + /// A transient error (DB read, prover failure). The outer loop will retry. + Transient(Box), + /// The overall proving timeout was exceeded. Retriable on next iteration. + Timeout, +} + +impl From for ProveBlockError { + fn from(err: DatabaseError) -> Self { + match err { + DatabaseError::DeserializationError(err) => { + Self::Fatal(ProofSchedulerError::DeserializationFailed(err)) }, + _ => Self::Transient(err.into()), } } } -/// Calls the block prover, retrying with exponential backoff on failure. -async fn prove_with_retries( - block_prover: &BlockProver, - block_num: BlockNumber, - request: BlockProofRequest, -) -> Result { - let mut retry_delay = INITIAL_RETRY_DELAY; - - loop { - match block_prover - .prove(request.tx_batches.clone(), request.block_inputs.clone(), &request.block_header) - .await - { - Ok(proof) => return Ok(proof), - Err(err) => { - warn!( - target: COMPONENT, - %block_num, - %err, - ?retry_delay, - "Block proving failed, retrying" - ); - tokio::time::sleep(retry_delay).await; - retry_delay = (retry_delay * 2).min(MAX_RETRY_DELAY); - }, +impl From for ProveBlockError { + fn from(err: StoreProverError) -> Self { + match err { + StoreProverError::RemoteProvingFailed(RemoteProverClientError::InvalidEndpoint( + uri, + )) => Self::Fatal(ProofSchedulerError::InvalidProverEndpoint(uri)), + _ => Self::Transient(err.into()), } } } From 549d808430a761f912634e5c086cddf222beba21 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 12:41:40 +1300 Subject: [PATCH 23/73] Tidy up future results --- crates/store/src/server/proof_scheduler.rs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 324d4621b4..4d106507cc 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -106,9 +106,9 @@ async fn run( // The outer loop will re-query unproven blocks and restart the sequence, ensuring // we never persist a proof while an ancestor block is still unproven. let mut proving_futures = order_proving_jobs(&db, &block_prover, &unproven_blocks); - while let Some((block_num, result)) = proving_futures.next().await { - match result { - Ok(proof) => { + while let Some(timeout_result) = proving_futures.next().await { + match timeout_result { + Ok((block_num, proof)) => { db.insert_block_proof(block_num, &proof) .await .map_err(ProofSchedulerError::PersistFailed)?; @@ -117,7 +117,6 @@ async fn run( Err(ProveBlockError::Transient(err)) => { error!( target: COMPONENT, - %block_num, %err, "Block proving failed, abandoning batch and retrying next iteration" ); @@ -126,7 +125,6 @@ async fn run( Err(ProveBlockError::Timeout) => { error!( target: COMPONENT, - %block_num, "Block proving timed out, abandoning batch and retrying next iteration" ); break; @@ -144,7 +142,7 @@ fn order_proving_jobs( block_prover: &Arc, unproven_blocks: &[BlockNumber], ) -> FuturesOrdered< - impl std::future::Future)>, + impl std::future::Future>, > { let mut futures = FuturesOrdered::new(); for &block_num in unproven_blocks { @@ -154,15 +152,15 @@ fn order_proving_jobs( // Define the future. let fut = async move { // Prove block with timeout. - let result = tokio::time::timeout( + let timeout_result = tokio::time::timeout( BLOCK_PROVE_TIMEOUT, prove_block(&db, &block_prover, block_num), ) .await; // Handle proving result. - match result { - Ok(proof) => (block_num, proof), - Err(_elapsed) => (block_num, Err(ProveBlockError::Timeout)), + match timeout_result { + Ok(proof_result) => proof_result.map(|proof| (block_num, proof)), + Err(_elapsed) => Err(ProveBlockError::Timeout), } }; futures.push_back(fut); From dab79e4f7d94fb331f7fd49a7f48cf1aac47772e Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 3 Mar 2026 12:45:24 +1300 Subject: [PATCH 24/73] Comments --- crates/store/src/server/proof_scheduler.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 4d106507cc..4fdf56645c 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -108,12 +108,17 @@ async fn run( let mut proving_futures = order_proving_jobs(&db, &block_prover, &unproven_blocks); while let Some(timeout_result) = proving_futures.next().await { match timeout_result { + // Store successful proofs. Ok((block_num, proof)) => { db.insert_block_proof(block_num, &proof) .await .map_err(ProofSchedulerError::PersistFailed)?; }, + + // Abort on fatal errors. Err(ProveBlockError::Fatal(err)) => return Err(err), + + // Log transient errors and restart proof scheduler loop. Err(ProveBlockError::Transient(err)) => { error!( target: COMPONENT, From c08b657ddfb7231f697ccefd91aab212e3d7f376 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:09:10 +1300 Subject: [PATCH 25/73] Fix compile --- bin/remote-prover/src/server/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/remote-prover/src/server/tests.rs b/bin/remote-prover/src/server/tests.rs index bff09caad9..e6abb72ac1 100644 --- a/bin/remote-prover/src/server/tests.rs +++ b/bin/remote-prover/src/server/tests.rs @@ -238,7 +238,7 @@ async fn capacity_is_respected() { result.sort_unstable(); assert_eq!(expected, result); - assert_matches!(first.err().or(second.err()).or(third.err()), Some(err) => { + assert_matches::assert_matches!(first.err().or(second.err()).or(third.err()), Some(err) => { assert_eq!(err.code(), tonic::Code::ResourceExhausted); }); From 258bafc18882e40f54404ac61bd067b95c7e819e Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:11:03 +1300 Subject: [PATCH 26/73] Rm dead code fields --- crates/store/src/db/models/queries/block_headers.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 0066f8e3b6..d46c8914d4 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -174,10 +174,6 @@ pub struct BlockHeaderRawRow { pub block_header: Vec, pub signature: Vec, pub commitment: Vec, - #[expect(dead_code)] - pub block_proof: Option>, - #[expect(dead_code)] - pub proving_inputs: Option>, } impl TryInto for BlockHeaderRawRow { type Error = DatabaseError; From bd8dad735b89b3ff1e2382c0b5c97929749e53b5 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:18:34 +1300 Subject: [PATCH 27/73] impl conv::SqlTypeConv for BlockProof --- crates/store/src/db/mod.rs | 4 ++-- crates/store/src/db/models/conv.rs | 14 +++++++++++++- .../store/src/db/models/queries/block_headers.rs | 6 +++--- crates/store/src/server/proof_scheduler.rs | 2 +- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index e8a09a7a9b..b05363b07e 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -596,11 +596,11 @@ impl Db { pub async fn insert_block_proof( &self, block_num: BlockNumber, - block_proof: &BlockProof, + block_proof: BlockProof, ) -> Result<()> { let block_proof = block_proof.clone(); self.transact("insert block proof", move |conn| { - models::queries::insert_block_proof(conn, block_num, &block_proof) + models::queries::insert_block_proof(conn, block_num, block_proof) }) .await?; Ok(()) diff --git a/crates/store/src/db/models/conv.rs b/crates/store/src/db/models/conv.rs index 2176ea0d46..b2cf2d6f8f 100644 --- a/crates/store/src/db/models/conv.rs +++ b/crates/store/src/db/models/conv.rs @@ -36,7 +36,7 @@ use miden_crypto::Word; use miden_crypto::utils::Deserializable; use miden_protocol::Felt; use miden_protocol::account::{StorageSlotName, StorageSlotType}; -use miden_protocol::block::{BlockHeader, BlockNumber}; +use miden_protocol::block::{BlockHeader, BlockNumber, BlockProof}; use miden_protocol::note::NoteTag; use crate::db::models::queries::{BlockHeaderCommitment, NetworkAccountType}; @@ -95,6 +95,18 @@ impl SqlTypeConvert for BlockHeader { } } +impl SqlTypeConvert for BlockProof { + type Raw = Vec; + + fn from_raw_sql(raw: Self::Raw) -> Result { + ::read_from_bytes(raw.as_slice()).map_err(Self::map_err) + } + + fn to_raw_sql(self) -> Self::Raw { + miden_crypto::utils::Serializable::to_bytes(&self) + } +} + impl SqlTypeConvert for NetworkAccountType { type Raw = i32; diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index d46c8914d4..59aec13c41 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -282,13 +282,13 @@ pub(crate) fn select_block_proving_inputs( pub(crate) fn insert_block_proof( conn: &mut SqliteConnection, block_num: BlockNumber, - block_proof: &BlockProof, + block_proof: BlockProof, ) -> Result { let count = diesel::update( schema::block_headers::table .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())), ) - .set(schema::block_headers::block_proof.eq(block_proof.to_bytes())) + .set(schema::block_headers::block_proof.eq(block_proof.to_raw_sql())) .execute(conn)?; Ok(count) } @@ -371,7 +371,7 @@ pub(crate) fn select_block_proof( .optional()?; // Flatten: None (row not found) or Some(None) (proof is NULL) => None. match proof_bytes.flatten() { - Some(bytes) => Ok(Some(BlockProof::read_from_bytes(&bytes[..])?)), + Some(bytes) => Ok(Some(BlockProof::from_raw_sql(bytes)?)), None => Ok(None), } } diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 4fdf56645c..d46fd21baf 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -110,7 +110,7 @@ async fn run( match timeout_result { // Store successful proofs. Ok((block_num, proof)) => { - db.insert_block_proof(block_num, &proof) + db.insert_block_proof(block_num, proof) .await .map_err(ProofSchedulerError::PersistFailed)?; }, From 2b4419298e85c8f40ef3da4207d0a2aa757eee8e Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:27:06 +1300 Subject: [PATCH 28/73] Add index update query --- .../db/migrations/2025062000000_setup/up.sql | 3 +++ .../src/db/models/queries/block_headers.rs | 17 +++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index 72da591136..f17bd6939a 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -10,6 +10,9 @@ CREATE TABLE block_headers ( CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) ); +CREATE INDEX block_headers_to_be_proven ON block_headers(block_proof ASC) WHERE block_proof IS NULL; +CREATE INDEX block_headers_proven_desc ON block_headers(block_num DESC) WHERE block_proof IS NOT NULL; + CREATE TABLE account_codes ( code_commitment BLOB NOT NULL, code BLOB NOT NULL, diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 59aec13c41..6804e33080 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -329,20 +329,21 @@ pub(crate) fn select_unproven_blocks( /// # Raw SQL /// /// ```sql -/// SELECT MAX(block_num) +/// SELECT block_num /// FROM block_headers /// WHERE block_proof IS NOT NULL +/// ORDER BY block_num DESC +/// LIMIT 1 /// ``` pub(crate) fn select_latest_proven_block_num( conn: &mut SqliteConnection, ) -> Result, DatabaseError> { - use diesel::dsl::max; - - let block_num: Option = SelectDsl::select( - schema::block_headers::table.filter(schema::block_headers::block_proof.is_not_null()), - max(schema::block_headers::block_num), - ) - .get_result(conn)?; + let block_num: Option = + SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) + .filter(schema::block_headers::block_proof.is_not_null()) + .order(schema::block_headers::block_num.desc()) + .first(conn) + .optional()?; block_num.map(BlockNumber::from_raw_sql).transpose().map_err(Into::into) } From 92878d8286311777d900ef4372d8cc8b1fda3288 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:27:31 +1300 Subject: [PATCH 29/73] Bump timeout --- crates/store/src/server/proof_scheduler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index d46fd21baf..b20050108d 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -34,7 +34,7 @@ use crate::server::block_prover_client::{BlockProver, StoreProverError}; // ================================================================================================ /// Overall timeout for proving a single block. -const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(2); +const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(4); // PROOF SCHEDULER // ================================================================================================ From 7d5ed4cc7e3f4098e64e7132e21612d01efa3136 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:51:18 +1300 Subject: [PATCH 30/73] Update notify --- crates/store/src/server/proof_scheduler.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index b20050108d..67e153bfcd 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -85,6 +85,11 @@ async fn run( info!(target: COMPONENT, "Proof scheduler started"); loop { + // Capture the notify permit before retrieving unproven blocks from the database. + // This ensures that a notify fired between the database query and the wait on the permit + // will be captured; meaning we don't block unnecessarily until the next notify. + let notified = notify.notified(); + // Query all unproven blocks. This handles both startup recovery and new blocks. let unproven_blocks = match db.select_unproven_blocks().await { Ok(blocks) => blocks, @@ -97,7 +102,7 @@ async fn run( // Wait for notify if there are no unproven blocks. if unproven_blocks.is_empty() { - notify.notified().await; + notified.await; continue; } From 2c5a7d798565b6f2eaf505dedd379e1e3ad9d08c Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:58:03 +1300 Subject: [PATCH 31/73] Specify proving block batch size --- crates/store/src/db/mod.rs | 6 +++--- crates/store/src/db/models/queries/block_headers.rs | 6 +++++- crates/store/src/server/proof_scheduler.rs | 5 ++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index b05363b07e..0ed04c8257 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -608,9 +608,9 @@ impl Db { /// Returns block numbers for all blocks that have not yet been proven, ordered ascending. #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] - pub async fn select_unproven_blocks(&self) -> Result> { - self.transact("select unproven blocks", |conn| { - models::queries::select_unproven_blocks(conn) + pub async fn select_unproven_blocks(&self, limit: i64) -> Result> { + self.transact("select unproven blocks", move |conn| { + models::queries::select_unproven_blocks(conn, limit) }) .await } diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 6804e33080..7f56b7a8a4 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -293,9 +293,10 @@ pub(crate) fn insert_block_proof( Ok(count) } -/// Select all block numbers that have not yet been proven, ordered ascending. +/// Select block numbers that have not yet been proven, ordered ascending. /// /// The genesis block (block 0) is excluded because it is never proven. +/// Results are limited to at most `limit` rows. /// /// # Raw SQL /// @@ -305,15 +306,18 @@ pub(crate) fn insert_block_proof( /// WHERE block_proof IS NULL /// AND block_num > 0 /// ORDER BY block_num ASC +/// LIMIT ? /// ``` pub(crate) fn select_unproven_blocks( conn: &mut SqliteConnection, + limit: i64, ) -> Result, DatabaseError> { let block_nums: Vec = SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) .filter(schema::block_headers::block_proof.is_null()) .filter(schema::block_headers::block_num.gt(0i64)) .order(schema::block_headers::block_num.asc()) + .limit(limit) .load(conn)?; block_nums .into_iter() diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 67e153bfcd..d05f8843cb 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -36,6 +36,9 @@ use crate::server::block_prover_client::{BlockProver, StoreProverError}; /// Overall timeout for proving a single block. const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(4); +/// Maximum number of unproven blocks to process in a single batch. +const MAX_PROVING_BATCH_SIZE: i64 = 16; + // PROOF SCHEDULER // ================================================================================================ @@ -91,7 +94,7 @@ async fn run( let notified = notify.notified(); // Query all unproven blocks. This handles both startup recovery and new blocks. - let unproven_blocks = match db.select_unproven_blocks().await { + let unproven_blocks = match db.select_unproven_blocks(MAX_PROVING_BATCH_SIZE).await { Ok(blocks) => blocks, Err(err) => { error!(target: COMPONENT, %err, "Failed to query unproven blocks, retrying"); From 8b803f76df85458ed495fa956d36be5ee42f18bf Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:58:47 +1300 Subject: [PATCH 32/73] static lifetime --- crates/store/src/server/proof_scheduler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index d05f8843cb..cad678963c 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -230,7 +230,7 @@ enum ProveBlockError { /// An irrecoverable error that should cause node shutdown. Fatal(ProofSchedulerError), /// A transient error (DB read, prover failure). The outer loop will retry. - Transient(Box), + Transient(Box), /// The overall proving timeout was exceeded. Retriable on next iteration. Timeout, } From 2ca641f29c77eedbcf2f6bf4b5e5fa622e8a55c3 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 09:59:33 +1300 Subject: [PATCH 33/73] backticks --- proto/proto/rpc.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/proto/proto/rpc.proto b/proto/proto/rpc.proto index 03090de3f4..01583531e4 100644 --- a/proto/proto/rpc.proto +++ b/proto/proto/rpc.proto @@ -502,8 +502,8 @@ message SyncChainMmrRequest { // The finality level to use when clamping the upper bound of the block range. // - // When set to FINALITY_COMMITTED (default), the upper bound is clamped to the chain tip. - // When set to FINALITY_PROVEN, the upper bound is clamped to the latest proven block. + // When set to `FINALITY_COMMITTED` (default), the upper bound is clamped to the chain tip. + // When set to `FINALITY_PROVEN`, the upper bound is clamped to the latest proven block. Finality finality = 2; } From 6e526509552996265112c5fa19707441a8c64363 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 10:07:22 +1300 Subject: [PATCH 34/73] replace match --- crates/store/src/server/proof_scheduler.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index cad678963c..d6077c8f04 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -195,18 +195,15 @@ async fn prove_block( block_num: BlockNumber, ) -> Result { // Load proving inputs from the DB. - let bytes = match db.select_block_proving_inputs(block_num).await { - Ok(Some(bytes)) => bytes, - // Inputs not found. This should never happen for committed blocks. The genesis block - // does not have inputs but it should not be queried by this function. - Ok(None) => { - return Err(ProveBlockError::Fatal(ProofSchedulerError::MissingProvingInputs( - block_num, - ))); - }, - // Failed to retrieve proving inputs. - Err(err) => return Err(err.into()), - }; + // All committed blocks should have inputs apart from the genesis block, which should + // never be queried by this function. + let bytes = db + .select_block_proving_inputs(block_num) + .await + .map_err(ProveBlockError::from)? + .ok_or_else(|| { + ProveBlockError::Fatal(ProofSchedulerError::MissingProvingInputs(block_num)) + })?; // Deserialize proving inputs. let request = BlockProofRequest::read_from_bytes(&bytes[..]) From 295f9d84214a9542c5d146485c2d8768b24ac21a Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 11:09:45 +1300 Subject: [PATCH 35/73] Store proofs to file --- crates/store/src/blocks.rs | 52 +++++++++++++++++++ .../db/migrations/2025062000000_setup/up.sql | 6 +-- crates/store/src/db/mod.rs | 27 +++------- crates/store/src/db/models/conv.rs | 14 +---- .../src/db/models/queries/block_headers.rs | 48 ++++------------- crates/store/src/db/schema.rs | 2 +- crates/store/src/errors.rs | 6 ++- crates/store/src/server/mod.rs | 2 +- crates/store/src/server/proof_scheduler.rs | 17 ++++-- crates/store/src/state/mod.rs | 5 ++ 10 files changed, 94 insertions(+), 85 deletions(-) diff --git a/crates/store/src/blocks.rs b/crates/store/src/blocks.rs index e771332ba9..ef8bf3526b 100644 --- a/crates/store/src/blocks.rs +++ b/crates/store/src/blocks.rs @@ -107,6 +107,41 @@ impl BlockStore { fs_err::write(block_path, data) } + // PROOF STORAGE + // -------------------------------------------------------------------------------------------- + + #[expect(dead_code)] + pub async fn load_proof( + &self, + block_num: BlockNumber, + ) -> Result>, std::io::Error> { + match tokio::fs::read(self.proof_path(block_num)).await { + Ok(data) => Ok(Some(data)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } + } + + #[instrument( + target = COMPONENT, + name = "store.block_store.save_proof", + skip(self, data), + err, + fields(proof_size = data.len()) + )] + pub async fn save_proof( + &self, + block_num: BlockNumber, + data: &[u8], + ) -> Result<(), std::io::Error> { + let (epoch_path, proof_path) = self.epoch_proof_path(block_num)?; + if !epoch_path.exists() { + tokio::fs::create_dir_all(epoch_path).await?; + } + + tokio::fs::write(proof_path, data).await + } + // HELPER FUNCTIONS // -------------------------------------------------------------------------------------------- @@ -117,6 +152,13 @@ impl BlockStore { epoch_dir.join(format!("block_{block_num:08x}.dat")) } + fn proof_path(&self, block_num: BlockNumber) -> PathBuf { + let block_num = block_num.as_u32(); + let epoch = block_num >> 16; + let epoch_dir = self.store_dir.join(format!("{epoch:04x}")); + epoch_dir.join(format!("proof_{block_num:08x}.dat")) + } + fn epoch_block_path( &self, block_num: BlockNumber, @@ -127,6 +169,16 @@ impl BlockStore { Ok((epoch_path.to_path_buf(), block_path)) } + fn epoch_proof_path( + &self, + block_num: BlockNumber, + ) -> Result<(PathBuf, PathBuf), std::io::Error> { + let proof_path = self.proof_path(block_num); + let epoch_path = proof_path.parent().ok_or(std::io::Error::from(ErrorKind::NotFound))?; + + Ok((epoch_path.to_path_buf(), proof_path)) + } + pub fn display(&self) -> std::path::Display<'_> { self.store_dir.display() } diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index f17bd6939a..ef971ee462 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -4,14 +4,14 @@ CREATE TABLE block_headers ( signature BLOB NOT NULL, commitment BLOB NOT NULL, proving_inputs BLOB, -- Serialized BlockProofRequest needed for deferred proving. NULL for genesis block. - block_proof BLOB, -- NULL means the block has not yet been proven + is_proven BOOLEAN NOT NULL DEFAULT 0, -- Whether the block has been proven PRIMARY KEY (block_num), CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) ); -CREATE INDEX block_headers_to_be_proven ON block_headers(block_proof ASC) WHERE block_proof IS NULL; -CREATE INDEX block_headers_proven_desc ON block_headers(block_num DESC) WHERE block_proof IS NOT NULL; +CREATE INDEX block_headers_to_be_proven ON block_headers(block_num ASC) WHERE is_proven = 0; +CREATE INDEX block_headers_proven_desc ON block_headers(block_num DESC) WHERE is_proven = 1; CREATE TABLE account_codes ( code_commitment BLOB NOT NULL, diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 0ed04c8257..61929a8a7e 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -10,7 +10,7 @@ use miden_node_utils::tracing::OpenTelemetrySpanExt; use miden_protocol::Word; use miden_protocol::account::{AccountHeader, AccountId, AccountStorageHeader}; use miden_protocol::asset::{Asset, AssetVaultKey}; -use miden_protocol::block::{BlockHeader, BlockNoteIndex, BlockNumber, BlockProof, SignedBlock}; +use miden_protocol::block::{BlockHeader, BlockNoteIndex, BlockNumber, SignedBlock}; use miden_protocol::crypto::merkle::SparseMerklePath; use miden_protocol::note::{ NoteDetails, @@ -589,18 +589,13 @@ impl Db { .await } - /// Stores a [`BlockProof`] for a previously committed block. + /// Marks a previously committed block as proven. /// - /// Updates the `block_proof` column for the given block number. + /// Sets the `is_proven` flag to `true` for the given block number. #[instrument(target = COMPONENT, skip_all, err)] - pub async fn insert_block_proof( - &self, - block_num: BlockNumber, - block_proof: BlockProof, - ) -> Result<()> { - let block_proof = block_proof.clone(); - self.transact("insert block proof", move |conn| { - models::queries::insert_block_proof(conn, block_num, block_proof) + pub async fn mark_block_proven(&self, block_num: BlockNumber) -> Result<()> { + self.transact("mark block proven", move |conn| { + models::queries::mark_block_proven(conn, block_num) }) .await?; Ok(()) @@ -637,16 +632,6 @@ impl Db { .await } - /// Returns the [`BlockProof`] for a given block number, if the block exists and has been - /// proven. - #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] - pub async fn select_block_proof(&self, block_num: BlockNumber) -> Result> { - self.transact("select block proof", move |conn| { - models::queries::select_block_proof(conn, block_num) - }) - .await - } - /// Selects storage map values for syncing storage maps for a specific account ID. /// /// The returned values are the latest known values up to `block_range.end()`, and no values diff --git a/crates/store/src/db/models/conv.rs b/crates/store/src/db/models/conv.rs index b2cf2d6f8f..2176ea0d46 100644 --- a/crates/store/src/db/models/conv.rs +++ b/crates/store/src/db/models/conv.rs @@ -36,7 +36,7 @@ use miden_crypto::Word; use miden_crypto::utils::Deserializable; use miden_protocol::Felt; use miden_protocol::account::{StorageSlotName, StorageSlotType}; -use miden_protocol::block::{BlockHeader, BlockNumber, BlockProof}; +use miden_protocol::block::{BlockHeader, BlockNumber}; use miden_protocol::note::NoteTag; use crate::db::models::queries::{BlockHeaderCommitment, NetworkAccountType}; @@ -95,18 +95,6 @@ impl SqlTypeConvert for BlockHeader { } } -impl SqlTypeConvert for BlockProof { - type Raw = Vec; - - fn from_raw_sql(raw: Self::Raw) -> Result { - ::read_from_bytes(raw.as_slice()).map_err(Self::map_err) - } - - fn to_raw_sql(self) -> Self::Raw { - miden_crypto::utils::Serializable::to_bytes(&self) - } -} - impl SqlTypeConvert for NetworkAccountType { type Raw = i32; diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 7f56b7a8a4..bebbbc7e2d 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -15,7 +15,7 @@ use miden_crypto::Word; use miden_crypto::dsa::ecdsa_k256_keccak::Signature; use miden_node_proto::BlockProofRequest; use miden_node_utils::limiter::{QueryParamBlockLimit, QueryParamLimiter}; -use miden_protocol::block::{BlockHeader, BlockNumber, BlockProof}; +use miden_protocol::block::{BlockHeader, BlockNumber}; use miden_protocol::utils::{Deserializable, Serializable}; use super::DatabaseError; @@ -266,9 +266,9 @@ pub(crate) fn select_block_proving_inputs( Ok(inputs.flatten()) } -/// Store a [`BlockProof`] for a committed block. +/// Mark a committed block as proven. /// -/// Updates the `block_proof` column for the row with the given `block_num`. +/// Sets the `is_proven` flag to `true` for the row with the given `block_num`. /// /// # Returns /// @@ -279,16 +279,15 @@ pub(crate) fn select_block_proving_inputs( fields(block_num = %block_num), err, )] -pub(crate) fn insert_block_proof( +pub(crate) fn mark_block_proven( conn: &mut SqliteConnection, block_num: BlockNumber, - block_proof: BlockProof, ) -> Result { let count = diesel::update( schema::block_headers::table .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())), ) - .set(schema::block_headers::block_proof.eq(block_proof.to_raw_sql())) + .set(schema::block_headers::is_proven.eq(true)) .execute(conn)?; Ok(count) } @@ -303,7 +302,7 @@ pub(crate) fn insert_block_proof( /// ```sql /// SELECT block_num /// FROM block_headers -/// WHERE block_proof IS NULL +/// WHERE is_proven = 0 /// AND block_num > 0 /// ORDER BY block_num ASC /// LIMIT ? @@ -314,7 +313,7 @@ pub(crate) fn select_unproven_blocks( ) -> Result, DatabaseError> { let block_nums: Vec = SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) - .filter(schema::block_headers::block_proof.is_null()) + .filter(schema::block_headers::is_proven.eq(false)) .filter(schema::block_headers::block_num.gt(0i64)) .order(schema::block_headers::block_num.asc()) .limit(limit) @@ -335,7 +334,7 @@ pub(crate) fn select_unproven_blocks( /// ```sql /// SELECT block_num /// FROM block_headers -/// WHERE block_proof IS NOT NULL +/// WHERE is_proven = 1 /// ORDER BY block_num DESC /// LIMIT 1 /// ``` @@ -344,39 +343,10 @@ pub(crate) fn select_latest_proven_block_num( ) -> Result, DatabaseError> { let block_num: Option = SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) - .filter(schema::block_headers::block_proof.is_not_null()) + .filter(schema::block_headers::is_proven.eq(true)) .order(schema::block_headers::block_num.desc()) .first(conn) .optional()?; block_num.map(BlockNumber::from_raw_sql).transpose().map_err(Into::into) } - -/// Select the [`BlockProof`] for a given block number, if it exists. -/// -/// # Returns -/// -/// `None` if the block does not exist or has not been proven yet. -/// -/// # Raw SQL -/// -/// ```sql -/// SELECT block_proof -/// FROM block_headers -/// WHERE block_num = ?1 -/// ``` -pub(crate) fn select_block_proof( - conn: &mut SqliteConnection, - block_num: BlockNumber, -) -> Result, DatabaseError> { - let proof_bytes: Option>> = - SelectDsl::select(schema::block_headers::table, schema::block_headers::block_proof) - .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())) - .get_result(conn) - .optional()?; - // Flatten: None (row not found) or Some(None) (proof is NULL) => None. - match proof_bytes.flatten() { - Some(bytes) => Ok(Some(BlockProof::from_raw_sql(bytes)?)), - None => Ok(None), - } -} diff --git a/crates/store/src/db/schema.rs b/crates/store/src/db/schema.rs index 7890516f33..dc8ebfb3f1 100644 --- a/crates/store/src/db/schema.rs +++ b/crates/store/src/db/schema.rs @@ -49,7 +49,7 @@ diesel::table! { block_header -> Binary, signature -> Binary, commitment -> Binary, - block_proof -> Nullable, + is_proven -> Bool, proving_inputs -> Nullable, } } diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index 5d809d5f16..ac8302780a 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -39,8 +39,10 @@ pub enum ProofSchedulerError { MissingProvingInputs(BlockNumber), #[error("failed to deserialize proving inputs for block")] DeserializationFailed(#[source] DeserializationError), - #[error("failed to persist block proof for block")] - PersistFailed(#[source] DatabaseError), + #[error("failed to write block proof to file")] + PersistProofFailed(#[source] std::io::Error), + #[error("failed to mark block as proven in database")] + MarkBlockProvenFailed(#[source] DatabaseError), #[error("invalid remote prover endpoint: {0}")] InvalidProverEndpoint(String), } diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 07486bccb2..94012ff6e4 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -117,7 +117,7 @@ impl Store { // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. let (proof_scheduler_handle, proof_scheduler_task) = - proof_scheduler::spawn(state.db.clone(), block_prover); + proof_scheduler::spawn(state.db.clone(), block_prover, state.block_store()); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index d6077c8f04..e8cdb821ec 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -19,13 +19,14 @@ use futures::StreamExt; use futures::stream::FuturesOrdered; use miden_node_proto::domain::proof_request::BlockProofRequest; use miden_protocol::block::{BlockNumber, BlockProof}; -use miden_protocol::utils::Deserializable; +use miden_protocol::utils::{Deserializable, Serializable}; use miden_remote_prover_client::RemoteProverClientError; use tokio::sync::Notify; use tokio::task::JoinHandle; use tracing::{error, info, instrument}; use crate::COMPONENT; +use crate::blocks::BlockStore; use crate::db::Db; use crate::errors::{DatabaseError, ProofSchedulerError}; use crate::server::block_prover_client::{BlockProver, StoreProverError}; @@ -64,11 +65,12 @@ impl ProofSchedulerHandle { pub fn spawn( db: Arc, block_prover: Arc, + block_store: Arc, ) -> (ProofSchedulerHandle, JoinHandle>) { let notify = Arc::new(Notify::new()); let handle = ProofSchedulerHandle { notify: Arc::clone(¬ify) }; - let join_handle = tokio::spawn(run(db, block_prover, notify)); + let join_handle = tokio::spawn(run(db, block_prover, block_store, notify)); (handle, join_handle) } @@ -83,6 +85,7 @@ pub fn spawn( async fn run( db: Arc, block_prover: Arc, + block_store: Arc, notify: Arc, ) -> Result<(), ProofSchedulerError> { info!(target: COMPONENT, "Proof scheduler started"); @@ -116,11 +119,15 @@ async fn run( let mut proving_futures = order_proving_jobs(&db, &block_prover, &unproven_blocks); while let Some(timeout_result) = proving_futures.next().await { match timeout_result { - // Store successful proofs. + // Save proof to file, then mark as proven in DB. Ok((block_num, proof)) => { - db.insert_block_proof(block_num, proof) + block_store + .save_proof(block_num, &proof.to_bytes()) .await - .map_err(ProofSchedulerError::PersistFailed)?; + .map_err(ProofSchedulerError::PersistProofFailed)?; + db.mark_block_proven(block_num) + .await + .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; }, // Abort on fatal errors. diff --git a/crates/store/src/state/mod.rs b/crates/store/src/state/mod.rs index e4f16afdd4..8c9a13dc67 100644 --- a/crates/store/src/state/mod.rs +++ b/crates/store/src/state/mod.rs @@ -776,6 +776,11 @@ impl State { self.block_store.load_block(block_num).await.map_err(Into::into) } + /// Returns the block store. + pub(crate) fn block_store(&self) -> Arc { + self.block_store.clone() + } + /// Returns the latest block number. pub async fn latest_block_num(&self) -> BlockNumber { self.inner.read().await.latest_block_num() From 5bfba051b537863760b3693eea7fbca41f14d3fe Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 11:18:56 +1300 Subject: [PATCH 36/73] update select proving inputs return value --- crates/store/src/db/mod.rs | 6 +++--- crates/store/src/db/models/queries/block_headers.rs | 10 +++++++--- crates/store/src/server/proof_scheduler.rs | 9 ++------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 61929a8a7e..36928020a3 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -610,12 +610,12 @@ impl Db { .await } - /// Returns the serialized proving inputs for a given block number, if stored. - #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] + /// Returns the proving inputs for a given block number, if stored. + #[instrument(level = "debug", target = COMPONENT, skip_all, err)] pub async fn select_block_proving_inputs( &self, block_num: BlockNumber, - ) -> Result>> { + ) -> Result> { self.transact("select block proving inputs", move |conn| { models::queries::select_block_proving_inputs(conn, block_num) }) diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index bebbbc7e2d..c85d17fc7c 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -241,7 +241,7 @@ pub(crate) fn insert_block_header( Ok(count) } -/// Select the serialized proving inputs for a given block number. +/// Select the proving inputs for a given block number. /// /// # Returns /// @@ -257,13 +257,17 @@ pub(crate) fn insert_block_header( pub(crate) fn select_block_proving_inputs( conn: &mut SqliteConnection, block_num: BlockNumber, -) -> Result>, DatabaseError> { +) -> Result, DatabaseError> { let inputs: Option>> = SelectDsl::select(schema::block_headers::table, schema::block_headers::proving_inputs) .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())) .get_result(conn) .optional()?; - Ok(inputs.flatten()) + inputs + .flatten() + .map(|bytes| BlockProofRequest::read_from_bytes(&bytes)) + .transpose() + .map_err(Into::into) } /// Mark a committed block as proven. diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index e8cdb821ec..a7a25b49c1 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -17,9 +17,8 @@ use std::time::Duration; use futures::StreamExt; use futures::stream::FuturesOrdered; -use miden_node_proto::domain::proof_request::BlockProofRequest; use miden_protocol::block::{BlockNumber, BlockProof}; -use miden_protocol::utils::{Deserializable, Serializable}; +use miden_protocol::utils::Serializable; use miden_remote_prover_client::RemoteProverClientError; use tokio::sync::Notify; use tokio::task::JoinHandle; @@ -204,7 +203,7 @@ async fn prove_block( // Load proving inputs from the DB. // All committed blocks should have inputs apart from the genesis block, which should // never be queried by this function. - let bytes = db + let request = db .select_block_proving_inputs(block_num) .await .map_err(ProveBlockError::from)? @@ -212,10 +211,6 @@ async fn prove_block( ProveBlockError::Fatal(ProofSchedulerError::MissingProvingInputs(block_num)) })?; - // Deserialize proving inputs. - let request = BlockProofRequest::read_from_bytes(&bytes[..]) - .map_err(|err| ProveBlockError::Fatal(ProofSchedulerError::DeserializationFailed(err)))?; - // Prove the block. let proof = block_prover .prove(request.tx_batches, request.block_inputs, &request.block_header) From 37cb51696dc4c5e1064f072b51240670ac40d869 Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 11:25:31 +1300 Subject: [PATCH 37/73] RM pub crate --- crates/store/src/server/mod.rs | 2 +- crates/store/src/server/rpc_api.rs | 2 +- crates/store/src/state/mod.rs | 17 +++++++++++------ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 94012ff6e4..54e6713136 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -117,7 +117,7 @@ impl Store { // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. let (proof_scheduler_handle, proof_scheduler_task) = - proof_scheduler::spawn(state.db.clone(), block_prover, state.block_store()); + proof_scheduler::spawn(state.db().clone(), block_prover, state.block_store()); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), diff --git a/crates/store/src/server/rpc_api.rs b/crates/store/src/server/rpc_api.rs index a0399f291f..c51bf98c74 100644 --- a/crates/store/src/server/rpc_api.rs +++ b/crates/store/src/server/rpc_api.rs @@ -171,7 +171,7 @@ impl rpc_server::Rpc for StoreApi { proto::rpc::Finality::Committed => chain_tip, proto::rpc::Finality::Proven => self .state - .db + .db() .select_latest_proven_block_num() .await .map_err(SyncChainMmrError::DatabaseError)? diff --git a/crates/store/src/state/mod.rs b/crates/store/src/state/mod.rs index 8c9a13dc67..de4a2257d0 100644 --- a/crates/store/src/state/mod.rs +++ b/crates/store/src/state/mod.rs @@ -104,7 +104,7 @@ impl InnerState { pub struct State { /// The database which stores block headers, nullifiers, notes, and the latest states of /// accounts. - pub(crate) db: Arc, + db: Arc, /// The block store which stores full block contents for all blocks. block_store: Arc, @@ -182,6 +182,16 @@ impl State { }) } + /// Returns the database. + pub(crate) fn db(&self) -> Arc { + self.db.clone() + } + + /// Returns the block store. + pub(crate) fn block_store(&self) -> Arc { + self.block_store.clone() + } + // STATE ACCESSORS // -------------------------------------------------------------------------------------------- @@ -776,11 +786,6 @@ impl State { self.block_store.load_block(block_num).await.map_err(Into::into) } - /// Returns the block store. - pub(crate) fn block_store(&self) -> Arc { - self.block_store.clone() - } - /// Returns the latest block number. pub async fn latest_block_num(&self) -> BlockNumber { self.inner.read().await.latest_block_num() From ec89137d0ccfe19be338698f4eea9a25e67b3e3f Mon Sep 17 00:00:00 2001 From: sergerad Date: Wed, 4 Mar 2026 11:27:55 +1300 Subject: [PATCH 38/73] Fix changelog --- CHANGELOG.md | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 429647867d..63b725dfeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,15 +4,6 @@ ### Enhancements -- [BREAKING] Move block proving from Blocker Producer to the Store ([#1579](https://github.com/0xMiden/miden-node/pull/1579)). -- [BREAKING] Updated miden-base dependencies to use `next` branch; renamed `NoteInputs` to `NoteStorage`, `.inputs()` to `.storage()`, and database `inputs` column to `storage` ([#1595](https://github.com/0xMiden/miden-node/pull/1595)). -- Validator now persists validated transactions ([#1614](https://github.com/0xMiden/miden-node/pull/1614)). -- [BREAKING] Remove `SynState` and introduce `SyncChainMmr` ([#1591](https://github.com/0xMiden/miden-node/issues/1591)). -- Introduce `SyncChainMmr` RPC endpoint to sync chain MMR deltas within specified block ranges ([#1591](https://github.com/0xMiden/miden-node/issues/1591)). -- Fixed `TransactionHeader` serialization for row insertion on database & fixed transaction cursor on retrievals ([#1701](https://github.com/0xMiden/miden-node/issues/1701)). -- Added KMS signing support in validator ([#1677](https://github.com/0xMiden/miden-node/pull/1677)). -- Restructured block proving to be asynchronous and added finality field for `SyncChainMmr` requests ([#1725](https://github.com/0xMiden/miden-node/pull/1725)). - - [BREAKING] Move block proving from Blocker Producer to the Store ([#1579](https://github.com/0xMiden/node/pull/1579)). - [BREAKING] Updated miden-base dependencies to use `next` branch; renamed `NoteInputs` to `NoteStorage`, `.inputs()` to `.storage()`, and database `inputs` column to `storage` ([#1595](https://github.com/0xMiden/node/pull/1595)). - Validator now persists validated transactions ([#1614](https://github.com/0xMiden/node/pull/1614)). @@ -20,6 +11,7 @@ - Introduce `SyncChainMmr` RPC endpoint to sync chain MMR deltas within specified block ranges ([#1591](https://github.com/0xMiden/node/issues/1591)). - Fixed `TransactionHeader` serialization for row insertion on database & fixed transaction cursor on retrievals ([#1701](https://github.com/0xMiden/node/issues/1701)). - Added KMS signing support in validator ([#1677](https://github.com/0xMiden/node/pull/1677)). +- Restructured block proving to be asynchronous and added finality field for `SyncChainMmr` requests ([#1725](https://github.com/0xMiden/miden-node/pull/1725)). ### Changes From bc0bd549b06c3a92ba8971ba9b6a76132bb49147 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:21:15 +1300 Subject: [PATCH 39/73] Changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63b725dfeb..6d5fc501db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ - Introduce `SyncChainMmr` RPC endpoint to sync chain MMR deltas within specified block ranges ([#1591](https://github.com/0xMiden/node/issues/1591)). - Fixed `TransactionHeader` serialization for row insertion on database & fixed transaction cursor on retrievals ([#1701](https://github.com/0xMiden/node/issues/1701)). - Added KMS signing support in validator ([#1677](https://github.com/0xMiden/node/pull/1677)). -- Restructured block proving to be asynchronous and added finality field for `SyncChainMmr` requests ([#1725](https://github.com/0xMiden/miden-node/pull/1725)). +- Added finality field for `SyncChainMmr` requests ([#1725](https://github.com/0xMiden/miden-node/pull/1725)). ### Changes From c8a76d46d80d010bc488670dd0a4bff90e7d283b Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:23:33 +1300 Subject: [PATCH 40/73] finality unspecified --- crates/store/src/server/rpc_api.rs | 2 +- proto/proto/rpc.proto | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/store/src/server/rpc_api.rs b/crates/store/src/server/rpc_api.rs index c51bf98c74..cf73ed07a1 100644 --- a/crates/store/src/server/rpc_api.rs +++ b/crates/store/src/server/rpc_api.rs @@ -168,7 +168,7 @@ impl rpc_server::Rpc for StoreApi { // Determine the effective tip based on the requested finality level. let effective_tip = match request.finality() { - proto::rpc::Finality::Committed => chain_tip, + proto::rpc::Finality::Unspecified | proto::rpc::Finality::Committed => chain_tip, proto::rpc::Finality::Proven => self .state .db() diff --git a/proto/proto/rpc.proto b/proto/proto/rpc.proto index 01583531e4..d1e01155a7 100644 --- a/proto/proto/rpc.proto +++ b/proto/proto/rpc.proto @@ -485,10 +485,11 @@ message SyncNotesResponse { // The finality level for chain data queries. enum Finality { + FINALITY_UNSPECIFIED = 0; // Return data up to the latest committed block (default). - FINALITY_COMMITTED = 0; + FINALITY_COMMITTED = 1; // Return data only up to the latest proven block. - FINALITY_PROVEN = 1; + FINALITY_PROVEN = 2; } // Chain MMR synchronization request. From 506ea8020cc7d6f9e5b8139c53b10dc06eff3ca7 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:24:09 +1300 Subject: [PATCH 41/73] unspecified comment --- proto/proto/rpc.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proto/proto/rpc.proto b/proto/proto/rpc.proto index d1e01155a7..850813eb98 100644 --- a/proto/proto/rpc.proto +++ b/proto/proto/rpc.proto @@ -503,7 +503,7 @@ message SyncChainMmrRequest { // The finality level to use when clamping the upper bound of the block range. // - // When set to `FINALITY_COMMITTED` (default), the upper bound is clamped to the chain tip. + // When set to `FINALITY_UNSPECIFIED` or `FINALITY_COMMITTED`, the upper bound is clamped to the chain tip. // When set to `FINALITY_PROVEN`, the upper bound is clamped to the latest proven block. Finality finality = 2; } From 97b320bfea29ebe3b003ff2725e9d0e405137ca0 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:24:36 +1300 Subject: [PATCH 42/73] More comments --- proto/proto/rpc.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proto/proto/rpc.proto b/proto/proto/rpc.proto index 850813eb98..b181162d5a 100644 --- a/proto/proto/rpc.proto +++ b/proto/proto/rpc.proto @@ -485,8 +485,9 @@ message SyncNotesResponse { // The finality level for chain data queries. enum Finality { + // Return data up to the latest committed block. FINALITY_UNSPECIFIED = 0; - // Return data up to the latest committed block (default). + // Return data up to the latest committed block. FINALITY_COMMITTED = 1; // Return data only up to the latest proven block. FINALITY_PROVEN = 2; From 820261d4c223cd204aa3a4a1228f23c51c4320f4 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:26:32 +1300 Subject: [PATCH 43/73] source not from --- crates/store/src/errors.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index ac8302780a..6aa805af35 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -275,7 +275,7 @@ pub enum SyncChainMmrError { NoProvenBlocks, #[error("database error")] #[grpc(internal)] - DatabaseError(#[from] DatabaseError), + DatabaseError(#[source] DatabaseError), } impl From for StateSyncError { From b3f0238a2f94e1e1e2346a0a274704df871619be Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:27:35 +1300 Subject: [PATCH 44/73] arc clone --- crates/store/src/state/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/store/src/state/mod.rs b/crates/store/src/state/mod.rs index de4a2257d0..89a5d9b1d0 100644 --- a/crates/store/src/state/mod.rs +++ b/crates/store/src/state/mod.rs @@ -184,12 +184,12 @@ impl State { /// Returns the database. pub(crate) fn db(&self) -> Arc { - self.db.clone() + Arc::clone(&self.db) } /// Returns the block store. pub(crate) fn block_store(&self) -> Arc { - self.block_store.clone() + Arc::clone(&self.block_store) } // STATE ACCESSORS From 228d38b48e73d1b767b2643190a82f19e880ed60 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 10:30:22 +1300 Subject: [PATCH 45/73] rename proof scheduler handle --- crates/store/src/server/api.rs | 4 ++-- crates/store/src/server/mod.rs | 8 ++++---- crates/store/src/server/proof_scheduler.rs | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/crates/store/src/server/api.rs b/crates/store/src/server/api.rs index d61f1b020e..523e8ec861 100644 --- a/crates/store/src/server/api.rs +++ b/crates/store/src/server/api.rs @@ -14,7 +14,7 @@ use tracing::{info, instrument}; use crate::COMPONENT; use crate::errors::GetBlockInputsError; -use crate::server::proof_scheduler::ProofSchedulerHandle; +use crate::server::proof_scheduler::ProofSchedulerNotifier; use crate::state::State; // STORE API @@ -24,7 +24,7 @@ use crate::state::State; pub struct StoreApi { pub(super) state: Arc, /// Handle used to notify proof scheduler of newly committed blocks. - pub(super) proof_scheduler: ProofSchedulerHandle, + pub(super) proof_scheduler: ProofSchedulerNotifier, } impl StoreApi { diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 54e6713136..aeda721251 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -116,21 +116,21 @@ impl Store { // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. - let (proof_scheduler_handle, proof_scheduler_task) = + let (proof_scheduler_notifier, proof_scheduler_task) = proof_scheduler::spawn(state.db().clone(), block_prover, state.block_store()); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), - proof_scheduler: proof_scheduler_handle.clone(), + proof_scheduler: proof_scheduler_notifier.clone(), }); let ntx_builder_service = store::ntx_builder_server::NtxBuilderServer::new(api::StoreApi { state: Arc::clone(&state), - proof_scheduler: proof_scheduler_handle.clone(), + proof_scheduler: proof_scheduler_notifier.clone(), }); let block_producer_service = store::block_producer_server::BlockProducerServer::new(api::StoreApi { state: Arc::clone(&state), - proof_scheduler: proof_scheduler_handle, + proof_scheduler: proof_scheduler_notifier, }); let reflection_service = tonic_reflection::server::Builder::configure() .register_file_descriptor_set(store_rpc_api_descriptor()) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index a7a25b49c1..0add26381d 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -44,11 +44,11 @@ const MAX_PROVING_BATCH_SIZE: i64 = 16; /// Handle returned when spawning the proof scheduler, used to notify it of new blocks. #[derive(Clone)] -pub struct ProofSchedulerHandle { +pub struct ProofSchedulerNotifier { notify: Arc, } -impl ProofSchedulerHandle { +impl ProofSchedulerNotifier { /// Notify the scheduler that a new block has been committed and may need proving. #[instrument(target = COMPONENT, name = "proof_scheduler.notify", skip_all)] pub fn notify_block_committed(&self) { @@ -65,13 +65,13 @@ pub fn spawn( db: Arc, block_prover: Arc, block_store: Arc, -) -> (ProofSchedulerHandle, JoinHandle>) { +) -> (ProofSchedulerNotifier, JoinHandle>) { let notify = Arc::new(Notify::new()); - let handle = ProofSchedulerHandle { notify: Arc::clone(¬ify) }; + let notifier = ProofSchedulerNotifier { notify: Arc::clone(¬ify) }; let join_handle = tokio::spawn(run(db, block_prover, block_store, notify)); - (handle, join_handle) + (notifier, join_handle) } /// Main loop of the proof scheduler. From d272347a323ba120961954b7c5d220d10aac4739 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 12:16:32 +1300 Subject: [PATCH 46/73] refactor proof concurrency --- crates/store/src/errors.rs | 2 + crates/store/src/server/api.rs | 6 +- crates/store/src/server/block_producer.rs | 7 +- crates/store/src/server/mod.rs | 26 +- crates/store/src/server/proof_scheduler.rs | 351 ++++++++++++--------- 5 files changed, 235 insertions(+), 157 deletions(-) diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index 6aa805af35..bae39790be 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -45,6 +45,8 @@ pub enum ProofSchedulerError { MarkBlockProvenFailed(#[source] DatabaseError), #[error("invalid remote prover endpoint: {0}")] InvalidProverEndpoint(String), + #[error("proof scheduler task panicked: {0}")] + TaskPanicked(String), } // DATABASE ERRORS diff --git a/crates/store/src/server/api.rs b/crates/store/src/server/api.rs index 523e8ec861..3391c90338 100644 --- a/crates/store/src/server/api.rs +++ b/crates/store/src/server/api.rs @@ -9,12 +9,12 @@ use miden_protocol::account::AccountId; use miden_protocol::batch::OrderedBatches; use miden_protocol::block::{BlockInputs, BlockNumber}; use miden_protocol::note::Nullifier; +use tokio::sync::watch; use tonic::{Request, Response, Status}; use tracing::{info, instrument}; use crate::COMPONENT; use crate::errors::GetBlockInputsError; -use crate::server::proof_scheduler::ProofSchedulerNotifier; use crate::state::State; // STORE API @@ -23,8 +23,8 @@ use crate::state::State; #[derive(Clone)] pub struct StoreApi { pub(super) state: Arc, - /// Handle used to notify proof scheduler of newly committed blocks. - pub(super) proof_scheduler: ProofSchedulerNotifier, + /// Sender used to notify the proof scheduler of the latest committed block number. + pub(super) chain_tip_sender: watch::Sender, } impl StoreApi { diff --git a/crates/store/src/server/block_producer.rs b/crates/store/src/server/block_producer.rs index 98f04ec7f9..b32b03c29f 100644 --- a/crates/store/src/server/block_producer.rs +++ b/crates/store/src/server/block_producer.rs @@ -13,7 +13,7 @@ use miden_protocol::batch::OrderedBatches; use miden_protocol::block::{BlockBody, BlockHeader, BlockNumber, SignedBlock}; use miden_protocol::utils::Deserializable; use tonic::{Request, Response, Status}; -use tracing::Instrument; +use tracing::{Instrument, error}; use crate::errors::ApplyBlockError; use crate::server::api::{ @@ -106,6 +106,7 @@ impl block_producer_server::BlockProducer for StoreApi { let this = self.clone(); tokio::spawn( async move { + let block_num = header.block_num(); let signed_block = SignedBlock::new(header, body, signature) .map_err(|err| Status::new(tonic::Code::Internal, err.as_report()))?; // Note: This is an internal endpoint, so its safe to expose the full error @@ -114,7 +115,9 @@ impl block_producer_server::BlockProducer for StoreApi { .apply_block(signed_block, Some(proving_inputs)) .await .inspect(|_| { - this.proof_scheduler.notify_block_committed(); + if let Err(err) = this.chain_tip_sender.send(block_num) { + error!("Failed to send chain tip: {:?}", err); + } }) .map_err(|err| { span.set_error(&err); diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index aeda721251..3c9ad15a4b 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -114,23 +114,39 @@ impl Store { Arc::new(BlockProver::local()) }; + // Initialize the chain tip watch channel and read the latest proven block from the DB. + let chain_tip = state.latest_block_num().await; + let (chain_tip_sender, chain_tip_rx) = tokio::sync::watch::channel(chain_tip); + + let latest_proven_block = state + .db() + .select_latest_proven_block_num() + .await + .context("failed to read latest proven block number")? + .unwrap_or(miden_protocol::block::BlockNumber::GENESIS); + // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. - let (proof_scheduler_notifier, proof_scheduler_task) = - proof_scheduler::spawn(state.db().clone(), block_prover, state.block_store()); + let proof_scheduler_task = proof_scheduler::spawn( + state.db().clone(), + block_prover, + state.block_store(), + chain_tip_rx, + latest_proven_block, + ); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { state: Arc::clone(&state), - proof_scheduler: proof_scheduler_notifier.clone(), + chain_tip_sender: chain_tip_sender.clone(), }); let ntx_builder_service = store::ntx_builder_server::NtxBuilderServer::new(api::StoreApi { state: Arc::clone(&state), - proof_scheduler: proof_scheduler_notifier.clone(), + chain_tip_sender: chain_tip_sender.clone(), }); let block_producer_service = store::block_producer_server::BlockProducerServer::new(api::StoreApi { state: Arc::clone(&state), - proof_scheduler: proof_scheduler_notifier, + chain_tip_sender, }); let reflection_service = tonic_reflection::server::Builder::configure() .register_file_descriptor_set(store_rpc_api_descriptor()) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 0add26381d..87e744111d 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -1,27 +1,26 @@ //! Background task that drives deferred block proving. //! -//! The [`ProofScheduler`] is spawned as an internal Store task. It: +//! The [`proof_scheduler`] is spawned as an internal Store task. It: //! -//! 1. On startup, queries the DB for all unproven blocks (handles restart recovery). -//! 2. Listens on a [`tokio::sync::Notify`] for newly committed blocks. -//! 3. Proves blocks concurrently, but resolves completions in FIFO order via [`FuturesOrdered`]. -//! This ensures the ancestor rule: a block's proof is only persisted after all ancestor proofs -//! have been persisted. -//! 4. On transient errors (DB reads, prover failures, timeouts), the scheduler abandons the current -//! batch, re-queries unproven blocks, and retries from scratch. +//! 1. Tracks `chain_tip` via a [`watch::Receiver`] and `latest_proven_block` locally. +//! 2. Maintains up to [`MAX_CONCURRENT_PROOFS`] in-flight proving jobs via a [`JoinSet`]. +//! 3. Marks blocks as proven in the database **sequentially** — a block is only marked after all +//! its ancestors have been marked. Completed proofs that arrive out-of-order are buffered +//! locally until the sequential gap is filled. +//! 4. On transient errors (DB reads, prover failures, timeouts), the failed block is re-queued for +//! proving while other in-flight jobs continue uninterrupted. //! 5. On fatal errors (e.g. deserialization failures, missing proving inputs), the scheduler //! returns the error to the caller for node shutdown. +use std::collections::BTreeSet; use std::sync::Arc; use std::time::Duration; -use futures::StreamExt; -use futures::stream::FuturesOrdered; use miden_protocol::block::{BlockNumber, BlockProof}; use miden_protocol::utils::Serializable; use miden_remote_prover_client::RemoteProverClientError; -use tokio::sync::Notify; -use tokio::task::JoinHandle; +use tokio::sync::watch; +use tokio::task::{JoinHandle, JoinSet}; use tracing::{error, info, instrument}; use crate::COMPONENT; @@ -36,48 +35,33 @@ use crate::server::block_prover_client::{BlockProver, StoreProverError}; /// Overall timeout for proving a single block. const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(4); -/// Maximum number of unproven blocks to process in a single batch. -const MAX_PROVING_BATCH_SIZE: i64 = 16; +/// Maximum number of blocks being proven concurrently. +const MAX_CONCURRENT_PROOFS: usize = 8; // PROOF SCHEDULER // ================================================================================================ -/// Handle returned when spawning the proof scheduler, used to notify it of new blocks. -#[derive(Clone)] -pub struct ProofSchedulerNotifier { - notify: Arc, -} - -impl ProofSchedulerNotifier { - /// Notify the scheduler that a new block has been committed and may need proving. - #[instrument(target = COMPONENT, name = "proof_scheduler.notify", skip_all)] - pub fn notify_block_committed(&self) { - self.notify.notify_one(); - } -} - /// Spawns the proof scheduler as a background tokio task. /// -/// Returns a [`ProofSchedulerHandle`] that should be used to notify the scheduler when new -/// blocks are committed, and a [`JoinHandle`] that resolves when the scheduler encounters a -/// fatal error or completes unexpectedly. +/// The scheduler uses `chain_tip_rx` to learn about newly committed blocks and +/// `latest_proven_block` as the starting point for sequential proof tracking. +/// +/// Returns a [`JoinHandle`] that resolves when the scheduler encounters a fatal error or +/// completes unexpectedly. pub fn spawn( db: Arc, block_prover: Arc, block_store: Arc, -) -> (ProofSchedulerNotifier, JoinHandle>) { - let notify = Arc::new(Notify::new()); - let notifier = ProofSchedulerNotifier { notify: Arc::clone(¬ify) }; - - let join_handle = tokio::spawn(run(db, block_prover, block_store, notify)); - - (notifier, join_handle) + chain_tip_rx: watch::Receiver, + latest_proven_block: BlockNumber, +) -> JoinHandle> { + tokio::spawn(run(db, block_prover, block_store, chain_tip_rx, latest_proven_block)) } /// Main loop of the proof scheduler. /// -/// Uses [`FuturesOrdered`] to run proving concurrently while resolving completions in block -/// order. This provides natural backpressure and ensures proofs are persisted sequentially. +/// Maintains a pool of concurrent proving jobs via [`JoinSet`], fills them up to +/// [`MAX_CONCURRENT_PROOFS`], and drains completed results in block-number order. /// /// Returns `Err` on irrecoverable errors (missing/corrupt proving inputs, DB write failures). /// Transient errors are retried internally. @@ -85,137 +69,212 @@ async fn run( db: Arc, block_prover: Arc, block_store: Arc, - notify: Arc, + mut chain_tip_rx: watch::Receiver, + latest_proven_block: BlockNumber, ) -> Result<(), ProofSchedulerError> { - info!(target: COMPONENT, "Proof scheduler started"); + info!(target: COMPONENT, %latest_proven_block, "Proof scheduler started"); + + // The latest block that has been sequentially marked as proven in the DB. + let mut latest_proven = latest_proven_block; + // The current chain tip as observed from the watch channel. + let mut chain_tip = *chain_tip_rx.borrow_and_update(); + // Completed proof results waiting for sequential drain. + let mut results: BTreeSet = BTreeSet::new(); + // In-flight proving tasks. + let mut join_set: JoinSet> = JoinSet::new(); + // Block numbers currently being proven. + // Used to avoid double-scheduling a block that failed and needs retry. + let mut in_flight: BTreeSet = BTreeSet::new(); + // Blocks that need to be (re-)scheduled for proving. + let mut pending: BTreeSet = BTreeSet::new(); + + // Seed the pending set with all blocks that need proving. + for block_num in block_range(latest_proven.child(), chain_tip) { + pending.insert(block_num); + } loop { - // Capture the notify permit before retrieving unproven blocks from the database. - // This ensures that a notify fired between the database query and the wait on the permit - // will be captured; meaning we don't block unnecessarily until the next notify. - let notified = notify.notified(); - - // Query all unproven blocks. This handles both startup recovery and new blocks. - let unproven_blocks = match db.select_unproven_blocks(MAX_PROVING_BATCH_SIZE).await { - Ok(blocks) => blocks, - Err(err) => { - error!(target: COMPONENT, %err, "Failed to query unproven blocks, retrying"); - tokio::time::sleep(Duration::from_secs(1)).await; - continue; - }, - }; + // Fill the job pool up to capacity from the pending set. + while in_flight.len() < MAX_CONCURRENT_PROOFS { + let Some(&block_num) = pending.first() else { + break; + }; + pending.remove(&block_num); + in_flight.insert(block_num); - // Wait for notify if there are no unproven blocks. - if unproven_blocks.is_empty() { - notified.await; - continue; + let db = Arc::clone(&db); + let block_prover = Arc::clone(&block_prover); + let block_store = Arc::clone(&block_store); + join_set.spawn(async move { + prove_and_save(&db, &block_prover, &block_store, block_num).await + }); } - // Construct proving jobs and drain results in order. - // On any failure we break immediately — dropping remaining futures cancels them. - // The outer loop will re-query unproven blocks and restart the sequence, ensuring - // we never persist a proof while an ancestor block is still unproven. - let mut proving_futures = order_proving_jobs(&db, &block_prover, &unproven_blocks); - while let Some(timeout_result) = proving_futures.next().await { - match timeout_result { - // Save proof to file, then mark as proven in DB. - Ok((block_num, proof)) => { - block_store - .save_proof(block_num, &proof.to_bytes()) - .await - .map_err(ProofSchedulerError::PersistProofFailed)?; - db.mark_block_proven(block_num) - .await - .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; - }, - - // Abort on fatal errors. - Err(ProveBlockError::Fatal(err)) => return Err(err), - - // Log transient errors and restart proof scheduler loop. - Err(ProveBlockError::Transient(err)) => { - error!( - target: COMPONENT, - %err, - "Block proving failed, abandoning batch and retrying next iteration" - ); - break; - }, - Err(ProveBlockError::Timeout) => { - error!( - target: COMPONENT, - "Block proving timed out, abandoning batch and retrying next iteration" - ); - break; - }, + // If there's nothing in flight and nothing pending, wait for new blocks. + if in_flight.is_empty() && pending.is_empty() { + if chain_tip_rx.changed().await.is_err() { + info!(target: COMPONENT, "Chain tip channel closed, proof scheduler exiting"); + return Ok(()); } + enqueue_new_blocks(&chain_tip_rx, &mut chain_tip, &mut pending); + continue; + } + + // Wait for either a job to complete or the chain tip to advance. + tokio::select! { + // Proving task completed. + Some(join_result) = join_set.join_next() => { + match join_result { + // Proof successful. + Ok(Ok(block_num)) => { + info!(target: COMPONENT, %block_num, "Block proof completed"); + in_flight.remove(&block_num); + results.insert(block_num); + }, + + // Transient errors, requeue. + Ok(Err(ProveBlockError::Transient(block_num, err))) => { + error!( + target: COMPONENT, + %block_num, %err, + "Block proving failed (transient), re-queuing" + ); + in_flight.remove(&block_num); + pending.insert(block_num); + }, + Ok(Err(ProveBlockError::Timeout(block_num))) => { + error!( + target: COMPONENT, + %block_num, + "Block proving timed out, re-queuing" + ); + in_flight.remove(&block_num); + pending.insert(block_num); + }, + + // Irrecoverable errors. + Ok(Err(ProveBlockError::Fatal(err))) => return Err(err), + Err(join_err) => { + // Task panicked or was cancelled — treat as fatal. + return Err(ProofSchedulerError::TaskPanicked(join_err.to_string())); + }, + } + }, + + // New chain tip received. + result = chain_tip_rx.changed() => { + if result.is_err() { + info!(target: COMPONENT, "Chain tip channel closed, proof scheduler exiting"); + return Ok(()); + } + enqueue_new_blocks(&chain_tip_rx, &mut chain_tip, &mut pending); + }, } + + // Drain completed proofs sequentially. + drain_sequential_results(&db, &mut results, &mut latest_proven).await?; } } -/// Submits all unproven blocks into a [`FuturesOrdered`]. Each future runs the full -/// prove-with-retries pipeline concurrently, but completions are polled in submission -/// (i.e. block) order. -fn order_proving_jobs( - db: &Arc, - block_prover: &Arc, - unproven_blocks: &[BlockNumber], -) -> FuturesOrdered< - impl std::future::Future>, -> { - let mut futures = FuturesOrdered::new(); - for &block_num in unproven_blocks { - // Clone the resources for each future. - let db = Arc::clone(db); - let block_prover = Arc::clone(block_prover); - // Define the future. - let fut = async move { - // Prove block with timeout. - let timeout_result = tokio::time::timeout( - BLOCK_PROVE_TIMEOUT, - prove_block(&db, &block_prover, block_num), - ) - .await; - // Handle proving result. - match timeout_result { - Ok(proof_result) => proof_result.map(|proof| (block_num, proof)), - Err(_elapsed) => Err(ProveBlockError::Timeout), - } - }; - futures.push_back(fut); +/// Reads and sets the latest chain tip from the watch channel and adds any new block numbers to the +/// pending set. +fn enqueue_new_blocks( + chain_tip_rx: &watch::Receiver, + chain_tip: &mut BlockNumber, + pending: &mut BTreeSet, +) { + let new_chain_tip = *chain_tip_rx.borrow(); + for block_num in block_range(chain_tip.child(), new_chain_tip) { + pending.insert(block_num); + } + *chain_tip = new_chain_tip; +} + +/// Returns an iterator over block numbers from `start` to `end` inclusive. +/// +/// Returns an empty iterator if `start > end`. +fn block_range(start: BlockNumber, end: BlockNumber) -> impl Iterator { + let start = start.as_u32(); + let end = end.as_u32(); + (start..=end).map(BlockNumber::from) +} + +/// Drains completed proofs from the results in sequential block-number order, +/// marking each as proven in the database. +/// +/// Does nothing if the next expected block in sequence has not been proven. +async fn drain_sequential_results( + db: &Db, + results: &mut BTreeSet, + latest_proven: &mut BlockNumber, +) -> Result<(), ProofSchedulerError> { + loop { + let next = latest_proven.child(); + if !results.remove(&next) { + break; + } + db.mark_block_proven(next) + .await + .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; + info!(target: COMPONENT, block_num = %next, "Block marked as proven"); + *latest_proven = next; } - futures + Ok(()) } // PROVE BLOCK // ================================================================================================ -/// Proves a single block. +/// Proves a single block, saves the proof to the block store, and returns the block number. /// -/// Loads proving inputs from the DB, deserializes them, and invokes the block prover. -/// blocks. -#[instrument(target = COMPONENT, name = "proof_scheduler.prove_block", skip_all, fields(%block_num))] +/// This function encapsulates the full lifecycle of a single proof job: loading inputs from the +/// DB, invoking the prover (with a timeout), and persisting the proof to disk. +/// +/// The caller is responsible for marking the block as proven in the DB. +#[instrument(target = COMPONENT, name = "proof_scheduler.prove_and_save", skip_all, fields(%block_num))] +async fn prove_and_save( + db: &Db, + block_prover: &BlockProver, + block_store: &BlockStore, + block_num: BlockNumber, +) -> Result { + // Prove block with timeout. + let proof = + match tokio::time::timeout(BLOCK_PROVE_TIMEOUT, prove_block(db, block_prover, block_num)) + .await + { + Ok(Ok(proof)) => proof, + Ok(Err(err)) => return Err(err), + Err(_elapsed) => return Err(ProveBlockError::Timeout(block_num)), + }; + + // Save proof to the block store. + block_store + .save_proof(block_num, &proof.to_bytes()) + .await + .map_err(|e| ProveBlockError::Fatal(ProofSchedulerError::PersistProofFailed(e)))?; + + Ok(block_num) +} + +/// Proves a single block by loading inputs from the DB and invoking the block prover. async fn prove_block( db: &Db, block_prover: &BlockProver, block_num: BlockNumber, ) -> Result { - // Load proving inputs from the DB. - // All committed blocks should have inputs apart from the genesis block, which should - // never be queried by this function. let request = db .select_block_proving_inputs(block_num) .await - .map_err(ProveBlockError::from)? + .map_err(|err| ProveBlockError::from_db_error(block_num, err))? .ok_or_else(|| { ProveBlockError::Fatal(ProofSchedulerError::MissingProvingInputs(block_num)) })?; - // Prove the block. let proof = block_prover .prove(request.tx_batches, request.block_inputs, &request.block_header) .await - .map_err(ProveBlockError::from)?; + .map_err(|err| ProveBlockError::from_prover_error(block_num, err))?; Ok(proof) } @@ -229,29 +288,27 @@ enum ProveBlockError { /// An irrecoverable error that should cause node shutdown. Fatal(ProofSchedulerError), /// A transient error (DB read, prover failure). The outer loop will retry. - Transient(Box), + Transient(BlockNumber, Box), /// The overall proving timeout was exceeded. Retriable on next iteration. - Timeout, + Timeout(BlockNumber), } -impl From for ProveBlockError { - fn from(err: DatabaseError) -> Self { +impl ProveBlockError { + fn from_db_error(block_num: BlockNumber, err: DatabaseError) -> Self { match err { DatabaseError::DeserializationError(err) => { Self::Fatal(ProofSchedulerError::DeserializationFailed(err)) }, - _ => Self::Transient(err.into()), + _ => Self::Transient(block_num, err.into()), } } -} -impl From for ProveBlockError { - fn from(err: StoreProverError) -> Self { + fn from_prover_error(block_num: BlockNumber, err: StoreProverError) -> Self { match err { StoreProverError::RemoteProvingFailed(RemoteProverClientError::InvalidEndpoint( uri, )) => Self::Fatal(ProofSchedulerError::InvalidProverEndpoint(uri)), - _ => Self::Transient(err.into()), + _ => Self::Transient(block_num, err.into()), } } } From 9b2f3c817d38e612ac0c5193cbf3b21da0b09335 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 12:26:53 +1300 Subject: [PATCH 47/73] lint --- Cargo.lock | 1 - crates/store/Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6bc2f415f3..638399583c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3106,7 +3106,6 @@ dependencies = [ "diesel", "diesel_migrations", "fs-err", - "futures", "hex", "indexmap", "libsqlite3-sys", diff --git a/crates/store/Cargo.toml b/crates/store/Cargo.toml index d0642a8193..61b1875ba7 100644 --- a/crates/store/Cargo.toml +++ b/crates/store/Cargo.toml @@ -21,7 +21,6 @@ deadpool-diesel = { features = ["sqlite"], version = "0.6" } diesel = { features = ["numeric", "sqlite"], version = "2.3" } diesel_migrations = { features = ["sqlite"], version = "2.3" } fs-err = { workspace = true } -futures = { workspace = true } hex = { version = "0.4" } indexmap = { workspace = true } libsqlite3-sys = { workspace = true } From da2243806cf3eb296863054adaa53cc36dcece05 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 5 Mar 2026 12:41:36 +1300 Subject: [PATCH 48/73] RM unused query and index --- .../db/migrations/2025062000000_setup/up.sql | 1 - crates/store/src/db/mod.rs | 9 ----- .../src/db/models/queries/block_headers.rs | 33 ------------------- 3 files changed, 43 deletions(-) diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index ef971ee462..a52c44eaed 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -10,7 +10,6 @@ CREATE TABLE block_headers ( CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) ); -CREATE INDEX block_headers_to_be_proven ON block_headers(block_num ASC) WHERE is_proven = 0; CREATE INDEX block_headers_proven_desc ON block_headers(block_num DESC) WHERE is_proven = 1; CREATE TABLE account_codes ( diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 36928020a3..c3215d6717 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -601,15 +601,6 @@ impl Db { Ok(()) } - /// Returns block numbers for all blocks that have not yet been proven, ordered ascending. - #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] - pub async fn select_unproven_blocks(&self, limit: i64) -> Result> { - self.transact("select unproven blocks", move |conn| { - models::queries::select_unproven_blocks(conn, limit) - }) - .await - } - /// Returns the proving inputs for a given block number, if stored. #[instrument(level = "debug", target = COMPONENT, skip_all, err)] pub async fn select_block_proving_inputs( diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index c85d17fc7c..812787e44a 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -296,39 +296,6 @@ pub(crate) fn mark_block_proven( Ok(count) } -/// Select block numbers that have not yet been proven, ordered ascending. -/// -/// The genesis block (block 0) is excluded because it is never proven. -/// Results are limited to at most `limit` rows. -/// -/// # Raw SQL -/// -/// ```sql -/// SELECT block_num -/// FROM block_headers -/// WHERE is_proven = 0 -/// AND block_num > 0 -/// ORDER BY block_num ASC -/// LIMIT ? -/// ``` -pub(crate) fn select_unproven_blocks( - conn: &mut SqliteConnection, - limit: i64, -) -> Result, DatabaseError> { - let block_nums: Vec = - SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) - .filter(schema::block_headers::is_proven.eq(false)) - .filter(schema::block_headers::block_num.gt(0i64)) - .order(schema::block_headers::block_num.asc()) - .limit(limit) - .load(conn)?; - block_nums - .into_iter() - .map(BlockNumber::from_raw_sql) - .collect::, _>>() - .map_err(Into::into) -} - /// Select the highest block number that has been proven. /// /// Returns `None` if no blocks have been proven yet (genesis is never proven). From 0e3145f7e1b4b858ce40536e6351553f4993eae0 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 9 Mar 2026 11:45:31 +1300 Subject: [PATCH 49/73] Move retry logic to prove_and_save --- crates/store/src/errors.rs | 4 - crates/store/src/server/proof_scheduler.rs | 91 ++++++++++------------ 2 files changed, 39 insertions(+), 56 deletions(-) diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index bae39790be..2be2eae2fd 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -39,14 +39,10 @@ pub enum ProofSchedulerError { MissingProvingInputs(BlockNumber), #[error("failed to deserialize proving inputs for block")] DeserializationFailed(#[source] DeserializationError), - #[error("failed to write block proof to file")] - PersistProofFailed(#[source] std::io::Error), #[error("failed to mark block as proven in database")] MarkBlockProvenFailed(#[source] DatabaseError), #[error("invalid remote prover endpoint: {0}")] InvalidProverEndpoint(String), - #[error("proof scheduler task panicked: {0}")] - TaskPanicked(String), } // DATABASE ERRORS diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 87e744111d..360c2c7930 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -54,7 +54,7 @@ pub fn spawn( block_store: Arc, chain_tip_rx: watch::Receiver, latest_proven_block: BlockNumber, -) -> JoinHandle> { +) -> JoinHandle> { tokio::spawn(run(db, block_prover, block_store, chain_tip_rx, latest_proven_block)) } @@ -71,7 +71,7 @@ async fn run( block_store: Arc, mut chain_tip_rx: watch::Receiver, latest_proven_block: BlockNumber, -) -> Result<(), ProofSchedulerError> { +) -> anyhow::Result<()> { info!(target: COMPONENT, %latest_proven_block, "Proof scheduler started"); // The latest block that has been sequentially marked as proven in the DB. @@ -81,11 +81,11 @@ async fn run( // Completed proof results waiting for sequential drain. let mut results: BTreeSet = BTreeSet::new(); // In-flight proving tasks. - let mut join_set: JoinSet> = JoinSet::new(); + let mut join_set: JoinSet> = JoinSet::new(); // Block numbers currently being proven. // Used to avoid double-scheduling a block that failed and needs retry. let mut in_flight: BTreeSet = BTreeSet::new(); - // Blocks that need to be (re-)scheduled for proving. + // Blocks that have been committed and need to be scheduled for proving. let mut pending: BTreeSet = BTreeSet::new(); // Seed the pending set with all blocks that need proving. @@ -125,38 +125,14 @@ async fn run( // Proving task completed. Some(join_result) = join_set.join_next() => { match join_result { - // Proof successful. Ok(Ok(block_num)) => { info!(target: COMPONENT, %block_num, "Block proof completed"); in_flight.remove(&block_num); results.insert(block_num); }, - - // Transient errors, requeue. - Ok(Err(ProveBlockError::Transient(block_num, err))) => { - error!( - target: COMPONENT, - %block_num, %err, - "Block proving failed (transient), re-queuing" - ); - in_flight.remove(&block_num); - pending.insert(block_num); - }, - Ok(Err(ProveBlockError::Timeout(block_num))) => { - error!( - target: COMPONENT, - %block_num, - "Block proving timed out, re-queuing" - ); - in_flight.remove(&block_num); - pending.insert(block_num); - }, - - // Irrecoverable errors. - Ok(Err(ProveBlockError::Fatal(err))) => return Err(err), + Ok(Err(err)) => return Err(err), Err(join_err) => { - // Task panicked or was cancelled — treat as fatal. - return Err(ProofSchedulerError::TaskPanicked(join_err.to_string())); + anyhow::bail!("Proof task panicked: {join_err}") }, } }, @@ -237,24 +213,37 @@ async fn prove_and_save( block_prover: &BlockProver, block_store: &BlockStore, block_num: BlockNumber, -) -> Result { - // Prove block with timeout. - let proof = - match tokio::time::timeout(BLOCK_PROVE_TIMEOUT, prove_block(db, block_prover, block_num)) - .await +) -> anyhow::Result { + let mut attempts = 0u32; + loop { + attempts += 1; + if attempts > 10 { + anyhow::bail!("Bailed after max attempts") + } + // Prove block with timeout. + let proof = match tokio::time::timeout( + BLOCK_PROVE_TIMEOUT, + prove_block(db, block_prover, block_num), + ) + .await { Ok(Ok(proof)) => proof, - Ok(Err(err)) => return Err(err), - Err(_elapsed) => return Err(ProveBlockError::Timeout(block_num)), + Ok(Err(ProveBlockError::Fatal(err))) => anyhow::bail!("Fatal error: {err}"), + Ok(Err(ProveBlockError::Transient(err))) => { + error!("Transient error proving block {block_num}: {err}"); + continue; + }, + Err(_elapsed) => { + error!("Timed out proving block {block_num}"); + continue; + }, }; - // Save proof to the block store. - block_store - .save_proof(block_num, &proof.to_bytes()) - .await - .map_err(|e| ProveBlockError::Fatal(ProofSchedulerError::PersistProofFailed(e)))?; + // Save proof to the block store. + block_store.save_proof(block_num, &proof.to_bytes()).await?; - Ok(block_num) + return Ok(block_num); + } } /// Proves a single block by loading inputs from the DB and invoking the block prover. @@ -266,7 +255,7 @@ async fn prove_block( let request = db .select_block_proving_inputs(block_num) .await - .map_err(|err| ProveBlockError::from_db_error(block_num, err))? + .map_err(ProveBlockError::from_db_error)? .ok_or_else(|| { ProveBlockError::Fatal(ProofSchedulerError::MissingProvingInputs(block_num)) })?; @@ -274,7 +263,7 @@ async fn prove_block( let proof = block_prover .prove(request.tx_batches, request.block_inputs, &request.block_header) .await - .map_err(|err| ProveBlockError::from_prover_error(block_num, err))?; + .map_err(ProveBlockError::from_prover_error)?; Ok(proof) } @@ -288,27 +277,25 @@ enum ProveBlockError { /// An irrecoverable error that should cause node shutdown. Fatal(ProofSchedulerError), /// A transient error (DB read, prover failure). The outer loop will retry. - Transient(BlockNumber, Box), - /// The overall proving timeout was exceeded. Retriable on next iteration. - Timeout(BlockNumber), + Transient(Box), } impl ProveBlockError { - fn from_db_error(block_num: BlockNumber, err: DatabaseError) -> Self { + fn from_db_error(err: DatabaseError) -> Self { match err { DatabaseError::DeserializationError(err) => { Self::Fatal(ProofSchedulerError::DeserializationFailed(err)) }, - _ => Self::Transient(block_num, err.into()), + _ => Self::Transient(err.into()), } } - fn from_prover_error(block_num: BlockNumber, err: StoreProverError) -> Self { + fn from_prover_error(err: StoreProverError) -> Self { match err { StoreProverError::RemoteProvingFailed(RemoteProverClientError::InvalidEndpoint( uri, )) => Self::Fatal(ProofSchedulerError::InvalidProverEndpoint(uri)), - _ => Self::Transient(block_num, err.into()), + _ => Self::Transient(err.into()), } } } From 75cca6e873ad41be2d448f380c84c61031c296ba Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 9 Mar 2026 11:53:42 +1300 Subject: [PATCH 50/73] PendingJoinSet --- crates/store/src/server/proof_scheduler.rs | 39 ++++++++++++++-------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 360c2c7930..9a20071bac 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -20,7 +20,7 @@ use miden_protocol::block::{BlockNumber, BlockProof}; use miden_protocol::utils::Serializable; use miden_remote_prover_client::RemoteProverClientError; use tokio::sync::watch; -use tokio::task::{JoinHandle, JoinSet}; +use tokio::task::{JoinError, JoinHandle, JoinSet}; use tracing::{error, info, instrument}; use crate::COMPONENT; @@ -38,6 +38,29 @@ const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(4); /// Maximum number of blocks being proven concurrently. const MAX_CONCURRENT_PROOFS: usize = 8; +/// A wrapper around [`JoinSet`] whose `join_next` returns [`std::future::pending`] when empty +/// instead of `None`, making it safe to use directly in `tokio::select!` without a special case. +struct PendingJoinSet(JoinSet); + +impl PendingJoinSet { + fn new() -> Self { + Self(JoinSet::new()) + } + + fn spawn(&mut self, task: impl std::future::Future + Send + 'static) { + self.0.spawn(task); + } + + /// Returns the result of the next completed task, or pends forever if the set is empty. + async fn join_next(&mut self) -> Result { + if self.0.is_empty() { + std::future::pending().await + } else { + self.0.join_next().await.expect("join set is not empty") + } + } +} + // PROOF SCHEDULER // ================================================================================================ @@ -81,7 +104,7 @@ async fn run( // Completed proof results waiting for sequential drain. let mut results: BTreeSet = BTreeSet::new(); // In-flight proving tasks. - let mut join_set: JoinSet> = JoinSet::new(); + let mut join_set: PendingJoinSet> = PendingJoinSet::new(); // Block numbers currently being proven. // Used to avoid double-scheduling a block that failed and needs retry. let mut in_flight: BTreeSet = BTreeSet::new(); @@ -110,20 +133,10 @@ async fn run( }); } - // If there's nothing in flight and nothing pending, wait for new blocks. - if in_flight.is_empty() && pending.is_empty() { - if chain_tip_rx.changed().await.is_err() { - info!(target: COMPONENT, "Chain tip channel closed, proof scheduler exiting"); - return Ok(()); - } - enqueue_new_blocks(&chain_tip_rx, &mut chain_tip, &mut pending); - continue; - } - // Wait for either a job to complete or the chain tip to advance. tokio::select! { // Proving task completed. - Some(join_result) = join_set.join_next() => { + join_result = join_set.join_next() => { match join_result { Ok(Ok(block_num)) => { info!(target: COMPONENT, %block_num, "Block proof completed"); From 20c1720f6fc23e99f7b183e06af5a4a30a89d466 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 9 Mar 2026 12:51:26 +1300 Subject: [PATCH 51/73] Simplify scheduling logic --- crates/store/src/server/proof_scheduler.rs | 97 ++++++---------------- 1 file changed, 25 insertions(+), 72 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 9a20071bac..3a15edc1aa 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -5,10 +5,9 @@ //! 1. Tracks `chain_tip` via a [`watch::Receiver`] and `latest_proven_block` locally. //! 2. Maintains up to [`MAX_CONCURRENT_PROOFS`] in-flight proving jobs via a [`JoinSet`]. //! 3. Marks blocks as proven in the database **sequentially** — a block is only marked after all -//! its ancestors have been marked. Completed proofs that arrive out-of-order are buffered -//! locally until the sequential gap is filled. -//! 4. On transient errors (DB reads, prover failures, timeouts), the failed block is re-queued for -//! proving while other in-flight jobs continue uninterrupted. +//! its ancestors have been marked. +//! 4. On transient errors (DB reads, prover failures, timeouts), the failed block is retried +//! internally within its proving task. //! 5. On fatal errors (e.g. deserialization failures, missing proving inputs), the scheduler //! returns the error to the caller for node shutdown. @@ -98,39 +97,31 @@ async fn run( info!(target: COMPONENT, %latest_proven_block, "Proof scheduler started"); // The latest block that has been sequentially marked as proven in the DB. - let mut latest_proven = latest_proven_block; + let mut latest_complete = latest_proven_block; // The current chain tip as observed from the watch channel. let mut chain_tip = *chain_tip_rx.borrow_and_update(); - // Completed proof results waiting for sequential drain. - let mut results: BTreeSet = BTreeSet::new(); // In-flight proving tasks. let mut join_set: PendingJoinSet> = PendingJoinSet::new(); // Block numbers currently being proven. - // Used to avoid double-scheduling a block that failed and needs retry. let mut in_flight: BTreeSet = BTreeSet::new(); - // Blocks that have been committed and need to be scheduled for proving. - let mut pending: BTreeSet = BTreeSet::new(); - - // Seed the pending set with all blocks that need proving. - for block_num in block_range(latest_proven.child(), chain_tip) { - pending.insert(block_num); - } + // The next block number to schedule for proving. + let mut next_to_schedule = latest_complete.child(); loop { - // Fill the job pool up to capacity from the pending set. - while in_flight.len() < MAX_CONCURRENT_PROOFS { - let Some(&block_num) = pending.first() else { - break; - }; - pending.remove(&block_num); - in_flight.insert(block_num); + // Fill the job pool up to capacity from the next unscheduled blocks. + while in_flight.len() < MAX_CONCURRENT_PROOFS + && next_to_schedule.as_u32() <= chain_tip.as_u32() + { + let scheduled = next_to_schedule; + in_flight.insert(scheduled); let db = Arc::clone(&db); let block_prover = Arc::clone(&block_prover); let block_store = Arc::clone(&block_store); join_set.spawn(async move { - prove_and_save(&db, &block_prover, &block_store, block_num).await + prove_and_save(&db, &block_prover, &block_store, scheduled).await }); + next_to_schedule = scheduled.child(); } // Wait for either a job to complete or the chain tip to advance. @@ -141,7 +132,6 @@ async fn run( Ok(Ok(block_num)) => { info!(target: COMPONENT, %block_num, "Block proof completed"); in_flight.remove(&block_num); - results.insert(block_num); }, Ok(Err(err)) => return Err(err), Err(join_err) => { @@ -156,59 +146,22 @@ async fn run( info!(target: COMPONENT, "Chain tip channel closed, proof scheduler exiting"); return Ok(()); } - enqueue_new_blocks(&chain_tip_rx, &mut chain_tip, &mut pending); + chain_tip = *chain_tip_rx.borrow(); }, } - // Drain completed proofs sequentially. - drain_sequential_results(&db, &mut results, &mut latest_proven).await?; - } -} - -/// Reads and sets the latest chain tip from the watch channel and adds any new block numbers to the -/// pending set. -fn enqueue_new_blocks( - chain_tip_rx: &watch::Receiver, - chain_tip: &mut BlockNumber, - pending: &mut BTreeSet, -) { - let new_chain_tip = *chain_tip_rx.borrow(); - for block_num in block_range(chain_tip.child(), new_chain_tip) { - pending.insert(block_num); - } - *chain_tip = new_chain_tip; -} - -/// Returns an iterator over block numbers from `start` to `end` inclusive. -/// -/// Returns an empty iterator if `start > end`. -fn block_range(start: BlockNumber, end: BlockNumber) -> impl Iterator { - let start = start.as_u32(); - let end = end.as_u32(); - (start..=end).map(BlockNumber::from) -} - -/// Drains completed proofs from the results in sequential block-number order, -/// marking each as proven in the database. -/// -/// Does nothing if the next expected block in sequence has not been proven. -async fn drain_sequential_results( - db: &Db, - results: &mut BTreeSet, - latest_proven: &mut BlockNumber, -) -> Result<(), ProofSchedulerError> { - loop { - let next = latest_proven.child(); - if !results.remove(&next) { - break; + // Mark completed proofs as proven sequentially. + // Find the lowest in-flight block. + let lowest_in_flight = in_flight.first().map_or(next_to_schedule, |&first| first); + // Mark all sequentially proven blocks as completed. + while latest_complete.child().as_u32() < lowest_in_flight.as_u32() { + latest_complete = latest_complete.child(); + db.mark_block_proven(latest_complete) + .await + .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; + info!(target: COMPONENT, block_num = %latest_complete, "Block marked as proven"); } - db.mark_block_proven(next) - .await - .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; - info!(target: COMPONENT, block_num = %next, "Block marked as proven"); - *latest_proven = next; } - Ok(()) } // PROVE BLOCK From 3bbb943fea081464ecedc621ee55a9898677bf4a Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 9 Mar 2026 12:53:04 +1300 Subject: [PATCH 52/73] Rename var --- crates/store/src/server/proof_scheduler.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 3a15edc1aa..47ed16bac6 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -103,17 +103,17 @@ async fn run( // In-flight proving tasks. let mut join_set: PendingJoinSet> = PendingJoinSet::new(); // Block numbers currently being proven. - let mut in_flight: BTreeSet = BTreeSet::new(); + let mut inflight: BTreeSet = BTreeSet::new(); // The next block number to schedule for proving. let mut next_to_schedule = latest_complete.child(); loop { // Fill the job pool up to capacity from the next unscheduled blocks. - while in_flight.len() < MAX_CONCURRENT_PROOFS + while inflight.len() < MAX_CONCURRENT_PROOFS && next_to_schedule.as_u32() <= chain_tip.as_u32() { let scheduled = next_to_schedule; - in_flight.insert(scheduled); + inflight.insert(scheduled); let db = Arc::clone(&db); let block_prover = Arc::clone(&block_prover); @@ -131,7 +131,7 @@ async fn run( match join_result { Ok(Ok(block_num)) => { info!(target: COMPONENT, %block_num, "Block proof completed"); - in_flight.remove(&block_num); + inflight.remove(&block_num); }, Ok(Err(err)) => return Err(err), Err(join_err) => { @@ -152,7 +152,7 @@ async fn run( // Mark completed proofs as proven sequentially. // Find the lowest in-flight block. - let lowest_in_flight = in_flight.first().map_or(next_to_schedule, |&first| first); + let lowest_in_flight = inflight.first().map_or(next_to_schedule, |&first| first); // Mark all sequentially proven blocks as completed. while latest_complete.child().as_u32() < lowest_in_flight.as_u32() { latest_complete = latest_complete.child(); From 433085b8b3e1efb95284b50f47f22f04241ba739 Mon Sep 17 00:00:00 2001 From: sergerad Date: Mon, 9 Mar 2026 13:05:52 +1300 Subject: [PATCH 53/73] instrument field name --- crates/store/src/db/models/queries/block_headers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 812787e44a..5251c34b23 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -280,7 +280,7 @@ pub(crate) fn select_block_proving_inputs( #[tracing::instrument( target = COMPONENT, skip_all, - fields(block_num = %block_num), + fields(block.number = %block_num), err, )] pub(crate) fn mark_block_proven( From aa4f32a10dfbdc0e5bb49b687902e9c5915752ac Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 08:05:48 +1300 Subject: [PATCH 54/73] parameterize max concurrent proofs --- bin/node/src/commands/bundled.rs | 16 ++++++++++++++-- bin/node/src/commands/store.rs | 14 +++++++++++++- bin/stress-test/src/seeding/mod.rs | 1 + crates/block-producer/src/server/tests.rs | 3 ++- crates/rpc/src/tests.rs | 4 +++- crates/store/src/lib.rs | 1 + crates/store/src/server/mod.rs | 3 +++ crates/store/src/server/proof_scheduler.rs | 21 +++++++++++++++------ 8 files changed, 52 insertions(+), 11 deletions(-) diff --git a/bin/node/src/commands/bundled.rs b/bin/node/src/commands/bundled.rs index 9ca1872074..a31a5930a3 100644 --- a/bin/node/src/commands/bundled.rs +++ b/bin/node/src/commands/bundled.rs @@ -5,7 +5,7 @@ use std::time::Duration; use anyhow::Context; use miden_node_block_producer::BlockProducer; use miden_node_rpc::Rpc; -use miden_node_store::Store; +use miden_node_store::{DEFAULT_MAX_CONCURRENT_PROOFS, Store}; use miden_node_utils::grpc::UrlExt; use miden_node_validator::{Validator, ValidatorSigner}; use miden_protocol::crypto::dsa::ecdsa_k256_keccak::SecretKey; @@ -94,6 +94,14 @@ pub enum BundledCommand { value_name = "DURATION" )] grpc_timeout: Duration, + + /// Maximum number of concurrent block proofs to be scheduled. + #[arg( + long = "max-concurrent-proofs", + default_value_t = DEFAULT_MAX_CONCURRENT_PROOFS, + value_name = "NUM" + )] + max_concurrent_proofs: usize, }, } @@ -126,6 +134,7 @@ impl BundledCommand { validator, enable_otel: _, grpc_timeout, + max_concurrent_proofs, } => { Self::start( rpc_url, @@ -135,13 +144,14 @@ impl BundledCommand { ntx_builder, validator, grpc_timeout, + max_concurrent_proofs, ) .await }, } } - #[expect(clippy::too_many_lines)] + #[expect(clippy::too_many_lines, clippy::too_many_arguments)] async fn start( rpc_url: Url, block_prover_url: Option, @@ -150,6 +160,7 @@ impl BundledCommand { ntx_builder: NtxBuilderConfig, validator: BundledValidatorConfig, grpc_timeout: Duration, + max_concurrent_proofs: usize, ) -> anyhow::Result<()> { // Start listening on all gRPC urls so that inter-component connections can be created // before each component is fully started up. @@ -207,6 +218,7 @@ impl BundledCommand { data_directory: data_directory_clone, block_prover_url, grpc_timeout, + max_concurrent_proofs, } .serve() .await diff --git a/bin/node/src/commands/store.rs b/bin/node/src/commands/store.rs index 14b266147e..af2c3c02d3 100644 --- a/bin/node/src/commands/store.rs +++ b/bin/node/src/commands/store.rs @@ -2,8 +2,8 @@ use std::path::{Path, PathBuf}; use std::time::Duration; use anyhow::Context; -use miden_node_store::Store; use miden_node_store::genesis::config::{AccountFileWithName, GenesisConfig}; +use miden_node_store::{DEFAULT_MAX_CONCURRENT_PROOFS, Store}; use miden_node_utils::grpc::UrlExt; use miden_node_utils::signer::BlockSigner; use miden_node_validator::ValidatorSigner; @@ -90,6 +90,14 @@ pub enum StoreCommand { value_name = "DURATION" )] grpc_timeout: Duration, + + /// Maximum number of concurrent block proofs to be scheduled. + #[arg( + long = "max-concurrent-proofs", + default_value_t = DEFAULT_MAX_CONCURRENT_PROOFS, + value_name = "NUM" + )] + max_concurrent_proofs: usize, }, } @@ -119,6 +127,7 @@ impl StoreCommand { data_directory, enable_otel: _, grpc_timeout, + max_concurrent_proofs, } => { Self::start( rpc_url, @@ -127,6 +136,7 @@ impl StoreCommand { block_prover_url, data_directory, grpc_timeout, + max_concurrent_proofs, ) .await }, @@ -148,6 +158,7 @@ impl StoreCommand { block_prover_url: Option, data_directory: PathBuf, grpc_timeout: Duration, + max_concurrent_proofs: usize, ) -> anyhow::Result<()> { let rpc_listener = rpc_url .to_socket() @@ -177,6 +188,7 @@ impl StoreCommand { block_producer_listener, data_directory, grpc_timeout, + max_concurrent_proofs, } .serve() .await diff --git a/bin/stress-test/src/seeding/mod.rs b/bin/stress-test/src/seeding/mod.rs index 855aa13e5f..35770a049a 100644 --- a/bin/stress-test/src/seeding/mod.rs +++ b/bin/stress-test/src/seeding/mod.rs @@ -556,6 +556,7 @@ pub async fn start_store( block_producer_listener, data_directory: dir, grpc_timeout: Duration::from_secs(30), + max_concurrent_proofs: miden_node_store::DEFAULT_MAX_CONCURRENT_PROOFS, } .serve() .await diff --git a/crates/block-producer/src/server/tests.rs b/crates/block-producer/src/server/tests.rs index 63aa983db2..a0805d5b34 100644 --- a/crates/block-producer/src/server/tests.rs +++ b/crates/block-producer/src/server/tests.rs @@ -2,7 +2,7 @@ use std::num::NonZeroUsize; use std::time::Duration; use miden_node_proto::generated::block_producer::api_client as block_producer_client; -use miden_node_store::{GenesisState, Store}; +use miden_node_store::{DEFAULT_MAX_CONCURRENT_PROOFS, GenesisState, Store}; use miden_node_utils::fee::test_fee_params; use miden_node_validator::{Validator, ValidatorSigner}; use miden_protocol::testing::random_secret_key::random_secret_key; @@ -155,6 +155,7 @@ async fn start_store( block_prover_url: None, data_directory: dir, grpc_timeout: Duration::from_secs(30), + max_concurrent_proofs: DEFAULT_MAX_CONCURRENT_PROOFS, } .serve() .await diff --git a/crates/rpc/src/tests.rs b/crates/rpc/src/tests.rs index 2459cde7ed..3210a216ca 100644 --- a/crates/rpc/src/tests.rs +++ b/crates/rpc/src/tests.rs @@ -6,8 +6,8 @@ use http::{HeaderMap, HeaderValue}; use miden_node_proto::clients::{Builder, RpcClient}; use miden_node_proto::generated::rpc::api_client::ApiClient as ProtoClient; use miden_node_proto::generated::{self as proto}; -use miden_node_store::Store; use miden_node_store::genesis::config::GenesisConfig; +use miden_node_store::{DEFAULT_MAX_CONCURRENT_PROOFS, Store}; use miden_node_utils::fee::test_fee; use miden_node_utils::limiter::{ QueryParamAccountIdLimit, @@ -430,6 +430,7 @@ async fn start_store(store_listener: TcpListener) -> (Runtime, TempDir, Word, So block_producer_listener, data_directory: dir, grpc_timeout: Duration::from_secs(30), + max_concurrent_proofs: DEFAULT_MAX_CONCURRENT_PROOFS, } .serve() .await @@ -472,6 +473,7 @@ async fn restart_store(store_addr: SocketAddr, data_directory: &std::path::Path) block_producer_listener, data_directory: dir, grpc_timeout: Duration::from_secs(10), + max_concurrent_proofs: DEFAULT_MAX_CONCURRENT_PROOFS, } .serve() .await diff --git a/crates/store/src/lib.rs b/crates/store/src/lib.rs index 519f8504b9..8cbdbe21e9 100644 --- a/crates/store/src/lib.rs +++ b/crates/store/src/lib.rs @@ -15,6 +15,7 @@ pub use db::models::conv::SqlTypeConvert; pub use errors::DatabaseError; pub use genesis::GenesisState; pub use server::block_prover_client::BlockProver; +pub use server::proof_scheduler::DEFAULT_MAX_CONCURRENT_PROOFS; pub use server::{DataDirectory, Store}; // CONSTANTS diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 3c9ad15a4b..1d9a1d76c2 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -45,6 +45,8 @@ pub struct Store { /// /// If the handler takes longer than this duration, the server cancels the call. pub grpc_timeout: Duration, + /// Maximum number of blocks being proven concurrently by the proof scheduler. + pub max_concurrent_proofs: usize, } impl Store { @@ -133,6 +135,7 @@ impl Store { state.block_store(), chain_tip_rx, latest_proven_block, + self.max_concurrent_proofs, ); let rpc_service = store::rpc_server::RpcServer::new(api::StoreApi { diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 47ed16bac6..f1e28efb13 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -3,7 +3,7 @@ //! The [`proof_scheduler`] is spawned as an internal Store task. It: //! //! 1. Tracks `chain_tip` via a [`watch::Receiver`] and `latest_proven_block` locally. -//! 2. Maintains up to [`MAX_CONCURRENT_PROOFS`] in-flight proving jobs via a [`JoinSet`]. +//! 2. Maintains up to `max_concurrent_proofs` in-flight proving jobs via a [`JoinSet`]. //! 3. Marks blocks as proven in the database **sequentially** — a block is only marked after all //! its ancestors have been marked. //! 4. On transient errors (DB reads, prover failures, timeouts), the failed block is retried @@ -34,8 +34,8 @@ use crate::server::block_prover_client::{BlockProver, StoreProverError}; /// Overall timeout for proving a single block. const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(4); -/// Maximum number of blocks being proven concurrently. -const MAX_CONCURRENT_PROOFS: usize = 8; +/// Default maximum number of blocks being proven concurrently. +pub const DEFAULT_MAX_CONCURRENT_PROOFS: usize = 8; /// A wrapper around [`JoinSet`] whose `join_next` returns [`std::future::pending`] when empty /// instead of `None`, making it safe to use directly in `tokio::select!` without a special case. @@ -76,14 +76,22 @@ pub fn spawn( block_store: Arc, chain_tip_rx: watch::Receiver, latest_proven_block: BlockNumber, + max_concurrent_proofs: usize, ) -> JoinHandle> { - tokio::spawn(run(db, block_prover, block_store, chain_tip_rx, latest_proven_block)) + tokio::spawn(run( + db, + block_prover, + block_store, + chain_tip_rx, + latest_proven_block, + max_concurrent_proofs, + )) } /// Main loop of the proof scheduler. /// /// Maintains a pool of concurrent proving jobs via [`JoinSet`], fills them up to -/// [`MAX_CONCURRENT_PROOFS`], and drains completed results in block-number order. +/// `max_concurrent_proofs`, and drains completed results in block-number order. /// /// Returns `Err` on irrecoverable errors (missing/corrupt proving inputs, DB write failures). /// Transient errors are retried internally. @@ -93,6 +101,7 @@ async fn run( block_store: Arc, mut chain_tip_rx: watch::Receiver, latest_proven_block: BlockNumber, + max_concurrent_proofs: usize, ) -> anyhow::Result<()> { info!(target: COMPONENT, %latest_proven_block, "Proof scheduler started"); @@ -109,7 +118,7 @@ async fn run( loop { // Fill the job pool up to capacity from the next unscheduled blocks. - while inflight.len() < MAX_CONCURRENT_PROOFS + while inflight.len() < max_concurrent_proofs && next_to_schedule.as_u32() <= chain_tip.as_u32() { let scheduled = next_to_schedule; From 8a70c04c8151a90481dea574ae701a79413416d8 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 13:42:55 +1300 Subject: [PATCH 55/73] joinset type specific --- crates/store/src/server/proof_scheduler.rs | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index f1e28efb13..4be4caf0e7 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -39,19 +39,30 @@ pub const DEFAULT_MAX_CONCURRENT_PROOFS: usize = 8; /// A wrapper around [`JoinSet`] whose `join_next` returns [`std::future::pending`] when empty /// instead of `None`, making it safe to use directly in `tokio::select!` without a special case. -struct PendingJoinSet(JoinSet); +struct ProofTaskJoinSet(JoinSet>); -impl PendingJoinSet { +impl ProofTaskJoinSet { fn new() -> Self { Self(JoinSet::new()) } - fn spawn(&mut self, task: impl std::future::Future + Send + 'static) { - self.0.spawn(task); + fn spawn( + &mut self, + db: &Arc, + block_prover: &Arc, + block_store: &Arc, + block_num: BlockNumber, + ) { + let db = Arc::clone(db); + let block_prover = Arc::clone(block_prover); + let block_store = Arc::clone(block_store); + self.0.spawn( + async move { prove_and_save(&db, &block_prover, &block_store, block_num).await }, + ); } /// Returns the result of the next completed task, or pends forever if the set is empty. - async fn join_next(&mut self) -> Result { + async fn join_next(&mut self) -> Result, JoinError> { if self.0.is_empty() { std::future::pending().await } else { @@ -110,7 +121,7 @@ async fn run( // The current chain tip as observed from the watch channel. let mut chain_tip = *chain_tip_rx.borrow_and_update(); // In-flight proving tasks. - let mut join_set: PendingJoinSet> = PendingJoinSet::new(); + let mut join_set = ProofTaskJoinSet::new(); // Block numbers currently being proven. let mut inflight: BTreeSet = BTreeSet::new(); // The next block number to schedule for proving. @@ -124,12 +135,7 @@ async fn run( let scheduled = next_to_schedule; inflight.insert(scheduled); - let db = Arc::clone(&db); - let block_prover = Arc::clone(&block_prover); - let block_store = Arc::clone(&block_store); - join_set.spawn(async move { - prove_and_save(&db, &block_prover, &block_store, scheduled).await - }); + join_set.spawn(&db, &block_prover, &block_store, scheduled); next_to_schedule = scheduled.child(); } From 4238cf1fbeaca286ed189e1718f1e6346c34368c Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 13:50:34 +1300 Subject: [PATCH 56/73] flatten error --- crates/store/src/server/proof_scheduler.rs | 27 +++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 4be4caf0e7..914da417d7 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -15,11 +15,12 @@ use std::collections::BTreeSet; use std::sync::Arc; use std::time::Duration; +use anyhow::Context; use miden_protocol::block::{BlockNumber, BlockProof}; use miden_protocol::utils::Serializable; use miden_remote_prover_client::RemoteProverClientError; use tokio::sync::watch; -use tokio::task::{JoinError, JoinHandle, JoinSet}; +use tokio::task::{JoinHandle, JoinSet}; use tracing::{error, info, instrument}; use crate::COMPONENT; @@ -62,11 +63,16 @@ impl ProofTaskJoinSet { } /// Returns the result of the next completed task, or pends forever if the set is empty. - async fn join_next(&mut self) -> Result, JoinError> { + async fn join_next(&mut self) -> anyhow::Result { if self.0.is_empty() { std::future::pending().await } else { - self.0.join_next().await.expect("join set is not empty") + self.0 + .join_next() + .await + .expect("join set is not empty") + .context("proving task panicked") + .flatten() } } } @@ -142,17 +148,10 @@ async fn run( // Wait for either a job to complete or the chain tip to advance. tokio::select! { // Proving task completed. - join_result = join_set.join_next() => { - match join_result { - Ok(Ok(block_num)) => { - info!(target: COMPONENT, %block_num, "Block proof completed"); - inflight.remove(&block_num); - }, - Ok(Err(err)) => return Err(err), - Err(join_err) => { - anyhow::bail!("Proof task panicked: {join_err}") - }, - } + result = join_set.join_next() => { + let block_num = result?; + info!(target: COMPONENT, %block_num, "Block proof completed"); + inflight.remove(&block_num); }, // New chain tip received. From 272095011304ad1f5298d09c3ae0ab3fb61649a7 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 13:54:38 +1300 Subject: [PATCH 57/73] unwrap or --- crates/store/src/server/proof_scheduler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 914da417d7..d3d6815fb3 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -166,7 +166,7 @@ async fn run( // Mark completed proofs as proven sequentially. // Find the lowest in-flight block. - let lowest_in_flight = inflight.first().map_or(next_to_schedule, |&first| first); + let lowest_in_flight = inflight.first().copied().unwrap_or(next_to_schedule); // Mark all sequentially proven blocks as completed. while latest_complete.child().as_u32() < lowest_in_flight.as_u32() { latest_complete = latest_complete.child(); From e0d9b55e7013dc7a03441e8db8e28cab1623561a Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 13:56:48 +1300 Subject: [PATCH 58/73] retry loop --- crates/store/src/server/proof_scheduler.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index d3d6815fb3..2865120f2e 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -194,12 +194,8 @@ async fn prove_and_save( block_store: &BlockStore, block_num: BlockNumber, ) -> anyhow::Result { - let mut attempts = 0u32; - loop { - attempts += 1; - if attempts > 10 { - anyhow::bail!("Bailed after max attempts") - } + const MAX_RETRIES: u32 = 10; + for _ in 0..MAX_RETRIES { // Prove block with timeout. let proof = match tokio::time::timeout( BLOCK_PROVE_TIMEOUT, @@ -224,6 +220,7 @@ async fn prove_and_save( return Ok(block_num); } + anyhow::bail!("maximum retries ({MAX_RETRIES}) exceeded"); } /// Proves a single block by loading inputs from the DB and invoking the block prover. From 4e71764a004197f7442395d81dc2dab8b8ca713b Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 14:22:51 +1300 Subject: [PATCH 59/73] instrument refactor --- crates/store/src/server/proof_scheduler.rs | 56 ++++++++++++---------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 2865120f2e..63c48250a6 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -19,9 +19,10 @@ use anyhow::Context; use miden_protocol::block::{BlockNumber, BlockProof}; use miden_protocol::utils::Serializable; use miden_remote_prover_client::RemoteProverClientError; +use thiserror::Error; use tokio::sync::watch; use tokio::task::{JoinHandle, JoinSet}; -use tracing::{error, info, instrument}; +use tracing::{info, instrument}; use crate::COMPONENT; use crate::blocks::BlockStore; @@ -187,7 +188,7 @@ async fn run( /// DB, invoking the prover (with a timeout), and persisting the proof to disk. /// /// The caller is responsible for marking the block as proven in the DB. -#[instrument(target = COMPONENT, name = "proof_scheduler.prove_and_save", skip_all, fields(%block_num))] +#[instrument(target = COMPONENT, name = "prove_block", skip_all, fields(%block_num), err)] async fn prove_and_save( db: &Db, block_prover: &BlockProver, @@ -195,35 +196,29 @@ async fn prove_and_save( block_num: BlockNumber, ) -> anyhow::Result { const MAX_RETRIES: u32 = 10; + for _ in 0..MAX_RETRIES { - // Prove block with timeout. - let proof = match tokio::time::timeout( - BLOCK_PROVE_TIMEOUT, - prove_block(db, block_prover, block_num), - ) - .await + match tokio::time::timeout(BLOCK_PROVE_TIMEOUT, prove_block(db, block_prover, block_num)) + .await { - Ok(Ok(proof)) => proof, - Ok(Err(ProveBlockError::Fatal(err))) => anyhow::bail!("Fatal error: {err}"), - Ok(Err(ProveBlockError::Transient(err))) => { - error!("Transient error proving block {block_num}: {err}"); - continue; + Ok(Ok(proof)) => { + save_block(block_store, block_num, &proof).await?; + return Ok(block_num); }, - Err(_elapsed) => { - error!("Timed out proving block {block_num}"); - continue; + Ok(Err(ProveBlockError::Fatal(err))) => anyhow::bail!("Fatal error: {err}"), + Ok(Err(ProveBlockError::Transient(_))) | Err(_) => { + // Errors are logged via the span. }, - }; - - // Save proof to the block store. - block_store.save_proof(block_num, &proof.to_bytes()).await?; - - return Ok(block_num); + } } + anyhow::bail!("maximum retries ({MAX_RETRIES}) exceeded"); } /// Proves a single block by loading inputs from the DB and invoking the block prover. +/// +/// Records `block_commitment` on `parent_span` once the block header is available. +#[instrument(target = COMPONENT, name = "prove_block.prove", skip_all, fields(%block_num), err)] async fn prove_block( db: &Db, block_prover: &BlockProver, @@ -245,15 +240,28 @@ async fn prove_block( Ok(proof) } +/// Saves a block proof to the block store. +#[instrument(target = COMPONENT, name = "prove_block.save", skip_all, fields(%block_num), err)] +async fn save_block( + block_store: &BlockStore, + block_num: BlockNumber, + proof: &BlockProof, +) -> anyhow::Result<()> { + block_store.save_proof(block_num, &proof.to_bytes()).await?; + Ok(()) +} + // PROVE BLOCK ERROR // ================================================================================================ /// Errors that can occur during block proving. -#[derive(Debug)] +#[derive(Debug, Error)] enum ProveBlockError { /// An irrecoverable error that should cause node shutdown. - Fatal(ProofSchedulerError), + #[error("fatal error")] + Fatal(#[source] ProofSchedulerError), /// A transient error (DB read, prover failure). The outer loop will retry. + #[error("transient error: {0}")] Transient(Box), } From 1cb05467e3c64c1804c0d2a47224b5a99ceb5a5d Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 10 Mar 2026 20:10:00 +1300 Subject: [PATCH 60/73] Fix info block num fields --- crates/store/src/server/proof_scheduler.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 63c48250a6..b94e608baa 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -151,7 +151,7 @@ async fn run( // Proving task completed. result = join_set.join_next() => { let block_num = result?; - info!(target: COMPONENT, %block_num, "Block proof completed"); + info!(target=COMPONENT, block.number=%block_num, "Block proof completed"); inflight.remove(&block_num); }, @@ -174,7 +174,7 @@ async fn run( db.mark_block_proven(latest_complete) .await .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; - info!(target: COMPONENT, block_num = %latest_complete, "Block marked as proven"); + info!(target=COMPONENT, block.number=%latest_complete, "Block marked as proven"); } } } From 3e1e3a9e3570ffa99ccc009afd2f923f11e20737 Mon Sep 17 00:00:00 2001 From: sergerad Date: Thu, 12 Mar 2026 19:30:52 +1300 Subject: [PATCH 61/73] rm is_proven and wipe proving_inputs on proven --- .../src/db/migrations/2025062000000_setup/up.sql | 5 ++--- crates/store/src/db/mod.rs | 2 +- crates/store/src/db/models/queries/block_headers.rs | 12 ++++++------ crates/store/src/db/schema.rs | 1 - 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/crates/store/src/db/migrations/2025062000000_setup/up.sql b/crates/store/src/db/migrations/2025062000000_setup/up.sql index 67f982663d..a17ef5ecb0 100644 --- a/crates/store/src/db/migrations/2025062000000_setup/up.sql +++ b/crates/store/src/db/migrations/2025062000000_setup/up.sql @@ -3,14 +3,13 @@ CREATE TABLE block_headers ( block_header BLOB NOT NULL, signature BLOB NOT NULL, commitment BLOB NOT NULL, - proving_inputs BLOB, -- Serialized BlockProofRequest needed for deferred proving. NULL for genesis block. - is_proven BOOLEAN NOT NULL DEFAULT 0, -- Whether the block has been proven + proving_inputs BLOB, -- Serialized BlockProofRequest needed for deferred proving. NULL if it has been proven or never proven (genesis block). PRIMARY KEY (block_num), CONSTRAINT block_header_block_num_is_u32 CHECK (block_num BETWEEN 0 AND 0xFFFFFFFF) ); -CREATE INDEX block_headers_proven_desc ON block_headers(block_num DESC) WHERE is_proven = 1; +CREATE INDEX block_headers_proven_desc ON block_headers(block_num DESC) WHERE proving_inputs IS NULL; CREATE TABLE account_codes ( code_commitment BLOB NOT NULL, diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 136b38cd04..8436c66133 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -600,7 +600,7 @@ impl Db { /// Marks a previously committed block as proven. /// - /// Sets the `is_proven` flag to `true` for the given block number. + /// Clears the `proving_inputs` for the given block number. #[instrument(target = COMPONENT, skip_all, err)] pub async fn mark_block_proven(&self, block_num: BlockNumber) -> Result<()> { self.transact("mark block proven", move |conn| { diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 5251c34b23..39803ba785 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -270,9 +270,9 @@ pub(crate) fn select_block_proving_inputs( .map_err(Into::into) } -/// Mark a committed block as proven. +/// Mark a committed block as proven by clearing its proving inputs. /// -/// Sets the `is_proven` flag to `true` for the row with the given `block_num`. +/// Sets `proving_inputs` to `NULL` for the row with the given `block_num`. /// /// # Returns /// @@ -291,21 +291,21 @@ pub(crate) fn mark_block_proven( schema::block_headers::table .filter(schema::block_headers::block_num.eq(block_num.to_raw_sql())), ) - .set(schema::block_headers::is_proven.eq(true)) + .set(schema::block_headers::proving_inputs.eq(None::>)) .execute(conn)?; Ok(count) } /// Select the highest block number that has been proven. /// -/// Returns `None` if no blocks have been proven yet (genesis is never proven). +/// A block is considered proven when its `proving_inputs` are `NULL`. /// /// # Raw SQL /// /// ```sql /// SELECT block_num /// FROM block_headers -/// WHERE is_proven = 1 +/// WHERE proving_inputs IS NULL /// ORDER BY block_num DESC /// LIMIT 1 /// ``` @@ -314,7 +314,7 @@ pub(crate) fn select_latest_proven_block_num( ) -> Result, DatabaseError> { let block_num: Option = SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) - .filter(schema::block_headers::is_proven.eq(true)) + .filter(schema::block_headers::proving_inputs.is_null()) .order(schema::block_headers::block_num.desc()) .first(conn) .optional()?; diff --git a/crates/store/src/db/schema.rs b/crates/store/src/db/schema.rs index 68c75bc814..4272cdaa18 100644 --- a/crates/store/src/db/schema.rs +++ b/crates/store/src/db/schema.rs @@ -49,7 +49,6 @@ diesel::table! { block_header -> Binary, signature -> Binary, commitment -> Binary, - is_proven -> Bool, proving_inputs -> Nullable, } } From 1b0712d8c0394543c118b57eabf2cee4a5ee5bfc Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:05:51 +1300 Subject: [PATCH 62/73] nonzerousize --- bin/node/src/commands/bundled.rs | 5 +++-- bin/node/src/commands/store.rs | 5 +++-- crates/block-producer/src/server/tests.rs | 2 -- crates/rpc/src/tests.rs | 1 - crates/store/src/server/mod.rs | 3 ++- crates/store/src/server/proof_scheduler.rs | 9 +++++---- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/bin/node/src/commands/bundled.rs b/bin/node/src/commands/bundled.rs index 4c1a746166..d2ad1689f8 100644 --- a/bin/node/src/commands/bundled.rs +++ b/bin/node/src/commands/bundled.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::num::NonZeroUsize; use std::path::PathBuf; use anyhow::Context; @@ -88,7 +89,7 @@ pub enum BundledCommand { default_value_t = DEFAULT_MAX_CONCURRENT_PROOFS, value_name = "NUM" )] - max_concurrent_proofs: usize, + max_concurrent_proofs: NonZeroUsize, #[command(flatten)] grpc_options: GrpcOptionsExternal, @@ -160,7 +161,7 @@ impl BundledCommand { ntx_builder: NtxBuilderConfig, validator: BundledValidatorConfig, grpc_options: GrpcOptionsExternal, - max_concurrent_proofs: usize, + max_concurrent_proofs: NonZeroUsize, storage_options: StorageOptions, ) -> anyhow::Result<()> { // Start listening on all gRPC urls so that inter-component connections can be created diff --git a/bin/node/src/commands/store.rs b/bin/node/src/commands/store.rs index ed79472141..edc5fe2521 100644 --- a/bin/node/src/commands/store.rs +++ b/bin/node/src/commands/store.rs @@ -1,3 +1,4 @@ +use std::num::NonZeroUsize; use std::path::{Path, PathBuf}; use anyhow::Context; @@ -71,7 +72,7 @@ pub enum StoreCommand { default_value_t = DEFAULT_MAX_CONCURRENT_PROOFS, value_name = "NUM" )] - max_concurrent_proofs: usize, + max_concurrent_proofs: NonZeroUsize, #[command(flatten)] grpc_options: GrpcOptionsInternal, @@ -130,7 +131,7 @@ impl StoreCommand { block_prover_url: Option, data_directory: PathBuf, grpc_options: GrpcOptionsInternal, - max_concurrent_proofs: usize, + max_concurrent_proofs: NonZeroUsize, storage_options: StorageOptions, ) -> anyhow::Result<()> { let rpc_listener = rpc_url diff --git a/crates/block-producer/src/server/tests.rs b/crates/block-producer/src/server/tests.rs index b71b230520..dedf74b84a 100644 --- a/crates/block-producer/src/server/tests.rs +++ b/crates/block-producer/src/server/tests.rs @@ -3,8 +3,6 @@ use std::time::Duration; use miden_node_proto::generated::block_producer::api_client as block_producer_client; use miden_node_store::{DEFAULT_MAX_CONCURRENT_PROOFS, GenesisState, Store}; -use miden_node_utils::clap::GrpcOptionsInternal; -use miden_node_store::{GenesisState, Store}; use miden_node_utils::clap::{GrpcOptionsInternal, StorageOptions}; use miden_node_utils::fee::test_fee_params; use miden_node_validator::{Validator, ValidatorSigner}; diff --git a/crates/rpc/src/tests.rs b/crates/rpc/src/tests.rs index bf0e182261..5f190dfdf7 100644 --- a/crates/rpc/src/tests.rs +++ b/crates/rpc/src/tests.rs @@ -9,7 +9,6 @@ use miden_node_proto::generated::rpc::api_client::ApiClient as ProtoClient; use miden_node_proto::generated::{self as proto}; use miden_node_store::genesis::config::GenesisConfig; use miden_node_store::{DEFAULT_MAX_CONCURRENT_PROOFS, Store}; -use miden_node_utils::clap::{GrpcOptionsExternal, GrpcOptionsInternal}; use miden_node_utils::clap::{GrpcOptionsExternal, GrpcOptionsInternal, StorageOptions}; use miden_node_utils::fee::test_fee; use miden_node_utils::limiter::{ diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 318409e86e..4633b84cdc 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -1,3 +1,4 @@ +use std::num::NonZeroUsize; use std::ops::Not; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -43,7 +44,7 @@ pub struct Store { pub block_prover_url: Option, pub data_directory: PathBuf, /// Maximum number of blocks being proven concurrently by the proof scheduler. - pub max_concurrent_proofs: usize, + pub max_concurrent_proofs: NonZeroUsize, pub storage_options: StorageOptions, pub grpc_options: GrpcOptionsInternal, } diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index b94e608baa..6e013c6f24 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -12,6 +12,7 @@ //! returns the error to the caller for node shutdown. use std::collections::BTreeSet; +use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; @@ -37,7 +38,7 @@ use crate::server::block_prover_client::{BlockProver, StoreProverError}; const BLOCK_PROVE_TIMEOUT: Duration = Duration::from_mins(4); /// Default maximum number of blocks being proven concurrently. -pub const DEFAULT_MAX_CONCURRENT_PROOFS: usize = 8; +pub const DEFAULT_MAX_CONCURRENT_PROOFS: NonZeroUsize = NonZeroUsize::new(8).unwrap(); /// A wrapper around [`JoinSet`] whose `join_next` returns [`std::future::pending`] when empty /// instead of `None`, making it safe to use directly in `tokio::select!` without a special case. @@ -94,7 +95,7 @@ pub fn spawn( block_store: Arc, chain_tip_rx: watch::Receiver, latest_proven_block: BlockNumber, - max_concurrent_proofs: usize, + max_concurrent_proofs: NonZeroUsize, ) -> JoinHandle> { tokio::spawn(run( db, @@ -119,7 +120,7 @@ async fn run( block_store: Arc, mut chain_tip_rx: watch::Receiver, latest_proven_block: BlockNumber, - max_concurrent_proofs: usize, + max_concurrent_proofs: NonZeroUsize, ) -> anyhow::Result<()> { info!(target: COMPONENT, %latest_proven_block, "Proof scheduler started"); @@ -136,7 +137,7 @@ async fn run( loop { // Fill the job pool up to capacity from the next unscheduled blocks. - while inflight.len() < max_concurrent_proofs + while inflight.len() < max_concurrent_proofs.into() && next_to_schedule.as_u32() <= chain_tip.as_u32() { let scheduled = next_to_schedule; From 8292df73dc9d5d23b3678361ac5b9cfc52346d0b Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:08:49 +1300 Subject: [PATCH 63/73] Update select block num comment --- crates/store/src/db/mod.rs | 2 ++ crates/store/src/db/models/queries/block_headers.rs | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 8436c66133..474b26b487 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -624,6 +624,8 @@ impl Db { /// Returns the highest block number that has been proven, or `None` if no blocks have been /// proven yet. + /// + /// This includes the genesis block, which is not technically proven, but treated as such. #[instrument(level = "debug", target = COMPONENT, skip_all, ret(level = "debug"), err)] pub async fn select_latest_proven_block_num(&self) -> Result> { self.transact("select latest proven block num", |conn| { diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index 39803ba785..a51b4e6bfe 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -298,7 +298,8 @@ pub(crate) fn mark_block_proven( /// Select the highest block number that has been proven. /// -/// A block is considered proven when its `proving_inputs` are `NULL`. +/// A block is considered proven when its `proving_inputs` are `NULL`. This includes the genesis +/// block, which is not technically proven, but treated as such. /// /// # Raw SQL /// From d8bb9945b6d6a8ec5772cca658bbff0bd6204cc6 Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:10:52 +1300 Subject: [PATCH 64/73] anyhow context --- crates/store/src/server/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 4633b84cdc..e7e4f8270f 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -223,8 +223,8 @@ impl Store { result = proof_scheduler_task => { match result { Ok(Ok(())) => Err(anyhow::anyhow!("proof scheduler exited unexpectedly")), - Ok(Err(err)) => Err(anyhow::anyhow!("proof scheduler fatal error").context(err)), - Err(join_err) => Err(anyhow::anyhow!("proof scheduler panicked").context(join_err)), + Ok(Err(err)) => Err(err.context("proof scheduler fatal error")), + Err(join_err) => Err(join_err).context("proof scheduler panicked"), } } } From 3044cea245d920f437bd1496cde623d7f786a01c Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:13:15 +1300 Subject: [PATCH 65/73] another anyhow context --- crates/store/src/server/proof_scheduler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 6e013c6f24..d9bec6a3a5 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -206,7 +206,7 @@ async fn prove_and_save( save_block(block_store, block_num, &proof).await?; return Ok(block_num); }, - Ok(Err(ProveBlockError::Fatal(err))) => anyhow::bail!("Fatal error: {err}"), + Ok(Err(ProveBlockError::Fatal(err))) => Err(err).context("fatal error: {err}")?, Ok(Err(ProveBlockError::Transient(_))) | Err(_) => { // Errors are logged via the span. }, From b8aa91ccbcf3e433aa347aa94a78388878da9d75 Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:15:24 +1300 Subject: [PATCH 66/73] Fix fields --- crates/store/src/server/proof_scheduler.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index d9bec6a3a5..fd707c020e 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -189,7 +189,7 @@ async fn run( /// DB, invoking the prover (with a timeout), and persisting the proof to disk. /// /// The caller is responsible for marking the block as proven in the DB. -#[instrument(target = COMPONENT, name = "prove_block", skip_all, fields(%block_num), err)] +#[instrument(target = COMPONENT, name = "prove_block", skip_all, fields(block.number=block_num.as_u32()), err)] async fn prove_and_save( db: &Db, block_prover: &BlockProver, @@ -219,7 +219,7 @@ async fn prove_and_save( /// Proves a single block by loading inputs from the DB and invoking the block prover. /// /// Records `block_commitment` on `parent_span` once the block header is available. -#[instrument(target = COMPONENT, name = "prove_block.prove", skip_all, fields(%block_num), err)] +#[instrument(target = COMPONENT, name = "prove_block.prove", skip_all, fields(block.number=block_num.as_u32()), err)] async fn prove_block( db: &Db, block_prover: &BlockProver, @@ -242,7 +242,7 @@ async fn prove_block( } /// Saves a block proof to the block store. -#[instrument(target = COMPONENT, name = "prove_block.save", skip_all, fields(%block_num), err)] +#[instrument(target = COMPONENT, name = "prove_block.save", skip_all, fields(block.number=block_num.as_u32()), err)] async fn save_block( block_store: &BlockStore, block_num: BlockNumber, From 905a298a8dcf4f905e340e0d37e4b55a559ad972 Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:23:01 +1300 Subject: [PATCH 67/73] log loop errs --- crates/store/src/server/proof_scheduler.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index fd707c020e..213e9aeeb0 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -23,7 +23,7 @@ use miden_remote_prover_client::RemoteProverClientError; use thiserror::Error; use tokio::sync::watch; use tokio::task::{JoinHandle, JoinSet}; -use tracing::{info, instrument}; +use tracing::{error, info, instrument}; use crate::COMPONENT; use crate::blocks::BlockStore; @@ -206,9 +206,12 @@ async fn prove_and_save( save_block(block_store, block_num, &proof).await?; return Ok(block_num); }, - Ok(Err(ProveBlockError::Fatal(err))) => Err(err).context("fatal error: {err}")?, - Ok(Err(ProveBlockError::Transient(_))) | Err(_) => { - // Errors are logged via the span. + Ok(Err(ProveBlockError::Fatal(err))) => Err(err).context("fatal error")?, + Ok(Err(ProveBlockError::Transient(err))) => { + error!(target: COMPONENT, %block_num, err = ?err, "transient error proving block, retrying"); + }, + Err(elapsed) => { + error!(target: COMPONENT, %block_num, %elapsed, "block proving timed out, retrying"); }, } } From ca54ff213eed858ec950882428179c2f3c325d26 Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:27:32 +1300 Subject: [PATCH 68/73] assert_matches --- bin/remote-prover/src/server/tests.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/remote-prover/src/server/tests.rs b/bin/remote-prover/src/server/tests.rs index e6abb72ac1..f1d526b16d 100644 --- a/bin/remote-prover/src/server/tests.rs +++ b/bin/remote-prover/src/server/tests.rs @@ -3,6 +3,7 @@ use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; +use assert_matches::assert_matches; use miden_protocol::MIN_PROOF_SECURITY_LEVEL; use miden_protocol::account::auth::AuthScheme; use miden_protocol::asset::{Asset, FungibleAsset}; @@ -238,7 +239,7 @@ async fn capacity_is_respected() { result.sort_unstable(); assert_eq!(expected, result); - assert_matches::assert_matches!(first.err().or(second.err()).or(third.err()), Some(err) => { + assert_matches!(first.err().or(second.err()).or(third.err()), Some(err) => { assert_eq!(err.code(), tonic::Code::ResourceExhausted); }); From a60538389f9645c8e55722dbe07095963753af7d Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 10:31:42 +1300 Subject: [PATCH 69/73] std io err --- crates/store/src/blocks.rs | 38 +++++++------------------------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/crates/store/src/blocks.rs b/crates/store/src/blocks.rs index ef8bf3526b..aac4279e0d 100644 --- a/crates/store/src/blocks.rs +++ b/crates/store/src/blocks.rs @@ -63,10 +63,7 @@ impl BlockStore { Ok(Self { store_dir }) } - pub async fn load_block( - &self, - block_num: BlockNumber, - ) -> Result>, std::io::Error> { + pub async fn load_block(&self, block_num: BlockNumber) -> std::io::Result>> { match tokio::fs::read(self.block_path(block_num)).await { Ok(data) => Ok(Some(data)), Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), @@ -81,11 +78,7 @@ impl BlockStore { err, fields(block_size = data.len()) )] - pub async fn save_block( - &self, - block_num: BlockNumber, - data: &[u8], - ) -> Result<(), std::io::Error> { + pub async fn save_block(&self, block_num: BlockNumber, data: &[u8]) -> std::io::Result<()> { let (epoch_path, block_path) = self.epoch_block_path(block_num)?; if !epoch_path.exists() { tokio::fs::create_dir_all(epoch_path).await?; @@ -94,11 +87,7 @@ impl BlockStore { tokio::fs::write(block_path, data).await } - pub fn save_block_blocking( - &self, - block_num: BlockNumber, - data: &[u8], - ) -> Result<(), std::io::Error> { + pub fn save_block_blocking(&self, block_num: BlockNumber, data: &[u8]) -> std::io::Result<()> { let (epoch_path, block_path) = self.epoch_block_path(block_num)?; if !epoch_path.exists() { fs_err::create_dir_all(epoch_path)?; @@ -111,10 +100,7 @@ impl BlockStore { // -------------------------------------------------------------------------------------------- #[expect(dead_code)] - pub async fn load_proof( - &self, - block_num: BlockNumber, - ) -> Result>, std::io::Error> { + pub async fn load_proof(&self, block_num: BlockNumber) -> std::io::Result>> { match tokio::fs::read(self.proof_path(block_num)).await { Ok(data) => Ok(Some(data)), Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), @@ -129,11 +115,7 @@ impl BlockStore { err, fields(proof_size = data.len()) )] - pub async fn save_proof( - &self, - block_num: BlockNumber, - data: &[u8], - ) -> Result<(), std::io::Error> { + pub async fn save_proof(&self, block_num: BlockNumber, data: &[u8]) -> std::io::Result<()> { let (epoch_path, proof_path) = self.epoch_proof_path(block_num)?; if !epoch_path.exists() { tokio::fs::create_dir_all(epoch_path).await?; @@ -159,20 +141,14 @@ impl BlockStore { epoch_dir.join(format!("proof_{block_num:08x}.dat")) } - fn epoch_block_path( - &self, - block_num: BlockNumber, - ) -> Result<(PathBuf, PathBuf), std::io::Error> { + fn epoch_block_path(&self, block_num: BlockNumber) -> std::io::Result<(PathBuf, PathBuf)> { let block_path = self.block_path(block_num); let epoch_path = block_path.parent().ok_or(std::io::Error::from(ErrorKind::NotFound))?; Ok((epoch_path.to_path_buf(), block_path)) } - fn epoch_proof_path( - &self, - block_num: BlockNumber, - ) -> Result<(PathBuf, PathBuf), std::io::Error> { + fn epoch_proof_path(&self, block_num: BlockNumber) -> std::io::Result<(PathBuf, PathBuf)> { let proof_path = self.proof_path(block_num); let epoch_path = proof_path.parent().ok_or(std::io::Error::from(ErrorKind::NotFound))?; From 7c8fba70bcd70f53c5afca30168203418e7a7544 Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 12:05:55 +1300 Subject: [PATCH 70/73] Simplify schedule logic --- crates/store/src/db/mod.rs | 13 ++++ .../src/db/models/queries/block_headers.rs | 34 ++++++++ crates/store/src/errors.rs | 2 - crates/store/src/server/mod.rs | 10 +-- crates/store/src/server/proof_scheduler.rs | 78 ++++++++----------- 5 files changed, 79 insertions(+), 58 deletions(-) diff --git a/crates/store/src/db/mod.rs b/crates/store/src/db/mod.rs index 474b26b487..49caa6f400 100644 --- a/crates/store/src/db/mod.rs +++ b/crates/store/src/db/mod.rs @@ -622,6 +622,19 @@ impl Db { .await } + /// Returns unproven block numbers greater than `after`, in ascending order, up to `limit`. + #[instrument(level = "debug", target = COMPONENT, skip_all, err)] + pub async fn select_unproven_blocks( + &self, + after: BlockNumber, + limit: usize, + ) -> Result> { + self.transact("select unproven blocks", move |conn| { + models::queries::select_unproven_blocks(conn, after, limit) + }) + .await + } + /// Returns the highest block number that has been proven, or `None` if no blocks have been /// proven yet. /// diff --git a/crates/store/src/db/models/queries/block_headers.rs b/crates/store/src/db/models/queries/block_headers.rs index a51b4e6bfe..db75d04b3a 100644 --- a/crates/store/src/db/models/queries/block_headers.rs +++ b/crates/store/src/db/models/queries/block_headers.rs @@ -296,6 +296,40 @@ pub(crate) fn mark_block_proven( Ok(count) } +/// Select unproven block numbers greater than `after`, in ascending order, up to `limit`. +/// +/// A block is unproven when its `proving_inputs` are non-NULL. +/// +/// # Raw SQL +/// +/// ```sql +/// SELECT block_num +/// FROM block_headers +/// WHERE proving_inputs IS NOT NULL +/// AND block_num > ? +/// ORDER BY block_num ASC +/// LIMIT ? +/// ``` +pub(crate) fn select_unproven_blocks( + conn: &mut SqliteConnection, + after: BlockNumber, + limit: usize, +) -> Result, DatabaseError> { + let block_nums: Vec = + SelectDsl::select(schema::block_headers::table, schema::block_headers::block_num) + .filter(schema::block_headers::proving_inputs.is_not_null()) + .filter(schema::block_headers::block_num.gt(after.to_raw_sql())) + .order(schema::block_headers::block_num.asc()) + .limit(i64::try_from(limit).expect("unproven block number limit should fit in i64")) + .load(conn)?; + + block_nums + .into_iter() + .map(BlockNumber::from_raw_sql) + .collect::, _>>() + .map_err(Into::into) +} + /// Select the highest block number that has been proven. /// /// A block is considered proven when its `proving_inputs` are `NULL`. This includes the genesis diff --git a/crates/store/src/errors.rs b/crates/store/src/errors.rs index 0392654d37..0ed39de5c1 100644 --- a/crates/store/src/errors.rs +++ b/crates/store/src/errors.rs @@ -39,8 +39,6 @@ pub enum ProofSchedulerError { MissingProvingInputs(BlockNumber), #[error("failed to deserialize proving inputs for block")] DeserializationFailed(#[source] DeserializationError), - #[error("failed to mark block as proven in database")] - MarkBlockProvenFailed(#[source] DatabaseError), #[error("invalid remote prover endpoint: {0}")] InvalidProverEndpoint(String), } diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index e7e4f8270f..148154fdf0 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -108,17 +108,10 @@ impl Store { Arc::new(BlockProver::local()) }; - // Initialize the chain tip watch channel and read the latest proven block from the DB. + // Initialize the chain tip watch channel. let chain_tip = state.latest_block_num().await; let (chain_tip_sender, chain_tip_rx) = tokio::sync::watch::channel(chain_tip); - let latest_proven_block = state - .db() - .select_latest_proven_block_num() - .await - .context("failed to read latest proven block number")? - .unwrap_or(miden_protocol::block::BlockNumber::GENESIS); - // Spawn the proof scheduler as a background task. It will immediately pick up any // unproven blocks from previous runs and begin proving them. let proof_scheduler_task = proof_scheduler::spawn( @@ -126,7 +119,6 @@ impl Store { block_prover, state.block_store(), chain_tip_rx, - latest_proven_block, self.max_concurrent_proofs, ); diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 213e9aeeb0..1b80cbfbd5 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -11,7 +11,6 @@ //! 5. On fatal errors (e.g. deserialization failures, missing proving inputs), the scheduler //! returns the error to the caller for node shutdown. -use std::collections::BTreeSet; use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; @@ -84,8 +83,8 @@ impl ProofTaskJoinSet { /// Spawns the proof scheduler as a background tokio task. /// -/// The scheduler uses `chain_tip_rx` to learn about newly committed blocks and -/// `latest_proven_block` as the starting point for sequential proof tracking. +/// The scheduler uses `chain_tip_rx` to learn about newly committed blocks and queries the DB +/// for unproven blocks to prove. /// /// Returns a [`JoinHandle`] that resolves when the scheduler encounters a fatal error or /// completes unexpectedly. @@ -94,23 +93,18 @@ pub fn spawn( block_prover: Arc, block_store: Arc, chain_tip_rx: watch::Receiver, - latest_proven_block: BlockNumber, max_concurrent_proofs: NonZeroUsize, ) -> JoinHandle> { - tokio::spawn(run( - db, - block_prover, - block_store, - chain_tip_rx, - latest_proven_block, - max_concurrent_proofs, - )) + tokio::spawn(run(db, block_prover, block_store, chain_tip_rx, max_concurrent_proofs)) } /// Main loop of the proof scheduler. /// /// Maintains a pool of concurrent proving jobs via [`JoinSet`], fills them up to -/// `max_concurrent_proofs`, and drains completed results in block-number order. +/// `max_concurrent_proofs`, and drains completed results. +/// +/// Unproven blocks are discovered by querying the database each iteration, so the scheduler is +/// stateless with respect to which blocks need proving. /// /// Returns `Err` on irrecoverable errors (missing/corrupt proving inputs, DB write failures). /// Transient errors are retried internally. @@ -119,32 +113,32 @@ async fn run( block_prover: Arc, block_store: Arc, mut chain_tip_rx: watch::Receiver, - latest_proven_block: BlockNumber, max_concurrent_proofs: NonZeroUsize, ) -> anyhow::Result<()> { - info!(target: COMPONENT, %latest_proven_block, "Proof scheduler started"); + info!(target: COMPONENT, "Proof scheduler started"); - // The latest block that has been sequentially marked as proven in the DB. - let mut latest_complete = latest_proven_block; - // The current chain tip as observed from the watch channel. - let mut chain_tip = *chain_tip_rx.borrow_and_update(); // In-flight proving tasks. let mut join_set = ProofTaskJoinSet::new(); - // Block numbers currently being proven. - let mut inflight: BTreeSet = BTreeSet::new(); - // The next block number to schedule for proving. - let mut next_to_schedule = latest_complete.child(); + // Number of blocks currently being proven. + let mut inflight_count: usize = 0; + // Highest block number that is inflight or has been proven. Used to avoid re-querying + // blocks we've already scheduled. + let mut highest_scheduled = BlockNumber::GENESIS; loop { - // Fill the job pool up to capacity from the next unscheduled blocks. - while inflight.len() < max_concurrent_proofs.into() - && next_to_schedule.as_u32() <= chain_tip.as_u32() - { - let scheduled = next_to_schedule; - inflight.insert(scheduled); + // Query the DB for unproven blocks beyond what we've already scheduled. + let capacity = max_concurrent_proofs.get() - inflight_count; + if capacity > 0 { + let unproven = db.select_unproven_blocks(highest_scheduled, capacity).await?; - join_set.spawn(&db, &block_prover, &block_store, scheduled); - next_to_schedule = scheduled.child(); + inflight_count += unproven.len(); + if let Some(&last) = unproven.last() { + highest_scheduled = last; + } + + for block_num in unproven { + join_set.spawn(&db, &block_prover, &block_store, block_num); + } } // Wait for either a job to complete or the chain tip to advance. @@ -152,31 +146,21 @@ async fn run( // Proving task completed. result = join_set.join_next() => { let block_num = result?; - info!(target=COMPONENT, block.number=%block_num, "Block proof completed"); - inflight.remove(&block_num); + inflight_count -= 1; + + db.mark_block_proven(block_num).await?; + + info!(target=COMPONENT, block.number=%block_num, "Block proven"); }, - // New chain tip received. + // New chain tip received — re-query for unproven blocks on next iteration. result = chain_tip_rx.changed() => { if result.is_err() { info!(target: COMPONENT, "Chain tip channel closed, proof scheduler exiting"); return Ok(()); } - chain_tip = *chain_tip_rx.borrow(); }, } - - // Mark completed proofs as proven sequentially. - // Find the lowest in-flight block. - let lowest_in_flight = inflight.first().copied().unwrap_or(next_to_schedule); - // Mark all sequentially proven blocks as completed. - while latest_complete.child().as_u32() < lowest_in_flight.as_u32() { - latest_complete = latest_complete.child(); - db.mark_block_proven(latest_complete) - .await - .map_err(ProofSchedulerError::MarkBlockProvenFailed)?; - info!(target=COMPONENT, block.number=%latest_complete, "Block marked as proven"); - } } } From 1dd667abc1013d7e6e9bb2f8d45a002c9dde624e Mon Sep 17 00:00:00 2001 From: sergerad Date: Fri, 13 Mar 2026 12:08:31 +1300 Subject: [PATCH 71/73] mark proven in prove_and_save --- crates/store/src/server/proof_scheduler.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index 1b80cbfbd5..b71081af62 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -103,8 +103,7 @@ pub fn spawn( /// Maintains a pool of concurrent proving jobs via [`JoinSet`], fills them up to /// `max_concurrent_proofs`, and drains completed results. /// -/// Unproven blocks are discovered by querying the database each iteration, so the scheduler is -/// stateless with respect to which blocks need proving. +/// Unproven blocks are discovered by querying the database each iteration. /// /// Returns `Err` on irrecoverable errors (missing/corrupt proving inputs, DB write failures). /// Transient errors are retried internally. @@ -145,12 +144,8 @@ async fn run( tokio::select! { // Proving task completed. result = join_set.join_next() => { - let block_num = result?; inflight_count -= 1; - - db.mark_block_proven(block_num).await?; - - info!(target=COMPONENT, block.number=%block_num, "Block proven"); + info!(target=COMPONENT, block.number=%result?, "Block proven"); }, // New chain tip received — re-query for unproven blocks on next iteration. @@ -170,9 +165,8 @@ async fn run( /// Proves a single block, saves the proof to the block store, and returns the block number. /// /// This function encapsulates the full lifecycle of a single proof job: loading inputs from the -/// DB, invoking the prover (with a timeout), and persisting the proof to disk. -/// -/// The caller is responsible for marking the block as proven in the DB. +/// DB, invoking the prover (with a timeout), and persisting the proof to disk, and marking the +/// block as proven in the DB. #[instrument(target = COMPONENT, name = "prove_block", skip_all, fields(block.number=block_num.as_u32()), err)] async fn prove_and_save( db: &Db, @@ -188,6 +182,7 @@ async fn prove_and_save( { Ok(Ok(proof)) => { save_block(block_store, block_num, &proof).await?; + db.mark_block_proven(block_num).await?; return Ok(block_num); }, Ok(Err(ProveBlockError::Fatal(err))) => Err(err).context("fatal error")?, From e53986140e80e59fc14c234ee51e5203f16c0453 Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 17 Mar 2026 11:04:40 +1300 Subject: [PATCH 72/73] saturating sub --- crates/store/src/server/proof_scheduler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/store/src/server/proof_scheduler.rs b/crates/store/src/server/proof_scheduler.rs index b71081af62..242d9723ad 100644 --- a/crates/store/src/server/proof_scheduler.rs +++ b/crates/store/src/server/proof_scheduler.rs @@ -144,7 +144,7 @@ async fn run( tokio::select! { // Proving task completed. result = join_set.join_next() => { - inflight_count -= 1; + inflight_count = inflight_count.saturating_sub(1); info!(target=COMPONENT, block.number=%result?, "Block proven"); }, From 346bf159e51268159ceecdd3997e80efa04843fe Mon Sep 17 00:00:00 2001 From: sergerad Date: Tue, 17 Mar 2026 16:56:24 +1300 Subject: [PATCH 73/73] rm too many lines --- crates/store/src/server/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/store/src/server/mod.rs b/crates/store/src/server/mod.rs index 9de4bd3c25..8638818762 100644 --- a/crates/store/src/server/mod.rs +++ b/crates/store/src/server/mod.rs @@ -84,7 +84,6 @@ impl Store { /// Serves the store APIs (rpc, ntx-builder, block-producer) and DB maintenance background task. /// /// Note: this blocks until the server dies. - #[expect(clippy::too_many_lines)] pub async fn serve(self) -> anyhow::Result<()> { let rpc_address = self.rpc_listener.local_addr()?; let ntx_builder_address = self.ntx_builder_listener.local_addr()?;