diff --git a/CMakeLists.txt b/CMakeLists.txt index 81269f5..7c372c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,6 +193,10 @@ if(LANTERN_BUILD_TESTS) ) add_test(NAME lantern_client_gossip COMMAND lantern_client_gossip_test) + add_executable(lantern_client_state_staleness_test tests/unit/test_client_state_staleness.c) + target_link_libraries(lantern_client_state_staleness_test PRIVATE lantern) + add_test(NAME lantern_client_state_staleness COMMAND lantern_client_state_staleness_test) + add_executable(lantern_genesis_anchor_test tests/unit/test_genesis_anchor.c) target_link_libraries(lantern_genesis_anchor_test PRIVATE lantern) add_test(NAME lantern_genesis_anchor COMMAND lantern_genesis_anchor_test) @@ -409,6 +413,7 @@ if(LANTERN_BUILD_TESTS) set(_lantern_ctest_targets lantern_client_vote lantern_client_pending + lantern_client_state_staleness lantern_genesis_anchor lantern_genesis_bootstrap lantern_validator_selection diff --git a/src/core/client.c b/src/core/client.c index dc9d4b3..2ed19af 100644 --- a/src/core/client.c +++ b/src/core/client.c @@ -80,6 +80,44 @@ static const size_t CHECKPOINT_SYNC_MAX_RESPONSE_BYTES = + (LANTERN_HISTORICAL_ROOTS_LIMIT * 32u) + (LANTERN_JUSTIFICATION_VALIDATORS_LIMIT / 8u); static const size_t LANTERN_ATTESTATION_COMMITTEE_COUNT = 1u; +static const uint64_t LANTERN_CHECKPOINT_SYNC_STALE_PERSISTED_STATE_SLOT_THRESHOLD = 2u * 32u; + +bool lantern_client_persisted_state_is_stale_for_checkpoint_sync( + const LanternState *persisted_state, + uint64_t genesis_time, + uint32_t slot_duration_seconds, + uint64_t now_seconds, + uint64_t *out_expected_current_slot, + uint64_t *out_gap) { + uint64_t expected_current_slot = 0u; + uint64_t gap = 0u; + + if (out_expected_current_slot) { + *out_expected_current_slot = 0u; + } + if (out_gap) { + *out_gap = 0u; + } + if (!persisted_state || slot_duration_seconds == 0u) { + return false; + } + + if (now_seconds > genesis_time) { + expected_current_slot = (now_seconds - genesis_time) / (uint64_t)slot_duration_seconds; + } + if (expected_current_slot > persisted_state->slot) { + gap = expected_current_slot - persisted_state->slot; + } + + if (out_expected_current_slot) { + *out_expected_current_slot = expected_current_slot; + } + if (out_gap) { + *out_gap = gap; + } + + return gap > LANTERN_CHECKPOINT_SYNC_STALE_PERSISTED_STATE_SLOT_THRESHOLD; +} static void sync_aggregated_payload_pools_after_time_advance( struct lantern_client *client, @@ -2400,35 +2438,77 @@ static lantern_client_error client_load_or_build_state( const struct lantern_client_options *options, bool *loaded_from_storage) { + const bool checkpoint_sync_configured = + options + && options->checkpoint_sync_url + && options->checkpoint_sync_url[0] != '\0'; + const struct lantern_log_metadata meta = {.validator = client ? client->node_id : NULL}; bool from_storage = false; + bool should_attempt_checkpoint_sync = false; int storage_state_rc = lantern_storage_load_state(client->data_dir, &client->state); if (storage_state_rc == 0) { client->has_state = true; from_storage = true; - if (options - && options->checkpoint_sync_url - && options->checkpoint_sync_url[0] != '\0') - { - lantern_log_info( - "checkpoint_sync", - &(const struct lantern_log_metadata){.validator = client->node_id}, - "using persisted state; skipping checkpoint fetch"); + if (checkpoint_sync_configured) + { + struct lantern_consensus_runtime_config runtime_config; + lantern_consensus_runtime_config_init(&runtime_config); + + uint64_t expected_current_slot = 0u; + uint64_t gap = 0u; + time_t now_time = time(NULL); + if (now_time != (time_t)-1 + && lantern_client_persisted_state_is_stale_for_checkpoint_sync( + &client->state, + client->genesis.chain_config.genesis_time, + runtime_config.seconds_per_slot, + (uint64_t)now_time, + &expected_current_slot, + &gap)) + { + lantern_log_info( + "checkpoint_sync", + &meta, + "persisted state stale slot=%" PRIu64 + " expected_current_slot=%" PRIu64 + " gap=%" PRIu64 + " threshold=%" PRIu64 + "; discarding state and using checkpoint sync", + client->state.slot, + expected_current_slot, + gap, + LANTERN_CHECKPOINT_SYNC_STALE_PERSISTED_STATE_SLOT_THRESHOLD); + lantern_state_reset(&client->state); + client->has_state = false; + from_storage = false; + should_attempt_checkpoint_sync = true; + } + else + { + lantern_log_info( + "checkpoint_sync", + &meta, + "using persisted state; skipping checkpoint fetch"); + } } } else if (storage_state_rc < 0) { lantern_log_error( "storage", - &(const struct lantern_log_metadata){.validator = client->node_id}, + &meta, "failed to load persisted state"); return LANTERN_CLIENT_ERR_STORAGE; } else { - if (options - && options->checkpoint_sync_url - && options->checkpoint_sync_url[0] != '\0') + should_attempt_checkpoint_sync = checkpoint_sync_configured; + } + + if (!client->has_state) + { + if (should_attempt_checkpoint_sync) { lantern_client_error checkpoint_rc = client_load_state_from_checkpoint( client, @@ -2451,7 +2531,7 @@ static lantern_client_error client_load_or_build_state( { lantern_log_error( "storage", - &(const struct lantern_log_metadata){.validator = client->node_id}, + &meta, "failed to load persisted votes"); return LANTERN_CLIENT_ERR_STORAGE; } diff --git a/src/core/client_internal.h b/src/core/client_internal.h index 0c12755..80b1a8c 100644 --- a/src/core/client_internal.h +++ b/src/core/client_internal.h @@ -186,6 +186,31 @@ void string_list_remove(struct lantern_string_list *list, const char *value); */ const char *connection_reason_text(int reason); +/** + * Decide whether a persisted state is too stale to trust when checkpoint sync + * is available. + * + * Computes the expected wall-clock slot from genesis time and slot duration, + * then compares it against the persisted state's slot. + * + * @param persisted_state Persisted state loaded from storage + * @param genesis_time Chain genesis Unix time in seconds + * @param slot_duration_seconds Slot duration in seconds + * @param now_seconds Current Unix time in seconds + * @param out_expected_current_slot Optional output for expected slot + * @param out_gap Optional output for slot gap + * + * @return true when the persisted state should be discarded in favor of + * checkpoint sync + */ +bool lantern_client_persisted_state_is_stale_for_checkpoint_sync( + const LanternState *persisted_state, + uint64_t genesis_time, + uint32_t slot_duration_seconds, + uint64_t now_seconds, + uint64_t *out_expected_current_slot, + uint64_t *out_gap); + /** * Cache an individual gossip signature keyed by validator and attestation root. * diff --git a/tests/unit/test_client_state_staleness.c b/tests/unit/test_client_state_staleness.c new file mode 100644 index 0000000..2a644bd --- /dev/null +++ b/tests/unit/test_client_state_staleness.c @@ -0,0 +1,115 @@ +#include +#include +#include +#include + +#include "lantern/consensus/state.h" + +#include "../../src/core/client_internal.h" + +static int expect_case( + const char *label, + uint64_t persisted_slot, + uint64_t genesis_time, + uint32_t slot_duration_seconds, + uint64_t now_seconds, + bool expected_stale, + uint64_t expected_current_slot, + uint64_t expected_gap) +{ + LanternState state; + lantern_state_init(&state); + state.slot = persisted_slot; + + uint64_t current_slot = UINT64_C(9999); + uint64_t gap = UINT64_C(9999); + bool stale = lantern_client_persisted_state_is_stale_for_checkpoint_sync( + &state, + genesis_time, + slot_duration_seconds, + now_seconds, + ¤t_slot, + &gap); + + lantern_state_reset(&state); + + if (stale != expected_stale) + { + fprintf( + stderr, + "%s stale mismatch got=%d expected=%d\n", + label, + stale ? 1 : 0, + expected_stale ? 1 : 0); + return 1; + } + if (current_slot != expected_current_slot) + { + fprintf( + stderr, + "%s current slot mismatch got=%" PRIu64 " expected=%" PRIu64 "\n", + label, + current_slot, + expected_current_slot); + return 1; + } + if (gap != expected_gap) + { + fprintf( + stderr, + "%s gap mismatch got=%" PRIu64 " expected=%" PRIu64 "\n", + label, + gap, + expected_gap); + return 1; + } + + return 0; +} + +int main(void) +{ + if (expect_case( + "fresh_enough_boundary", + 10u, + 1000u, + 4u, + 1000u + ((10u + 64u) * 4u), + false, + 74u, + 64u) + != 0) + { + return 1; + } + + if (expect_case( + "stale_beyond_boundary", + 10u, + 1000u, + 4u, + 1000u + ((10u + 65u) * 4u), + true, + 75u, + 65u) + != 0) + { + return 1; + } + + if (expect_case( + "before_genesis", + 0u, + 5000u, + 4u, + 4990u, + false, + 0u, + 0u) + != 0) + { + return 1; + } + + return 0; +}