-
Notifications
You must be signed in to change notification settings - Fork 17
SDSTOR-22424: redo destroy pg #435
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -449,6 +449,10 @@ void ReplicationStateMachine::write_snapshot_obj(std::shared_ptr< homestore::sna | |
| set_snapshot_context(context); // Update the snapshot context in case apply_snapshot is not called | ||
| auto hs_pg = home_object_->get_hs_pg(m_snp_rcv_handler->get_context_pg_id()); | ||
| hs_pg->pg_state_.clear_state(PGStateMask::BASELINE_RESYNC); | ||
| // we only reset this if destroying pg happens in BR case. for other cases (on_destroy and _exit_pg), | ||
| // since this replica will leave the PG and no later logs will be received, no need to reset this. | ||
| reset_no_space_left_error_info(); | ||
| repl_dev()->reset_latch_lsn(); | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -499,7 +503,7 @@ void ReplicationStateMachine::write_snapshot_obj(std::shared_ptr< homestore::sna | |
| if (home_object_->pg_exists(pg_data->pg_id())) { | ||
| LOGI("pg already exists, clean pg resources before snapshot, pg={} {}", pg_data->pg_id(), log_suffix); | ||
| // Need to pause state machine before destroying the PG, if fail, let raft retry. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments out of date, as well as we dont have a branch that returns false as of now.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let`s remove this out-of-date comments after addressing other comments for this PR |
||
| if (!home_object_->pg_destroy(pg_data->pg_id(), true /* pause state machine */)) { | ||
| if (!home_object_->pg_destroy(pg_data->pg_id())) { | ||
| LOGE("failed to destroy existing pg, let raft retry, pg={} {}", pg_data->pg_id(), log_suffix); | ||
| return; | ||
| } | ||
|
|
@@ -1030,7 +1034,19 @@ void ReplicationStateMachine::on_log_replay_done(const homestore::group_id_t& gr | |
| const auto pg_id = pg_id_opt.value(); | ||
| RELEASE_ASSERT(home_object_->pg_exists(pg_id), "pg={} should exist, but not! fatal error!", pg_id); | ||
|
|
||
| const auto& shards_in_pg = (const_cast< HSHomeObject::HS_PG* >(home_object_->_get_hs_pg_unlocked(pg_id)))->shards_; | ||
| const auto hs_pg = (const_cast< HSHomeObject::HS_PG* >(home_object_->get_hs_pg(pg_id))); | ||
| RELEASE_ASSERT(hs_pg, "Failed to get pg={} when log replay done", pg_id); | ||
| if (hs_pg->pg_sb_->state == PGState::DESTROYED) { | ||
| // if we reach here, it means we have a repl_dev (since only we have a repl_dev , we can have log replay and | ||
| // thus on_log_replay_done will be called), but the state of the related pg is destroyed. this can only happen | ||
| // when crash happens after pg is destroyed but before pg_super_blk is destroyed in baseline resync case. | ||
|
|
||
| // we need to do nothing here, since the first snapshot message(obj_id.shard_seq_num == 0) will be received | ||
| // again and pg_destory will be called again when handling the first snapshot message. | ||
| return; | ||
| } | ||
|
|
||
| const auto& shards_in_pg = hs_pg->shards_; | ||
| auto chunk_selector = home_object_->chunk_selector(); | ||
|
|
||
| for (const auto& shard_iter : shards_in_pg) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.