From 28dd61351ee4676f7cd2dae15919eb80f413a673 Mon Sep 17 00:00:00 2001 From: Hooper Date: Wed, 24 Jun 2026 13:29:17 +0800 Subject: [PATCH] Enhance unit tests with timeout handling for blob creation and synchronization, and jenkins. --- .jenkins/Jenkinsfile | 5 ++++- src/lib/homestore_backend/CMakeLists.txt | 1 + src/lib/homestore_backend/tests/homeobj_fixture.hpp | 6 +++++- src/lib/homestore_backend/tests/hs_repl_test_helper.hpp | 5 ++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile index eedf3408e..041b3da5d 100644 --- a/.jenkins/Jenkinsfile +++ b/.jenkins/Jenkinsfile @@ -65,6 +65,9 @@ pipeline { conan create ${BUILD_MISSING} -s:h build_type=RelWithDebInfo -o sisl/*:malloc_impl=tcmalloc ${CONAN_FLAGS} . ; \ " } + post { + failure { script { sleep 3600000 } } + } } stage("Deploy") { @@ -82,7 +85,7 @@ pipeline { expression { (env.BRANCH_NAME == "${TARGET_BRANCH}") } expression { (!"${upstream_triggered}") || ("${upstream_triggered}" == "") } } } - + stages { stage('StorageManager') { steps { diff --git a/src/lib/homestore_backend/CMakeLists.txt b/src/lib/homestore_backend/CMakeLists.txt index 14903ad5d..6106ec7b8 100644 --- a/src/lib/homestore_backend/CMakeLists.txt +++ b/src/lib/homestore_backend/CMakeLists.txt @@ -138,6 +138,7 @@ add_test(NAME HomestoreResyncTestWithLeaderRestart --override_config homestore_config.consensus.max_grpc_message_size=138412032 --override_config homestore_config.consensus.replace_member_sync_check_interval_ms=1000 --override_config homestore_config.consensus.laggy_threshold=2000 + --log_mods homeobject:trace,replication:trace --gtest_filter=HomeObjectFixture.RestartLeader*) #add_test(NAME HomestoreReplaceMemberRollbackTest # COMMAND homestore_test_dynamic -csv error --executor immediate --config_path ./ diff --git a/src/lib/homestore_backend/tests/homeobj_fixture.hpp b/src/lib/homestore_backend/tests/homeobj_fixture.hpp index 499968ab3..e8b276bf0 100644 --- a/src/lib/homestore_backend/tests/homeobj_fixture.hpp +++ b/src/lib/homestore_backend/tests/homeobj_fixture.hpp @@ -49,7 +49,7 @@ class HomeObjectFixture : public ::testing::Test { HSHomeObject::_hs_chunk_size = SISL_OPTIONS["chunk_size"].as< uint64_t >() * Mi; _obj_inst = std::dynamic_pointer_cast< HSHomeObject >(g_helper->build_new_homeobject()); - + // Used to export metrics, it should be called after init_homeobject if (SISL_OPTIONS["enable_http"].as< bool >()) { g_helper->app->start_http_server(); } if (!g_helper->is_current_testcase_restarted()) { @@ -785,12 +785,16 @@ class HomeObjectFixture : public ::testing::Test { // wait for the last blob to be created locally, which means all the blob before this blob are created void wait_for_blob(shard_id_t shard_id, blob_id_t blob_id) { + static constexpr auto k_blob_timeout = std::chrono::minutes(10); + auto deadline = std::chrono::steady_clock::now() + k_blob_timeout; while (true) { if (blob_exist(shard_id, blob_id)) { LOGINFO("shard {} blob {} is created locally, which means all the blob before {} are created", shard_id, blob_id, blob_id); return; } + RELEASE_ASSERT(std::chrono::steady_clock::now() < deadline, + "wait_for_blob timed out after 5min: shard={} blob={}", shard_id, blob_id); std::this_thread::sleep_for(1s); } } diff --git a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp index 79941bcc8..37a0a7c3b 100644 --- a/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp +++ b/src/lib/homestore_backend/tests/hs_repl_test_helper.hpp @@ -52,6 +52,7 @@ class HSReplTestHelper { protected: struct IPCData { void sync(uint64_t sync_point, uint32_t max_count) { + static constexpr auto k_sync_timeout = std::chrono::minutes(10); std::unique_lock lg(mtx_); LOGINFO("=== Syncing: replica={}(total {}), sync_point_num={} ===", homeobject_replica_count_, max_count, sync_point); @@ -63,7 +64,9 @@ class HSReplTestHelper { auxiliary_uint64_id_ = UINT64_MAX; cv_.notify_all(); } else { - cv_.wait(lg, [this, sync_point]() { return sync_point_num_ == sync_point; }); + bool ok = + cv_.wait_for(lg, k_sync_timeout, [this, sync_point]() { return sync_point_num_ == sync_point; }); + RELEASE_ASSERT(ok, "sync timed out after 5min at sync_point={}, a replica likely crashed", sync_point); } }