From be5918527d42f72097c528823ee580d2030da451 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Tue, 24 Feb 2026 15:02:28 +0800 Subject: [PATCH 1/2] Fix superblock IO error handling and add footer validation This change improves superblock error handling and integrity checking: - Fail fast on IO errors during superblock read instead of treating them as fresh/unformatted disks - Add footer superblock validation for HDD devices to detect corruption - Check header and footer write errors independently to prevent silent failures - Add comprehensive unit tests covering all error scenarios Changes: - read_first_block(): Now throws std::system_error on IO errors instead of returning garbage data - write_super_block(): Separately validates header and footer writes with independent error checking - sanity_check(): New method that validates footer consistency on HDD devices by comparing header and footer superblocks using full memcmp - Added 6 unit tests covering IO errors, corruption detection, and footer validation scenarios Use release assert instead of exceptions for superblock IO errors Per review feedback, replace exception throws with HS_REL_ASSERT for all superblock IO errors to ensure immediate crash on failure: - read_first_block(): Use HS_REL_ASSERT instead of throwing std::system_error - sanity_check(): Use HS_REL_ASSERT for header/footer read errors and mismatch - Update tests from ASSERT_THROW to ASSERT_DEATH to verify crash behavior --- src/lib/device/physical_dev.cpp | 57 +++++++- src/lib/device/physical_dev.hpp | 1 + src/tests/test_pdev.cpp | 221 ++++++++++++++++++++++++++++++++ 3 files changed, 273 insertions(+), 6 deletions(-) diff --git a/src/lib/device/physical_dev.cpp b/src/lib/device/physical_dev.cpp index ba52ba2f2..b151c308c 100644 --- a/src/lib/device/physical_dev.cpp +++ b/src/lib/device/physical_dev.cpp @@ -66,8 +66,11 @@ first_block PhysicalDev::read_first_block(const std::string& devname, int oflags first_block ret; auto buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, 512); - iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, - hs_super_blk::first_block_offset()); + auto err = iodev->drive_interface()->sync_read(iodev.get(), r_cast< char* >(buf), first_block::s_io_fb_size, + hs_super_blk::first_block_offset()); + + HS_REL_ASSERT(!err, "IO error reading first block from device={}, error={}, homestore will go down", devname, + err.message()); ret = *(r_cast< first_block* >(buf)); hs_utils::iobuf_free(buf, sisl::buftag::superblk); @@ -114,20 +117,25 @@ PhysicalDev::PhysicalDev(const dev_info& dinfo, int oflags, const pdev_info_head m_streams.emplace_back(i); } m_super_blk_in_footer = m_pdev_info.mirror_super_block; + + // Validate footer superblock consistency if mirroring is enabled + sanity_check(); } PhysicalDev::~PhysicalDev() { close_device(); } void PhysicalDev::write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset) { auto err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, offset); + HS_REL_ASSERT(!err_c, "Super block write to header failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, offset); if (m_super_blk_in_footer) { auto t_offset = data_end_offset() + offset; - err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + auto footer_err_c = m_drive_iface->sync_write(m_iodev.get(), c_charptr_cast(buf), sb_size, t_offset); + HS_REL_ASSERT(!footer_err_c, + "Super block write to footer failed on dev={} at size={} offset={}, homestore will go down", + m_devname, sb_size, t_offset); } - - HS_REL_ASSERT(!err_c, "Super block write failed on dev={} at size={} offset={}, homestore will go down", m_devname, - sb_size, offset); } std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset) { @@ -136,6 +144,43 @@ std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, ui void PhysicalDev::close_device() { close_and_uncache_dev(m_devname, m_iodev); } +void PhysicalDev::sanity_check() { + // Only validate footer if mirroring is enabled (HDD devices) + if (!m_super_blk_in_footer) { return; } + + HS_LOG(INFO, device, "Validating footer superblock consistency on device={}", m_devname); + + // Read header first block + auto header_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto header_err = read_super_block(header_buf, first_block::s_io_fb_size, hs_super_blk::first_block_offset()); + HS_REL_ASSERT(!header_err, + "IO error reading header first block during sanity check on device={}, error={}, homestore will go down", + m_devname, header_err.message()); + + // Read footer first block using the same offset calculation as write_super_block() + auto footer_offset = data_end_offset() + hs_super_blk::first_block_offset(); + auto footer_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, + m_pdev_info.dev_attr.align_size); + auto footer_err = read_super_block(footer_buf, first_block::s_io_fb_size, footer_offset); + HS_REL_ASSERT( + !footer_err, + "IO error reading footer first block during sanity check on device={}, offset={}, error={}, homestore will go down", + m_devname, footer_offset, footer_err.message()); + + // Compare header and footer + auto header_blk = r_cast< first_block* >(header_buf); + auto footer_blk = r_cast< first_block* >(footer_buf); + HS_REL_ASSERT(std::memcmp(header_blk, footer_blk, first_block::s_atomic_fb_size) == 0, + "Footer first block mismatch with header on device={}, header=[{}], footer=[{}], this indicates " + "corruption, homestore will go down", + m_devname, header_blk->to_string(), footer_blk->to_string()); + + hs_utils::iobuf_free(header_buf, sisl::buftag::superblk); + hs_utils::iobuf_free(footer_buf, sisl::buftag::superblk); + HS_LOG(INFO, device, "Footer superblock validated successfully on device={}", m_devname); +} + folly::Future< std::error_code > PhysicalDev::async_write(const char* data, uint32_t size, uint64_t offset, bool part_of_batch) { auto const start_time = get_current_time(); diff --git a/src/lib/device/physical_dev.hpp b/src/lib/device/physical_dev.hpp index f68ae14e6..0709e27d5 100644 --- a/src/lib/device/physical_dev.hpp +++ b/src/lib/device/physical_dev.hpp @@ -154,6 +154,7 @@ class PhysicalDev { std::error_code read_super_block(uint8_t* buf, uint32_t sb_size, uint64_t offset); void write_super_block(uint8_t const* buf, uint32_t sb_size, uint64_t offset); + void sanity_check(); void close_device(); //////////////////////////// Chunk Creation/Load related methods ///////////////////////////////////////// diff --git a/src/tests/test_pdev.cpp b/src/tests/test_pdev.cpp index 4447c500b..e8003a445 100644 --- a/src/tests/test_pdev.cpp +++ b/src/tests/test_pdev.cpp @@ -262,6 +262,227 @@ TEST_F(PDevTest, RandomChunkOpsWithRestart) { num_removed, available_size); } +// Test fixture for superblock error handling tests +class SuperblockErrorTest : public ::testing::Test { +protected: + std::string m_test_file; + uint64_t m_dev_size{100 * 1024 * 1024}; // 100MB + + void SetUp() override { + m_test_file = "/tmp/test_superblock_error"; + init_file(m_test_file, m_dev_size); + + auto const is_spdk = SISL_OPTIONS["spdk"].as< bool >(); + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = is_spdk}); + } + + void TearDown() override { + iomanager.stop(); + if (std::filesystem::exists(m_test_file)) { + std::filesystem::remove(m_test_file); + } + } + + // Helper to corrupt a file at specific offset + void corrupt_file_at_offset(uint64_t offset, uint64_t size) { + std::fstream file(m_test_file, std::ios::binary | std::ios::in | std::ios::out); + ASSERT_TRUE(file.is_open()); + file.seekp(offset); + std::vector garbage(size, 0xAA); // Fill with garbage + file.write(reinterpret_cast(garbage.data()), size); + file.close(); + } + + // Helper to truncate file to simulate IO errors + void truncate_file(uint64_t new_size) { + std::filesystem::resize_file(m_test_file, new_size); + } +}; + +TEST_F(SuperblockErrorTest, ReadFirstBlockIOError) { + LOGINFO("Test: read_first_block should crash on IO error"); + + // Truncate the file to be too small to contain first block + truncate_file(512); // Less than first_block::s_io_fb_size (4096) + + // Attempt to read first block should crash with HS_REL_ASSERT + ASSERT_DEATH({ + PhysicalDev::read_first_block(m_test_file, O_RDWR); + }, "IO error reading first block"); +} + +TEST_F(SuperblockErrorTest, ReadFirstBlockCorruptedData) { + LOGINFO("Test: read_first_block should return invalid first_block on corrupted data"); + + // Fill the first block area with garbage + corrupt_file_at_offset(0, 4096); + + // Reading should succeed but return invalid first_block + ASSERT_NO_THROW({ + auto fblk = PhysicalDev::read_first_block(m_test_file, O_RDWR); + ASSERT_FALSE(fblk.is_valid()) << "Corrupted first block should be invalid"; + LOGINFO("Successfully read corrupted first block, is_valid={}", fblk.is_valid()); + }); +} + +TEST_F(SuperblockErrorTest, FooterValidationHDDDevice) { + LOGINFO("Test: Footer validation should detect header/footer mismatch on HDD"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + // Get the pdev to check if it has footer mirroring + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices (with footer mirroring), test footer validation + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer corruption detection"); + + dmgr.reset(); + iomanager.stop(); + + // Calculate footer offset: data_end_offset = devsize - data_offset + // Footer first block is at: data_end_offset + first_block_offset (0) + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + + LOGINFO("Corrupting footer at offset={}", footer_offset); + corrupt_file_at_offset(footer_offset, 512); + + // Restart should crash because footer doesn't match header + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "Footer first block mismatch"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer corruption test"); + } +} + +TEST_F(SuperblockErrorTest, FooterIOError) { + LOGINFO("Test: Footer read IO error should be caught during sanity_check"); + + // First, create a properly formatted device + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + auto pdev = pdevs[0]; + + // For HDD devices, test footer IO error + if (pdev->atomic_page_size() > 0) { + LOGINFO("Device has footer mirroring enabled, testing footer IO error"); + + dmgr.reset(); + iomanager.stop(); + + // Truncate file to cut off the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto truncate_size = data_offset + 1024; // Cut off before footer + + LOGINFO("Truncating file to size={} to cause footer IO error", truncate_size); + truncate_file(truncate_size); + + // Restart should crash because footer cannot be read + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + }, "IO error reading footer first block"); + } else { + LOGINFO("Device does not have footer mirroring, skipping footer IO error test"); + } +} + +TEST_F(SuperblockErrorTest, NonHDDDeviceSkipsFooterValidation) { + LOGINFO("Test: Non-HDD devices should skip footer validation"); + + // Create device as Fast type (SSD), which typically doesn't have footer mirroring + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Fast); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Fast); + ASSERT_GT(pdevs.size(), 0); + + // Should restart successfully even if we corrupt the footer area + dmgr.reset(); + iomanager.stop(); + + // Corrupt what would be the footer area + auto data_offset = hs_super_blk::first_block_offset() + + hs_super_blk::total_size(dev_infos[0]); + auto footer_offset = m_dev_size - data_offset; + corrupt_file_at_offset(footer_offset, 4096); + + // Should succeed because SSD doesn't validate footer + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully loaded device without footer validation"); + }); +} + +TEST_F(SuperblockErrorTest, ValidFooterMatchesHeader) { + LOGINFO("Test: Valid footer should match header on HDD device"); + + std::vector dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + + auto dmgr = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + + ASSERT_TRUE(dmgr->is_first_time_boot()); + dmgr->format_devices(); + dmgr->commit_formatting(); + + auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); + ASSERT_GT(pdevs.size(), 0); + + // Restart should succeed with matching header and footer + dmgr.reset(); + iomanager.stop(); + + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_NO_THROW({ + auto dmgr2 = std::make_unique( + dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + dmgr2->load_devices(); + LOGINFO("Successfully validated matching header and footer"); + }); +} + int main(int argc, char* argv[]) { SISL_OPTIONS_LOAD(argc, argv, logging, test_pdev, iomgr); ::testing::InitGoogleTest(&argc, argv); From e28c0c0edf6adbc5af61234bda739bc276cb96b9 Mon Sep 17 00:00:00 2001 From: Xiaoxi Chen Date: Fri, 5 Jun 2026 18:22:16 +0800 Subject: [PATCH 2/2] Fix sanity_check to handle first boot and distinguish header corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On first boot (format_devices path), all devices have invalid headers on disk when PhysicalDev is constructed — before write_super_block is called. The original code blindly memcmp'd header vs footer on garbage data, causing a crash on HDD devices where mirror_super_block=1. In load_devices, a device with an invalid header goes to pdevs_to_format and then format_single_device. At that point m_first_blk_hdr (and thus m_pdev_info.system_uuid) is already populated from other valid devices in the cluster. So if the footer on disk is valid and its system_uuid matches, it means the footer survived but the header was corrupted — not a fresh device. If the footer uuid does not match (or footer is invalid), it is leftover/garbage data and safe to treat as first boot. New logic: - Header valid: compare header and footer as before - Header invalid, footer valid + uuid matches cluster: header corruption, assert - Otherwise: first boot / leftover data, skip validation --- conanfile.py | 2 +- src/lib/device/physical_dev.cpp | 36 +++--- src/lib/device/physical_dev.hpp | 1 + src/tests/test_pdev.cpp | 187 +++++++++++++------------------- 4 files changed, 97 insertions(+), 129 deletions(-) diff --git a/conanfile.py b/conanfile.py index 87a52abba..4ec340dfb 100644 --- a/conanfile.py +++ b/conanfile.py @@ -9,7 +9,7 @@ class HomestoreConan(ConanFile): name = "homestore" - version = "7.5.10" + version = "7.5.11" homepage = "https://github.com/eBay/Homestore" description = "HomeStore Storage Engine" diff --git a/src/lib/device/physical_dev.cpp b/src/lib/device/physical_dev.cpp index b151c308c..8f467d48a 100644 --- a/src/lib/device/physical_dev.cpp +++ b/src/lib/device/physical_dev.cpp @@ -145,40 +145,46 @@ std::error_code PhysicalDev::read_super_block(uint8_t* buf, uint32_t sb_size, ui void PhysicalDev::close_device() { close_and_uncache_dev(m_devname, m_iodev); } void PhysicalDev::sanity_check() { - // Only validate footer if mirroring is enabled (HDD devices) if (!m_super_blk_in_footer) { return; } HS_LOG(INFO, device, "Validating footer superblock consistency on device={}", m_devname); - // Read header first block auto header_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, m_pdev_info.dev_attr.align_size); auto header_err = read_super_block(header_buf, first_block::s_io_fb_size, hs_super_blk::first_block_offset()); HS_REL_ASSERT(!header_err, - "IO error reading header first block during sanity check on device={}, error={}, homestore will go down", - m_devname, header_err.message()); + "IO error reading header first block on device={}, error={}, homestore will go down", m_devname, + header_err.message()); - // Read footer first block using the same offset calculation as write_super_block() auto footer_offset = data_end_offset() + hs_super_blk::first_block_offset(); auto footer_buf = hs_utils::iobuf_alloc(first_block::s_io_fb_size, sisl::buftag::superblk, m_pdev_info.dev_attr.align_size); auto footer_err = read_super_block(footer_buf, first_block::s_io_fb_size, footer_offset); - HS_REL_ASSERT( - !footer_err, - "IO error reading footer first block during sanity check on device={}, offset={}, error={}, homestore will go down", - m_devname, footer_offset, footer_err.message()); + HS_REL_ASSERT(!footer_err, + "IO error reading footer first block on device={}, offset={}, error={}, homestore will go down", + m_devname, footer_offset, footer_err.message()); - // Compare header and footer auto header_blk = r_cast< first_block* >(header_buf); auto footer_blk = r_cast< first_block* >(footer_buf); - HS_REL_ASSERT(std::memcmp(header_blk, footer_blk, first_block::s_atomic_fb_size) == 0, - "Footer first block mismatch with header on device={}, header=[{}], footer=[{}], this indicates " - "corruption, homestore will go down", - m_devname, header_blk->to_string(), footer_blk->to_string()); + + if (header_blk->is_valid()) { + HS_REL_ASSERT(std::memcmp(header_blk, footer_blk, first_block::s_atomic_fb_size) == 0, + "Footer mismatch with header on device={}, header=[{}], footer=[{}], corruption detected, " + "homestore will go down", + m_devname, header_blk->to_string(), footer_blk->to_string()); + HS_LOG(INFO, device, "Footer superblock validated successfully on device={}", m_devname); + } else if (footer_blk->is_valid() && footer_blk->this_pdev_hdr.system_uuid == m_pdev_info.system_uuid) { + HS_REL_ASSERT(false, + "Header invalid but footer has matching system_uuid on device={}, indicates header superblock " + "corruption, homestore will go down", + m_devname); + } else { + HS_LOG(INFO, device, + "Header invalid and footer has no matching system_uuid on device={}, treating as first boot", m_devname); + } hs_utils::iobuf_free(header_buf, sisl::buftag::superblk); hs_utils::iobuf_free(footer_buf, sisl::buftag::superblk); - HS_LOG(INFO, device, "Footer superblock validated successfully on device={}", m_devname); } folly::Future< std::error_code > PhysicalDev::async_write(const char* data, uint32_t size, uint64_t offset, diff --git a/src/lib/device/physical_dev.hpp b/src/lib/device/physical_dev.hpp index 0709e27d5..757a838c6 100644 --- a/src/lib/device/physical_dev.hpp +++ b/src/lib/device/physical_dev.hpp @@ -223,6 +223,7 @@ class PhysicalDev { uint32_t optimal_page_size() const { return m_pdev_info.dev_attr.phys_page_size; } uint32_t align_size() const { return m_pdev_info.dev_attr.align_size; } uint32_t atomic_page_size() const { return m_pdev_info.dev_attr.atomic_phys_page_size; } + bool has_footer_mirror() const { return m_super_blk_in_footer; } uint64_t data_start_offset() const { return m_pdev_info.data_offset; } uint64_t data_end_offset() const { diff --git a/src/tests/test_pdev.cpp b/src/tests/test_pdev.cpp index e8003a445..73854bba2 100644 --- a/src/tests/test_pdev.cpp +++ b/src/tests/test_pdev.cpp @@ -25,8 +25,10 @@ #include #include +#include #include #include +#include #include "device/chunk.h" @@ -277,10 +279,10 @@ class SuperblockErrorTest : public ::testing::Test { } void TearDown() override { + // Reset any drive type emulation so it doesn't bleed into subsequent tests. + iomgr::DriveInterface::emulate_drive_type(m_test_file, iomgr::drive_type::file_on_nvme); iomanager.stop(); - if (std::filesystem::exists(m_test_file)) { - std::filesystem::remove(m_test_file); - } + if (std::filesystem::exists(m_test_file)) { std::filesystem::remove(m_test_file); } } // Helper to corrupt a file at specific offset @@ -300,24 +302,20 @@ class SuperblockErrorTest : public ::testing::Test { }; TEST_F(SuperblockErrorTest, ReadFirstBlockIOError) { - LOGINFO("Test: read_first_block should crash on IO error"); + LOGINFO("Test: read_first_block should crash on IO error (short-read returns ERANGE)"); - // Truncate the file to be too small to contain first block - truncate_file(512); // Less than first_block::s_io_fb_size (4096) + // Truncate to less than s_io_fb_size so pread() returns fewer bytes than requested. + // iomgr's sync_read retries until max_resubmit_cnt, then returns ERANGE. + truncate_file(512); - // Attempt to read first block should crash with HS_REL_ASSERT - ASSERT_DEATH({ - PhysicalDev::read_first_block(m_test_file, O_RDWR); - }, "IO error reading first block"); + ASSERT_DEATH({ PhysicalDev::read_first_block(m_test_file, O_RDWR); }, "IO error reading first block"); } TEST_F(SuperblockErrorTest, ReadFirstBlockCorruptedData) { LOGINFO("Test: read_first_block should return invalid first_block on corrupted data"); - // Fill the first block area with garbage corrupt_file_at_offset(0, 4096); - // Reading should succeed but return invalid first_block ASSERT_NO_THROW({ auto fblk = PhysicalDev::read_first_block(m_test_file, O_RDWR); ASSERT_FALSE(fblk.is_valid()) << "Corrupted first block should be invalid"; @@ -325,130 +323,86 @@ TEST_F(SuperblockErrorTest, ReadFirstBlockCorruptedData) { }); } -TEST_F(SuperblockErrorTest, FooterValidationHDDDevice) { - LOGINFO("Test: Footer validation should detect header/footer mismatch on HDD"); - - // First, create a properly formatted device - std::vector dev_infos; - dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); - - auto dmgr = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); - - ASSERT_TRUE(dmgr->is_first_time_boot()); - dmgr->format_devices(); - dmgr->commit_formatting(); - - // Get the pdev to check if it has footer mirroring - auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); - ASSERT_GT(pdevs.size(), 0); - auto pdev = pdevs[0]; - - // For HDD devices (with footer mirroring), test footer validation - if (pdev->atomic_page_size() > 0) { - LOGINFO("Device has footer mirroring enabled, testing footer corruption detection"); - - dmgr.reset(); - iomanager.stop(); - - // Calculate footer offset: data_end_offset = devsize - data_offset - // Footer first block is at: data_end_offset + first_block_offset (0) - auto data_offset = hs_super_blk::first_block_offset() + - hs_super_blk::total_size(dev_infos[0]); - auto footer_offset = m_dev_size - data_offset; - - LOGINFO("Corrupting footer at offset={}", footer_offset); - corrupt_file_at_offset(footer_offset, 512); - - // Restart should crash because footer doesn't match header - ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); - ASSERT_DEATH({ - auto dmgr2 = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); - dmgr2->load_devices(); - }, "Footer first block mismatch"); - } else { - LOGINFO("Device does not have footer mirroring, skipping footer corruption test"); - } +// Emulate the test file as an HDD so that DeviceManager sets mirror_super_block=true. +// This exercises the footer write/validate code paths on any filesystem. +static void emulate_as_hdd(const std::string& path) { + iomgr::DriveInterface::emulate_drive_type(path, iomgr::drive_type::file_on_hdd); +} +static void emulate_as_nvme(const std::string& path) { + iomgr::DriveInterface::emulate_drive_type(path, iomgr::drive_type::file_on_nvme); } -TEST_F(SuperblockErrorTest, FooterIOError) { - LOGINFO("Test: Footer read IO error should be caught during sanity_check"); +TEST_F(SuperblockErrorTest, FooterValidationHDDDevice) { + LOGINFO("Test: Footer validation should detect header/footer mismatch"); - // First, create a properly formatted device - std::vector dev_infos; - dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + // Force HDD detection so DeviceManager enables mirror_super_block. + emulate_as_hdd(m_test_file); - auto dmgr = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + std::vector< dev_info > dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + auto dmgr = std::make_unique< DeviceManager >( + dev_infos, [](const vdev_info&, bool) -> shared< VirtualDev > { return nullptr; }); ASSERT_TRUE(dmgr->is_first_time_boot()); dmgr->format_devices(); dmgr->commit_formatting(); auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); - ASSERT_GT(pdevs.size(), 0); - auto pdev = pdevs[0]; + ASSERT_GT(pdevs.size(), 0u); + ASSERT_TRUE(pdevs[0]->has_footer_mirror()) << "emulate_as_hdd should have enabled footer mirroring"; - // For HDD devices, test footer IO error - if (pdev->atomic_page_size() > 0) { - LOGINFO("Device has footer mirroring enabled, testing footer IO error"); + // data_end_offset() = devsize - data_start_offset; footer superblock starts there. + // Use the actual aligned data_start_offset from the formatted pdev rather than + // recomputing it (populate_pdev_info rounds up to phys_page_size). + auto footer_offset = pdevs[0]->data_end_offset(); + LOGINFO("Corrupting footer at offset={}", footer_offset); - dmgr.reset(); - iomanager.stop(); - - // Truncate file to cut off the footer area - auto data_offset = hs_super_blk::first_block_offset() + - hs_super_blk::total_size(dev_infos[0]); - auto truncate_size = data_offset + 1024; // Cut off before footer + dmgr.reset(); + iomanager.stop(); - LOGINFO("Truncating file to size={} to cause footer IO error", truncate_size); - truncate_file(truncate_size); + corrupt_file_at_offset(footer_offset, 512); - // Restart should crash because footer cannot be read - ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); - ASSERT_DEATH({ - auto dmgr2 = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); + ASSERT_DEATH( + { + emulate_as_hdd(dev_infos[0].dev_name); + auto dmgr2 = std::make_unique< DeviceManager >( + dev_infos, [](const vdev_info&, bool) -> shared< VirtualDev > { return nullptr; }); dmgr2->load_devices(); - }, "IO error reading footer first block"); - } else { - LOGINFO("Device does not have footer mirroring, skipping footer IO error test"); - } + }, + "Footer mismatch with header"); } TEST_F(SuperblockErrorTest, NonHDDDeviceSkipsFooterValidation) { LOGINFO("Test: Non-HDD devices should skip footer validation"); - // Create device as Fast type (SSD), which typically doesn't have footer mirroring - std::vector dev_infos; - dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Fast); + // Explicitly reset to nvme so HDD emulation from a prior test doesn't leak. + emulate_as_nvme(m_test_file); - auto dmgr = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + std::vector< dev_info > dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Fast); + auto dmgr = std::make_unique< DeviceManager >( + dev_infos, [](const vdev_info&, bool) -> shared< VirtualDev > { return nullptr; }); ASSERT_TRUE(dmgr->is_first_time_boot()); dmgr->format_devices(); dmgr->commit_formatting(); auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Fast); - ASSERT_GT(pdevs.size(), 0); + ASSERT_GT(pdevs.size(), 0u); + ASSERT_FALSE(pdevs[0]->has_footer_mirror()) << "NVMe/SSD should not have footer mirroring"; - // Should restart successfully even if we corrupt the footer area dmgr.reset(); iomanager.stop(); - // Corrupt what would be the footer area - auto data_offset = hs_super_blk::first_block_offset() + - hs_super_blk::total_size(dev_infos[0]); - auto footer_offset = m_dev_size - data_offset; - corrupt_file_at_offset(footer_offset, 4096); + // Corrupt the area that would be the footer on an HDD; should be ignored. + auto data_offset = hs_super_blk::first_block_offset() + hs_super_blk::total_size(dev_infos[0]); + corrupt_file_at_offset(m_dev_size - data_offset, 4096); - // Should succeed because SSD doesn't validate footer ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); ASSERT_NO_THROW({ - auto dmgr2 = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + auto dmgr2 = std::make_unique< DeviceManager >( + dev_infos, [](const vdev_info&, bool) -> shared< VirtualDev > { return nullptr; }); dmgr2->load_devices(); LOGINFO("Successfully loaded device without footer validation"); }); @@ -457,37 +411,44 @@ TEST_F(SuperblockErrorTest, NonHDDDeviceSkipsFooterValidation) { TEST_F(SuperblockErrorTest, ValidFooterMatchesHeader) { LOGINFO("Test: Valid footer should match header on HDD device"); - std::vector dev_infos; - dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + emulate_as_hdd(m_test_file); - auto dmgr = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + std::vector< dev_info > dev_infos; + dev_infos.emplace_back(std::filesystem::canonical(m_test_file).string(), HSDevType::Data); + auto dmgr = std::make_unique< DeviceManager >( + dev_infos, [](const vdev_info&, bool) -> shared< VirtualDev > { return nullptr; }); ASSERT_TRUE(dmgr->is_first_time_boot()); dmgr->format_devices(); dmgr->commit_formatting(); - auto pdevs = dmgr->get_pdevs_by_dev_type(HSDevType::Data); - ASSERT_GT(pdevs.size(), 0); - - // Restart should succeed with matching header and footer dmgr.reset(); iomanager.stop(); ioenvironment.with_iomgr(iomgr::iomgr_params{.num_threads = 1, .is_spdk = false}); ASSERT_NO_THROW({ - auto dmgr2 = std::make_unique( - dev_infos, [](const vdev_info&, bool) -> shared { return nullptr; }); + emulate_as_hdd(dev_infos[0].dev_name); + auto dmgr2 = std::make_unique< DeviceManager >( + dev_infos, [](const vdev_info&, bool) -> shared< VirtualDev > { return nullptr; }); dmgr2->load_devices(); LOGINFO("Successfully validated matching header and footer"); }); } int main(int argc, char* argv[]) { - SISL_OPTIONS_LOAD(argc, argv, logging, test_pdev, iomgr); + // InitGoogleTest must run first so gtest strips --gtest_internal_run_death_test + // from argv before SISL_OPTIONS_LOAD sees it (cxxopts throws on unknown args). ::testing::InitGoogleTest(&argc, argv); + SISL_OPTIONS_LOAD(argc, argv, logging, test_pdev, iomgr); sisl::logging::SetLogger("test_pdev"); spdlog::set_pattern("[%D %T%z] [%^%l%$] [%n] [%t] %v"); + // HS_REL_ASSERT logs via sisl to stdout; death tests capture stderr. + // Add a stderr sink so gtest can match the assertion message regex. + // HS_REL_ASSERT logs via sisl to stdout; death tests capture stderr. + // Add an ostream stderr sink so gtest can match the assertion message regex. + sisl::logging::GetLogger()->sinks().push_back( + std::make_shared< spdlog::sinks::ostream_sink_mt >(std::cerr)); + ::testing::FLAGS_gtest_death_test_style = "threadsafe"; return RUN_ALL_TESTS(); }