From 9c161ab938c099af95c6c2869938e383fd48e68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 12 Nov 2025 13:59:06 +0200 Subject: [PATCH 01/33] Cleanup: Merge recv_recovery_read_checkpoint() to srv_start() --- storage/innobase/include/log0recv.h | 5 ----- storage/innobase/log/log0recv.cc | 24 ------------------------ storage/innobase/srv/srv0start.cc | 18 +++++++++++++++--- 3 files changed, 15 insertions(+), 32 deletions(-) diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 457218656f439..95847965e1d4d 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -44,11 +44,6 @@ ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) @return whether the page was recovered correctly */ bool recv_recover_page(fil_space_t* space, buf_page_t* bpage); -/** Read the latest checkpoint information from log file -and store it in log_sys.next_checkpoint and recv_sys.file_checkpoint -@return error code or DB_SUCCESS */ -dberr_t recv_recovery_read_checkpoint(); - /** Start recovering from a redo log checkpoint. of first system tablespace page @return error code or DB_SUCCESS */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 9eb286b4d5516..e213231b210e2 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -4698,30 +4698,6 @@ static dberr_t recv_rename_files() return err; } -dberr_t recv_recovery_read_checkpoint() -{ - ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED || - srv_operation == SRV_OPERATION_RESTORE || - srv_operation == SRV_OPERATION_RESTORE_EXPORT); - ut_ad(!recv_sys.recovery_on); - ut_d(mysql_mutex_lock(&buf_pool.mutex)); - ut_ad(UT_LIST_GET_LEN(buf_pool.LRU) == 0); - ut_ad(UT_LIST_GET_LEN(buf_pool.unzip_LRU) == 0); - ut_d(mysql_mutex_unlock(&buf_pool.mutex)); - - if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) - { - sql_print_information("InnoDB: innodb_force_recovery=6" - " skips redo log apply"); - return DB_SUCCESS; - } - - log_sys.latch.wr_lock(SRW_LOCK_CALL); - dberr_t err= recv_sys.find_checkpoint(); - log_sys.latch.wr_unlock(); - return err; -} - inline void log_t::set_recovered() noexcept { ut_ad(get_flushed_lsn() == get_lsn()); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 318357432cfdf..e92e0ff54e078 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1458,9 +1458,21 @@ dberr_t srv_start(bool create_new_db) } recv_sys.debug_free(); } else { - err = recv_recovery_read_checkpoint(); - if (err != DB_SUCCESS) { - return srv_init_abort(err); + ut_ad(srv_operation <= SRV_OPERATION_EXPORT_RESTORED + || srv_operation == SRV_OPERATION_RESTORE + || srv_operation == SRV_OPERATION_RESTORE_EXPORT); + ut_ad(!recv_sys.recovery_on); + + if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { + sql_print_information("InnoDB: innodb_force_recovery=6" + " skips redo log apply"); + } else { + log_sys.latch.wr_lock(SRW_LOCK_CALL); + err = recv_sys.find_checkpoint(); + log_sys.latch.wr_unlock(); + if (err != DB_SUCCESS) { + return srv_init_abort(err); + } } } From 1d1699eef3dfca20a075de7a282ec92ccf03166e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 19 Dec 2025 16:15:07 +0200 Subject: [PATCH 02/33] Atomic doublewrite buffer creation buf_dblwr_t::create(): Create the doublewrite buffer in a single atomic mini-transaction. Do not write any log records for initializing any doublewrite buffer pages, in order to avoid recovery failure with innodb_log_archive=ON starting from the very beginning. --- storage/innobase/buf/buf0dblwr.cc | 98 ++++++++++-------------------- storage/innobase/include/mtr0mtr.h | 3 + 2 files changed, 35 insertions(+), 66 deletions(-) diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index d8b08b4254a87..f0e7eb0f9fc82 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -94,15 +94,17 @@ bool buf_dblwr_t::create() noexcept mtr.start(); dberr_t err; - buf_block_t *trx_sys_block= buf_dblwr_trx_sys_get(&mtr); + buf_block_t *const trx_sys_block= buf_dblwr_trx_sys_get(&mtr); if (!trx_sys_block) { mtr.commit(); return false; } - if (mach_read_from_4(TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + - trx_sys_block->page.frame) == + byte *const fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG + + trx_sys_block->page.frame; + + if (mach_read_from_4(fseg_header + FSEG_HEADER_SIZE) == TRX_SYS_DOUBLEWRITE_MAGIC_N) { /* The doublewrite buffer has already been created: just read in @@ -136,21 +138,21 @@ bool buf_dblwr_t::create() noexcept sql_print_information("InnoDB: Doublewrite buffer not found:" " creating new"); - /* FIXME: After this point, the doublewrite buffer creation - is not atomic. The doublewrite buffer should not exist in + /* FIXME: The doublewrite buffer should not exist in the InnoDB system tablespace file in the first place. It could be located in separate optional file(s) in a user-specified location. */ } - byte *fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG + - trx_sys_block->page.frame; + mtr_t init_mtr{nullptr}; + init_mtr.start(); + for (uint32_t prev_page_no= 0, i= 0, extent_size= FSP_EXTENT_SIZE; i < 2 * size + extent_size / 2; i++) { buf_block_t *new_block= fseg_alloc_free_page_general(fseg_header, prev_page_no + 1, FSP_UP, - false, &mtr, &mtr, &err); + false, &mtr, &init_mtr, &err); if (!new_block) { sql_print_error("InnoDB: Cannot create doublewrite buffer: " @@ -165,75 +167,39 @@ bool buf_dblwr_t::create() noexcept return false; } - /* We read the allocated pages to the buffer pool; when they are - written to disk in a flush, the space id and page number fields - are also written to the pages. When we at database startup read - pages from the doublewrite buffer, we know that if the space id - and page number in them are the same as the page position in the - tablespace, then the page has not been written to in - doublewrite. */ - - ut_ad(new_block->page.lock.not_recursive()); const page_id_t id= new_block->page.id(); - /* We only do this in the debug build, to ensure that the check in - buf_flush_init_for_writing() will see a valid page type. The - flushes of new_block are actually unnecessary here. */ - ut_d(mtr.write<2>(*new_block, FIL_PAGE_TYPE + new_block->page.frame, - FIL_PAGE_TYPE_SYS)); + /* Normally, allocated pages will be modified further. However, + the pages of the doublewrite buffer are just dummy storage, not + covered by the write-ahead log. */ + ut_ad(init_mtr.get_savepoint() == 1); + ut_ad(init_mtr.m_memo[0].object == new_block); + ut_ad(init_mtr.m_memo[0].type == MTR_MEMO_PAGE_X_MODIFY); + init_mtr.m_memo[0].type= MTR_MEMO_PAGE_X_FIX; + init_mtr.rollback_to_savepoint(0, 1); + init_mtr.m_log.erase(); if (i == size / 2) - { ut_a(id.page_no() == size); - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_BLOCK1 + - trx_sys_block->page.frame, id.page_no()); - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_REPEAT + - TRX_SYS_DOUBLEWRITE_BLOCK1 + trx_sys_block->page.frame, - id.page_no()); - } else if (i == size / 2 + size) - { ut_a(id.page_no() == 2 * size); - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_BLOCK2 + - trx_sys_block->page.frame, id.page_no()); - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_REPEAT + - TRX_SYS_DOUBLEWRITE_BLOCK2 + trx_sys_block->page.frame, - id.page_no()); - } else if (i > size / 2) ut_a(id.page_no() == prev_page_no + 1); - - if (((i + 1) & 15) == 0) { - /* rw_locks can only be recursively x-locked 2048 times. (on 32 - bit platforms, (lint) 0 - (X_LOCK_DECR * 2049) is no longer a - negative number, and thus lock_word becomes like a shared lock). - For 4k page size this loop will lock the fseg header too many - times. Since this code is not done while any other threads are - active, restart the MTR occasionally. */ - mtr.commit(); - mtr.start(); - trx_sys_block= buf_dblwr_trx_sys_get(&mtr); - fseg_header= TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG + - trx_sys_block->page.frame; - } - prev_page_no= id.page_no(); } - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + - trx_sys_block->page.frame, TRX_SYS_DOUBLEWRITE_MAGIC_N); - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC + - TRX_SYS_DOUBLEWRITE_REPEAT + trx_sys_block->page.frame, - TRX_SYS_DOUBLEWRITE_MAGIC_N); - - mtr.write<4>(*trx_sys_block, - TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED + - trx_sys_block->page.frame, + ut_ad(init_mtr.is_empty()); + byte *const doublewrite= fseg_header + + (TRX_SYS_DOUBLEWRITE_MAGIC - TRX_SYS_DOUBLEWRITE_FSEG); + mtr.write<4>(*trx_sys_block, doublewrite, TRX_SYS_DOUBLEWRITE_MAGIC_N); + static_assert(TRX_SYS_DOUBLEWRITE_BLOCK1==TRX_SYS_DOUBLEWRITE_MAGIC + 4, ""); + mtr.write<4>(*trx_sys_block, doublewrite + 4, size); + static_assert(TRX_SYS_DOUBLEWRITE_BLOCK2==TRX_SYS_DOUBLEWRITE_MAGIC + 8, ""); + mtr.write<4>(*trx_sys_block, doublewrite + 8, size * 2); + static_assert(TRX_SYS_DOUBLEWRITE_REPEAT == 12, ""); + mtr.memcpy(*trx_sys_block, doublewrite + 12, doublewrite, 12); + static_assert(TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED == + 24 + TRX_SYS_DOUBLEWRITE_MAGIC, ""); + mtr.write<4>(*trx_sys_block, doublewrite + 24, TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N); mtr.commit(); diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 82756ee43cf0d..acf2b5512840f 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -63,8 +63,11 @@ struct mtr_memo_slot_t void release() const; }; +class buf_dblwr_t; + /** Mini-transaction handle and buffer */ struct mtr_t { + friend buf_dblwr_t; mtr_t(trx_t *trx/*= nullptr*/); ~mtr_t(); From 388b199ea30d7ebce89c402581377326633cd0f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 12 Nov 2025 13:59:15 +0200 Subject: [PATCH 03/33] MDEV-37949: Introduce innodb_log_recovery_start, innodb_log_recovery_target innodb_log_recovery_start: The checkpoint LSN to start recovery from. This will be useful when recovering from an archived log. innodb_log_recovery_target: The requested LSN to end recovery at. This will be useful when recovering data files that were copied as of a time that is before end of the available log. --- .../r/innodb_encrypt_log_corruption.result | 7 +++ .../innodb/r/corrupted_during_recovery.result | 10 +++- .../innodb/r/innodb-wl5522,strict_crc32.rdiff | 25 ++++++++- .../suite/innodb/r/innodb-wl5522.result | 7 +++ .../innodb/r/innodb_force_recovery.result | 7 +++ .../suite/innodb/r/log_corruption.result | 7 +++ mysql-test/suite/innodb/r/rename_table.result | 10 ++++ .../innodb/t/corrupted_during_recovery.test | 34 +++++++++--- mysql-test/suite/innodb/t/innodb-wl5522.test | 18 ++++++- .../suite/innodb/t/innodb_force_recovery.test | 13 +++++ mysql-test/suite/innodb/t/log_corruption.test | 11 +++- mysql-test/suite/innodb/t/rename_table.test | 15 ++++++ .../suite/sys_vars/r/sysvars_innodb.result | 24 +++++++++ storage/innobase/handler/ha_innodb.cc | 12 +++++ storage/innobase/include/log0recv.h | 5 ++ storage/innobase/log/log0recv.cc | 52 +++++++++++++++++-- 16 files changed, 241 insertions(+), 16 deletions(-) diff --git a/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result b/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result index 3c3e4831d8a0f..b7bdee10daa90 100644 --- a/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result +++ b/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result @@ -20,6 +20,13 @@ AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 1 /InnoDB: Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint/ in mysqld.1.err # empty redo log from before MariaDB 10.2.2 +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-recovery-target=12345 +SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); +COUNT(*) +0 +FOUND 1 /InnoDB: cannot fulfill innodb_log_recovery_target=12345!=/ in mysqld.1.err # restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result index 593943b4951ea..d063fcb0132cb 100644 --- a/mysql-test/suite/innodb/r/corrupted_during_recovery.result +++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result @@ -1,14 +1,19 @@ -CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=0; INSERT INTO t1 VALUES(1); +SET GLOBAL innodb_max_purge_lag_wait=0, innodb_log_checkpoint_now=ON; CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 VALUES(2); SET GLOBAL innodb_flush_log_at_trx_commit=1; INSERT INTO t2 VALUES(1); # Kill the server +SELECT * FROM t2; +Got one of the listed errors +SELECT * FROM t2; +ERROR 42000: Unknown storage engine 'InnoDB' +FOUND 1 /InnoDB: impossible innodb_log_recovery_start=/ in mysqld.1.err # Corrupt the pages SELECT * FROM t1; ERROR 42000: Unknown storage engine 'InnoDB' -FOUND 1 /InnoDB: Page \[page id: space=[1-9][0-9]*, page number=3\] log sequence number 1311768467463790320 is in the future!/ in mysqld.1.err SELECT * FROM t1; a 1 @@ -18,6 +23,7 @@ a CHECK TABLE t2; Table Op Msg_type Msg_text test.t2 check status OK +FOUND 1 /InnoDB: Page \[page id: space=[1-9][0-9]*, page number=3\] log sequence number 1311768467463790320 is in the future!/ in mysqld.1.err DROP TABLE t1, t2; CREATE TABLE t1(pk SERIAL) ENGINE=InnoDB; INSERT INTO t1 VALUES (1),(2),(3); diff --git a/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff b/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff index 283bbe96aae97..e4c5128b0377f 100644 --- a/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff +++ b/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff @@ -1,6 +1,27 @@ --- innodb-wl5522.result +++ innodb-wl5522,strict_crc32.result~ -@@ -131,8 +131,7 @@ +@@ -1,9 +1,6 @@ + call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists. Please DISCARD the tablespace before IMPORT\\."); + call mtr.add_suppression("Index for table 't2' is corrupt; try to repair it"); + call mtr.add_suppression("InnoDB: Cannot save statistics for table `test`\\.`t1` because the \\.ibd file is missing"); +-call mtr.add_suppression("InnoDB: cannot fulfill innodb_log_recovery_target=123456<"); +-call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +-call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); + FLUSH TABLES; + CREATE TABLE t1 + (a INT AUTO_INCREMENT PRIMARY KEY, +@@ -37,10 +34,6 @@ + t1.ibd + t2.frm + t2.ibd +-# restart: --innodb-log-recovery-target=123456 +-FOUND 1 /InnoDB: cannot fulfill innodb_log_recovery_target=123456::max(), 0); + +static MYSQL_SYSVAR_UINT64_T(log_recovery_target, recv_sys.rpo, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "recovery point objective (end LSN; 0=unlimited)", + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size, PLUGIN_VAR_RQCMDARG, "Redo log size in bytes.", @@ -19875,6 +19885,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_file_write_through), MYSQL_SYSVAR(data_file_buffering), MYSQL_SYSVAR(data_file_write_through), + MYSQL_SYSVAR(log_recovery_start), + MYSQL_SYSVAR(log_recovery_target), MYSQL_SYSVAR(log_file_size), MYSQL_SYSVAR(log_write_ahead_size), MYSQL_SYSVAR(log_spin_wait_delay), diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 95847965e1d4d..42aee0bc2af97 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -244,6 +244,11 @@ struct recv_sys_t lsn_t scanned_lsn; /** log sequence number at the end of the FILE_CHECKPOINT record, or 0 */ lsn_t file_checkpoint; + /** recovery start checkpoint */ + lsn_t recovery_start; + /** recovery point objective (a limit for scanned_lsn) */ + lsn_t rpo; + /** the time when progress was last reported */ time_t progress_time; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index e213231b210e2..f75d118ab1bf4 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -57,6 +57,8 @@ Created 9/20/1997 Heikki Tuuri /** The recovery system */ recv_sys_t recv_sys; +/** 0 or the first LSN that would conflict with innodb_log_recovery_target */ +static lsn_t recv_sys_rpo_exceeded; /** TRUE when recv_init_crash_recovery() has been called. */ bool recv_needed_recovery; #ifdef UNIV_DEBUG @@ -1688,6 +1690,16 @@ static dberr_t recv_log_recover_10_5(lsn_t lsn_offset) return DB_SUCCESS; } +/** @return if the specified innodb_log_recovery_target is being violated */ +static bool recv_sys_invalid_rpo(lsn_t lsn) noexcept +{ + if (!recv_sys.rpo || recv_sys.rpo >= lsn) + return false; + sql_print_error("InnoDB: cannot fulfill innodb_log_recovery_target=%" + PRIu64 "<%" PRIu64, recv_sys.rpo, lsn); + return true; +} + dberr_t recv_sys_t::find_checkpoint() { bool wrong_size= false; @@ -1779,6 +1791,12 @@ dberr_t recv_sys_t::find_checkpoint() log_sys.last_checkpoint_lsn= log_sys.next_checkpoint_lsn; log_sys.set_recovered_lsn(log_sys.next_checkpoint_lsn); lsn= file_checkpoint= log_sys.next_checkpoint_lsn; + if (recv_sys.rpo && recv_sys.rpo != lsn) + { + sql_print_error("InnoDB: cannot fulfill innodb_log_recovery_target=%" + PRIu64 "!=%" PRIu64, recv_sys.rpo, lsn); + return DB_CORRUPTION; + } if (UNIV_LIKELY(lsn != 0)) scanned_lsn= lsn; log_sys.next_checkpoint_no= 0; @@ -1855,6 +1873,8 @@ dberr_t recv_sys_t::find_checkpoint() } if (!log_sys.next_checkpoint_lsn) goto got_no_checkpoint; + if (recv_sys_invalid_rpo(lsn)) + return DB_READ_ONLY; if (!memcmp(creator, "Backup ", 7)) srv_start_after_restore= true; @@ -2427,8 +2447,16 @@ recv_sys_t::parse_mtr_result log_parse_start(source &l, unsigned nonce) return recv_sys_t::PREMATURE_EOF; eom_found: - if (*l != log_sys.get_sequence_bit((l - begin) + recv_sys.lsn)) + const lsn_t end_lsn{(l - begin) + recv_sys.lsn}; + + if (*l != log_sys.get_sequence_bit(end_lsn)) + return recv_sys_t::GOT_EOF; + + if (recv_sys.rpo && recv_sys.rpo < end_lsn) + { + recv_sys_rpo_exceeded= end_lsn; return recv_sys_t::GOT_EOF; + } if (l.is_eof(5 + nonce)) return recv_sys_t::PREMATURE_EOF; @@ -4770,6 +4798,7 @@ dberr_t recv_recovery_from_checkpoint_start() } recv_sys.recovery_on = true; + recv_sys_rpo_exceeded = 0; log_sys.latch.wr_lock(SRW_LOCK_CALL); log_sys.set_capacity(); @@ -4787,9 +4816,20 @@ dberr_t recv_recovery_from_checkpoint_start() recv_sys_t::parser parser[2]; if (log_sys.is_recoverable()) { + if (recv_sys.recovery_start > log_sys.next_checkpoint_lsn) { + sql_print_error("InnoDB: impossible " + "innodb_log_recovery_start=%" PRIu64 + ">%" PRIu64, + recv_sys.recovery_start, + log_sys.next_checkpoint_lsn); + goto err_exit; + } else { + log_sys.last_checkpoint_lsn = recv_sys.recovery_start + ? recv_sys.recovery_start + : log_sys.next_checkpoint_lsn; + } const bool rewind = recv_sys.lsn - != log_sys.next_checkpoint_lsn; - log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn; + != log_sys.last_checkpoint_lsn; parser[false] = get_parse_mmap(); parser[true] = get_parse_mmap(); recv_scan_log(false, parser); @@ -4797,6 +4837,7 @@ dberr_t recv_recovery_from_checkpoint_start() read_only_recovery: sql_print_warning("InnoDB: innodb_read_only" " prevents crash recovery"); +read_only_reported: err = DB_READ_ONLY; goto func_exit; } @@ -4817,8 +4858,11 @@ dberr_t recv_recovery_from_checkpoint_start() } rescan = recv_scan_log(false, parser); - if (srv_read_only_mode && recv_needed_recovery) { + if (!recv_needed_recovery) { + } else if (srv_read_only_mode) { goto read_only_recovery; + } else if (recv_sys_invalid_rpo(recv_sys_rpo_exceeded)) { + goto read_only_reported; } if ((recv_sys.is_corrupt_log() && !srv_force_recovery) From fa6bc68e951e2eaba0c0add0164ee49ad210216a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 4 Dec 2025 12:58:10 +0200 Subject: [PATCH 04/33] WIP MDEV-37949: Implement innodb_log_archive, innodb_lsn_archived FIXME: Implement recovery from multiple archived log files. The new setting SET GLOBAL innodb_log_archive=ON will enable log archiving as soon as the current ib_logfile0 is about to wrap around. The status variable innodb_lsn_archived will reflect the LSN since when a complete InnoDB log archive is available. Its initial value will be that of the new parameter innodb_log_archive_start. If that variable is 0 (the default), the innodb_lsn_archived will be recovered from the available log files. If innodb_log_archive=OFF, innodb_lsn_archived will be adjusted to the latest checkpoint every time a log checkpoint is executed. If innodb_log_archive=ON, the value should not change. When innodb_log_archive=ON, the setting SET GLOBAL innodb_log_file_size will affect subsequently created log files when the file that is being currently written is running out. log_t::archive_new_write(): Create and allocate a new log file, and write the outstanding data to both the current and the new file. log_t::archive_new_mmap(): Create and memory-map a new log file, and update file_size to resize_target. log_t::set_archive(my_bool): Implement SET GLOBAL innodb_log_archive. An error will be returned if non-archived SET GLOBAL innodb_log_file_size (log file resizing) is in progress. log_t::archive_set_size(): Ensure that resize_target is set for new archived log files. log_t::first_lsn: This should be the start of the current log file, to be consulted in log_t::write_checkpoint() when renaming files. log_write_buf(): Add the parameter max_length, the file wrap limit. mtr_t::finish_writer(): Specialize for innodb_log_archive=ON innodb_log_archive_update(): Assign log_sys.archived_lsn to the latest checkpoint; it should be usable. If the log buffer is about to wrap around, back off and retry. log_t::archived_mmap_switch_prepare(): Create and memory-map a new log file. log_t::append_prepare(): Special case. log_t::archived_mmap_switch_complete(): Switch to the buffer that was created in archived_mmap_switch_prepare(). log_t::get_archive_path(lsn_t): Return a name of an archived log file. log_t::get_next_archive_path(): Return the name of the next archived log. log_t::resize_write_low(): Adjust an assertion for log_t::ARCHIVED_MMAP. log_t::archived_mmap_switch_prepare(): Remember the file handle in log_sys.resize_log, so that write_checkpoint() will be able to invoke fchmod() on it. log_t::write_checkpoint(): Mark the old log file read-only, and update log_sys.first_lsn if we are not in the archived_mmap path. mtr_t::finish_writer(): Invoke log_close() only if innodb_log_archive=OFF. --- .../innodb/r/innodb_status_variables.result | 1 + .../innodb/r/log_file_size_online.result | 13 +- mysql-test/suite/innodb/t/innodb-master.opt | 2 - .../suite/innodb/t/log_file_size_online.opt | 1 + .../suite/innodb/t/log_file_size_online.test | 19 +- .../suite/sys_vars/r/sysvars_innodb.result | 26 +- sql/upgrade_conf_file.cc | 1 - storage/innobase/buf/buf0flu.cc | 34 ++- storage/innobase/handler/ha_innodb.cc | 64 ++++- storage/innobase/include/log0log.h | 110 +++++++- storage/innobase/include/mtr0mtr.h | 12 +- storage/innobase/log/log0log.cc | 233 ++++++++++++++-- storage/innobase/log/log0recv.cc | 2 + storage/innobase/mtr/mtr0mtr.cc | 254 +++++++++++++----- storage/innobase/srv/srv0start.cc | 4 + 15 files changed, 669 insertions(+), 107 deletions(-) create mode 100644 mysql-test/suite/innodb/t/log_file_size_online.opt diff --git a/mysql-test/suite/innodb/r/innodb_status_variables.result b/mysql-test/suite/innodb/r/innodb_status_variables.result index 194528c99da5c..9a758a83fa4ae 100644 --- a/mysql-test/suite/innodb/r/innodb_status_variables.result +++ b/mysql-test/suite/innodb/r/innodb_status_variables.result @@ -51,6 +51,7 @@ INNODB_LOG_WRITES INNODB_LSN_CURRENT INNODB_LSN_FLUSHED INNODB_LSN_LAST_CHECKPOINT +INNODB_LSN_ARCHIVED INNODB_MASTER_THREAD_ACTIVE_LOOPS INNODB_MASTER_THREAD_IDLE_LOOPS INNODB_MAX_TRX_ID diff --git a/mysql-test/suite/innodb/r/log_file_size_online.result b/mysql-test/suite/innodb/r/log_file_size_online.result index 8dcd9a47b2f0b..41c4f358cfb8e 100644 --- a/mysql-test/suite/innodb/r/log_file_size_online.result +++ b/mysql-test/suite/innodb/r/log_file_size_online.result @@ -1,3 +1,4 @@ +SET GLOBAL innodb_log_archive=OFF; SET GLOBAL innodb_log_file_size=4194304; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value @@ -11,7 +12,7 @@ a INT PRIMARY KEY AUTO_INCREMENT, b CHAR(255) NOT NULL) ENGINE=INNODB; INSERT INTO t SELECT NULL, REPEAT('a', 255) FROM seq_1_to_20000; -# restart: --innodb-log-file-size=4194304 +# restart: --innodb-log-file-size=4194304 --skip-innodb-log-archive SELECT COUNT(*) FROM t; COUNT(*) 20000 @@ -28,8 +29,15 @@ Got one of the listed errors connect con1,localhost,root; SET GLOBAL innodb_log_file_size=7340032; connection default; +SET GLOBAL innodb_log_archive=ON; +SET GLOBAL innodb_log_archive=OFF; KILL QUERY @id; connection con1; +SET GLOBAL innodb_log_archive=ON, innodb_log_file_size=10485760; +SELECT @@GLOBAL.innodb_log_file_size!=10485760; +@@GLOBAL.innodb_log_file_size!=10485760 +1 +SET GLOBAL innodb_log_archive=OFF; connection default; SET GLOBAL innodb_log_file_size=5242880; connection con1; @@ -46,6 +54,8 @@ connection con1; disconnect con1; connection default; # restart +SET @save_archive=@@GLOBAL.innodb_log_archive; +SET GLOBAL innodb_log_archive=OFF; SELECT * FROM t WHERE a<10; a b 1 @@ -73,4 +83,5 @@ SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 5242880 FOUND 1 /InnoDB: Resized log to 6\.000MiB/ in mysqld.1.err +SET GLOBAL innodb_log_archive=@save_archive; DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/innodb-master.opt b/mysql-test/suite/innodb/t/innodb-master.opt index 2e71d62206dbd..5266978e4f0a7 100644 --- a/mysql-test/suite/innodb/t/innodb-master.opt +++ b/mysql-test/suite/innodb/t/innodb-master.opt @@ -2,5 +2,3 @@ --default-storage-engine=MyISAM --innodb-strict-mode=0 --innodb-file-per-table=0 ---loose-innodb-track-changed-pages ---loose-innodb-log-archive diff --git a/mysql-test/suite/innodb/t/log_file_size_online.opt b/mysql-test/suite/innodb/t/log_file_size_online.opt new file mode 100644 index 0000000000000..1f9a83fbbfbcf --- /dev/null +++ b/mysql-test/suite/innodb/t/log_file_size_online.opt @@ -0,0 +1 @@ +--skip-innodb-log-archive diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index 8ea5e662d3b00..ac1f6d37d534e 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -4,6 +4,8 @@ let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; +SET GLOBAL innodb_log_archive=OFF; + SET GLOBAL innodb_log_file_size=4194304; SHOW VARIABLES LIKE 'innodb_log_file_size'; SELECT global_value FROM information_schema.system_variables @@ -16,7 +18,7 @@ ENGINE=INNODB; INSERT INTO t SELECT NULL, REPEAT('a', 255) FROM seq_1_to_20000; ---let $restart_parameters=--innodb-log-file-size=4194304 +--let $restart_parameters=--innodb-log-file-size=4194304 --skip-innodb-log-archive --source include/restart_mysqld.inc SELECT COUNT(*) FROM t; @@ -41,11 +43,22 @@ let $ID= `SELECT @id := CONNECTION_ID()`; send SET GLOBAL innodb_log_file_size=7340032; --connection default let $ignore= `SELECT @id := $ID`; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_log_archive=ON; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_log_archive=OFF; + KILL QUERY @id; --connection con1 --error 0,ER_QUERY_INTERRUPTED reap; +# When innodb_log_archive=ON, SET GLOBAL innodb_log_file_size is instantaneous +# but will not reflect the file size. +SET GLOBAL innodb_log_archive=ON, innodb_log_file_size=10485760; +SELECT @@GLOBAL.innodb_log_file_size!=10485760; +SET GLOBAL innodb_log_archive=OFF; + --connection default send SET GLOBAL innodb_log_file_size=5242880; @@ -67,6 +80,9 @@ reap; --let $restart_parameters= --source include/restart_mysqld.inc +SET @save_archive=@@GLOBAL.innodb_log_archive; +SET GLOBAL innodb_log_archive=OFF; + SELECT * FROM t WHERE a<10; SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; @@ -77,5 +93,6 @@ SET GLOBAL innodb_log_file_size=5242880; SHOW VARIABLES LIKE 'innodb_log_file_size'; let SEARCH_PATTERN = InnoDB: Resized log to 6\\.000MiB; --source include/search_pattern_in_file.inc +SET GLOBAL innodb_log_archive=@save_archive; DROP TABLE t; diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index d355c64c60817..941e817e8c5db 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -932,6 +932,30 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_LOG_ARCHIVE +SESSION_VALUE NULL +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Whether log archiving is desired +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_LOG_ARCHIVE_START +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT initial value of innodb_lsn_archived; 0=auto-detect +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_LOG_BUFFER_SIZE SESSION_VALUE NULL DEFAULT_VALUE 16777216 @@ -973,7 +997,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 100663296 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT Redo log size in bytes. +VARIABLE_COMMENT Desired size of ib_logfile0 in bytes NUMERIC_MIN_VALUE 4194304 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 4096 diff --git a/sql/upgrade_conf_file.cc b/sql/upgrade_conf_file.cc index 0d7bc6034685c..f1fa9aac6ba3d 100644 --- a/sql/upgrade_conf_file.cc +++ b/sql/upgrade_conf_file.cc @@ -97,7 +97,6 @@ static const char *removed_variables[] = "innodb_locks_unsafe_for_binlog", "innodb_log_arch_dir", "innodb_log_arch_expire_sec", -"innodb_log_archive", "innodb_log_block_size", "innodb_log_checksum_algorithm", "innodb_log_checksums", diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 0fe60203901b8..978983e190a4b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1886,6 +1886,37 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept next_checkpoint_no++; const lsn_t checkpoint_lsn{next_checkpoint_lsn}; last_checkpoint_lsn= checkpoint_lsn; + if (!archive) + archived_lsn= checkpoint_lsn; + else if (resize_log.is_opened()) + { + /* Make the previous archived log file read-only */ +#ifdef _WIN32 + resize_log.close(); + SetFileAttributesA(get_archive_path(first_lsn).c_str(), + FILE_ATTRIBUTE_READONLY | FILE_ATTRIBUTE_ARCHIVE); +#else + struct stat st; + if (!fstat(resize_log.m_file, &st)) + st.st_mode&= 0444; + else + st.st_mode= 0444; + fchmod(resize_log.m_file, st.st_mode); + resize_log.close(); +#endif +#ifdef HAVE_PMEM + if (!is_mmap()) +#endif + { + /* Mimic archived_mmap_switch_complete() */ + ut_ad(current_lsn >= first_lsn + capacity()); + first_lsn+= capacity(); + file_size= resize_target; + } + + ut_ad(current_lsn >= first_lsn); + ut_ad(current_lsn < first_lsn + capacity()); + } DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF ", flushed to " LSN_PF, checkpoint_lsn, get_flushed_lsn())); @@ -2184,7 +2215,8 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) noexcept if (recv_recovery_is_on()) recv_sys.apply(true); - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", return;); + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", + if (!log_sys.archive) return;); Atomic_relaxed &limit= furious ? buf_flush_sync_lsn : buf_flush_async_lsn; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 72df3a59d492f..2b2d0562dc7ff 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -950,6 +950,7 @@ static SHOW_VAR innodb_status_variables[]= { {"lsn_flushed", &export_vars.innodb_lsn_flushed, SHOW_ULONGLONG}, {"lsn_last_checkpoint", &export_vars.innodb_lsn_last_checkpoint, SHOW_ULONGLONG}, + {"lsn_archived", &log_sys.archived_lsn, SHOW_ULONGLONG}, {"master_thread_active_loops", &srv_main_active_loops, SHOW_SIZE_T}, {"master_thread_idle_loops", &srv_main_idle_loops, SHOW_SIZE_T}, {"max_trx_id", &export_vars.innodb_max_trx_id, SHOW_ULONGLONG}, @@ -3694,6 +3695,9 @@ compression_algorithm_is_not_loaded(ulong compression_algorithm, myf flags) return 1; } +/** Initial value of innodb_lsn_archived */ +static uint64_t innodb_log_archive_start; + /** Initialize, validate and normalize the InnoDB startup parameters. @return failure code @retval 0 on success @@ -3982,6 +3986,8 @@ static int innodb_init_params() skip_buffering_tweak: #endif + log_sys.archived_lsn= innodb_log_archive_start; + if (!tpool::supports_native_aio()) srv_use_native_aio= FALSE; @@ -19441,19 +19447,69 @@ static MYSQL_SYSVAR_BOOL(data_file_write_through, fil_system.write_through, "Whether each write to data files writes through", nullptr, innodb_data_file_write_through_update, FALSE); +inline bool log_t::set_archive(my_bool archive) noexcept +{ + bool fail{false}; + for (;;) + { + latch.wr_lock(SRW_LOCK_CALL); + fail= resize_in_progress(); + if (fail) + break; +#ifdef HAVE_PMEM + if (is_backoff() && is_mmap()) + { + /* Prevent a race condition with append_prepare() */ + latch.wr_unlock(); + continue; + } +#endif + this->archive= archive; + if (archive) + { + archived_lsn= next_checkpoint_lsn; + archive_set_size(); + /* TODO: rename ib_logfile0 to archived file; update header */ + } + mtr_t::finisher_update(); + break; + } + + latch.wr_unlock(); + return fail; +} + +static void innodb_log_archive_update(THD *, st_mysql_sys_var*, + void *, const void *save) noexcept +{ + if (log_sys.set_archive(*static_cast(save))) + my_printf_error(ER_WRONG_USAGE, + "SET GLOBAL innodb_log_file_size is in progress", MYF(0)); +} + +static MYSQL_SYSVAR_BOOL(log_archive, log_sys.archive, + PLUGIN_VAR_OPCMDARG, + "Whether log archiving is desired", + nullptr, innodb_log_archive_update, FALSE); + +static MYSQL_SYSVAR_UINT64_T(log_archive_start, innodb_log_archive_start, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "initial value of innodb_lsn_archived; 0=auto-detect", + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + static MYSQL_SYSVAR_UINT64_T(log_recovery_start, recv_sys.recovery_start, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "checkpoint LSN to start recovery from (0=automatic)", - nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); static MYSQL_SYSVAR_UINT64_T(log_recovery_target, recv_sys.rpo, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "recovery point objective (end LSN; 0=unlimited)", - nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size, PLUGIN_VAR_RQCMDARG, - "Redo log size in bytes.", + "Desired size of ib_logfile0 in bytes", nullptr, innodb_log_file_size_update, 96 << 20, 4 << 20, std::numeric_limits::max(), 4096); @@ -19885,6 +19941,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_file_write_through), MYSQL_SYSVAR(data_file_buffering), MYSQL_SYSVAR(data_file_write_through), + MYSQL_SYSVAR(log_archive), + MYSQL_SYSVAR(log_archive_start), MYSQL_SYSVAR(log_recovery_start), MYSQL_SYSVAR(log_recovery_target), MYSQL_SYSVAR(log_file_size), diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index e80011a9c4c50..ee0169ba12826 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -247,15 +247,21 @@ struct log_t lsn_t (*writer)() noexcept; /** next checkpoint LSN (protected by latch.wr_lock()) */ lsn_t next_checkpoint_lsn; + /** start of archived log, or 0 (proteted by latch.wr_lock()) */ + lsn_t archived_lsn; /** Log file */ log_file_t log; private: - /** Log file being constructed during resizing; protected by latch */ + /** Log file being constructed during resizing, + or the previous archived log file; protected by latch */ log_file_t resize_log; - /** size of resize_log; protected by latch */ + /** size of resize_log, or the requested innodb_log_file_size + of the next file created if archive==TRUE; protected by latch */ lsn_t resize_target; - /** Buffer for writing to resize_log; @see buf */ + /** Buffer for writing to resize_log; @see buf + Also a spare buffer between archived_mmap_switch_prepare() + and archived_mmap_switch_complete() */ byte *resize_buf; /** Buffer for writing to resize_log; @see flush_buf */ byte *resize_flush_buf; @@ -263,13 +269,15 @@ struct log_t /** log sequence number when log resizing was initiated; 0 if the log is not being resized, 1 if resize_start() is in progress */ std::atomic resize_lsn; - /** the log sequence number at the start of the log file */ + /** the log sequence number at the start of the current log file */ lsn_t first_lsn; public: /** current innodb_log_write_ahead_size */ uint write_size; /** format of the redo log: e.g., FORMAT_10_8 */ uint32_t format; + /** the current value of innodb_log_archive; protected by latch.wr_lock() */ + my_bool archive; /** whether the memory-mapped interface is enabled for the log */ my_bool log_mmap; /** the default value of log_mmap */ @@ -362,14 +370,32 @@ struct log_t { return thd == resize_initiator; } /** Replicate a write to the log. + @tparam mmap whether the memory-mapped interface is enabled @param lsn start LSN @param end end of the mini-transaction @param len length of the mini-transaction @param seq offset of the sequence bit from the end */ + template inline void resize_write(lsn_t lsn, const byte *end, - size_t len, size_t seq) noexcept; + size_t len, size_t seq) noexcept + { + if (UNIV_LIKELY_NULL(resize_buf)) + resize_write_low(lsn, end, len, seq); + } + + /** SET GLOBAL innodb_log_archive */ + inline bool set_archive(my_bool archive) noexcept; private: + /** Replicate a write to the log. + @tparam mmap whether the memory-mapped interface is enabled + @param lsn start LSN + @param end end of the mini-transaction + @param len length of the mini-transaction + @param seq offset of the sequence bit from the end */ + template + ATTRIBUTE_COLD void resize_write_low(lsn_t lsn, const byte *end, + size_t len, size_t seq) noexcept; /** Write resize_buf to resize_log. @param b resize_buf or resize_flush_buf @param length the used length of b */ @@ -380,13 +406,6 @@ struct log_t @return whether an error occurred */ static bool resize_rename() noexcept; - /** @return pointer for writing to resize_buf - @retval nullptr if no is_mmap() based resizing is active */ - inline byte *resize_buf_begin(lsn_t lsn) const noexcept; - /** @return end of resize_buf */ - inline const byte *resize_buf_end() const noexcept - { return resize_buf + resize_target; } - /** Initialise the redo log subsystem. */ void create() noexcept; @@ -434,6 +453,13 @@ struct log_t (write_lsn_offset & (WRITE_BACKOFF - 1)); } + /** @return whether a back-off in a log write is in progress */ + bool is_backoff() const noexcept + { + ut_ad(latch_have_wr()); + return write_lsn_offset & WRITE_BACKOFF; + } + lsn_t get_flushed_lsn(std::memory_order order= std::memory_order_acquire) const noexcept { return flushed_to_disk_lsn.load(order); } @@ -455,7 +481,34 @@ struct log_t /** Persist the log. @param lsn desired new value of flushed_to_disk_lsn */ void persist(lsn_t lsn) noexcept; + /** @return the overflow buffer when ARCHIVED_MMAP is wrapping around */ + byte *get_archived_mmap_switch() const noexcept + { + ut_ad(archived_mmap_switch()); + return resize_buf + START_OFFSET; + } #endif + /** @return whether archived_mmap_switch_complete() needs to be called */ + bool archived_mmap_switch() const noexcept + { + ut_ad(latch_have_any()); + return UNIV_UNLIKELY(archive && resize_buf); + } + /** Create a new log file when the current one will fill up. + @param buf log records to append + @param length size of the log records, in bytes + @param offset log file offset */ + ATTRIBUTE_COLD void archive_new_write(const byte *buf, size_t length, + lsn_t offset) noexcept; + + /** Ensure that innodb_log_archive=ON will default to the current + innodb_log_file_size if no size has been specified. */ + void archive_set_size() noexcept + { + ut_ad(!resize_in_progress()); + if (!resize_target) + resize_target= file_size; + } bool check_for_checkpoint() const { @@ -489,13 +542,42 @@ struct log_t @param late whether the WRITE_BACKOFF flag had already been set @param ex whether log_sys.latch is exclusively locked */ ATTRIBUTE_COLD void append_prepare_wait(bool late, bool ex) noexcept; +#ifdef HAVE_PMEM + /** Wait in append_prepare() for buffer to become available + @param late whether the WRITE_BACKOFF flag had already been set + @param ex whether log_sys.latch is exclusively locked */ + ATTRIBUTE_COLD void archived_mmap_switch_prepare(bool late, bool ex) + noexcept; +#endif public: + /** Attempt to finish archived_mmap_switch_prepare(). + @return the current LSN in the new file + @retval 0 if no switch took place */ + ATTRIBUTE_COLD lsn_t archived_mmap_switch_complete() noexcept; + + /** How to write log */ + enum write { + /** normal writing !log_sys.is_mmap() */ + WRITE_NORMAL, + /** circular memory-mapped writing when log_sys.is_mmap() */ + CIRCULAR_MMAP, + /** memory-mapped log for log_sys.archive */ + ARCHIVED_MMAP + }; + + /** Generate an archive log file name. + @param lsn first LSN stored in the file + @return archive log file name */ + ATTRIBUTE_COLD std::string get_archive_path(lsn_t lsn) const; + /** @return the next archive log file name */ + ATTRIBUTE_COLD std::string get_next_archive_path() const; + /** Reserve space in the log buffer for appending data. - @tparam mmap log_sys.is_mmap() + @tparam mode how to write log @param size total length of the data to append(), in bytes @param ex whether log_sys.latch is exclusively locked @return the start LSN and the buffer position for append() */ - template + template std::pair append_prepare(size_t size, bool ex) noexcept; /** Append a string of bytes to the redo log. diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index acf2b5512840f..633d7e6cbda4a 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -691,13 +691,15 @@ struct mtr_t { ATTRIBUTE_NOINLINE size_t crc32c() noexcept; /** Commit the mini-transaction log. - @tparam pmem log_sys.is_mmap() + @tparam mmap log_sys.is_mmap() @param mtr mini-transaction @param lsns {start_lsn,flush_ahead_lsn} */ - template + template static void commit_log(mtr_t *mtr, std::pair lsns) noexcept; - /** Release log_sys.latch. */ + /** Release log_sys.latch. + @tparam mmap log_sys.is_mmap() */ + template void commit_log_release() noexcept; /** Append the redo log records to the redo log buffer. @@ -705,11 +707,11 @@ struct mtr_t { std::pair do_write() noexcept; /** Append the redo log records to the redo log buffer. - @tparam mmap log_sys.is_mmap() + @tparam how how to write @param mtr mini-transaction @param len number of bytes to write @return {start_lsn,flush_ahead_lsn} */ - template static + template static std::pair finish_writer(mtr_t *mtr, size_t len); /** The applicable variant of commit_log() */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index eb5fc3fe721d5..6c6b130c44495 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -436,6 +436,8 @@ void log_t::create(lsn_t lsn) noexcept flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); first_lsn= lsn; write_lsn= lsn; + if (!archived_lsn) + archived_lsn= lsn; last_checkpoint_lsn= 0; @@ -617,6 +619,14 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) status= RESIZE_NO_CHANGE; else if (resize_in_progress()) status= RESIZE_IN_PROGRESS; + else if (archive) + { + status= RESIZE_NO_CHANGE; + /* When the current log becomes full and a new archivable log file + is being created, it will be of this size. At that point we will assign + file_size= resize_target, resize_target= 0; */ + resize_target= size; + } else { lsn_t start_lsn; @@ -758,10 +768,13 @@ void log_t::resize_abort(void *thd) noexcept } /** Write an aligned buffer to ib_logfile0. -@param buf buffer to be written -@param length length of data to be written -@param offset log file offset */ -static void log_write_buf(const byte *buf, size_t length, lsn_t offset) +@param max_length the maximum length that can be written to the file +@param buf buffer to be written +@param length length of data to be written +@param offset log file offset */ +static void log_write_buf(lsn_t max_length, + const byte *buf, size_t length, lsn_t offset) + noexcept { ut_ad(write_lock.is_owner()); ut_ad(!recv_no_log_write); @@ -770,21 +783,92 @@ static void log_write_buf(const byte *buf, size_t length, lsn_t offset) ut_ad(!(length & block_size_1)); ut_ad(!(size_t(buf) & block_size_1)); ut_ad(length); + ut_ad(max_length == log_sys.file_size - offset); - const lsn_t maximum_write_length{log_sys.file_size - offset}; - ut_ad(maximum_write_length <= log_sys.file_size - log_sys.START_OFFSET); - - if (UNIV_UNLIKELY(length > maximum_write_length)) + if (UNIV_UNLIKELY(length > max_length)) { - log_sys.log.write(offset, {buf, size_t(maximum_write_length)}); - length-= size_t(maximum_write_length); - buf+= size_t(maximum_write_length); + ut_ad(!log_sys.archive); + log_sys.log.write(offset, {buf, size_t(max_length)}); + length-= size_t(max_length); + buf+= size_t(max_length); ut_ad(log_sys.START_OFFSET + length < offset); offset= log_sys.START_OFFSET; } log_sys.log.write(offset, {buf, length}); } +ATTRIBUTE_COLD std::string log_t::get_archive_path(lsn_t lsn) const +{ + size_t size= strlen(srv_log_group_home_dir) + + sizeof "/ib_0000000000000000.log"; + bool trim= false; + switch (srv_log_group_home_dir[strlen(srv_log_group_home_dir) - 1]) { +#ifdef _WIN32 + case '\\': +#endif + case '/': + trim= true; + size--; + } + + char stack[FN_REFLEN], *heap= nullptr; + char *buf= size < sizeof stack + ? stack : (heap= static_cast(malloc(size))); + const int d= + snprintf(buf, size, + trim ? "%sib_" UINT64PFx ".log" : "%s/ib_" UINT64PFx ".log", + srv_log_group_home_dir, lsn); + ut_a(d + 1 == int(size)); + std::string path{buf, size}; + free(heap); + return path; +} + +ATTRIBUTE_COLD std::string log_t::get_next_archive_path() const +{ return get_archive_path(first_lsn + capacity()); } + +ATTRIBUTE_COLD void log_t::archive_new_write(const byte *buf, size_t length, + lsn_t offset) noexcept +{ + ut_ad(latch_have_wr()); + ut_ad(write_lock.is_owner()); + ut_ad(archive); + ut_ad(length >= file_size - offset); + ut_ad(!resize_log.is_opened()); + ut_ad(!resize_buf); + ut_ad(!resize_in_progress()); + ut_ad(resize_target >= 4U << 20); + ut_ad(is_latest()); + + const size_t first{size_t(file_size - offset)}; + log.write(offset, {buf, first}); + length-= first; + buf+= first; + + std::string path{get_next_archive_path()}; + bool success; + pfs_os_file_t file= + os_file_create_func(path.c_str(), OS_FILE_CREATE, OS_LOG_FILE, + false, &success); + ut_ad(success == (file != OS_FILE_CLOSED)); + if (file != OS_FILE_CLOSED) + { + if (os_file_set_size(path.c_str(), file, resize_target)) + { + resize_log= log; + log.m_file= file; + if (length) + log.write(START_OFFSET, {buf, length}); + return; + } + os_file_close(file); + IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + } + sql_print_error("[FATAL] InnoDB: Failed to create %s of %" PRIu64 + " bytes", path.c_str(), resize_target); + abort(); +} + /** Invoke commit_checkpoint_notify_ha() to notify that outstanding log writes have been completed. */ void log_flush_notify(lsn_t flush_lsn); @@ -911,6 +995,112 @@ static size_t log_pad(lsn_t lsn, size_t pad, byte *begin, byte *extra) #endif #ifdef HAVE_PMEM +ATTRIBUTE_COLD +void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept +{ + ut_ad(archive); + ut_ad(is_mmap()); + ut_ad(!resize_log.is_opened()); + ut_ad(!resize_buf); + ut_ad(!resize_in_progress()); + ut_ad(resize_target >= 4U << 20); + ut_ad(is_latest()); + + if (UNIV_LIKELY(!ex)) + { + latch.rd_unlock(); + if (!late) + { + /* Wait for all threads to back off. */ + latch.wr_lock(SRW_LOCK_CALL); + goto got_ex; + } + + const auto delay= my_cpu_relax_multiplier / 4 * srv_spin_wait_delay; + const auto rounds= srv_n_spin_wait_rounds; + + for (;;) + { + HMT_low(); + for (auto r= rounds + 1; r--; ) + { + if (write_lsn_offset.load(std::memory_order_relaxed) & WRITE_BACKOFF) + { + for (auto d= delay; d--; ) + MY_RELAX_CPU(); + } + else + { + HMT_medium(); + goto done; + } + } + HMT_medium(); + std::this_thread::sleep_for(std::chrono::microseconds(100)); + } + } + else + { + got_ex: + const uint64_t l= write_lsn_offset.load(std::memory_order_relaxed); + const lsn_t lsn= base_lsn.load(std::memory_order_relaxed) + + (l & (WRITE_BACKOFF - 1)); + waits++; + ut_ad(archive); + ut_ad(!resize_log.is_opened()); + ut_ad(!resize_buf); + ut_ad(!resize_in_progress()); + ut_ad(resize_target >= 4U << 20); + ut_ad(is_latest()); + ut_ad(!resize_log.is_opened()); + + do + { + std::string path{get_next_archive_path()}; + bool success; + os_file_t file= + os_file_create_func(path.c_str(), OS_FILE_CREATE, OS_LOG_FILE, + false, &success); + ut_ad(success == (file != OS_FILE_CLOSED)); + if (file != OS_FILE_CLOSED) + { + if (os_file_set_size(path.c_str(), file, resize_target)) + { + bool is_pmem{false}; + resize_buf= static_cast(::log_mmap(file, is_pmem, + resize_target)); + IF_WIN(os_file_close(file),); + if (resize_buf != MAP_FAILED) + { + /* Will be closed in write_checkpoint() */ + IF_WIN(,resize_log= file); + continue; + } + resize_buf= nullptr; + IF_WIN(,os_file_close(file)); + } + } + + IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + sql_print_error("[FATAL] InnoDB: Failed to create and map %s of %" PRIu64 + " bytes", path.c_str(), resize_target); + abort(); + } + while (false); + + ut_ad(lsn - get_flushed_lsn(std::memory_order_relaxed) < capacity()); + persist(lsn); + /* Above we cleared the WRITE_BACKOFF flag, + which our caller will recheck. */ + if (ex) + return; + latch.wr_unlock(); + } + +done: + latch.rd_lock(SRW_LOCK_CALL); +} + void log_t::persist(lsn_t lsn) noexcept { ut_ad(!is_opened()); @@ -1092,18 +1282,31 @@ lsn_t log_t::write_buf() noexcept ut_ad(base + (write_lsn_offset & (WRITE_TO_BUF - 1)) == lsn); write_to_log++; + DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF, + write_lsn, lsn, offset)); + + const lsn_t max_length{file_size - offset}; + ut_ad(max_length <= capacity()); + if (UNIV_UNLIKELY(length >= max_length)) + { + if (resizing != RESIZING && archive) + { + archive_new_write(write_buf, length, offset); + if (resizing != RETAIN_LATCH) + latch.wr_unlock(); + goto written; + } + } if (resizing != RETAIN_LATCH) latch.wr_unlock(); - DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF, - write_lsn, lsn, offset)); - /* Do the write to the log file */ - log_write_buf(write_buf, length, offset); + log_write_buf(max_length, write_buf, length, offset); if (UNIV_LIKELY_NULL(re_write_buf)) resize_write_buf(re_write_buf, length); + written: write_lsn= lsn; if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index f75d118ab1bf4..7cb511882cb75 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1811,6 +1811,8 @@ dberr_t recv_sys_t::find_checkpoint() const lsn_t first_lsn{mach_read_from_8(buf + LOG_HEADER_START_LSN)}; log_sys.set_first_lsn(first_lsn); + if (!log_sys.archived_lsn) + log_sys.archived_lsn= first_lsn; char creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR + 1]; memcpy(creator, buf + LOG_HEADER_CREATOR, sizeof creator); /* Ensure that the string is NUL-terminated. */ diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 952850b3f499d..ea76a7b00d65e 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -52,12 +52,14 @@ void mtr_t::finisher_update() if (log_sys.is_mmap()) { commit_logger= mtr_t::commit_log; - finisher= mtr_t::finish_writer; + finisher= log_sys.archive + ? mtr_t::finish_writer + : mtr_t::finish_writer; return; } commit_logger= mtr_t::commit_log; #endif - finisher= mtr_t::finish_writer; + finisher= mtr_t::finish_writer; } void mtr_memo_slot_t::release() const @@ -336,7 +338,51 @@ void mtr_t::release() m_memo.clear(); } -ATTRIBUTE_NOINLINE void mtr_t::commit_log_release() noexcept +#ifdef HAVE_PMEM +ATTRIBUTE_COLD lsn_t log_t::archived_mmap_switch_complete() noexcept +{ + ut_ad(latch_have_wr()); + if (!archive || !resize_buf) + return 0; + const lsn_t lsn{get_lsn()}, end_lsn{first_lsn + capacity()}; + if (lsn < end_lsn) + return 0; + persist(lsn); + my_munmap(buf, file_size); + buf= resize_buf; + resize_buf= nullptr; + first_lsn= end_lsn; + file_size= resize_target; + return lsn; +} + +template<> +ATTRIBUTE_NOINLINE void mtr_t::commit_log_release() noexcept +{ + if (m_latch_ex) + { + completed: + const lsn_t lsn{log_sys.archived_mmap_switch_complete()}; + log_sys.latch.wr_unlock(); + m_latch_ex= false; + if (lsn) + buf_flush_ahead(lsn, true); + } + else + { + const bool retry{log_sys.archived_mmap_switch()}; + log_sys.latch.rd_unlock(); + if (retry) + { + log_sys.latch.wr_lock(SRW_LOCK_CALL); + goto completed; + } + } +} +#endif + +template<> +ATTRIBUTE_NOINLINE void mtr_t::commit_log_release() noexcept { if (m_latch_ex) { @@ -395,12 +441,12 @@ void mtr_t::commit_log(mtr_t *mtr, std::pair lsns) noexcept buf_pool.page_cleaner_wakeup(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - mtr->commit_log_release(); + mtr->commit_log_release(); mtr->release(); } else { - mtr->commit_log_release(); + mtr->commit_log_release(); for (auto it= mtr->m_memo.rbegin(); it != mtr->m_memo.rend(); ) { @@ -852,7 +898,9 @@ static time_t log_close_warn_time; making the server crash-unsafe. */ ATTRIBUTE_COLD static void log_overwrite_warning(lsn_t lsn) { - if (log_sys.overwrite_warned) + ut_ad(!log_sys.archive); /* we hope that this is unreachable */ + + if (log_sys.overwrite_warned || log_sys.archive) return; time_t t= time(nullptr); @@ -872,6 +920,38 @@ ATTRIBUTE_COLD static void log_overwrite_warning(lsn_t lsn) ? ". Shutdown is in progress" : ""); } + +#ifdef HAVE_PMEM +template<> +inline std::pair +log_t::append_prepare(size_t size, bool ex) noexcept +{ + ut_ad(ex ? latch_have_wr() : latch_have_rd()); + ut_ad(is_mmap()); + ut_ad(archive); + ut_ad(archived_lsn); + + uint64_t l, lsn; + static_assert(WRITE_TO_BUF == WRITE_BACKOFF << 1, ""); + while (UNIV_UNLIKELY((l= write_lsn_offset.fetch_add(size + WRITE_TO_BUF) & + (WRITE_TO_BUF - 1)) + size + + (lsn= base_lsn.load(std::memory_order_relaxed)) >= + first_lsn + capacity()) && !resize_buf) + { + /* The following is inlined here instead of being part of + archive_mmap_switch_prepare() below, in order to increase the + locality of reference and to expedite setting the WRITE_BACKOFF flag. */ + bool late(write_lsn_offset.fetch_or(WRITE_BACKOFF) & WRITE_BACKOFF); + /* Subtract our LSN overshoot. */ + write_lsn_offset.fetch_sub(size); + archived_mmap_switch_prepare(late, ex); + } + + lsn+= l; + return {lsn, buf + FIRST_LSN + (lsn - first_lsn)}; +} +#endif + ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept { if (UNIV_LIKELY(!ex)) @@ -918,6 +998,7 @@ ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept const bool is_pmem{is_mmap()}; if (is_pmem) { + ut_ad(!archive); ut_ad(lsn - get_flushed_lsn(std::memory_order_relaxed) < capacity() || overwrite_warned); persist(lsn); @@ -942,17 +1023,20 @@ ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept } /** Reserve space in the log buffer for appending data. -@tparam mmap log_sys.is_mmap() +@tparam mode how to write log @param size total length of the data to append(), in bytes @param ex whether log_sys.latch is exclusively locked @return the start LSN and the buffer position for append() */ -template +template inline std::pair log_t::append_prepare(size_t size, bool ex) noexcept { ut_ad(ex ? latch_have_wr() : latch_have_rd()); - ut_ad(mmap == is_mmap()); - ut_ad(!mmap || buf_size == std::min(capacity(), buf_size_max)); + static_assert(!bool(WRITE_NORMAL), ""); + static_assert(bool(CIRCULAR_MMAP), ""); + static_assert(mode == WRITE_NORMAL || mode == CIRCULAR_MMAP, ""); + ut_ad(bool(mode) == is_mmap()); + ut_ad(!mode || buf_size == std::min(capacity(), buf_size_max)); const size_t buf_size{this->buf_size - size}; uint64_t l; static_assert(WRITE_TO_BUF == WRITE_BACKOFF << 1, ""); @@ -968,14 +1052,13 @@ std::pair log_t::append_prepare(size_t size, bool ex) noexcept append_prepare_wait(late, ex); } - const lsn_t lsn{l + base_lsn.load(std::memory_order_relaxed)}, - end_lsn{lsn + size}; + const lsn_t lsn{l + base_lsn.load(std::memory_order_relaxed)}; - if (UNIV_UNLIKELY(end_lsn >= last_checkpoint_lsn + log_capacity)) + if (UNIV_UNLIKELY(lsn + size >= last_checkpoint_lsn + log_capacity)) set_check_for_checkpoint(true); return {lsn, - buf + size_t(mmap ? FIRST_LSN + (lsn - first_lsn) % capacity() : l)}; + buf + size_t(mode ? FIRST_LSN + (lsn - first_lsn) % capacity() : l)}; } /** Finish appending data to the log. @@ -1111,12 +1194,14 @@ std::pair mtr_t::do_write() noexcept return finish_write(len); } -inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, - size_t seq) noexcept +template +ATTRIBUTE_COLD +void log_t::resize_write_low(lsn_t lsn, const byte *end, + size_t len, size_t seq) noexcept { ut_ad(latch_have_any()); + ut_ad(resize_buf); - if (UNIV_LIKELY_NULL(resize_buf)) { ut_ad(end >= buf); end-= len; @@ -1208,92 +1293,135 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, inline void log_t::append(byte *&d, const void *s, size_t size) noexcept { ut_ad(log_sys.latch_have_any()); - ut_ad(d + size <= log_sys.buf + - (log_sys.is_mmap() ? log_sys.file_size : log_sys.buf_size)); + ut_ad(log_sys.is_mmap() + ? ((d >= log_sys.buf && d + size <= log_sys.buf + log_sys.file_size) || + (log_sys.archive && + d >= log_sys.resize_buf && + d + size <= log_sys.resize_buf + log_sys.resize_target)) + : (d >= log_sys.buf && d + size <= log_sys.buf + log_sys.buf_size)); memcpy(d, s, size); d+= size; } -template -std::pair mtr_t::finish_writer(mtr_t *mtr, size_t len) +template +std::pair +mtr_t::finish_writer(mtr_t *mtr, size_t len) { ut_ad(log_sys.is_latest()); ut_ad(!recv_no_log_write); ut_ad(mtr->is_logged()); ut_ad(mtr->m_latch_ex ? log_sys.latch_have_wr() : log_sys.latch_have_rd()); ut_ad(len < recv_sys.MTR_SIZE_MAX); + ut_ad(mode == log_t::WRITE_NORMAL || + log_sys.archive == (mode == log_t::ARCHIVED_MMAP)); const size_t size{mtr->m_commit_lsn ? 5U + 8U : 5U}; std::pair start= - log_sys.append_prepare(len, mtr->m_latch_ex); + log_sys.append_prepare(len, mtr->m_latch_ex); - if (!mmap) - { + if (mode == log_t::WRITE_NORMAL) +#ifdef HAVE_PMEM + write_normal: +#endif for (const mtr_buf_t::block_t &b : mtr->m_log) log_sys.append(start.second, b.begin(), b.used()); - - write_trailer: - *start.second++= log_sys.get_sequence_bit(start.first + len - size); - if (mtr->m_commit_lsn) - { - mach_write_to_8(start.second, mtr->m_commit_lsn); - mtr->m_crc= my_crc32c(mtr->m_crc, start.second, 8); - start.second+= 8; - } - mach_write_to_4(start.second, mtr->m_crc); - start.second+= 4; - } +#ifdef HAVE_PMEM else { - if (UNIV_LIKELY(start.second + len <= &log_sys.buf[log_sys.file_size])) + const size_t file_size= log_sys.file_size; + byte *const buf{log_sys.buf}; + byte *end= &buf[file_size]; + if (UNIV_LIKELY(start.second + len <= end)) + goto write_normal; + byte *const begin= mode == log_t::ARCHIVED_MMAP + ? log_sys.get_archived_mmap_switch() + : buf + log_sys.START_OFFSET; + if (mode == log_t::ARCHIVED_MMAP && UNIV_UNLIKELY(start.second > end)) { - for (const mtr_buf_t::block_t &b : mtr->m_log) - log_sys.append(start.second, b.begin(), b.used()); - goto write_trailer; + /* Our mini-transaction will not span two log files. We are + somewhere between log_t::archived_mmap_switch_prepare() and + log_t::archived_mmap_switch_complete(), and our entire log must + be written to the new file. */ + start.second= begin + (start.second - end); + goto write_normal; } + for (const mtr_buf_t::block_t &b : mtr->m_log) { size_t size{b.used()}; - const size_t size_left(&log_sys.buf[log_sys.file_size] - start.second); + const size_t size_left(end - start.second); const byte *src= b.begin(); if (size > size_left) { ::memcpy(start.second, src, size_left); - start.second= &log_sys.buf[log_sys.START_OFFSET]; + start.second= begin; + if (mode == log_t::ARCHIVED_MMAP) + /* An approximation; the minimum innodb_log_file_size + always exceeds the maximum mtr->get_log_size() */ + end= begin + file_size; src+= size_left; size-= size_left; } ::memcpy(start.second, src, size); start.second+= size; } - const size_t size_left(&log_sys.buf[log_sys.file_size] - start.second); - if (size_left > size) - goto write_trailer; + const size_t size_left(end - start.second); + if (size_left <= size) + { + byte tail[5 + 8]; + tail[0]= log_sys.get_sequence_bit(start.first + len - size); - byte tail[5 + 8]; - tail[0]= log_sys.get_sequence_bit(start.first + len - size); + if (mtr->m_commit_lsn) + { + mach_write_to_8(tail + 1, mtr->m_commit_lsn); + mtr->m_crc= my_crc32c(mtr->m_crc, tail + 1, 8); + mach_write_to_4(tail + 9, mtr->m_crc); + } + else + mach_write_to_4(tail + 1, mtr->m_crc); - if (mtr->m_commit_lsn) - { - mach_write_to_8(tail + 1, mtr->m_commit_lsn); - mtr->m_crc= my_crc32c(mtr->m_crc, tail + 1, 8); - mach_write_to_4(tail + 9, mtr->m_crc); + ::memcpy(start.second, tail, size_left); + ::memcpy(begin, tail + size_left, size - size_left); + start.second= ((size >= size_left) ? begin : end) + (size - size_left); + goto wrote_trailer; } - else - mach_write_to_4(tail + 1, mtr->m_crc); - - ::memcpy(start.second, tail, size_left); - ::memcpy(log_sys.buf + log_sys.START_OFFSET, tail + size_left, - size - size_left); - start.second= log_sys.buf + - ((size >= size_left) ? log_sys.START_OFFSET : log_sys.file_size) + - (size - size_left); } +#endif - log_sys.resize_write(start.first, start.second, len, size); + *start.second++= log_sys.get_sequence_bit(start.first + len - size); + + if (mtr->m_commit_lsn) + { + mach_write_to_8(start.second, mtr->m_commit_lsn); + mtr->m_crc= my_crc32c(mtr->m_crc, start.second, 8); + start.second+= 8; + } + mach_write_to_4(start.second, mtr->m_crc); + start.second+= 4; + +#ifdef HAVE_PMEM +wrote_trailer: +#else + static_assert(mode == log_t::WRITE_NORMAL, ""); +#endif mtr->m_commit_lsn= start.first + len; - return {start.first, log_close(mtr->m_commit_lsn)}; + + switch (mode) { + case log_t::ARCHIVED_MMAP: + ut_ad(!log_sys.resize_in_progress()); + return {start.first, (log_sys.get_first_lsn() > log_sys.last_checkpoint_lsn + ? log_sys.get_first_lsn() : 0)}; + case log_t::CIRCULAR_MMAP: + log_sys.resize_write(start.first, start.second, len, size); + return {start.first, log_close(mtr->m_commit_lsn)}; + case log_t::WRITE_NORMAL: + log_sys.resize_write(start.first, start.second, len, size); + } + return {start.first, log_sys.archive + ? (log_sys.get_first_lsn() > log_sys.last_checkpoint_lsn + ? log_sys.get_first_lsn() : 0) + : log_close(mtr->m_commit_lsn)}; } bool mtr_t::have_x_latch(const buf_block_t &block) const diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index e92e0ff54e078..806a4c334693b 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1611,6 +1611,8 @@ dberr_t srv_start(bool create_new_db) if (log_sys.resize_rename()) { return(srv_init_abort(DB_ERROR)); } + + if (log_sys.archive) log_sys.archive_set_size(); } else { /* Suppress warnings in fil_space_t::create() for files that are being read before dict_boot() has recovered @@ -1732,6 +1734,8 @@ dberr_t srv_start(bool create_new_db) recv_sys.debug_free(); + if (log_sys.archive) log_sys.archive_set_size(); + if (!srv_read_only_mode) { const uint32_t flags = FSP_FLAGS_PAGE_SSIZE(); for (uint32_t id = srv_undo_space_id_start; From 8b09088948e759e1bf6f2643ef11afe71e87dde2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 8 Dec 2025 18:03:48 +0200 Subject: [PATCH 05/33] Replace some references to ib_logfile0 --- mysql-test/suite/sys_vars/r/sysvars_innodb.result | 8 ++++---- storage/innobase/handler/ha_innodb.cc | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 941e817e8c5db..4befc2cbcafa1 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -985,7 +985,7 @@ SESSION_VALUE NULL DEFAULT_VALUE ON VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Whether ib_logfile0 resides in persistent memory (when supported) or should initially be memory-mapped +VARIABLE_COMMENT Whether the log resides in persistent memory (when supported) or should initially be memory-mapped NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL @@ -997,7 +997,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 100663296 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT Desired size of ib_logfile0 in bytes +VARIABLE_COMMENT Desired log file size in bytes NUMERIC_MIN_VALUE 4194304 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 4096 @@ -1009,7 +1009,7 @@ SESSION_VALUE NULL DEFAULT_VALUE OFF VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Whether each write to ib_logfile0 is write through +VARIABLE_COMMENT Whether each write to the log is write through NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL @@ -1021,7 +1021,7 @@ SESSION_VALUE NULL DEFAULT_VALUE VARIABLE_SCOPE GLOBAL VARIABLE_TYPE VARCHAR -VARIABLE_COMMENT Path to ib_logfile0 +VARIABLE_COMMENT Path to ib_logfile0 or ib_*.log NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 2b2d0562dc7ff..185a0555ba9d6 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -19053,7 +19053,7 @@ static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method, static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Path to ib_logfile0", NULL, NULL, NULL); + "Path to ib_logfile0 or ib_*.log", NULL, NULL, NULL); static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct, PLUGIN_VAR_RQCMDARG, @@ -19417,7 +19417,7 @@ static MYSQL_SYSVAR_UINT(log_buffer_size, log_sys.buf_size, NULL, NULL, 16U << 20, 2U << 20, log_sys.buf_size_max, 4096); static constexpr const char *innodb_log_file_mmap_description= - "Whether ib_logfile0" + "Whether the log" " resides in persistent memory (when supported) or" " should initially be memory-mapped"; static MYSQL_SYSVAR_BOOL(log_file_mmap, log_sys.log_mmap, @@ -19434,7 +19434,7 @@ static MYSQL_SYSVAR_BOOL(log_file_buffering, log_sys.log_buffered, static MYSQL_SYSVAR_BOOL(log_file_write_through, log_sys.log_write_through, PLUGIN_VAR_OPCMDARG, - "Whether each write to ib_logfile0 is write through", + "Whether each write to the log is write through", nullptr, innodb_log_file_write_through_update, FALSE); static MYSQL_SYSVAR_BOOL(data_file_buffering, fil_system.buffered, @@ -19509,7 +19509,7 @@ static MYSQL_SYSVAR_UINT64_T(log_recovery_target, recv_sys.rpo, static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size, PLUGIN_VAR_RQCMDARG, - "Desired size of ib_logfile0 in bytes", + "Desired log file size in bytes", nullptr, innodb_log_file_size_update, 96 << 20, 4 << 20, std::numeric_limits::max(), 4096); From fb98e3dc8d0c2bad96dc84dd6050f2c5ace2e733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 10 Dec 2025 07:56:17 +0200 Subject: [PATCH 06/33] Rename ib_logfile0 on SET GLOBAL innodb_log_archive log_t::set_archive(my_bool): Implement SET GLOBAL innodb_log_archive. The log file will be renamed. log_t::header_rewrite(my_bool): Rewrite the log file header before or after renaming the log file. The recovery of the last archived log file must tolerate also the ib_logfile0 format. TODO: Throw an error on startup if the innodb_log_archive setting does not match the log file name (and contents). TODO: Implement crash recovery for the archived-format log file. TODO: Use a different file name for encrypted archived log. log_t::get_circular_path(): Get the path name of a circular file. Replaces get_log_file_path(). log_t::get_path(): Get the name of the current log file. log_t::set_archive(): Reset first_lsn on SET innodb_log_archive=ON to be within the current file. recv_sys_t::find_checkpoint(): If the circular ib_logfile0 is missing, determine the oldest archived log file with contiguous LSN. For now, we open the first archived log file. FIXME: Validate innodb_log_archive_start, innodb_log_recovery_start, innodb_log_recovery_target FIXME: In bootstrap, write the header in the correct format. FIXME: Parse the archived log file header in the correct format. FIXME: Multi-file recovery log_t::append_archive_name(): Append the archive log file name to a path string. log_checkpoint_low(): Do not prevent a checkpoint at the start of a file. log_t::create(lsn_t): Initialize last_checkpoint_lsn. --- extra/mariabackup/backup_copy.cc | 4 +- extra/mariabackup/xtrabackup.cc | 9 +- storage/innobase/buf/buf0flu.cc | 25 +-- storage/innobase/handler/ha_innodb.cc | 36 +---- storage/innobase/include/log0log.h | 43 ++--- storage/innobase/log/log0log.cc | 223 ++++++++++++++++++++++---- storage/innobase/log/log0recv.cc | 105 +++++++++++- storage/innobase/srv/srv0start.cc | 12 +- 8 files changed, 347 insertions(+), 110 deletions(-) diff --git a/extra/mariabackup/backup_copy.cc b/extra/mariabackup/backup_copy.cc index 22a40e5fb1042..885b493a174ba 100644 --- a/extra/mariabackup/backup_copy.cc +++ b/extra/mariabackup/backup_copy.cc @@ -1772,7 +1772,7 @@ copy_back() if it exists. */ ds_tmp = ds_create(dst_dir, DS_TYPE_LOCAL); - if (!(ret = copy_or_move_file(ds_tmp, LOG_FILE_NAME, LOG_FILE_NAME, + if (!(ret = copy_or_move_file(ds_tmp, "ib_logfile0", "ib_logfile0", dst_dir, 1))) { goto cleanup; } @@ -1869,7 +1869,7 @@ copy_back() } /* skip the redo log (it was already copied) */ - if (!strcmp(filename, LOG_FILE_NAME)) { + if (!strcmp(filename, "ib_logfile0")) { continue; } diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 18fd030e644b4..33c07910f5536 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2705,7 +2705,7 @@ static bool innodb_init() srv_log_group_home_dir= xtrabackup_target_dir; bool ret; - const std::string ib_logfile0{get_log_file_path()}; + const std::string ib_logfile0{log_sys.get_circular_path()}; os_file_delete_if_exists_func(ib_logfile0.c_str(), nullptr); os_file_t file= os_file_create_func(ib_logfile0.c_str(), OS_FILE_CREATE, @@ -5569,10 +5569,11 @@ static bool xtrabackup_backup_func() /* open the log file */ memset(&stat_info, 0, sizeof(MY_STAT)); - dst_log_file = ds_open(backup_datasinks.m_redo, LOG_FILE_NAME, &stat_info); + dst_log_file = + ds_open(backup_datasinks.m_redo, "ib_logfile0", &stat_info); if (dst_log_file == NULL) { - msg("Error: failed to open the target stream for '%s'.", - LOG_FILE_NAME); + msg("Error: failed to open the target stream" + " for 'ib_logfile0'."); goto fail; } diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 978983e190a4b..7b722b1dee916 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1842,6 +1842,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept { ut_ad(!is_opened()); resizing= resize_lsn.load(std::memory_order_relaxed); + ut_ad(!resizing || !archive); if (resizing > 1 && resizing <= next_checkpoint_lsn) { @@ -1860,6 +1861,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept latch.wr_unlock(); log_write_and_flush_prepare(); resizing= resize_lsn.load(std::memory_order_relaxed); + ut_ad(!resizing || !archive); ut_ad(ut_is_2pow(write_size)); ut_ad(write_size >= 512); ut_ad(write_size <= 4096); @@ -1880,6 +1882,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ut_ad(checkpoint_pending); checkpoint_pending= false; resizing= resize_lsn.load(std::memory_order_relaxed); + ut_ad(!resizing || !archive); } ut_ad(!checkpoint_pending); @@ -1893,7 +1896,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept /* Make the previous archived log file read-only */ #ifdef _WIN32 resize_log.close(); - SetFileAttributesA(get_archive_path(first_lsn).c_str(), + SetFileAttributesA(get_archive_path().c_str(), FILE_ATTRIBUTE_READONLY | FILE_ATTRIBUTE_ARCHIVE); #else struct stat st; @@ -1933,6 +1936,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept if (resizing > 1 && resizing <= checkpoint_lsn) { + ut_ad(!archive); ut_ad(is_mmap() == !resize_flush_buf); ut_ad(is_mmap() == !resize_log.is_opened()); @@ -1962,7 +1966,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ut_ad(!log.is_opened()); bool success; log.m_file= - os_file_create_func(get_log_file_path().c_str(), OS_FILE_OPEN, + os_file_create_func(get_circular_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); ut_a(success); ut_a(log.is_opened()); @@ -2028,12 +2032,12 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept ut_ad(oldest_lsn <= end_lsn); ut_ad(end_lsn == log_sys.get_lsn()); - if (oldest_lsn == log_sys.last_checkpoint_lsn || - (oldest_lsn == end_lsn && - !log_sys.resize_in_progress() && - oldest_lsn == log_sys.last_checkpoint_lsn + - (log_sys.is_encrypted() - ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT))) + if (oldest_lsn == end_lsn && oldest_lsn != log_sys.get_first_lsn() && + (oldest_lsn == log_sys.last_checkpoint_lsn || + (!log_sys.resize_in_progress() && + oldest_lsn == log_sys.last_checkpoint_lsn + + (log_sys.is_encrypted() + ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT)))) { /* Do nothing, because nothing was logged (other than a FILE_CHECKPOINT record) since the previous checkpoint. */ @@ -2043,7 +2047,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept } ut_ad(!recv_no_log_write); - ut_ad(oldest_lsn > log_sys.last_checkpoint_lsn); + ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn); /* Repeat the FILE_MODIFY records after the checkpoint, in case some log records between the checkpoint and log_sys.lsn need them. Finally, write a FILE_CHECKPOINT record. Redo log apply expects to @@ -2060,7 +2064,8 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept log_sys.latch.wr_unlock(); log_write_up_to(flush_lsn, true); log_sys.latch.wr_lock(SRW_LOCK_CALL); - if (log_sys.last_checkpoint_lsn >= oldest_lsn) + if (log_sys.last_checkpoint_lsn >= oldest_lsn && + log_sys.last_checkpoint_lsn != log_sys.get_first_lsn()) goto do_nothing; ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 185a0555ba9d6..c1fc30cfdf2e4 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -19447,44 +19447,10 @@ static MYSQL_SYSVAR_BOOL(data_file_write_through, fil_system.write_through, "Whether each write to data files writes through", nullptr, innodb_data_file_write_through_update, FALSE); -inline bool log_t::set_archive(my_bool archive) noexcept -{ - bool fail{false}; - for (;;) - { - latch.wr_lock(SRW_LOCK_CALL); - fail= resize_in_progress(); - if (fail) - break; -#ifdef HAVE_PMEM - if (is_backoff() && is_mmap()) - { - /* Prevent a race condition with append_prepare() */ - latch.wr_unlock(); - continue; - } -#endif - this->archive= archive; - if (archive) - { - archived_lsn= next_checkpoint_lsn; - archive_set_size(); - /* TODO: rename ib_logfile0 to archived file; update header */ - } - mtr_t::finisher_update(); - break; - } - - latch.wr_unlock(); - return fail; -} - static void innodb_log_archive_update(THD *, st_mysql_sys_var*, void *, const void *save) noexcept { - if (log_sys.set_archive(*static_cast(save))) - my_printf_error(ER_WRONG_USAGE, - "SET GLOBAL innodb_log_file_size is in progress", MYF(0)); + log_sys.set_archive(*static_cast(save)); } static MYSQL_SYSVAR_BOOL(log_archive, log_sys.archive, diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index ee0169ba12826..5f480d80f4d7c 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -35,22 +35,6 @@ Created 12/9/1995 Heikki Tuuri using st_::span; -static const char LOG_FILE_NAME_PREFIX[] = "ib_logfile"; -static const char LOG_FILE_NAME[] = "ib_logfile0"; - -/** Composes full path for a redo log file -@param[in] filename name of the redo log file -@return path with log file name*/ -std::string get_log_file_path(const char *filename= LOG_FILE_NAME); - -/** Delete log file. -@param[in] suffix suffix of the file name */ -static inline void delete_log_file(const char* suffix) -{ - auto path = get_log_file_path(LOG_FILE_NAME_PREFIX).append(suffix); - os_file_delete_if_exists_func(path.c_str(), nullptr); -} - struct completion_callback; /** Ensure that the log has been written to the log file up to a given @@ -383,8 +367,9 @@ struct log_t resize_write_low(lsn, end, len, seq); } - /** SET GLOBAL innodb_log_archive */ - inline bool set_archive(my_bool archive) noexcept; + /** SET GLOBAL innodb_log_archive + @param archive the new value of innodb_log_archive */ + void set_archive(my_bool archive) noexcept; private: /** Replicate a write to the log. @@ -432,6 +417,10 @@ struct log_t @param encrypted whether the log is encrypted */ static void header_write(byte *buf, lsn_t lsn, bool encrypted) noexcept; + /** Rewrite the log file header in set_archive() + @param archive the new value of innodb_log_archive */ + void header_rewrite(my_bool archive) noexcept; + /** @return an estimate of get_lsn(), using acquire-release ordering with write_buf() or persist(); an upper bound if said functions have updated only one of the fields, @@ -565,10 +554,28 @@ struct log_t ARCHIVED_MMAP }; + /** Get a name of a circular log file. + @param i log file number (0 to 101) + @return the path name of the log file */ + ATTRIBUTE_COLD static std::string get_circular_path(size_t i= 0); + + /** @return the name of the current log file */ + ATTRIBUTE_COLD std::string get_path() const; + + /** Append the archive log file base name to a string. + @param path directory name and separator + @param lsn first LSN stored in the file + @return path with the base file name appended */ + static ATTRIBUTE_COLD std::string &append_archive_name(std::string &path, + lsn_t lsn); + /** Generate an archive log file name. @param lsn first LSN stored in the file @return archive log file name */ ATTRIBUTE_COLD std::string get_archive_path(lsn_t lsn) const; + /** @return the current archive log file name */ + std::string get_archive_path() const { return get_archive_path(first_lsn); } + /** @return the next archive log file name */ ATTRIBUTE_COLD std::string get_next_archive_path() const; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 6c6b130c44495..37fc054ca9d1b 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -439,7 +439,7 @@ void log_t::create(lsn_t lsn) noexcept if (!archived_lsn) archived_lsn= lsn; - last_checkpoint_lsn= 0; + last_checkpoint_lsn= lsn; DBUG_PRINT("ib_log", ("write header " LSN_PF, lsn)); @@ -561,10 +561,9 @@ void log_t::set_buffered(bool buffered) noexcept { if (const dberr_t err= log.close()) log_close_failed(err); - std::string path{get_log_file_path()}; log_buffered= buffered; bool success; - log.m_file= os_file_create_func(path.c_str(), + log.m_file= os_file_create_func(get_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); ut_a(log.m_file != OS_FILE_CLOSED); @@ -585,10 +584,9 @@ void log_t::set_write_through(bool write_through) { os_file_close_func(log.m_file); log.m_file= OS_FILE_CLOSED; - std::string path{get_log_file_path()}; log_write_through= write_through; bool success; - log.m_file= os_file_create_func(path.c_str(), + log.m_file= os_file_create_func(get_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); ut_a(log.m_file != OS_FILE_CLOSED); @@ -599,6 +597,161 @@ void log_t::set_write_through(bool write_through) log_resize_release(); } +/** Rewrite the log file header in set_archive() +@param archive the new value of innodb_log_archive */ +void log_t::header_rewrite(my_bool archive) noexcept +{ + ut_ad(!resize_buf); + ut_ad(this->archive == !archive); + + /* We will rewrite the log file header while the file + name is not ib_logfile0. That is, the archived log file + recovery will accept both the circular and the archived + format for the last file. */ + + byte* c= checkpoint_buf; + lsn_t end_lsn= first_lsn + 1; // FIXME: store this in log_sys? + ut_ad(end_lsn > first_lsn); + ut_ad(!archive || end_lsn <= first_lsn + ~0U); +#ifdef HAVE_PMEM + if (!c) + { + ut_ad(is_mmap()); + if (!archive) + { + memset_aligned<512>(buf + 512, 0, START_OFFSET - 512); + c= buf + CHECKPOINT_1; + mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); + mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); + mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); + pmem_persist(buf + 512, START_OFFSET - 512); + header_write(buf, first_lsn, is_encrypted()); + memset_aligned<512>(buf + 512, 0, CHECKPOINT_1 - 512); + pmem_persist(buf, CHECKPOINT_1); + } + else + { + mach_write_to_4(buf, uint32_t(end_lsn - first_lsn)); + memset(buf + 4, 0, 60 - 4); + pmem_persist(buf, 64); + memset_aligned<64>(buf + 64, 0, START_OFFSET - 64); + pmem_persist(buf, START_OFFSET); + } + return; + } +#endif + memset_aligned<512>(c, 0, write_size); + + if (!archive) + { + mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); + mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); + mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); + log.write(CHECKPOINT_1, {c, write_size}); + os_file_flush(log.m_file); + memset_aligned<512>(c, 0, write_size); + for (size_t offset= CHECKPOINT_1; (offset+= write_size) < START_OFFSET;) + log.write(offset, {c, write_size}); + header_write(c, first_lsn, is_encrypted()); + if (write_size > 512) + memset_aligned<512>(c + 512, 0, write_size - 512); + log.write(0, {c, write_size}); + os_file_flush(log.m_file); + memset_aligned<512>(c, 0, write_size); + for (size_t offset= 0; (offset+= write_size) < CHECKPOINT_1;) + log.write(offset, {c, write_size}); + } + else + { + mach_write_to_4(c, uint32_t(end_lsn - first_lsn)); + log.write(0, {c, write_size}); + os_file_flush(log.m_file); + memset_aligned<512>(c, 0, write_size); + for (size_t offset= 0; (offset+= write_size) < START_OFFSET;) + log.write(offset, {c, write_size}); + } + + os_file_flush(log.m_file); +} + +/** SET GLOBAL innodb_log_archive +@param archive the new value of innodb_log_archive */ +void log_t::set_archive(my_bool archive) noexcept +{ + for (;;) + { + IF_WIN(log_resize_acquire(), latch.wr_lock(SRW_LOCK_CALL)); + if (resize_in_progress()) + { + my_printf_error(ER_WRONG_USAGE, + "SET GLOBAL innodb_log_file_size is in progress", + MYF(0)); + break; + } + if (archive == this->archive) + break; +#ifdef HAVE_PMEM + if (is_backoff() && is_mmap()) + { + /* Prevent a race condition with append_prepare() */ + IF_WIN(log_resize_release(), latch.wr_unlock()); + continue; + } +#endif + ut_ad(!resize_buf); /* FIXME: wait for write_checkpoint() */ + + const lsn_t old_first_lsn{first_lsn}; + if (archive) + first_lsn+= (end_lsn - old_first_lsn) / capacity() * capacity(); + std::string normal_name{get_circular_path()}; + std::string arch_name{get_archive_path()}; + + const char *old_name= normal_name.c_str(); + const char *new_name= arch_name.c_str(); + if (!archive) + { + std::swap(old_name, new_name); + header_rewrite(archive); + } + +#ifdef _WIN32 + /* On Microsoft Windows, there must be no open file handles to a + file that is being renamed. */ + if (const dberr_t err= log.close()) + log_close_failed(err); +#endif + int fail= my_rename(old_name, new_name, MY_SYNC_DIR); +#ifdef _WIN32 + { + bool success; + log.m_file= os_file_create_func(fail ? old_name : new_name, + OS_FILE_OPEN, OS_LOG_FILE, + false, &success); + ut_a(log.m_file != OS_FILE_CLOSED); + } +#endif + if (fail) + { + my_error(ER_ERROR_ON_RENAME, MYF(0), old_name, new_name, my_errno); + first_lsn= old_first_lsn; + break; + } + + if (archive) + { + header_rewrite(archive); + + archived_lsn= next_checkpoint_lsn; + archive_set_size(); + } + this->archive= archive; + mtr_t::finisher_update(); + break; + } + + IF_WIN(log_resize_release(), latch.wr_unlock()); +} + /** Start resizing the log and release the exclusive latch. @param size requested new file_size @param thd the current thread identifier @@ -635,7 +788,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) ut_ad(!resize_buf); ut_ad(!resize_flush_buf); ut_ad(!resize_initiator); - std::string path{get_log_file_path("ib_logfile101")}; + const std::string path{get_circular_path(101)}; bool success; resize_initiator= thd; resize_lsn.store(1, std::memory_order_relaxed); @@ -759,8 +912,8 @@ void log_t::resize_abort(void *thd) noexcept resize_target= 0; resize_lsn.store(0, std::memory_order_relaxed); resize_initiator= nullptr; - std::string path{get_log_file_path("ib_logfile101")}; - IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + IF_WIN(DeleteFile(get_circular_path(101).c_str()), + unlink(get_circular_path(101).c_str())); writer_update(false); } @@ -797,31 +950,38 @@ static void log_write_buf(lsn_t max_length, log_sys.log.write(offset, {buf, length}); } +ATTRIBUTE_COLD +std::string &log_t::append_archive_name(std::string &path, lsn_t lsn) +{ + path.append("ib_"); + for (int i= 16; i--; lsn<<= 4) + path.push_back("0123456789abcdef"[lsn >> 60]); + path.append(".log"); + return path; +} + ATTRIBUTE_COLD std::string log_t::get_archive_path(lsn_t lsn) const { - size_t size= strlen(srv_log_group_home_dir) + - sizeof "/ib_0000000000000000.log"; - bool trim= false; - switch (srv_log_group_home_dir[strlen(srv_log_group_home_dir) - 1]) { + size_t size= strlen(srv_log_group_home_dir); + retry: + switch (srv_log_group_home_dir[size - 1]) { #ifdef _WIN32 case '\\': #endif case '/': - trim= true; + if (size <= 1) + break; size--; + goto retry; } - - char stack[FN_REFLEN], *heap= nullptr; - char *buf= size < sizeof stack - ? stack : (heap= static_cast(malloc(size))); - const int d= - snprintf(buf, size, - trim ? "%sib_" UINT64PFx ".log" : "%s/ib_" UINT64PFx ".log", - srv_log_group_home_dir, lsn); - ut_a(d + 1 == int(size)); - std::string path{buf, size}; - free(heap); - return path; + if (size == 1 && *srv_log_group_home_dir == '.') + size= 0; + std::string path; + path.reserve(size + sizeof "/ib_0000000000000000.log"); + path.assign(srv_log_group_home_dir, size); + if (size) + path.push_back('/'); + return append_archive_name(path, lsn); } ATTRIBUTE_COLD std::string log_t::get_next_archive_path() const @@ -1784,10 +1944,10 @@ void log_t::close() recv_sys.close(); } -std::string get_log_file_path(const char *filename) +ATTRIBUTE_COLD std::string log_t::get_circular_path(size_t i) { - const size_t size= strlen(srv_log_group_home_dir) + /* path separator */ 1 + - strlen(filename) + /* longest suffix */ 3; + ut_ad(i <= 101); + const size_t size= strlen(srv_log_group_home_dir) + sizeof "/ib_logfile101"; std::string path; path.reserve(size); path.assign(srv_log_group_home_dir); @@ -1801,7 +1961,10 @@ std::string get_log_file_path(const char *filename) default: path.push_back('/'); } - path.append(filename); + return path.append("ib_logfile").append(std::to_string(i)); +} - return path; +ATTRIBUTE_COLD std::string log_t::get_path() const +{ + return archive ? get_archive_path() : get_circular_path(); } diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 7cb511882cb75..cb41002b0933a 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1711,14 +1711,109 @@ dberr_t recv_sys_t::find_checkpoint() if (files.empty()) { file_checkpoint= 0; - std::string path{get_log_file_path()}; + int archive= 0; + retry: + std::string path{log_sys.get_circular_path()}; bool success; - os_file_t file{os_file_create_func(path.c_str(), - OS_FILE_OPEN, + os_file_t file{os_file_create_func(path.c_str(), archive + ? OS_FILE_OPEN + : OS_FILE_OPEN_SILENT, OS_LOG_FILE, srv_read_only_mode, &success)}; - if (file == OS_FILE_CLOSED) + if (file != OS_FILE_CLOSED); + else if (archive) return DB_ERROR; + else + { + archive= -1; +#ifdef _WIN32 + WIN32_FIND_DATAA entry; + HANDLE d= FindFirstFileA(srv_log_group_home_dir, &entry); + if (d == INVALID_HANDLE_VALUE) + goto retry; +#else + DIR *d= opendir(srv_log_group_home_dir); + if (!d) + goto retry; +#endif + std::map logs; +#ifdef _WIN32 + do + { + if (entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + continue; + lsn_t lsn; + int n{0}; + const char *fn{entry.cFileName}; + if (1 != sscanf(fn, "ib_%016" PRIx64 ".log%n", &lsn, &n) || fn[n]) + continue; + LARGE_INTEGER filesize; + filesize.LowPart= entry.nFileSizeLow; + filesize.HighPart= entry.nFileSizeHigh; + logs.emplace(lsn, lsn + filesize.QuadPart); + } + while (FindNextFile(d, &entry)); + FindClose(d); +#else + path.reserve(strlen(srv_log_group_home_dir) + + sizeof "/ib_0000000000000000.log"); + while (dirent *e= readdir(d)) + { + lsn_t lsn; + int n{0}; + const char *fn{e->d_name}; + if (1 != sscanf(fn, "ib_%016" PRIx64 ".log%n", &lsn, &n) || fn[n]) + continue; + path.assign(srv_log_group_home_dir); + path.push_back('/'); + path.append(fn); + struct stat st; + if (stat(path.c_str(), &st) || + st.st_size < static_cast + (log_t::START_OFFSET + SIZE_OF_FILE_CHECKPOINT)) + { + sql_print_warning("InnoDB: ignoring %s", path.c_str()); + continue; + } + logs.emplace(lsn, lsn + st.st_size); + } + closedir(d); +#endif + + auto min= logs.cbegin(); + const auto end= logs.cend(); + if (min == end) + goto retry; + + for (auto i= min;;) + { + auto prev= i++; + if (i == end) + break; + if (prev->second != i->first) + min= i; + } + + path.assign(srv_log_group_home_dir); + switch (path.back()) { +#ifdef _WIN32 + case '\\': +#endif + case '/': + break; + default: + path.push_back('/'); + } + // TODO: validate innodb_log_archive_start, innodb_log_recovery_start, + // innodb_log_recovery_target + file= os_file_create_func(log_sys.append_archive_name(path, min->first). + c_str(), OS_FILE_OPEN, OS_LOG_FILE, + srv_read_only_mode, &success); + if (file == OS_FILE_CLOSED) + goto retry; + archive= 1; + } + const os_offset_t size{os_file_get_size(file)}; if (!size) { @@ -1742,7 +1837,7 @@ dberr_t recv_sys_t::find_checkpoint() recv_sys.files.emplace_back(file); for (int i= 1; i < 101; i++) { - path= get_log_file_path(LOG_FILE_NAME_PREFIX).append(std::to_string(i)); + path= log_sys.get_circular_path(i); file= os_file_create_func(path.c_str(), OS_FILE_OPEN_SILENT, OS_LOG_FILE, true, &success); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 806a4c334693b..b381c581f2166 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -160,13 +160,12 @@ static PSI_stage_info* srv_stages[] = static void delete_log_files() { for (size_t i= 1; i < 102; i++) - delete_log_file(std::to_string(i).c_str()); + os_file_delete_if_exists_func(log_sys.get_circular_path(i).c_str(), nullptr); } /** Creates log file. @param create_new_db whether the database is being initialized @param lsn log sequence number -@param logfile0 name of the log file @return DB_SUCCESS or error code */ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) { @@ -192,7 +191,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) log_sys.latch.wr_lock(SRW_LOCK_CALL); log_sys.set_capacity(); - std::string logfile0{get_log_file_path("ib_logfile101")}; + const std::string logfile0{log_sys.get_circular_path(101)}; bool ret; os_file_t file{ os_file_create_func(logfile0.c_str(), @@ -265,8 +264,8 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) @return whether an error occurred */ bool log_t::resize_rename() noexcept { - std::string old_name{get_log_file_path("ib_logfile101")}; - std::string new_name{get_log_file_path()}; + const std::string old_name{get_circular_path(101)}; + const std::string new_name{log_sys.get_path()}; if (IF_WIN(MoveFileEx(old_name.c_str(), new_name.c_str(), MOVEFILE_REPLACE_EXISTING), @@ -803,7 +802,8 @@ srv_check_undo_redo_logs_exists() } /* Check if redo log file exists */ - auto logfilename = get_log_file_path(); + const std::string logfilename{log_sys.get_circular_path()}; + // FIXME: check for archived log as well fh = os_file_create_func(logfilename.c_str(), OS_FILE_OPEN_RETRY_SILENT, From 254bfaa5f4c00208cb4706c35d3e84f82e779386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 8 Jan 2026 17:13:17 +0200 Subject: [PATCH 07/33] Write archived-format checkpoint, and almost recover it With these changes, the following test passes: mysql-test/mtr --mysqld=--innodb-log-archive innodb.temporary_table TODO: Check the following tests, after fully implementing multi-file innodb_log_archive=ON recovery: innodb.insert_into_empty innodb.alter_kill log_t::write_checkpoint(): Write checkpoint in the archived format, and mark the previous log file (resize_log) read-only if one was open. Avoid writing a null entry when creating a log file (end_lsn==first_lsn), so that recovery will not wrongly observe an end of checkpoints. log_t::create(): In the archived format, let the header remain zero-filled. log_t::set_archive(): Fix some possible races log_t::end_lsn: The get_lsn() at the time of the latest checkpoint. log_t::log: Retain open when innodb_log_archive=ON, also for PMEM. log_t::resize_log: The new log file when innodb_log_archive=ON has switched logs but not yet completed a checkpoint in the new file. log_t::next_checkpoint_no: Widen to uint16_t. There may be up to 12288/4=3072 checkpoints in the header. recv_sys_t::find_checkpoint(): Do not read any ib_logfile* if innodb_log_archive=ON. Open non-last archived log files in read-only mode. recv_sys_t::find_checkpoint_archived(): Validate each checkpoint in the file header, and by default recover from the last one. recv_sys_t::parse(), log_parse_file(): Tweak some logic for innodb_log_archive=ON. log_t::set_recovered_checkpoint(): Set the checkpoint on recovery. Updates also the end_lsn. log_t::clear_mmap(): Clean up the logic. log_t::persist(): Even if the flushed_to_disk_lsn does not change, we may want to reset the write_lsn_offset. srv_log_rebuild_if_needed(): Never rebuild if innodb_log_archive=ON. --- storage/innobase/buf/buf0flu.cc | 137 +++++++++++++-------- storage/innobase/include/log0log.h | 18 ++- storage/innobase/include/log0recv.h | 6 + storage/innobase/log/log0log.cc | 151 +++++++++++++++-------- storage/innobase/log/log0recv.cc | 182 +++++++++++++++++++++------- storage/innobase/srv/srv0start.cc | 11 +- 6 files changed, 355 insertions(+), 150 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 7b722b1dee916..f03516c610ab2 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1815,78 +1815,120 @@ static ulint buf_flush_LRU(ulint max_n) noexcept inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept { ut_ad(!srv_read_only_mode); + ut_ad((archive ? end_lsn : next_checkpoint_lsn) >= first_lsn); ut_ad(end_lsn >= next_checkpoint_lsn); ut_d(const lsn_t current_lsn{get_lsn()}); ut_ad(end_lsn <= current_lsn); ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= current_lsn || srv_shutdown_state > SRV_SHUTDOWN_INITIATED); + ut_ad(this->end_lsn <= end_lsn); DBUG_PRINT("ib_log", ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); - auto n= next_checkpoint_no; - const size_t offset{(n & 1) ? CHECKPOINT_2 : CHECKPOINT_1}; + const auto n= next_checkpoint_no; + size_t offset; static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "efficiency"); static_assert(CPU_LEVEL1_DCACHE_LINESIZE <= 4096, "compatibility"); - byte* c= my_assume_aligned - (is_mmap() ? buf + offset : checkpoint_buf); - memset_aligned(c, 0, CPU_LEVEL1_DCACHE_LINESIZE); - mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); - mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); - mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); + lsn_t resizing{resize_lsn.load(std::memory_order_relaxed)}; + byte *c; - lsn_t resizing; - -#ifdef HAVE_PMEM - if (is_mmap()) + if (archive) { - ut_ad(!is_opened()); - resizing= resize_lsn.load(std::memory_order_relaxed); - ut_ad(!resizing || !archive); + ut_ad(!resizing); + offset= n * 4; + ut_a(offset < START_OFFSET); // FIXME: better guard for this + const lsn_t d{end_lsn - this->end_lsn}; + ut_a(d <= lsn_t{~uint32_t{0}}); - if (resizing > 1 && resizing <= next_checkpoint_lsn) + if (!d) + { + ut_a(!next_checkpoint_no); + ut_a(end_lsn == first_lsn); + } +#ifdef HAVE_PMEM + else if (is_mmap()) { - memcpy_aligned<64>(resize_buf + CHECKPOINT_1, c, 64); - header_write(resize_buf, resizing, is_encrypted()); - pmem_persist(resize_buf, resize_target); + c= buf + offset; + ut_ad(!memcmp(c, field_ref_zero, 4)); + mach_write_to_4(my_assume_aligned<4>(c), uint32_t(d)); + c= reinterpret_cast(uintptr_t(c) & ~63); + goto persist_checkpoint; + } +#endif + else + { + c= checkpoint_buf; + const size_t o{offset & (write_size - 1)}; + offset&= ~size_t(write_size - 1); + if (!o) + memset_aligned<512>(c, 0, write_size); + ut_ad(!memcmp(c + o, field_ref_zero, 4)); + mach_write_to_4(my_assume_aligned<4>(c + o), uint32_t(d)); + goto write_checkpoint; } - pmem_persist(c, 64); } else -#endif { - ut_ad(!is_mmap()); - ut_ad(!checkpoint_pending); - checkpoint_pending= true; - latch.wr_unlock(); - log_write_and_flush_prepare(); - resizing= resize_lsn.load(std::memory_order_relaxed); - ut_ad(!resizing || !archive); - ut_ad(ut_is_2pow(write_size)); - ut_ad(write_size >= 512); - ut_ad(write_size <= 4096); - log.write(offset, {c, write_size}); - if (resizing > 1 && resizing <= next_checkpoint_lsn) + offset= (n & 1) ? CHECKPOINT_2 : CHECKPOINT_1; + c= is_mmap() ? buf + offset : checkpoint_buf; + memset_aligned(c, 0, CPU_LEVEL1_DCACHE_LINESIZE); + mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); + mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); + mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); + +#ifdef HAVE_PMEM + if (is_mmap()) { - resize_log.write(CHECKPOINT_1, {c, write_size}); - byte *buf= static_cast(aligned_malloc(4096, 4096)); - memset_aligned<4096>(buf, 0, 4096); - header_write(buf, resizing, is_encrypted()); - resize_log.write(0, {buf, 4096}); - aligned_free(buf); + ut_ad(!is_opened()); + if (resizing > 1 && resizing <= next_checkpoint_lsn) + { + memcpy_aligned<64>(resize_buf + CHECKPOINT_1, c, 64); + header_write(resize_buf, resizing, is_encrypted()); + pmem_persist(resize_buf, resize_target); + } + persist_checkpoint: + pmem_persist(c, 64); + } + else +#endif + { + write_checkpoint: + ut_ad(!is_mmap()); + ut_ad(!checkpoint_pending); + checkpoint_pending= true; + latch.wr_unlock(); + log_write_and_flush_prepare(); + resizing= resize_lsn.load(std::memory_order_relaxed); + ut_ad(!resizing || !archive); + ut_ad(ut_is_2pow(write_size)); + ut_ad(write_size >= 512); + ut_ad(write_size <= 4096); + log.write(offset, {c, write_size}); + if (resizing > 1 && resizing <= next_checkpoint_lsn) + { + resize_log.write(CHECKPOINT_1, {c, write_size}); + byte *buf= static_cast(aligned_malloc(4096, 4096)); + memset_aligned<4096>(buf, 0, 4096); + header_write(buf, resizing, is_encrypted()); + resize_log.write(0, {buf, 4096}); + aligned_free(buf); + } + + if (!log_write_through) + ut_a(log.flush()); + latch.wr_lock(SRW_LOCK_CALL); + ut_ad(checkpoint_pending); + checkpoint_pending= false; + resizing= resize_lsn.load(std::memory_order_relaxed); } - if (!log_write_through) - ut_a(log.flush()); - latch.wr_lock(SRW_LOCK_CALL); - ut_ad(checkpoint_pending); - checkpoint_pending= false; - resizing= resize_lsn.load(std::memory_order_relaxed); - ut_ad(!resizing || !archive); + next_checkpoint_no++; } + ut_ad(!resizing || !archive); ut_ad(!checkpoint_pending); - next_checkpoint_no++; + this->end_lsn= end_lsn; const lsn_t checkpoint_lsn{next_checkpoint_lsn}; last_checkpoint_lsn= checkpoint_lsn; if (!archive) @@ -1912,7 +1954,6 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept #endif { /* Mimic archived_mmap_switch_complete() */ - ut_ad(current_lsn >= first_lsn + capacity()); first_lsn+= capacity(); file_size= resize_target; } diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 5f480d80f4d7c..bb8c61a957bfe 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -220,7 +220,7 @@ struct log_t /** whether a checkpoint is pending; protected by latch.wr_lock() */ Atomic_relaxed checkpoint_pending; /** next checkpoint number (protected by latch.wr_lock()) */ - byte next_checkpoint_no; + uint16_t next_checkpoint_no; /** Log sequence number when a log file overwrite (broken crash recovery) was noticed. Protected by latch.wr_lock(). */ lsn_t overwrite_warned; @@ -255,6 +255,8 @@ struct log_t std::atomic resize_lsn; /** the log sequence number at the start of the current log file */ lsn_t first_lsn; + /** the log sequence number when the latest checkpoint was initiated */ + lsn_t end_lsn; public: /** current innodb_log_write_ahead_size */ uint write_size; @@ -325,7 +327,7 @@ struct log_t bool is_mmap() const noexcept { return !flush_buf; } /** @return whether a handle to the log is open; - is_mmap() && !is_opened() holds for PMEM */ + is_mmap() && (is_opened() == archive) holds for PMEM */ bool is_opened() const noexcept { return log.is_opened(); } /** @return LSN at which log resizing was started and is still in progress @@ -616,6 +618,18 @@ struct log_t /** @return the first LSN of the log file */ lsn_t get_first_lsn() const noexcept { return first_lsn; } + /** Set the recovered checkpoint. + @param lsn log sequence number of the checkpoint + @param end_lsn LSN passed to write_checkpoint() + @param number checkpoint number */ + void set_recovered_checkpoint(lsn_t lsn, lsn_t end_lsn, uint16_t number) + noexcept + { + next_checkpoint_lsn= lsn; + this->end_lsn= end_lsn; + next_checkpoint_no= number; + } + /** Determine the sequence bit at a log sequence number */ byte get_sequence_bit(lsn_t lsn) const noexcept { diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 42aee0bc2af97..2a3961d43b317 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -406,6 +406,12 @@ struct recv_sys_t @return error code or DB_SUCCESS */ dberr_t find_checkpoint(); + /** Find a checkpoint in an innodb_log_archive=ON file. + @param first_lsn the first LSN of the file + @return error code + @retval DB_SUCCESS if a suitable checkpoint was found */ + dberr_t find_checkpoint_archived(lsn_t first_lsn); + /** Register a redo log snippet for a page. @param it page iterator @param l redo log snippet diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 37fc054ca9d1b..14cb69c7e3ed1 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -109,6 +109,8 @@ void log_t::create() noexcept #endif last_checkpoint_lsn= FIRST_LSN; + first_lsn= FIRST_LSN; + end_lsn= FIRST_LSN; log_capacity= 0; max_modified_age_async= 0; max_checkpoint_age= 0; @@ -322,27 +324,32 @@ static inline void log_file_message() noexcept {} bool log_t::attach(log_file_t file, os_offset_t size) noexcept { - log= file; + ut_ad(!log.is_opened()); + ut_ad(!resize_log.is_opened()); ut_ad(!size || size >= START_OFFSET + SIZE_OF_FILE_CHECKPOINT); - file_size= size; - ut_ad(!buf); ut_ad(!flush_buf); ut_ad(!writer); + + file_size= size; + if (size) { # ifdef HAVE_PMEM bool is_pmem; - void *ptr= ::log_mmap(log.m_file, is_pmem, size); + void *ptr= ::log_mmap(file.m_file, is_pmem, size); # else - void *ptr= ::log_mmap(log.m_file, size); + void *ptr= ::log_mmap(file.m_file, size); # endif if (ptr != MAP_FAILED) { # ifdef HAVE_PMEM if (is_pmem) { - log.close(); + if (archive) + log= file; + else + file.close(); log_buffered= false; log_maybe_unbuffered= true; IF_WIN(,mprotect(ptr, size_t(size), PROT_READ)); @@ -357,6 +364,10 @@ bool log_t::attach(log_file_t file, os_offset_t size) noexcept goto func_exit; } } + else + ut_ad(!archive); + + log= file; log_mmap= false; buf= static_cast(ut_malloc_dontdump(buf_size, PSI_INSTRUMENT_ME)); if (!buf) @@ -435,6 +446,7 @@ void log_t::create(lsn_t lsn) noexcept base_lsn.store(lsn, std::memory_order_relaxed); flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); first_lsn= lsn; + end_lsn= lsn; write_lsn= lsn; if (!archived_lsn) archived_lsn= lsn; @@ -446,11 +458,14 @@ void log_t::create(lsn_t lsn) noexcept #ifdef HAVE_PMEM if (is_mmap()) { - ut_ad(!is_opened()); + ut_ad(is_opened() == archive); mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE); - memset_aligned<4096>(buf, 0, 4096); - log_sys.header_write(buf, lsn, is_encrypted()); - pmem_persist(buf, 512); + if (!archive) + { + memset_aligned<4096>(buf, 0, 4096); + header_write(buf, lsn, is_encrypted()); + pmem_persist(buf, 512); + } buf_size= unsigned(std::min(capacity(), buf_size_max)); } else @@ -459,9 +474,12 @@ void log_t::create(lsn_t lsn) noexcept ut_ad(!is_mmap()); memset_aligned<4096>(flush_buf, 0, buf_size); memset_aligned<4096>(buf, 0, buf_size); - log_sys.header_write(buf, lsn, is_encrypted()); - log.write(0, {buf, 4096}); - memset_aligned<512>(buf, 0, 512); + if (!archive) + { + header_write(buf, lsn, is_encrypted()); + log.write(0, {buf, 4096}); + memset_aligned<512>(buf, 0, 512); + } } } @@ -583,13 +601,13 @@ void log_t::set_write_through(bool write_through) bool(log_write_through) != write_through) { os_file_close_func(log.m_file); - log.m_file= OS_FILE_CLOSED; + log= OS_FILE_CLOSED; log_write_through= write_through; bool success; log.m_file= os_file_create_func(get_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); - ut_a(log.m_file != OS_FILE_CLOSED); + ut_a(log.is_opened()); sql_print_information(log_write_through ? "InnoDB: Log writes write through" : "InnoDB: Log writes may be cached"); @@ -610,8 +628,7 @@ void log_t::header_rewrite(my_bool archive) noexcept format for the last file. */ byte* c= checkpoint_buf; - lsn_t end_lsn= first_lsn + 1; // FIXME: store this in log_sys? - ut_ad(end_lsn > first_lsn); + ut_ad(end_lsn >= first_lsn); ut_ad(!archive || end_lsn <= first_lsn + ~0U); #ifdef HAVE_PMEM if (!c) @@ -691,14 +708,32 @@ void log_t::set_archive(my_bool archive) noexcept if (archive == this->archive) break; #ifdef HAVE_PMEM - if (is_backoff() && is_mmap()) + if (is_mmap()) { - /* Prevent a race condition with append_prepare() */ + ut_ad(this->archive == log.is_opened()); + if (is_backoff()) + /* Prevent a race condition with append_prepare() */ + goto retry; + if (archive); + else if (resize_buf) + /* Wait for a call to archived_mmap_switch_complete() */ + goto retry; + else + log.close(); + } +#endif + else if (checkpoint_pending) + { + /* Prevent a race condition with write_checkpoint() */ +#ifdef HAVE_PMEM + retry: +#endif IF_WIN(log_resize_release(), latch.wr_unlock()); continue; } -#endif - ut_ad(!resize_buf); /* FIXME: wait for write_checkpoint() */ + + ut_ad(!resize_buf); + ut_ad(!resize_log.is_opened()); // FIXME: wait for checkpoint? const lsn_t old_first_lsn{first_lsn}; if (archive) @@ -713,6 +748,22 @@ void log_t::set_archive(my_bool archive) noexcept std::swap(old_name, new_name); header_rewrite(archive); } +#if defined HAVE_PMEM && !defined _WIN32 + else if (is_mmap()) + { + /* Open the file so that write_checkpoint() + will be able to flag it read-only */ + bool success; + log.m_file= + os_file_create_func(old_name, OS_FILE_OPEN, OS_LOG_FILE, + false, &success); + if (!log.is_opened()) + { + my_error(ER_ERROR_ON_READ, MYF(0), old_name, errno); + break; + } + } +#endif #ifdef _WIN32 /* On Microsoft Windows, there must be no open file handles to a @@ -1160,6 +1211,7 @@ void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept { ut_ad(archive); ut_ad(is_mmap()); + ut_ad(log.is_opened()); ut_ad(!resize_log.is_opened()); ut_ad(!resize_buf); ut_ad(!resize_in_progress()); @@ -1207,11 +1259,11 @@ void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept (l & (WRITE_BACKOFF - 1)); waits++; ut_ad(archive); - ut_ad(!resize_log.is_opened()); ut_ad(!resize_buf); ut_ad(!resize_in_progress()); ut_ad(resize_target >= 4U << 20); ut_ad(is_latest()); + ut_ad(log.is_opened()); ut_ad(!resize_log.is_opened()); do @@ -1229,15 +1281,15 @@ void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept bool is_pmem{false}; resize_buf= static_cast(::log_mmap(file, is_pmem, resize_target)); - IF_WIN(os_file_close(file),); if (resize_buf != MAP_FAILED) { /* Will be closed in write_checkpoint() */ - IF_WIN(,resize_log= file); + resize_log= log; + log= file; continue; } resize_buf= nullptr; - IF_WIN(,os_file_close(file)); + os_file_close(file); } } @@ -1263,14 +1315,14 @@ void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept void log_t::persist(lsn_t lsn) noexcept { - ut_ad(!is_opened()); ut_ad(!write_lock.is_owner()); ut_ad(!flush_lock.is_owner()); ut_ad(latch_have_wr()); + ut_ad(is_opened() == archive); lsn_t old= flushed_to_disk_lsn.load(std::memory_order_relaxed); - if (old >= lsn) + if (old > lsn) return; const size_t start(calc_lsn_offset(old)); @@ -1612,38 +1664,33 @@ void log_t::clear_mmap() noexcept { if (!is_mmap() || high_level_read_only) return; -#ifdef HAVE_PMEM - if (!is_opened()) - { - ut_d(latch.wr_lock(SRW_LOCK_CALL)); - ut_ad(!resize_in_progress()); - ut_ad(get_lsn() == get_flushed_lsn(std::memory_order_relaxed)); - ut_d(latch.wr_unlock()); - return; - } -#endif log_resize_acquire(); ut_ad(!resize_in_progress()); - ut_ad(write_lsn == get_lsn()); - ut_ad(write_lsn == get_flushed_lsn(std::memory_order_relaxed)); - - if (buf) /* this may be invoked while creating a new database */ + ut_ad(get_lsn() == get_flushed_lsn(std::memory_order_relaxed)); +#ifdef HAVE_PMEM + if (is_opened() && !archive) +#endif { - alignas(16) byte log_block[4096]; - const size_t bs{write_size}; + ut_ad(write_lsn == get_lsn()); + + if (buf) /* this may be invoked while creating a new database */ { - const size_t bf= - size_t(write_lsn - base_lsn.load(std::memory_order_relaxed)); - memcpy_aligned<16>(log_block, buf + (bf & ~(bs - 1)), bs); - } + alignas(16) byte log_block[4096]; + const size_t bs{write_size}; + { + const size_t bf= + size_t(write_lsn - base_lsn.load(std::memory_order_relaxed)); + memcpy_aligned<16>(log_block, buf + (bf & ~(bs - 1)), bs); + } - close_file(false); - log_mmap= false; - ut_a(attach(log, file_size)); - ut_ad(!is_mmap()); + close_file(false); + log_mmap= false; + ut_a(attach(log, file_size)); + ut_ad(!is_mmap()); - memcpy_aligned<16>(buf, log_block, bs); + memcpy_aligned<16>(buf, log_block, bs); + } } log_resize_release(); } diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index cb41002b0933a..6bfc75d274656 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1702,8 +1702,8 @@ static bool recv_sys_invalid_rpo(lsn_t lsn) noexcept dberr_t recv_sys_t::find_checkpoint() { - bool wrong_size= false; byte *buf; + lsn_t first_lsn= 0; ut_ad(pages.empty()); pages_it= pages.end(); @@ -1711,31 +1711,48 @@ dberr_t recv_sys_t::find_checkpoint() if (files.empty()) { file_checkpoint= 0; - int archive= 0; + int archive= log_sys.archive; retry: std::string path{log_sys.get_circular_path()}; bool success; - os_file_t file{os_file_create_func(path.c_str(), archive - ? OS_FILE_OPEN - : OS_FILE_OPEN_SILENT, + os_file_t file{os_file_create_func(path.c_str(), archive < 0 + ? OS_FILE_OPEN : OS_FILE_OPEN_SILENT, OS_LOG_FILE, srv_read_only_mode, &success)}; - if (file != OS_FILE_CLOSED); - else if (archive) + if (file != OS_FILE_CLOSED) + { + if (archive > 0) + { + sql_print_error("InnoDB: innodb_log_archive=ON but %s exists", + path.c_str()); + return DB_ERROR; + } + } + else if (archive < 0) return DB_ERROR; else { - archive= -1; #ifdef _WIN32 WIN32_FIND_DATAA entry; HANDLE d= FindFirstFileA(srv_log_group_home_dir, &entry); - if (d == INVALID_HANDLE_VALUE) - goto retry; + if (d != INVALID_HANDLE_VALUE) + goto readdir; #else DIR *d= opendir(srv_log_group_home_dir); - if (!d) - goto retry; + if (d) + goto readdir; #endif + no_archive_found: + if (archive) + sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", + srv_log_group_home_dir); + no_archive_found_reported: + if (archive) + return DB_ERROR; + archive= -1; + goto retry; + + readdir: std::map logs; #ifdef _WIN32 do @@ -1762,38 +1779,50 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{e->d_name}; - if (1 != sscanf(fn, "ib_%016" PRIx64 ".log%n", &lsn, &n) || fn[n]) + if (1 != sscanf(fn, "ib_%016" PRIx64 ".log%n", &lsn, &n) || fn[n] || + lsn < log_t::FIRST_LSN) continue; path.assign(srv_log_group_home_dir); path.push_back('/'); - path.append(fn); struct stat st; - if (stat(path.c_str(), &st) || + if (stat(log_sys.append_archive_name(path, lsn).c_str(), &st) || st.st_size < static_cast (log_t::START_OFFSET + SIZE_OF_FILE_CHECKPOINT)) { sql_print_warning("InnoDB: ignoring %s", path.c_str()); continue; } - logs.emplace(lsn, lsn + st.st_size); + logs.emplace(lsn, lsn - log_t::START_OFFSET + st.st_size); } closedir(d); #endif + first_lsn= 0; auto min= logs.cbegin(); const auto end= logs.cend(); if (min == end) - goto retry; - + goto no_archive_found; + int subsequent= 0; for (auto i= min;;) { auto prev= i++; if (i == end) + { + if (!recovery_start) + subsequent= 0, first_lsn= prev->first; break; - if (prev->second != i->first) - min= i; + } + if (prev->second == i->first) + subsequent++; + else + subsequent=0, first_lsn= i->first; } + // TODO: validate innodb_log_archive_start, innodb_log_recovery_start, + // innodb_log_recovery_target + + // FIXME: open the file that is determined by recovery_start + path.assign(srv_log_group_home_dir); switch (path.back()) { #ifdef _WIN32 @@ -1804,18 +1833,16 @@ dberr_t recv_sys_t::find_checkpoint() default: path.push_back('/'); } - // TODO: validate innodb_log_archive_start, innodb_log_recovery_start, - // innodb_log_recovery_target - file= os_file_create_func(log_sys.append_archive_name(path, min->first). + file= os_file_create_func(log_sys.append_archive_name(path, first_lsn). c_str(), OS_FILE_OPEN, OS_LOG_FILE, - srv_read_only_mode, &success); + subsequent || srv_read_only_mode, &success); if (file == OS_FILE_CLOSED) - goto retry; - archive= 1; + goto no_archive_found_reported; + log_sys.archive= true; } const os_offset_t size{os_file_get_size(file)}; - if (!size) + if (!size && !log_sys.archive) { if (srv_operation != SRV_OPERATION_NORMAL) goto too_small; @@ -1823,8 +1850,7 @@ dberr_t recv_sys_t::find_checkpoint() else if (size < log_t::START_OFFSET + SIZE_OF_FILE_CHECKPOINT) { too_small: - sql_print_error("InnoDB: File %.*s is too small", - int(path.size()), path.data()); + sql_print_error("InnoDB: File %s is too small", path.c_str()); err_exit: os_file_close(file); return DB_ERROR; @@ -1835,6 +1861,9 @@ dberr_t recv_sys_t::find_checkpoint() file= OS_FILE_CLOSED; recv_sys.files.emplace_back(file); + if (log_sys.archive) + goto find_checkpoint; + for (int i= 1; i < 101; i++) { path= log_sys.get_circular_path(i); @@ -1849,14 +1878,14 @@ dberr_t recv_sys_t::find_checkpoint() sql_print_error("InnoDB: Log file %.*s is of different size " UINT64PF " bytes than other log files " UINT64PF " bytes!", int(path.size()), path.data(), sz, size); - wrong_size= true; + first_lsn= LSN_MAX; } recv_sys.files.emplace_back(file); } if (!size) { - if (wrong_size) + if (first_lsn == LSN_MAX) return DB_CORRUPTION; lsn= log_sys.next_checkpoint_lsn; log_sys.format= log_t::FORMAT_3_23; @@ -1864,19 +1893,26 @@ dberr_t recv_sys_t::find_checkpoint() } } else + { ut_ad(srv_operation == SRV_OPERATION_BACKUP); + ut_ad(!log_sys.archive); + } + find_checkpoint: log_sys.next_checkpoint_lsn= 0; lsn= 0; buf= my_assume_aligned<4096>(log_sys.buf); if (!log_sys.is_mmap()) if (dberr_t err= log_sys.log.read(0, {buf, log_sys.START_OFFSET})) return err; + if (log_sys.archive) + return find_checkpoint_archived(first_lsn); + /* Check the header page checksum. There was no checksum in the first redo log format (version 0). */ log_sys.format= mach_read_from_4(buf + LOG_HEADER_FORMAT); if (log_sys.format == log_t::FORMAT_3_23) { - if (wrong_size) + if (first_lsn == LSN_MAX) return DB_CORRUPTION; if (dberr_t err= recv_log_recover_pre_10_2()) return err; @@ -1904,7 +1940,7 @@ dberr_t recv_sys_t::find_checkpoint() return DB_CORRUPTION; } - const lsn_t first_lsn{mach_read_from_8(buf + LOG_HEADER_START_LSN)}; + first_lsn= mach_read_from_8(buf + LOG_HEADER_START_LSN); log_sys.set_first_lsn(first_lsn); if (!log_sys.archived_lsn) log_sys.archived_lsn= first_lsn; @@ -1962,11 +1998,8 @@ dberr_t recv_sys_t::find_checkpoint() } if (checkpoint_lsn >= log_sys.next_checkpoint_lsn) - { - log_sys.next_checkpoint_lsn= checkpoint_lsn; - log_sys.next_checkpoint_no= field == log_t::CHECKPOINT_1; - lsn= end_lsn; - } + log_sys.set_recovered_checkpoint(checkpoint_lsn, lsn= end_lsn, + field == log_t::CHECKPOINT_1); } if (!log_sys.next_checkpoint_lsn) goto got_no_checkpoint; @@ -2038,7 +2071,7 @@ dberr_t recv_sys_t::find_checkpoint() return DB_ERROR; } - if (wrong_size) + if (first_lsn == LSN_MAX) return DB_CORRUPTION; if (dberr_t err= recv_log_recover_10_5(lsn_offset)) @@ -2599,7 +2632,7 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse(source l, bool if_exists) (srv_operation == SRV_OPERATION_BACKUP || srv_operation == SRV_OPERATION_BACKUP_NO_DEFER)); mysql_mutex_assert_owner(&mutex); - ut_ad(log_sys.next_checkpoint_lsn); + ut_ad(log_sys.next_checkpoint_lsn || log_sys.archive); ut_ad(log_sys.is_recoverable()); ut_ad(log_sys.format == format); @@ -2862,14 +2895,21 @@ log_parse_file(const page_id_t id, bool if_exists, ? "ignored" : recv_sys.file_checkpoint ? "reread" : "read", recv_sys.lsn)); - if (c == log_sys.next_checkpoint_lsn) + if (c == log_sys.next_checkpoint_lsn || !log_sys.next_checkpoint_lsn) { /* There can be multiple FILE_CHECKPOINT for the same LSN. */ if (!recv_sys.file_checkpoint) { + ut_ad(log_sys.next_checkpoint_lsn || log_sys.archive); + ut_ad(log_sys.last_checkpoint_lsn || log_sys.archive); + log_sys.next_checkpoint_lsn= c; + if (!log_sys.last_checkpoint_lsn) + log_sys.last_checkpoint_lsn= c; recv_sys.file_checkpoint= recv_sys.lsn; return recv_sys_t::GOT_EOF; } + else + ut_ad(log_sys.next_checkpoint_lsn); } } break; @@ -3342,6 +3382,7 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) recv_sys_t::parse_mtr_result r{parse_mtr(if_exists)}; if (UNIV_LIKELY(r != PREMATURE_EOF) || !log_sys.is_mmap()) return r; + ut_ad(!log_sys.archive); // FIXME: implement ut_ad(recv_sys.len == log_sys.file_size); ut_ad(recv_sys.offset >= log_sys.START_OFFSET); ut_ad(recv_sys.offset <= recv_sys.len); @@ -4419,7 +4460,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) ut_ad(!end || end == recv_sys.lsn); bool corrupt_fs= recv_sys.is_corrupt_fs(); - if (!end && !corrupt_fs) + if (!end && !corrupt_fs && !log_sys.archive) { recv_sys.set_corrupt_log(); sql_print_error("InnoDB: Missing FILE_CHECKPOINT(" LSN_PF @@ -4872,6 +4913,60 @@ static recv_sys_t::parser get_parse_mmap() noexcept ut_error; } +dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) +{ + // FIXME: retrieve encrypted format from file name? + log_sys.format= log_t::FORMAT_10_8; + log_sys.set_first_lsn(first_lsn); + if (!log_sys.archived_lsn) + log_sys.archived_lsn= first_lsn; + /* Validate the checkpoints */ + lsn_t end_lsn{first_lsn}, recovery_start_end_lsn{0}, checkpoint{0}; + uint16_t n_checkpoint; + const recv_sys_t::parser parser[2] { + get_parse_mmap(), get_parse_mmap() + }; + byte header[log_sys.START_OFFSET]; + const byte *buf= log_sys.buf; + if (log_sys.is_mmap()) + buf= static_cast(memcpy(header, buf, sizeof header)); + for (n_checkpoint= 0; n_checkpoint < sizeof header / 4; n_checkpoint++) + { + const uint32_t d{mach_read_from_4(&buf[4 * n_checkpoint])}; + if (!d && n_checkpoint) + break; + lsn= end_lsn+= d; + file_checkpoint= 0; + log_sys.next_checkpoint_lsn= 0; + ut_d(const bool rescan=) recv_scan_log(false, parser); + ut_ad(rescan); + if (!file_checkpoint) + { + found_corrupt_log= false; + break; + } + checkpoint= log_sys.next_checkpoint_lsn; + ut_ad(checkpoint); + if (checkpoint == recovery_start) + recovery_start_end_lsn= end_lsn; + } + + if (!recovery_start); + else if (recovery_start_end_lsn) + checkpoint= recovery_start, end_lsn= recovery_start_end_lsn; + else + { + sql_print_error("InnoDB: Did not find innodb_log_recovery_start=" LSN_PF + " between " LSN_PF " and " LSN_PF " (" LSN_PF ")", + recovery_start, first_lsn, end_lsn, file_checkpoint); + return DB_CORRUPTION; + } + + file_checkpoint= 0; + log_sys.set_recovered_checkpoint(checkpoint, lsn= end_lsn, n_checkpoint); + return DB_SUCCESS; +} + /** Start recovering from a redo log checkpoint. of first system tablespace page @return error code or DB_SUCCESS */ @@ -4914,12 +5009,13 @@ dberr_t recv_recovery_from_checkpoint_start() if (log_sys.is_recoverable()) { if (recv_sys.recovery_start > log_sys.next_checkpoint_lsn) { + ut_ad(!log_sys.archive); // already checked sql_print_error("InnoDB: impossible " "innodb_log_recovery_start=%" PRIu64 ">%" PRIu64, recv_sys.recovery_start, log_sys.next_checkpoint_lsn); - goto err_exit; + goto err_exit; // FIXME: remove this? } else { log_sys.last_checkpoint_lsn = recv_sys.recovery_start ? recv_sys.recovery_start diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index b381c581f2166..9556dd4aea96e 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1210,6 +1210,8 @@ static dberr_t srv_log_rebuild_if_needed() if (srv_read_only_mode) /* Leave the redo log alone. */ return DB_SUCCESS; + if (log_sys.archive) + return DB_SUCCESS; /* Never rebuild archived log files. */ if (log_sys.file_size == srv_log_file_size && log_sys.format == @@ -1462,6 +1464,10 @@ dberr_t srv_start(bool create_new_db) || srv_operation == SRV_OPERATION_RESTORE || srv_operation == SRV_OPERATION_RESTORE_EXPORT); ut_ad(!recv_sys.recovery_on); + /* Suppress warnings in fil_space_t::create() for files + that are being read before dict_boot() has recovered + DICT_HDR_MAX_SPACE_ID. */ + fil_system.space_id_reuse_warned = true; if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { sql_print_information("InnoDB: innodb_force_recovery=6" @@ -1614,11 +1620,6 @@ dberr_t srv_start(bool create_new_db) if (log_sys.archive) log_sys.archive_set_size(); } else { - /* Suppress warnings in fil_space_t::create() for files - that are being read before dict_boot() has recovered - DICT_HDR_MAX_SPACE_ID. */ - fil_system.space_id_reuse_warned = true; - /* We always try to do a recovery, even if the database had been shut down normally: this is the normal startup path */ From d7fa86e0e3cad836e1eec976e13c65b0ae345e7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 9 Jan 2026 15:04:59 +0200 Subject: [PATCH 08/33] Write correct first checkpoint to subsequent archived log --- storage/innobase/buf/buf0flu.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index f03516c610ab2..d0fcc5fdb5e15 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1826,6 +1826,9 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept DBUG_PRINT("ib_log", ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); + if (archive && resize_log.is_opened()) + next_checkpoint_no= 0; + const auto n= next_checkpoint_no; size_t offset; static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "efficiency"); @@ -1838,7 +1841,8 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ut_ad(!resizing); offset= n * 4; ut_a(offset < START_OFFSET); // FIXME: better guard for this - const lsn_t d{end_lsn - this->end_lsn}; + const lsn_t d= end_lsn - + (resize_log.is_opened() ? first_lsn : this->end_lsn); ut_a(d <= lsn_t{~uint32_t{0}}); if (!d) From 17d4148a21da15ccfb2d693f09dab8114b60f1cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 9 Jan 2026 16:49:05 +0200 Subject: [PATCH 09/33] log_parse_file(): Do not invoke fil_name_process() during recv_sys_t::find_checkpoint_archived(). recv_sys_t::find_checkpoint_archived(): Fix some glitches, and enforce innodb_log_recovery_target. TODO: Multi-file recovery TODO: If there is no valid checkpoint in the last archived file, try the previous one (or start from the the first not-read-only log file that is followed by at most one not-read-only log file). TODO: Separate file name pattern for encrypted log files. --- storage/innobase/log/log0recv.cc | 34 +++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 6bfc75d274656..0b97e625ad912 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2953,6 +2953,18 @@ log_parse_file(const page_id_t id, bool if_exists, break; } + if (!log_sys.next_checkpoint_lsn) + { + /* We are validating checkpoints in + recv_log_t::find_checkpoint_archived(). We must not open and + validate data files until we actually start recovery from a + checkpoint, because there could be lots of FILE_MODIFY and + FILE_CHECKPOINT log records to be parsed. */ + ut_ad(!recv_sys.file_checkpoint); + ut_ad(log_sys.archive); + return recv_sys_t::OK; + } + fil_name_process(reinterpret_cast(l), fnend - l, space_id, fn2 ? FILE_MODIFY : mfile_type_t(b & 0xf0), recv_sys.start_lsn, if_exists); @@ -4921,11 +4933,12 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) if (!log_sys.archived_lsn) log_sys.archived_lsn= first_lsn; /* Validate the checkpoints */ - lsn_t end_lsn{first_lsn}, recovery_start_end_lsn{0}, checkpoint{0}; + lsn_t end_lsn{first_lsn}, checkpoint{0}, recovery_start_end_lsn{0}; uint16_t n_checkpoint; const recv_sys_t::parser parser[2] { get_parse_mmap(), get_parse_mmap() }; + ut_ad(recv_spaces.empty()); byte header[log_sys.START_OFFSET]; const byte *buf= log_sys.buf; if (log_sys.is_mmap()) @@ -4935,22 +4948,34 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) const uint32_t d{mach_read_from_4(&buf[4 * n_checkpoint])}; if (!d && n_checkpoint) break; - lsn= end_lsn+= d; + lsn= end_lsn + d; file_checkpoint= 0; log_sys.next_checkpoint_lsn= 0; ut_d(const bool rescan=) recv_scan_log(false, parser); ut_ad(rescan); + ut_ad(recv_spaces.empty()); if (!file_checkpoint) { found_corrupt_log= false; break; } + ut_ad(file_checkpoint == lsn); + end_lsn+= d; checkpoint= log_sys.next_checkpoint_lsn; ut_ad(checkpoint); + ut_ad(checkpoint < lsn); if (checkpoint == recovery_start) recovery_start_end_lsn= end_lsn; } + if (!checkpoint) + { + /* FIXME: try with the preceding file as well! */ + sql_print_error("InnoDB: Did not find any checkpoint after LSN=" LSN_PF, + first_lsn); + return DB_CORRUPTION; + } + if (!recovery_start); else if (recovery_start_end_lsn) checkpoint= recovery_start, end_lsn= recovery_start_end_lsn; @@ -4958,10 +4983,13 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) { sql_print_error("InnoDB: Did not find innodb_log_recovery_start=" LSN_PF " between " LSN_PF " and " LSN_PF " (" LSN_PF ")", - recovery_start, first_lsn, end_lsn, file_checkpoint); + recovery_start, first_lsn, end_lsn, checkpoint); return DB_CORRUPTION; } + if (recv_sys_invalid_rpo(recv_sys_rpo_exceeded)) + return DB_READ_ONLY; + file_checkpoint= 0; log_sys.set_recovered_checkpoint(checkpoint, lsn= end_lsn, n_checkpoint); return DB_SUCCESS; From 2673d37912013459202e7b0c41c65db501870a15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 12 Jan 2026 15:15:29 +0200 Subject: [PATCH 10/33] Fix mysql-test/mtr --mysqld=--loose-innodb-log-archive --- mysql-test/mariadb-test-run.pl | 4 ++-- mysql-test/suite/innodb/include/skip_innodb_log_archive.inc | 4 ++++ mysql-test/suite/innodb/r/log_upgrade.result | 2 +- mysql-test/suite/innodb/r/log_upgrade_101_flags.result | 2 +- mysql-test/suite/innodb/t/doublewrite.test | 1 + mysql-test/suite/innodb/t/doublewrite_debug.test | 1 + mysql-test/suite/innodb/t/encryption_threads_shutdown.test | 1 + mysql-test/suite/innodb/t/log_corruption.test | 1 + mysql-test/suite/innodb/t/log_corruption_recovery.test | 1 + mysql-test/suite/innodb/t/log_file.test | 1 + mysql-test/suite/innodb/t/log_file_size.test | 1 + mysql-test/suite/innodb/t/log_file_size_online.test | 1 + mysql-test/suite/innodb/t/log_upgrade.test | 1 + mysql-test/suite/innodb/t/log_upgrade_101_flags.test | 2 +- mysql-test/suite/innodb/t/row_format_redundant.test | 4 ++++ mysql-test/suite/innodb/t/sys_defragment.test | 1 + mysql-test/suite/innodb/t/sys_defragment_fail.test | 1 + mysql-test/suite/innodb/t/table_flags.test | 4 ++++ 18 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 mysql-test/suite/innodb/include/skip_innodb_log_archive.inc diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index ee3412c9cd154..741ab3e49036a 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -326,7 +326,7 @@ END my $opt_debug_sync_timeout= 300; # Default timeout for WAIT_FOR actions. my $warn_seconds = 60; -my $rebootstrap_re= '--innodb[-_](?:page[-_]size|checksum[-_]algorithm|undo[-_]tablespaces|log[-_]group[-_]home[-_]dir|data[-_]home[-_]dir)|data[-_]file[-_]path|force_rebootstrap'; +my $rebootstrap_re= '--innodb[-_](?:page[-_]size|checksum[-_]algorithm|undo[-_]tablespaces|log[-_](group[-_]home[-_]dir|archive)|data[-_]home[-_]dir)|data[-_]file[-_]path|force_rebootstrap'; sub testcase_timeout ($) { return $opt_testcase_timeout * 60; } sub check_timeout ($) { return testcase_timeout($_[0]); } @@ -3145,7 +3145,7 @@ sub mysql_install_db { # need to be given to the bootstrap process as well as the # server process. foreach my $extra_opt ( @opt_extra_mysqld_opt ) { - if ($extra_opt =~ /--innodb/) { + if ($extra_opt =~ /--((loose|skip)[-_])*innodb/) { mtr_add_arg($args, $extra_opt); } } diff --git a/mysql-test/suite/innodb/include/skip_innodb_log_archive.inc b/mysql-test/suite/innodb/include/skip_innodb_log_archive.inc new file mode 100644 index 0000000000000..984e5fefa369a --- /dev/null +++ b/mysql-test/suite/innodb/include/skip_innodb_log_archive.inc @@ -0,0 +1,4 @@ +if (`SELECT @@GLOBAL.innodb_log_archive`) +{ + --skip Test requires innodb_log_archive=OFF +} diff --git a/mysql-test/suite/innodb/r/log_upgrade.result b/mysql-test/suite/innodb/r/log_upgrade.result index 4da83460f9364..d1f1867778c34 100644 --- a/mysql-test/suite/innodb/r/log_upgrade.result +++ b/mysql-test/suite/innodb/r/log_upgrade.result @@ -2,7 +2,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); # # MDEV-24412 InnoDB: Upgrade after a crash is not supported # -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --skip-innodb-log-archive --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result index 9bcb786170a63..1f5772c1d30f9 100644 --- a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result +++ b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result @@ -1,7 +1,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); call mtr.add_suppression("InnoDB: Tablespace size stored in header is 768 pages, but the sum of data file sizes is 384 pages"); call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-undo-directory=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-undo-directory=MYSQLTEST_VARDIR/tmp/log_upgrade --skip-innodb-log-archive --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/t/doublewrite.test b/mysql-test/suite/innodb/t/doublewrite.test index da059bf80ea9a..9a9e12c4260d1 100644 --- a/mysql-test/suite/innodb/t/doublewrite.test +++ b/mysql-test/suite/innodb/t/doublewrite.test @@ -5,6 +5,7 @@ --source include/innodb_page_size.inc --source include/not_embedded.inc +--source include/skip_innodb_log_archive.inc --disable_query_log call mtr.add_suppression("InnoDB: Data file .* uses page size .* but the innodb_page_size start-up parameter is"); diff --git a/mysql-test/suite/innodb/t/doublewrite_debug.test b/mysql-test/suite/innodb/t/doublewrite_debug.test index e31cf34dbc110..bdced0d5c04cd 100644 --- a/mysql-test/suite/innodb/t/doublewrite_debug.test +++ b/mysql-test/suite/innodb/t/doublewrite_debug.test @@ -7,6 +7,7 @@ --source include/innodb_page_size.inc --source include/have_debug.inc --source include/not_embedded.inc +--source include/skip_innodb_log_archive.inc --disable_query_log call mtr.add_suppression("InnoDB: Data file .* uses page size .* but the innodb_page_size start-up parameter is"); call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS"); diff --git a/mysql-test/suite/innodb/t/encryption_threads_shutdown.test b/mysql-test/suite/innodb/t/encryption_threads_shutdown.test index d6e58a19ad9c5..079d4f07dd1b6 100644 --- a/mysql-test/suite/innodb/t/encryption_threads_shutdown.test +++ b/mysql-test/suite/innodb/t/encryption_threads_shutdown.test @@ -1,5 +1,6 @@ # This test is for MDEV-24612 fix --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --source include/not_embedded.inc call mtr.add_suppression("Creating system tablespace with existing redo log file is not recommended."); diff --git a/mysql-test/suite/innodb/t/log_corruption.test b/mysql-test/suite/innodb/t/log_corruption.test index c3fc2242b2132..8b911b2ef62d0 100644 --- a/mysql-test/suite/innodb/t/log_corruption.test +++ b/mysql-test/suite/innodb/t/log_corruption.test @@ -1,6 +1,7 @@ --source include/have_innodb.inc --source include/have_innodb_16k.inc --source include/no_valgrind_without_big.inc +--source include/skip_innodb_log_archive.inc --disable_query_log call mtr.add_suppression("InnoDB: Upgrade after a crash is not supported"); diff --git a/mysql-test/suite/innodb/t/log_corruption_recovery.test b/mysql-test/suite/innodb/t/log_corruption_recovery.test index 489010f9b774d..ac9368d604263 100644 --- a/mysql-test/suite/innodb/t/log_corruption_recovery.test +++ b/mysql-test/suite/innodb/t/log_corruption_recovery.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --let DATADIR=`select @@datadir` let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES diff --git a/mysql-test/suite/innodb/t/log_file.test b/mysql-test/suite/innodb/t/log_file.test index 7ff0de0fc013a..880226cd84f75 100644 --- a/mysql-test/suite/innodb/t/log_file.test +++ b/mysql-test/suite/innodb/t/log_file.test @@ -4,6 +4,7 @@ --source include/have_innodb.inc --source include/no_valgrind_without_big.inc +--source include/skip_innodb_log_archive.inc --disable_query_log call mtr.add_suppression("InnoDB: Could not create undo tablespace.*undo002"); diff --git a/mysql-test/suite/innodb/t/log_file_size.test b/mysql-test/suite/innodb/t/log_file_size.test index ea186965e5f92..6fd73865320f6 100644 --- a/mysql-test/suite/innodb/t/log_file_size.test +++ b/mysql-test/suite/innodb/t/log_file_size.test @@ -1,5 +1,6 @@ # Test resizing the InnoDB redo log. --source include/innodb_page_size_small.inc +--source include/skip_innodb_log_archive.inc # Embedded server tests do not support restarting --source include/not_embedded.inc # DBUG_EXECUTE_IF is needed diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index ac1f6d37d534e..7aebf911b6e71 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --source include/have_sequence.inc --source include/no_valgrind_without_big.inc diff --git a/mysql-test/suite/innodb/t/log_upgrade.test b/mysql-test/suite/innodb/t/log_upgrade.test index a3d237875feac..d5a3fc7f3962b 100644 --- a/mysql-test/suite/innodb/t/log_upgrade.test +++ b/mysql-test/suite/innodb/t/log_upgrade.test @@ -16,6 +16,7 @@ let bugdir= $MYSQLTEST_VARDIR/tmp/log_upgrade; --let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err --let $dirs= --innodb-data-home-dir=$bugdir --innodb-log-group-home-dir=$bugdir +--let $dirs=$dirs --skip-innodb-log-archive --echo # --echo # MDEV-24412 InnoDB: Upgrade after a crash is not supported diff --git a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test index 7b19986f73e8e..f37fe11e0b3cd 100644 --- a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test +++ b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test @@ -8,7 +8,7 @@ call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); let bugdir= $MYSQLTEST_VARDIR/tmp/log_upgrade; --mkdir $bugdir --let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err ---let $dirs= --innodb-data-home-dir=$bugdir --innodb-log-group-home-dir=$bugdir --innodb-undo-directory=$bugdir +--let $dirs= --innodb-data-home-dir=$bugdir --innodb-log-group-home-dir=$bugdir --innodb-undo-directory=$bugdir --skip-innodb-log-archive # Test case similar to log_upgrade.test perl; diff --git a/mysql-test/suite/innodb/t/row_format_redundant.test b/mysql-test/suite/innodb/t/row_format_redundant.test index 9f85c45455b1a..2a705094f02f5 100644 --- a/mysql-test/suite/innodb/t/row_format_redundant.test +++ b/mysql-test/suite/innodb/t/row_format_redundant.test @@ -153,6 +153,10 @@ DROP TABLE t2,t3; --let $restart_parameters= --source include/restart_mysqld.inc +if (!`select @@innodb_log_archive=0`) +{ +--replace_result ib_0000000000003000.log ib_logfile0 +} --list_files $bugdir --remove_files_wildcard $bugdir --rmdir $bugdir diff --git a/mysql-test/suite/innodb/t/sys_defragment.test b/mysql-test/suite/innodb/t/sys_defragment.test index a4e5a84450d0d..b880a9d09a826 100644 --- a/mysql-test/suite/innodb/t/sys_defragment.test +++ b/mysql-test/suite/innodb/t/sys_defragment.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --source include/have_sequence.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/sys_defragment_fail.test b/mysql-test/suite/innodb/t/sys_defragment_fail.test index 4800e2619cb03..3213385bc1ddd 100644 --- a/mysql-test/suite/innodb/t/sys_defragment_fail.test +++ b/mysql-test/suite/innodb/t/sys_defragment_fail.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --source include/have_debug.inc --source include/have_sequence.inc diff --git a/mysql-test/suite/innodb/t/table_flags.test b/mysql-test/suite/innodb/t/table_flags.test index 74a7782a90f4b..fcc0f2adb6ac2 100644 --- a/mysql-test/suite/innodb/t/table_flags.test +++ b/mysql-test/suite/innodb/t/table_flags.test @@ -232,6 +232,10 @@ DROP TABLE tr,tc,td,tz,tp; --error 0,1 --remove_file $bugdir/ib_buffer_pool +if (!`select @@innodb_log_archive=0`) +{ +--replace_result ib_0000000000003000.log ib_logfile0 +} --list_files $bugdir --remove_files_wildcard $bugdir --rmdir $bugdir From 209f0b2afcef0bd09a62aff26baaa02ab591be40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 12 Jan 2026 15:16:09 +0200 Subject: [PATCH 11/33] Flag tests that require no_checkpoint_end.inc rewrite --- mysql-test/suite/encryption/t/corrupted_during_recovery.test | 1 + mysql-test/suite/encryption/t/doublewrite_debug.test | 1 + mysql-test/suite/encryption/t/file_creation.test | 1 + mysql-test/suite/encryption/t/innodb-redo-badkey.test | 1 + mysql-test/suite/encryption/t/innodb-redo-nokeys.test | 1 + mysql-test/suite/innodb/t/alter_kill.test | 1 + mysql-test/suite/innodb/t/corrupted_during_recovery.test | 1 + mysql-test/suite/innodb/t/innodb-index.test | 1 + .../suite/innodb/t/leaf_page_corrupted_during_recovery.test | 1 + mysql-test/suite/innodb/t/log_data_file_size.test | 1 + mysql-test/suite/innodb/t/log_file_name.test | 1 + mysql-test/suite/innodb/t/undo_space_dblwr.test | 1 + 12 files changed, 12 insertions(+) diff --git a/mysql-test/suite/encryption/t/corrupted_during_recovery.test b/mysql-test/suite/encryption/t/corrupted_during_recovery.test index dabf06dd04789..e07b30c2935ec 100644 --- a/mysql-test/suite/encryption/t/corrupted_during_recovery.test +++ b/mysql-test/suite/encryption/t/corrupted_during_recovery.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME --source include/have_file_key_management_plugin.inc --disable_query_log diff --git a/mysql-test/suite/encryption/t/doublewrite_debug.test b/mysql-test/suite/encryption/t/doublewrite_debug.test index 4f2215240441f..0f7e71b20abb6 100644 --- a/mysql-test/suite/encryption/t/doublewrite_debug.test +++ b/mysql-test/suite/encryption/t/doublewrite_debug.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME --source include/have_debug.inc --source include/not_embedded.inc --source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/file_creation.test b/mysql-test/suite/encryption/t/file_creation.test index 6b0126831a4ca..63e10b7f8d39f 100644 --- a/mysql-test/suite/encryption/t/file_creation.test +++ b/mysql-test/suite/encryption/t/file_creation.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME --source include/have_example_key_management_plugin.inc let $restart_noprint=2; # embedded does not support restart diff --git a/mysql-test/suite/encryption/t/innodb-redo-badkey.test b/mysql-test/suite/encryption/t/innodb-redo-badkey.test index bacc71dd2c86e..c7dad93170870 100644 --- a/mysql-test/suite/encryption/t/innodb-redo-badkey.test +++ b/mysql-test/suite/encryption/t/innodb-redo-badkey.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME -- source include/have_file_key_management_plugin.inc # embedded does not support restart -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test index 87a9e7a146e1b..507bedbb68e3d 100644 --- a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test +++ b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME -- source include/have_file_key_management_plugin.inc # embedded does not support restart -- source include/not_embedded.inc diff --git a/mysql-test/suite/innodb/t/alter_kill.test b/mysql-test/suite/innodb/t/alter_kill.test index cd27cabdd488c..c801a5739d372 100644 --- a/mysql-test/suite/innodb/t/alter_kill.test +++ b/mysql-test/suite/innodb/t/alter_kill.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc # The embedded server does not support restarting in mysql-test-run. -- source include/not_embedded.inc -- source include/no_valgrind_without_big.inc diff --git a/mysql-test/suite/innodb/t/corrupted_during_recovery.test b/mysql-test/suite/innodb/t/corrupted_during_recovery.test index 233873f707e13..2eae39b9dcf97 100644 --- a/mysql-test/suite/innodb/t/corrupted_during_recovery.test +++ b/mysql-test/suite/innodb/t/corrupted_during_recovery.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc --disable_query_log call mtr.add_suppression("InnoDB: Plugin initialization aborted"); diff --git a/mysql-test/suite/innodb/t/innodb-index.test b/mysql-test/suite/innodb/t/innodb-index.test index 701574319ff3e..68c9ab99c7be3 100644 --- a/mysql-test/suite/innodb/t/innodb-index.test +++ b/mysql-test/suite/innodb/t/innodb-index.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc # Embedded server tests do not support restarting. -- source include/not_embedded.inc diff --git a/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test b/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test index 026be74373f58..08bd19e11aae9 100644 --- a/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test +++ b/mysql-test/suite/innodb/t/leaf_page_corrupted_during_recovery.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc --source include/have_debug.inc --disable_query_log diff --git a/mysql-test/suite/innodb/t/log_data_file_size.test b/mysql-test/suite/innodb/t/log_data_file_size.test index fe75b9ab236a4..b4dca3b8df4c1 100644 --- a/mysql-test/suite/innodb/t/log_data_file_size.test +++ b/mysql-test/suite/innodb/t/log_data_file_size.test @@ -1,5 +1,6 @@ --source include/innodb_page_size.inc --source include/not_embedded.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc let INNODB_PAGE_SIZE=`select @@innodb_page_size`; let MYSQLD_DATADIR=`select @@datadir`; diff --git a/mysql-test/suite/innodb/t/log_file_name.test b/mysql-test/suite/innodb/t/log_file_name.test index 81f199951a896..f41e8965b64e7 100644 --- a/mysql-test/suite/innodb/t/log_file_name.test +++ b/mysql-test/suite/innodb/t/log_file_name.test @@ -2,6 +2,7 @@ # Test the detection of duplicate tablespaces. --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc --source include/no_valgrind_without_big.inc # Embedded server does not support crashing diff --git a/mysql-test/suite/innodb/t/undo_space_dblwr.test b/mysql-test/suite/innodb/t/undo_space_dblwr.test index 33e8ed9d65153..8f6699f5dcad5 100644 --- a/mysql-test/suite/innodb/t/undo_space_dblwr.test +++ b/mysql-test/suite/innodb/t/undo_space_dblwr.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: fix no_checkpoint_end.inc --source include/have_debug.inc --source include/not_embedded.inc call mtr.add_suppression("Checksum mismatch in the first page of file"); From 5cc99f752b43ce198ca4a7e784f27ea617a9bd5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 12 Jan 2026 15:16:34 +0200 Subject: [PATCH 12/33] Flag some tests that require multi-file recovery --- mysql-test/suite/innodb/t/bulk_load.test | 1 + mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test | 1 + mysql-test/suite/innodb/t/shrink_cached_undo.test | 1 + mysql-test/suite/innodb/t/sys_truncate_large.test | 1 + mysql-test/suite/innodb/t/undo_truncate_recover.test | 1 + 5 files changed, 5 insertions(+) diff --git a/mysql-test/suite/innodb/t/bulk_load.test b/mysql-test/suite/innodb/t/bulk_load.test index 711786b97a5ae..abec8439542c8 100644 --- a/mysql-test/suite/innodb/t/bulk_load.test +++ b/mysql-test/suite/innodb/t/bulk_load.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/big_test.inc --source include/not_embedded.inc diff --git a/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test b/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test index 34c9336e51795..4e36887ee8401 100644 --- a/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test +++ b/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test @@ -5,6 +5,7 @@ # Test Restart & Crash Recovery. -- source include/big_test.inc -- source include/innodb_page_size_small.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery let $restart_noprint=2; # Test Row Format: REDUNDANT. diff --git a/mysql-test/suite/innodb/t/shrink_cached_undo.test b/mysql-test/suite/innodb/t/shrink_cached_undo.test index 8274556d8fd57..10ab63127669e 100644 --- a/mysql-test/suite/innodb/t/shrink_cached_undo.test +++ b/mysql-test/suite/innodb/t/shrink_cached_undo.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/have_debug.inc --source include/not_embedded.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate_large.test b/mysql-test/suite/innodb/t/sys_truncate_large.test index 8499ce0bf3a6a..2f6f8407b2473 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_large.test +++ b/mysql-test/suite/innodb/t/sys_truncate_large.test @@ -1,5 +1,6 @@ --source include/big_test.inc --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/not_valgrind.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/undo_truncate_recover.test b/mysql-test/suite/innodb/t/undo_truncate_recover.test index 988b28f75b461..0b9e9459b0306 100644 --- a/mysql-test/suite/innodb/t/undo_truncate_recover.test +++ b/mysql-test/suite/innodb/t/undo_truncate_recover.test @@ -7,6 +7,7 @@ # With larger innodb_page_size, the undo log tablespaces do not grow enough. --source include/innodb_page_size_small.inc --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_debug.inc # Tests with embedded server do not support restarting --source include/not_embedded.inc From ae4b0fbce3b11e9d3eaef3a57630c7499a3c1a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 12 Jan 2026 16:22:48 +0200 Subject: [PATCH 13/33] Implement some parameter validation FIXME: innodb.log_file_size_online,encrypted restart work-around FIXME: validate innodb_log_recovery_start at a checkpoint --- .../r/log_file_size_online,encrypted.rdiff | 12 +++ .../innodb/r/log_file_size_online.result | 31 ++++++- .../suite/innodb/t/log_file_size_online.test | 80 +++++++++++++++++-- storage/innobase/handler/ha_innodb.cc | 17 ++++ storage/innobase/log/log0log.cc | 2 +- storage/innobase/log/log0recv.cc | 64 ++++++++++----- 6 files changed, 177 insertions(+), 29 deletions(-) create mode 100644 mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff diff --git a/mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff b/mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff new file mode 100644 index 0000000000000..15ddd7686673c --- /dev/null +++ b/mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff @@ -0,0 +1,12 @@ +--- log_file_size_online.result +--- log_file_size_online,encrypted.result +@@ -87,7 +87,8 @@ + # restart: --innodb-log-recovery-start=12290 + $check_no_innodb; + ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +-FOUND 1 /InnoDB: No matching file found for innodb_log_recovery_start=12290/ in mysqld.1.err ++InnoDB YES Supports transactions, row-level locking, foreign keys and encryption for tables YES YES YES ++NOT FOUND /InnoDB: No matching file found for innodb_log_recovery_start=12290/ in mysqld.1.err + # restart: with restart_parameters + $check_no_innodb; + ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS diff --git a/mysql-test/suite/innodb/r/log_file_size_online.result b/mysql-test/suite/innodb/r/log_file_size_online.result index 41c4f358cfb8e..cfc68ad0012d2 100644 --- a/mysql-test/suite/innodb/r/log_file_size_online.result +++ b/mysql-test/suite/innodb/r/log_file_size_online.result @@ -1,4 +1,10 @@ -SET GLOBAL innodb_log_archive=OFF; +call mtr.add_suppression("InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345"); +call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists"); +call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); +call mtr.add_suppression("InnoDB: File .*/ib_logfile0 was not found"); +call mtr.add_suppression("InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); SET GLOBAL innodb_log_file_size=4194304; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value @@ -53,9 +59,15 @@ global_value connection con1; disconnect con1; connection default; +# restart: --innodb-log-recovery-start=12345 --innodb-log-archive-start=1234567 +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345/ in mysqld.1.err +# restart: --innodb-log-archive +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists/ in mysqld.1.err # restart -SET @save_archive=@@GLOBAL.innodb_log_archive; -SET GLOBAL innodb_log_archive=OFF; SELECT * FROM t WHERE a<10; a b 1 @@ -71,6 +83,18 @@ SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; COUNT(*) LENGTH(b) 9 0 19991 255 +SET GLOBAL innodb_log_archive=ON; +# restart: --innodb-log-recovery-start=12290 +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: No matching file found for innodb_log_recovery_start=12290/ in mysqld.1.err +# restart: with restart_parameters +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +NOT FOUND /InnoDB: File .*/ib_logfile0 as not found/ in mysqld.1.err +FOUND 2 /InnoDB: innodb_log_archive_start=\d+ is after innodb_log_recovery_start=\d+/ in mysqld.1.err +# restart: with restart_parameters +SET GLOBAL innodb_log_archive=OFF; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 5242880 @@ -83,5 +107,4 @@ SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 5242880 FOUND 1 /InnoDB: Resized log to 6\.000MiB/ in mysqld.1.err -SET GLOBAL innodb_log_archive=@save_archive; DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index 7aebf911b6e71..1356ea99cb3b8 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -3,9 +3,18 @@ --source include/have_sequence.inc --source include/no_valgrind_without_big.inc -let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; +call mtr.add_suppression("InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345"); +call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists"); +call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); +call mtr.add_suppression("InnoDB: File .*/ib_logfile0 was not found"); +call mtr.add_suppression("InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); -SET GLOBAL innodb_log_archive=OFF; +let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; SET GLOBAL innodb_log_file_size=4194304; SHOW VARIABLES LIKE 'innodb_log_file_size'; @@ -78,15 +87,75 @@ reap; --connection default --let $shutdown_timeout=0 ---let $restart_parameters= +--let $restart_parameters=--innodb-log-recovery-start=12345 --innodb-log-archive-start=1234567 --source include/restart_mysqld.inc +--let $shutdown_timeout= +evalp $check_no_innodb; -SET @save_archive=@@GLOBAL.innodb_log_archive; -SET GLOBAL innodb_log_archive=OFF; +let SEARCH_PATTERN = InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345; +--source include/search_pattern_in_file.inc + +--let $restart_parameters=--innodb-log-archive +--source include/restart_mysqld.inc +evalp $check_no_innodb; + +let SEARCH_PATTERN = InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists; +--source include/search_pattern_in_file.inc + +--let $restart_parameters= +--source include/restart_mysqld.inc SELECT * FROM t WHERE a<10; SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; +if ($MTR_COMBINATION_ENCRYPTED) { +# FIXME: remove this! +--echo SET GLOBAL innodb_log_archive=ON; +--disable_query_log +call mtr.add_suppression("InnoDB: Crash recovery is broken due to insufficient innodb_log_file_size"); +--enable_query_log +} +if (!$MTR_COMBINATION_ENCRYPTED) { +SET GLOBAL innodb_log_archive=ON; +} +let $archive_start=`SELECT variable_value FROM information_schema.global_status +WHERE variable_name='innodb_lsn_archived'`; +let $archive_start_1=`SELECT $archive_start-1`; +let $checkpoint=`SELECT variable_value FROM information_schema.global_status +WHERE variable_name='innodb_lsn_last_checkpoint'`; + +--let $restart_parameters= --innodb-log-recovery-start=12290 +--source include/restart_mysqld.inc + +evalp $check_no_innodb; +let SEARCH_PATTERN = InnoDB: No matching file found for innodb_log_recovery_start=12290; +--source include/search_pattern_in_file.inc + +--let $restart_noprint=1 +--let $restart_parameters= --innodb-log-archive-start=$archive_start --innodb-log-recovery-start=$archive_start_1 +--source include/restart_mysqld.inc + +evalp $check_no_innodb; + +let SEARCH_PATTERN = InnoDB: File .*/ib_logfile0 as not found; +--source include/search_pattern_in_file.inc + +let SEARCH_PATTERN = InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+; +--source include/search_pattern_in_file.inc + +# FIXME: implement this check! +#--let $restart_parameters= --innodb-log-recovery-start=$archive_start +#--source include/restart_mysqld.inc + +#evalp $check_no_innodb; + +--let $restart_parameters= --innodb-log-recovery-start=$checkpoint +--source include/restart_mysqld.inc + +--let $restart_noprint= + +SET GLOBAL innodb_log_archive=OFF; + SHOW VARIABLES LIKE 'innodb_log_file_size'; SET GLOBAL innodb_log_file_size=6291456; SHOW VARIABLES LIKE 'innodb_log_file_size'; @@ -94,6 +163,5 @@ SET GLOBAL innodb_log_file_size=5242880; SHOW VARIABLES LIKE 'innodb_log_file_size'; let SEARCH_PATTERN = InnoDB: Resized log to 6\\.000MiB; --source include/search_pattern_in_file.inc -SET GLOBAL innodb_log_archive=@save_archive; DROP TABLE t; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c1fc30cfdf2e4..54b564a7020c1 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3988,6 +3988,23 @@ static int innodb_init_params() log_sys.archived_lsn= innodb_log_archive_start; + if (recv_sys.recovery_start && + log_sys.archived_lsn > recv_sys.recovery_start) + { + sql_print_error("InnoDB: innodb_log_archive_start=" LSN_PF + " is after innodb_log_recovery_start=" LSN_PF, + log_sys.archived_lsn, recv_sys.recovery_start); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + + if (recv_sys.rpo && recv_sys.recovery_start > recv_sys.rpo) + { + sql_print_error("InnoDB: innodb_log_recovery_start=" LSN_PF + " is after innodb_log_recovery_target=" LSN_PF, + recv_sys.recovery_start, recv_sys.rpo); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + if (!tpool::supports_native_aio()) srv_use_native_aio= FALSE; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 14cb69c7e3ed1..63296f3b86cd4 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1004,7 +1004,7 @@ static void log_write_buf(lsn_t max_length, ATTRIBUTE_COLD std::string &log_t::append_archive_name(std::string &path, lsn_t lsn) { - path.append("ib_"); + path.append(srv_encrypt_log ? "ibe" : "ib_"); for (int i= 16; i--; lsn<<= 4) path.push_back("0123456789abcdef"[lsn >> 60]); path.append(".log"); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 0b97e625ad912..b3cd6ed85346f 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1753,7 +1753,11 @@ dberr_t recv_sys_t::find_checkpoint() goto retry; readdir: - std::map logs; + struct log { lsn_t end; bool read_only; }; + const char *pattern= srv_encrypt_log + ? "ibe%016" PRIx64 ".log%n" : "ib_%016" PRIx64 ".log%n"; + + std::map logs; #ifdef _WIN32 do { @@ -1762,12 +1766,14 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{entry.cFileName}; - if (1 != sscanf(fn, "ib_%016" PRIx64 ".log%n", &lsn, &n) || fn[n]) + if (1 != sscanf(fn, pattern, &lsn, &n) || fn[n]) continue; LARGE_INTEGER filesize; filesize.LowPart= entry.nFileSizeLow; filesize.HighPart= entry.nFileSizeHigh; - logs.emplace(lsn, lsn + filesize.QuadPart); + logs.emplace(lsn, + log{lsn + filesize.QuadPart, + entry.dwFileAttributes & FILE_ATTRIBUTE_READONLY}); } while (FindNextFile(d, &entry)); FindClose(d); @@ -1779,7 +1785,7 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{e->d_name}; - if (1 != sscanf(fn, "ib_%016" PRIx64 ".log%n", &lsn, &n) || fn[n] || + if (1 != sscanf(fn, pattern, &lsn, &n) || fn[n] || lsn < log_t::FIRST_LSN) continue; path.assign(srv_log_group_home_dir); @@ -1792,36 +1798,58 @@ dberr_t recv_sys_t::find_checkpoint() sql_print_warning("InnoDB: ignoring %s", path.c_str()); continue; } - logs.emplace(lsn, lsn - log_t::START_OFFSET + st.st_size); + logs.emplace(lsn, + log{lsn - log_t::START_OFFSET + st.st_size, + !(st.st_mode & 0400)}); } closedir(d); #endif - first_lsn= 0; - auto min= logs.cbegin(); const auto end= logs.cend(); - if (min == end) - goto no_archive_found; + auto i= logs.cbegin(), start= i, found_recovery_start= end; int subsequent= 0; - for (auto i= min;;) + if (i == end) + goto no_archive_found; + for (;;) { - auto prev= i++; - if (i == end) + const lsn_t first{i->first}, last{i->second.end}; + if (log_sys.archived_lsn > first && log_sys.archived_lsn < last) + { + sql_print_error("InnoDB: Invalid innodb_log_archive_start=" LSN_PF); + goto no_archive_found_reported; + } + if (recovery_start >= first && recovery_start < last) + found_recovery_start= i; + const auto prev= i; + if (++i == end) { if (!recovery_start) - subsequent= 0, first_lsn= prev->first; + subsequent= 0, start= prev; break; } - if (prev->second == i->first) + if (last == i->first) subsequent++; else - subsequent=0, first_lsn= i->first; + subsequent=0, start= i; } - // TODO: validate innodb_log_archive_start, innodb_log_recovery_start, - // innodb_log_recovery_target + if (!recovery_start) + { + /* Start the recovery from the first non-read-only log file. */ + for (i= logs.cbegin(); i != start; start--, subsequent++) + if (start->second.read_only) + break; + } + else if (found_recovery_start == end) + { + sql_print_error("InnoDB: No matching file found for" + " innodb_log_recovery_start=" LSN_PF, recovery_start); + goto no_archive_found_reported; + } + else + start= found_recovery_start; - // FIXME: open the file that is determined by recovery_start + first_lsn= start->first; path.assign(srv_log_group_home_dir); switch (path.back()) { From ccd6536684a40346cdbaf865d5c31d3906f8c59c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 12 Jan 2026 16:41:49 +0200 Subject: [PATCH 14/33] fixup! ae4b0fbce3b11e9d3eaef3a57630c7499a3c1a55 --- storage/innobase/log/log0recv.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index b3cd6ed85346f..f0e2f8eb9a556 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1773,7 +1773,8 @@ dberr_t recv_sys_t::find_checkpoint() filesize.HighPart= entry.nFileSizeHigh; logs.emplace(lsn, log{lsn + filesize.QuadPart, - entry.dwFileAttributes & FILE_ATTRIBUTE_READONLY}); + bool(entry.dwFileAttributes & + FILE_ATTRIBUTE_READONLY)}); } while (FindNextFile(d, &entry)); FindClose(d); From 30116f4c70078a0587b69afb3f55b16d1fe6c683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 Jan 2026 11:52:23 +0200 Subject: [PATCH 15/33] Refine the innodb_encrypt_log format --- .../r/log_file_size_online,encrypted.rdiff | 12 ----- .../innodb/r/log_file_size_online.result | 6 ++- .../suite/innodb/t/log_file_size_online.test | 27 +++++----- storage/innobase/buf/buf0flu.cc | 5 +- storage/innobase/log/log0log.cc | 19 +++++-- storage/innobase/log/log0recv.cc | 53 +++++++++++++++---- 6 files changed, 80 insertions(+), 42 deletions(-) delete mode 100644 mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff diff --git a/mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff b/mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff deleted file mode 100644 index 15ddd7686673c..0000000000000 --- a/mysql-test/suite/innodb/r/log_file_size_online,encrypted.rdiff +++ /dev/null @@ -1,12 +0,0 @@ ---- log_file_size_online.result ---- log_file_size_online,encrypted.result -@@ -87,7 +87,8 @@ - # restart: --innodb-log-recovery-start=12290 - $check_no_innodb; - ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS --FOUND 1 /InnoDB: No matching file found for innodb_log_recovery_start=12290/ in mysqld.1.err -+InnoDB YES Supports transactions, row-level locking, foreign keys and encryption for tables YES YES YES -+NOT FOUND /InnoDB: No matching file found for innodb_log_recovery_start=12290/ in mysqld.1.err - # restart: with restart_parameters - $check_no_innodb; - ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS diff --git a/mysql-test/suite/innodb/r/log_file_size_online.result b/mysql-test/suite/innodb/r/log_file_size_online.result index cfc68ad0012d2..31fe541545c87 100644 --- a/mysql-test/suite/innodb/r/log_file_size_online.result +++ b/mysql-test/suite/innodb/r/log_file_size_online.result @@ -1,10 +1,12 @@ +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345"); call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists"); call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); call mtr.add_suppression("InnoDB: File .*/ib_logfile0 was not found"); call mtr.add_suppression("InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+"); -call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); -call mtr.add_suppression("InnoDB: Plugin initialization aborted"); SET GLOBAL innodb_log_file_size=4194304; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index 1356ea99cb3b8..eeaf1ab207df9 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -1,5 +1,19 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc + +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); + +if (`SELECT COUNT(*)=0 FROM information_schema.global_variables where variable_name='innodb_log_archive' and variable_value='OFF'`) +{ +--disable_query_log +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: ib_0.*\\.log does not match innodb_encrypt_log"); +--enable_query_log +--skip Test requires innodb_log_archive=OFF +} + --source include/have_sequence.inc --source include/no_valgrind_without_big.inc @@ -8,8 +22,6 @@ call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exist call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); call mtr.add_suppression("InnoDB: File .*/ib_logfile0 was not found"); call mtr.add_suppression("InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+"); -call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); -call mtr.add_suppression("InnoDB: Plugin initialization aborted"); let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); @@ -108,16 +120,7 @@ let SEARCH_PATTERN = InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists; SELECT * FROM t WHERE a<10; SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; -if ($MTR_COMBINATION_ENCRYPTED) { -# FIXME: remove this! ---echo SET GLOBAL innodb_log_archive=ON; ---disable_query_log -call mtr.add_suppression("InnoDB: Crash recovery is broken due to insufficient innodb_log_file_size"); ---enable_query_log -} -if (!$MTR_COMBINATION_ENCRYPTED) { SET GLOBAL innodb_log_archive=ON; -} let $archive_start=`SELECT variable_value FROM information_schema.global_status WHERE variable_name='innodb_lsn_archived'`; let $archive_start_1=`SELECT $archive_start-1`; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index d0fcc5fdb5e15..f70fbd4ae0b6b 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1827,7 +1827,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); if (archive && resize_log.is_opened()) - next_checkpoint_no= 0; + next_checkpoint_no= is_encrypted() ? 8 : 1; const auto n= next_checkpoint_no; size_t offset; @@ -1840,6 +1840,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept { ut_ad(!resizing); offset= n * 4; + ut_a(offset); ut_a(offset < START_OFFSET); // FIXME: better guard for this const lsn_t d= end_lsn - (resize_log.is_opened() ? first_lsn : this->end_lsn); @@ -1847,7 +1848,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept if (!d) { - ut_a(!next_checkpoint_no); + ut_a(next_checkpoint_no == (is_encrypted() ? 8 : 1)); ut_a(end_lsn == first_lsn); } #ifdef HAVE_PMEM diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 63296f3b86cd4..e5819350623c0 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -442,6 +442,7 @@ void log_t::create(lsn_t lsn) noexcept ut_ad(is_latest()); ut_ad(this == &log_sys); + next_checkpoint_no= archive ? (format == FORMAT_ENC_11 ? 8 : 1) : 0; write_lsn_offset= 0; base_lsn.store(lsn, std::memory_order_relaxed); flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); @@ -648,8 +649,20 @@ void log_t::header_rewrite(my_bool archive) noexcept } else { - mach_write_to_4(buf, uint32_t(end_lsn - first_lsn)); - memset(buf + 4, 0, 60 - 4); + next_checkpoint_no&= 1; + if (format == FORMAT_10_8) + { + mach_write_to_8(buf, uint32_t(end_lsn - first_lsn)); + memset(buf + 8, 0, 60 - 8); + } + else + { + ut_ad(format == FORMAT_ENC_11); + log_crypt_write_header(buf); + mach_write_to_4(buf + 32, uint32_t(end_lsn - first_lsn)); + memset(buf + 40, 0, 60 - 40); + next_checkpoint_no+= 8; + } pmem_persist(buf, 64); memset_aligned<64>(buf + 64, 0, START_OFFSET - 64); pmem_persist(buf, START_OFFSET); @@ -1004,7 +1017,7 @@ static void log_write_buf(lsn_t max_length, ATTRIBUTE_COLD std::string &log_t::append_archive_name(std::string &path, lsn_t lsn) { - path.append(srv_encrypt_log ? "ibe" : "ib_"); + path.append("ib_"); for (int i= 16; i--; lsn<<= 4) path.push_back("0123456789abcdef"[lsn >> 60]); path.append(".log"); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index f0e2f8eb9a556..894789b738c9b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -32,6 +32,8 @@ Created 9/20/1997 Heikki Tuuri #include "log0recv.h" +#define LOG_ARCHIVE_NAME "ib_%016" PRIx64 ".log" + #ifdef HAVE_MY_AES_H #include #endif @@ -1754,9 +1756,6 @@ dberr_t recv_sys_t::find_checkpoint() readdir: struct log { lsn_t end; bool read_only; }; - const char *pattern= srv_encrypt_log - ? "ibe%016" PRIx64 ".log%n" : "ib_%016" PRIx64 ".log%n"; - std::map logs; #ifdef _WIN32 do @@ -1766,7 +1765,7 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{entry.cFileName}; - if (1 != sscanf(fn, pattern, &lsn, &n) || fn[n]) + if (1 != sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n) || fn[n]) continue; LARGE_INTEGER filesize; filesize.LowPart= entry.nFileSizeLow; @@ -1786,7 +1785,7 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{e->d_name}; - if (1 != sscanf(fn, pattern, &lsn, &n) || fn[n] || + if (1 != sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n) || fn[n] || lsn < log_t::FIRST_LSN) continue; path.assign(srv_log_group_home_dir); @@ -4956,26 +4955,56 @@ static recv_sys_t::parser get_parse_mmap() noexcept dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) { - // FIXME: retrieve encrypted format from file name? - log_sys.format= log_t::FORMAT_10_8; + const byte *buf= log_sys.buf; + uint16_t n_checkpoint= 0; + { + const uint32_t format{mach_read_from_4(buf)}; + if (format != uint32_t(srv_encrypt_log)) + { + sql_print_error(format < 2 + ? "InnoDB: " LOG_ARCHIVE_NAME + " does not match innodb_encrypt_log" + : "InnoDB: " LOG_ARCHIVE_NAME + " is in unrecognized format", + first_lsn); + return DB_ERROR; + } + + if (!format) + log_sys.format= log_t::FORMAT_10_8; + else if (!log_crypt_read_header(buf)) + return DB_ERROR; + else + { + n_checkpoint= 32/*log_crypt_read_header()*/ / 4; + if (!tmp_buf) + { + tmp_buf= static_cast + (ut_malloc_dontdump(tmp_buf_size, PSI_INSTRUMENT_ME)); + if (!tmp_buf) + return DB_OUT_OF_MEMORY; + } + log_sys.format= log_t::FORMAT_ENC_11; + } + } + log_sys.set_first_lsn(first_lsn); if (!log_sys.archived_lsn) log_sys.archived_lsn= first_lsn; /* Validate the checkpoints */ lsn_t end_lsn{first_lsn}, checkpoint{0}, recovery_start_end_lsn{0}; - uint16_t n_checkpoint; const recv_sys_t::parser parser[2] { get_parse_mmap(), get_parse_mmap() }; ut_ad(recv_spaces.empty()); byte header[log_sys.START_OFFSET]; - const byte *buf= log_sys.buf; if (log_sys.is_mmap()) buf= static_cast(memcpy(header, buf, sizeof header)); - for (n_checkpoint= 0; n_checkpoint < sizeof header / 4; n_checkpoint++) + for (bool first= true; n_checkpoint < sizeof header / 4; + n_checkpoint++, first= false) { const uint32_t d{mach_read_from_4(&buf[4 * n_checkpoint])}; - if (!d && n_checkpoint) + if (!d && !first) break; lsn= end_lsn + d; file_checkpoint= 0; @@ -4986,6 +5015,8 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) if (!file_checkpoint) { found_corrupt_log= false; + if (!d && first) + continue; break; } ut_ad(file_checkpoint == lsn); From 02e799ad7cf3c1fd2e2f710e82e77020ea6ce2e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 Jan 2026 14:16:40 +0200 Subject: [PATCH 16/33] fixup! 30116f4c70078a0587b69afb3f55b16d1fe6c683 --- mysql-test/include/innodb_encrypt_log.inc | 4 ++++ .../include/skip_innodb_log_archive.inc | 12 +++++++++++ .../suite/encryption/t/bulk_insert.test | 1 + .../encryption/t/debug_key_management.test | 1 + .../suite/encryption/t/encrypt_and_grep.test | 1 + .../suite/encryption/t/encryption_force.test | 1 + .../suite/encryption/t/innochecksum.test | 1 + .../encryption/t/innodb-first-page-read.opt | 5 ----- .../t/innodb-key-rotation-disable.test | 1 + .../t/innodb-page_encryption-32k.test | 1 + .../t/innodb-page_encryption_compression.test | 1 + ...innodb-page_encryption_log_encryption.test | 1 + .../encryption/t/innodb_encrypt_freed.test | 1 + .../encryption/t/innodb_encrypt_log.test | 1 + .../t/innodb_encrypt_log_corruption.test | 1 + .../t/innodb_encryption-page-compression.test | 1 + .../suite/encryption/t/innodb_encryption.test | 1 + .../t/innodb_encryption_discard_import.test | 1 + .../t/innodb_encryption_tables.test | 1 + .../suite/encryption/t/innodb_first_page.test | 1 + .../t/innodb_onlinealter_encryption.test | 1 + .../suite/encryption/t/recovery_memory.test | 1 + .../innodb/r/log_file_size_online.result | 4 ---- .../suite/innodb/t/alter_copy_bulk.test | 1 + .../suite/innodb/t/insert_into_empty.test | 1 + .../suite/innodb/t/log_file_size_online.test | 16 +------------- .../suite/innodb/t/sys_truncate_debug.test | 1 + .../innodb/t/sys_truncate_shutdown_debug.test | 1 + mysql-test/suite/innodb/t/undo_truncate.test | 1 + storage/innobase/log/log0log.cc | 21 ++++++++++++------- storage/innobase/log/log0recv.cc | 2 +- 31 files changed, 55 insertions(+), 33 deletions(-) create mode 100644 mysql-test/suite/encryption/include/skip_innodb_log_archive.inc delete mode 100644 mysql-test/suite/encryption/t/innodb-first-page-read.opt diff --git a/mysql-test/include/innodb_encrypt_log.inc b/mysql-test/include/innodb_encrypt_log.inc index 5beebeae81f07..fd3e0ceea5758 100644 --- a/mysql-test/include/innodb_encrypt_log.inc +++ b/mysql-test/include/innodb_encrypt_log.inc @@ -2,3 +2,7 @@ # (see include/innodb_encrypt_log.combinations) --source include/have_innodb.inc +if ($MTR_COMBINATION_CRYPT) +{ +--source ../suite/encryption/include/skip_innodb_log_archive.inc +} diff --git a/mysql-test/suite/encryption/include/skip_innodb_log_archive.inc b/mysql-test/suite/encryption/include/skip_innodb_log_archive.inc new file mode 100644 index 0000000000000..3fa44408a68a3 --- /dev/null +++ b/mysql-test/suite/encryption/include/skip_innodb_log_archive.inc @@ -0,0 +1,12 @@ +--disable_query_log +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: ib_0.*\\.log does not match innodb_encrypt_log"); +--enable_query_log +if (`SELECT COUNT(*)=0 FROM information_schema.global_variables where variable_name='innodb_log_archive' and variable_value='OFF'`) +{ + --skip Test requires innodb_log_archive=OFF +} diff --git a/mysql-test/suite/encryption/t/bulk_insert.test b/mysql-test/suite/encryption/t/bulk_insert.test index ce7804cb4e067..94b40f0152232 100644 --- a/mysql-test/suite/encryption/t/bulk_insert.test +++ b/mysql-test/suite/encryption/t/bulk_insert.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_sequence.inc --source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/debug_key_management.test b/mysql-test/suite/encryption/t/debug_key_management.test index 9638391e69058..3211687c0c294 100644 --- a/mysql-test/suite/encryption/t/debug_key_management.test +++ b/mysql-test/suite/encryption/t/debug_key_management.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_debug.inc -- source include/innodb_undo_tablespaces.inc -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/encrypt_and_grep.test b/mysql-test/suite/encryption/t/encrypt_and_grep.test index 648ad80780c93..988604db58ecd 100644 --- a/mysql-test/suite/encryption/t/encrypt_and_grep.test +++ b/mysql-test/suite/encryption/t/encrypt_and_grep.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/innodb_undo_tablespaces.inc -- source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/encryption_force.test b/mysql-test/suite/encryption/t/encryption_force.test index 3c6f039184b96..28b49c866b489 100644 --- a/mysql-test/suite/encryption/t/encryption_force.test +++ b/mysql-test/suite/encryption/t/encryption_force.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_partition.inc -- source include/have_example_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/innochecksum.test b/mysql-test/suite/encryption/t/innochecksum.test index e440e3d3344f8..fc390aa59f635 100644 --- a/mysql-test/suite/encryption/t/innochecksum.test +++ b/mysql-test/suite/encryption/t/innochecksum.test @@ -6,6 +6,7 @@ -- source include/not_embedded.inc # Require InnoDB -- source include/have_innodb.inc +-- source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME -- source include/have_file_key_management_plugin.inc -- source include/innodb_page_size_small.inc -- source include/innodb_checksum_algorithm.inc diff --git a/mysql-test/suite/encryption/t/innodb-first-page-read.opt b/mysql-test/suite/encryption/t/innodb-first-page-read.opt deleted file mode 100644 index 38d69691ed6aa..0000000000000 --- a/mysql-test/suite/encryption/t/innodb-first-page-read.opt +++ /dev/null @@ -1,5 +0,0 @@ ---innodb-encrypt-tables=ON ---innodb-encrypt-log=ON ---innodb-encryption-rotate-key-age=15 ---innodb-encryption-threads=4 ---innodb-tablespaces-encryption diff --git a/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test b/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test index 1bd69365f6892..33936e1704768 100644 --- a/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test +++ b/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_file_key_management_plugin.inc # not embedded because of restarts -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test b/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test index 03c9c93ff06d9..e175924d8159f 100644 --- a/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test +++ b/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test @@ -4,6 +4,7 @@ --source include/have_innodb.inc --source include/have_innodb_32k.inc --source include/have_file_key_management_plugin.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact encrypted=yes encryption_key_id=1; diff --git a/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test b/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test index 5fe6f68657661..de826e28d6c70 100644 --- a/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test +++ b/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test @@ -1,6 +1,7 @@ -- source include/have_innodb.inc -- source include/not_embedded.inc -- source include/have_file_key_management_plugin.inc +-- source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; diff --git a/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test b/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test index a736c7292ad35..3c64c039f3a45 100644 --- a/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test +++ b/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/not_embedded.inc -- source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_freed.test b/mysql-test/suite/encryption/t/innodb_encrypt_freed.test index 408e874a3b242..b37ff06dac1f7 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_freed.test +++ b/mysql-test/suite/encryption/t/innodb_encrypt_freed.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_example_key_management_plugin.inc --source include/have_debug.inc --source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_log.test b/mysql-test/suite/encryption/t/innodb_encrypt_log.test index 5448a606ba807..cdb807c157780 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_log.test +++ b/mysql-test/suite/encryption/t/innodb_encrypt_log.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/not_embedded.inc -- source filekeys_plugin.inc diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test b/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test index f1642e83e32e0..19858188cf1ff 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test +++ b/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test @@ -1,3 +1,4 @@ +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --let $no_cleanup=1 --source ../../innodb/t/log_corruption.test diff --git a/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test b/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test index 57c8721282f4a..cf40cfc3ac816 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test +++ b/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/not_embedded.inc # This test is too slow for valgrind and causes innnodb semaphores to time out diff --git a/mysql-test/suite/encryption/t/innodb_encryption.test b/mysql-test/suite/encryption/t/innodb_encryption.test index 2b0b2b8d7fb5c..113bbf152605d 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption.test +++ b/mysql-test/suite/encryption/t/innodb_encryption.test @@ -2,6 +2,7 @@ # # -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/innodb_undo_tablespaces.inc diff --git a/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test b/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test index e33aaec3e21c0..c1f51be89883f 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test +++ b/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/not_valgrind.inc -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb_encryption_tables.test b/mysql-test/suite/encryption/t/innodb_encryption_tables.test index d03bc890ba4ed..3a8d244a1e3f4 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption_tables.test +++ b/mysql-test/suite/encryption/t/innodb_encryption_tables.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/not_embedded.inc # We can't run this test under valgrind as it 'takes forever' diff --git a/mysql-test/suite/encryption/t/innodb_first_page.test b/mysql-test/suite/encryption/t/innodb_first_page.test index db4d8eb3b16f5..838fc8396a9a8 100644 --- a/mysql-test/suite/encryption/t/innodb_first_page.test +++ b/mysql-test/suite/encryption/t/innodb_first_page.test @@ -3,6 +3,7 @@ # --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_file_key_management_plugin.inc --source include/innodb_undo_tablespaces.inc diff --git a/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test b/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test index dc6d1e6f93c86..0b5c70339f704 100644 --- a/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test +++ b/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_file_key_management_plugin.inc # test uses restart -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/recovery_memory.test b/mysql-test/suite/encryption/t/recovery_memory.test index fc6f15f7ee27f..af539b3a36bec 100644 --- a/mysql-test/suite/encryption/t/recovery_memory.test +++ b/mysql-test/suite/encryption/t/recovery_memory.test @@ -1,5 +1,6 @@ --source include/have_debug.inc --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_sequence.inc --source filekeys_plugin.inc diff --git a/mysql-test/suite/innodb/r/log_file_size_online.result b/mysql-test/suite/innodb/r/log_file_size_online.result index 31fe541545c87..7b3e6d631d648 100644 --- a/mysql-test/suite/innodb/r/log_file_size_online.result +++ b/mysql-test/suite/innodb/r/log_file_size_online.result @@ -1,7 +1,3 @@ -SET STATEMENT sql_log_bin=0 FOR -call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); -SET STATEMENT sql_log_bin=0 FOR -call mtr.add_suppression("InnoDB: Plugin initialization aborted"); call mtr.add_suppression("InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345"); call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists"); call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); diff --git a/mysql-test/suite/innodb/t/alter_copy_bulk.test b/mysql-test/suite/innodb/t/alter_copy_bulk.test index ffa8e70f77ea0..d471ec498aa6f 100644 --- a/mysql-test/suite/innodb/t/alter_copy_bulk.test +++ b/mysql-test/suite/innodb/t/alter_copy_bulk.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_partition.inc --source include/have_sequence.inc diff --git a/mysql-test/suite/innodb/t/insert_into_empty.test b/mysql-test/suite/innodb/t/insert_into_empty.test index 9c544a3532eec..d32aa01215973 100644 --- a/mysql-test/suite/innodb/t/insert_into_empty.test +++ b/mysql-test/suite/innodb/t/insert_into_empty.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/innodb_page_size.inc --source include/have_sequence.inc --source include/maybe_debug.inc diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index eeaf1ab207df9..8382c65fa4bbc 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -1,19 +1,5 @@ --source include/have_innodb.inc - -SET STATEMENT sql_log_bin=0 FOR -call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); -SET STATEMENT sql_log_bin=0 FOR -call mtr.add_suppression("InnoDB: Plugin initialization aborted"); - -if (`SELECT COUNT(*)=0 FROM information_schema.global_variables where variable_name='innodb_log_archive' and variable_value='OFF'`) -{ ---disable_query_log -SET STATEMENT sql_log_bin=0 FOR -call mtr.add_suppression("InnoDB: ib_0.*\\.log does not match innodb_encrypt_log"); ---enable_query_log ---skip Test requires innodb_log_archive=OFF -} - +--source ../../suite/encryption/include/skip_innodb_log_archive.inc --source include/have_sequence.inc --source include/no_valgrind_without_big.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate_debug.test b/mysql-test/suite/innodb/t/sys_truncate_debug.test index 7dcb5ffde2aa9..059193afc7b2f 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_debug.test +++ b/mysql-test/suite/innodb/t/sys_truncate_debug.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/not_embedded.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test b/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test index d819ebe42eef4..08688de1f1a0e 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test +++ b/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/not_embedded.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test index 9abf08ff43631..7aff8e7e11433 100644 --- a/mysql-test/suite/innodb/t/undo_truncate.test +++ b/mysql-test/suite/innodb/t/undo_truncate.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/innodb_page_size.inc --source include/not_embedded.inc --source include/have_sequence.inc diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index e5819350623c0..bc47165d34eb0 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -442,7 +442,7 @@ void log_t::create(lsn_t lsn) noexcept ut_ad(is_latest()); ut_ad(this == &log_sys); - next_checkpoint_no= archive ? (format == FORMAT_ENC_11 ? 8 : 1) : 0; + next_checkpoint_no= archive ? (is_encrypted() ? 8 : 1) : 0; write_lsn_offset= 0; base_lsn.store(lsn, std::memory_order_relaxed); flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); @@ -461,13 +461,12 @@ void log_t::create(lsn_t lsn) noexcept { ut_ad(is_opened() == archive); mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE); - if (!archive) - { - memset_aligned<4096>(buf, 0, 4096); - header_write(buf, lsn, is_encrypted()); - pmem_persist(buf, 512); - } buf_size= unsigned(std::min(capacity(), buf_size_max)); + if (archive) + goto archive_header; + memset_aligned<4096>(buf, 0, 4096); + header_write(buf, lsn, is_encrypted()); + pmem_persist(buf, 512); } else #endif @@ -481,6 +480,12 @@ void log_t::create(lsn_t lsn) noexcept log.write(0, {buf, 4096}); memset_aligned<512>(buf, 0, 512); } + else +#ifdef HAVE_PMEM + archive_header: +#endif + if (is_encrypted()) + log_crypt_write_header(buf); } } @@ -650,7 +655,7 @@ void log_t::header_rewrite(my_bool archive) noexcept else { next_checkpoint_no&= 1; - if (format == FORMAT_10_8) + if (!is_encrypted()) { mach_write_to_8(buf, uint32_t(end_lsn - first_lsn)); memset(buf + 8, 0, 60 - 8); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 894789b738c9b..17e5c71bcc70b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -4956,7 +4956,7 @@ static recv_sys_t::parser get_parse_mmap() noexcept dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) { const byte *buf= log_sys.buf; - uint16_t n_checkpoint= 0; + uint16_t n_checkpoint= 1; { const uint32_t format{mach_read_from_4(buf)}; if (format != uint32_t(srv_encrypt_log)) From 5cc0306a076506d8d43e334816cfac0b54df32b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 13 Jan 2026 16:35:31 +0200 Subject: [PATCH 17/33] Allow memory-mapped reads of the archived log log_t::attach(), log_mmap(): Add the parameter bool read_only. --- storage/innobase/include/log0log.h | 2 +- storage/innobase/log/log0log.cc | 24 +++++++++++++----------- storage/innobase/log/log0recv.cc | 9 +++++---- storage/innobase/srv/srv0start.cc | 2 +- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index bb8c61a957bfe..77046fc6dfe80 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -398,7 +398,7 @@ struct log_t /** Attach a log file. @return whether the memory allocation succeeded */ - bool attach(log_file_t file, os_offset_t size) noexcept; + bool attach(log_file_t file, os_offset_t size, bool read_only) noexcept; /** Disable memory-mapped access (update log_mmap) */ void clear_mmap() noexcept; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index bc47165d34eb0..bf07680b3de82 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -193,15 +193,17 @@ void log_file_t::write(os_offset_t offset, span buf) noexcept # endif /** Attempt to memory map a file. -@param file log file handle -@param size file size +@param file log file handle +@param size file size +@param read_only whether the file is read-only @return pointer to memory mapping @retval MAP_FAILED if the memory cannot be mapped */ static void *log_mmap(os_file_t file, # ifdef HAVE_PMEM bool &is_pmem, /*!< whether the file is on pmem */ # endif - os_offset_t size) + os_offset_t size, + bool read_only) { #if SIZEOF_SIZE_T < 8 if (size != os_offset_t(size_t(size))) @@ -230,8 +232,8 @@ static void *log_mmap(os_file_t file, The mapping will always be read-only if innodb_read_only=ON or if mariadb-backup is running in any other mode than --prepare --export. */ - const bool read_only= - srv_read_only_mode || srv_operation >= SRV_OPERATION_BACKUP; + ut_ad(read_only || + (!srv_read_only_mode && srv_operation < SRV_OPERATION_BACKUP)); # ifdef _WIN32 void *ptr= MAP_FAILED; @@ -322,7 +324,7 @@ ATTRIBUTE_COLD static void log_file_message() noexcept static inline void log_file_message() noexcept {} #endif -bool log_t::attach(log_file_t file, os_offset_t size) noexcept +bool log_t::attach(log_file_t file, os_offset_t size, bool read_only) noexcept { ut_ad(!log.is_opened()); ut_ad(!resize_log.is_opened()); @@ -337,9 +339,9 @@ bool log_t::attach(log_file_t file, os_offset_t size) noexcept { # ifdef HAVE_PMEM bool is_pmem; - void *ptr= ::log_mmap(file.m_file, is_pmem, size); + void *ptr= ::log_mmap(file.m_file, is_pmem, size, read_only); # else - void *ptr= ::log_mmap(file.m_file, size); + void *ptr= ::log_mmap(file.m_file, size, read_only); # endif if (ptr != MAP_FAILED) { @@ -878,7 +880,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) else if (is_mmap()) { bool is_pmem{false}; - ptr= ::log_mmap(resize_log.m_file, is_pmem, size); + ptr= ::log_mmap(resize_log.m_file, is_pmem, size, false); if (ptr == MAP_FAILED) goto alloc_fail; @@ -1298,7 +1300,7 @@ void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept { bool is_pmem{false}; resize_buf= static_cast(::log_mmap(file, is_pmem, - resize_target)); + resize_target, false)); if (resize_buf != MAP_FAILED) { /* Will be closed in write_checkpoint() */ @@ -1704,7 +1706,7 @@ void log_t::clear_mmap() noexcept close_file(false); log_mmap= false; - ut_a(attach(log, file_size)); + ut_a(attach(log, file_size, false)); ut_ad(!is_mmap()); memcpy_aligned<16>(buf, log_block, bs); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 17e5c71bcc70b..c55478cbe07dc 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1706,6 +1706,7 @@ dberr_t recv_sys_t::find_checkpoint() { byte *buf; lsn_t first_lsn= 0; + bool read_only= bool(srv_read_only_mode); ut_ad(pages.empty()); pages_it= pages.end(); @@ -1719,8 +1720,7 @@ dberr_t recv_sys_t::find_checkpoint() bool success; os_file_t file{os_file_create_func(path.c_str(), archive < 0 ? OS_FILE_OPEN : OS_FILE_OPEN_SILENT, - OS_LOG_FILE, - srv_read_only_mode, &success)}; + OS_LOG_FILE, read_only, &success)}; if (file != OS_FILE_CLOSED) { if (archive > 0) @@ -1861,9 +1861,10 @@ dberr_t recv_sys_t::find_checkpoint() default: path.push_back('/'); } + read_only= subsequent || srv_read_only_mode; file= os_file_create_func(log_sys.append_archive_name(path, first_lsn). c_str(), OS_FILE_OPEN, OS_LOG_FILE, - subsequent || srv_read_only_mode, &success); + read_only, &success); if (file == OS_FILE_CLOSED) goto no_archive_found_reported; log_sys.archive= true; @@ -1883,7 +1884,7 @@ dberr_t recv_sys_t::find_checkpoint() os_file_close(file); return DB_ERROR; } - else if (!log_sys.attach(file, size)) + else if (!log_sys.attach(file, size, read_only)) goto err_exit; else file= OS_FILE_CLOSED; diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 9556dd4aea96e..40071694bf271 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -217,7 +217,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) } log_sys.set_latest_format(srv_encrypt_log); - if (!log_sys.attach(file, srv_log_file_size)) { + if (!log_sys.attach(file, srv_log_file_size, false)) { goto close_and_exit; } From 3802dd164c010080d02d62c6fe5e6821dddce708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 14 Jan 2026 14:53:40 +0200 Subject: [PATCH 18/33] fixup! 02e799ad7cf3c1fd2e2f710e82e77020ea6ce2e1 --- storage/innobase/log/log0log.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index bf07680b3de82..90f861872dade 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -664,11 +664,11 @@ void log_t::header_rewrite(my_bool archive) noexcept } else { + next_checkpoint_no+= 8; ut_ad(format == FORMAT_ENC_11); log_crypt_write_header(buf); mach_write_to_4(buf + 32, uint32_t(end_lsn - first_lsn)); memset(buf + 40, 0, 60 - 40); - next_checkpoint_no+= 8; } pmem_persist(buf, 64); memset_aligned<64>(buf + 64, 0, START_OFFSET - 64); @@ -700,7 +700,16 @@ void log_t::header_rewrite(my_bool archive) noexcept } else { - mach_write_to_4(c, uint32_t(end_lsn - first_lsn)); + next_checkpoint_no&= 1; + if (!is_encrypted()) + mach_write_to_8(c, uint32_t(end_lsn - first_lsn)); + else + { + next_checkpoint_no+= 8; + ut_ad(format == FORMAT_ENC_11); + log_crypt_write_header(c); + mach_write_to_4(c + 32, uint32_t(end_lsn - first_lsn)); + } log.write(0, {c, write_size}); os_file_flush(log.m_file); memset_aligned<512>(c, 0, write_size); From 9a933711c7302fe175ef6e5e9ed46302b87e498f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 14 Jan 2026 14:56:34 +0200 Subject: [PATCH 19/33] Prepare for multi-file innodb_log_archive recovery recv_sys_t::find_checkpoint_archived(): Read the checkpoint header. Add a parameter "bool silent", to be set when trying recovery from a possibly incomplete file. --- storage/innobase/include/log0recv.h | 5 +- storage/innobase/log/log0recv.cc | 77 ++++++++++++++++------------- 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 2a3961d43b317..08b56cba57e5b 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -406,11 +406,14 @@ struct recv_sys_t @return error code or DB_SUCCESS */ dberr_t find_checkpoint(); +private: /** Find a checkpoint in an innodb_log_archive=ON file. @param first_lsn the first LSN of the file + @param silent whether to silence error reporting @return error code @retval DB_SUCCESS if a suitable checkpoint was found */ - dberr_t find_checkpoint_archived(lsn_t first_lsn); + dberr_t find_checkpoint_archived(lsn_t first_lsn, bool silent); +public: /** Register a redo log snippet for a page. @param it page iterator diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index c55478cbe07dc..b3b723bfd09ba 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1849,8 +1849,6 @@ dberr_t recv_sys_t::find_checkpoint() else start= found_recovery_start; - first_lsn= start->first; - path.assign(srv_log_group_home_dir); switch (path.back()) { #ifdef _WIN32 @@ -1862,16 +1860,24 @@ dberr_t recv_sys_t::find_checkpoint() path.push_back('/'); } read_only= subsequent || srv_read_only_mode; - file= os_file_create_func(log_sys.append_archive_name(path, first_lsn). - c_str(), OS_FILE_OPEN, OS_LOG_FILE, - read_only, &success); + file= + os_file_create_func(log_sys.append_archive_name(path, start->first). + c_str(), OS_FILE_OPEN, OS_LOG_FILE, + read_only, &success); if (file == OS_FILE_CLOSED) goto no_archive_found_reported; + log_sys.archive= true; + + if (!log_sys.attach(file, start->second.end - start->first, read_only)) + goto err_exit; + + return find_checkpoint_archived(start->first, false); } + ut_ad(!log_sys.archive); const os_offset_t size{os_file_get_size(file)}; - if (!size && !log_sys.archive) + if (!size) { if (srv_operation != SRV_OPERATION_NORMAL) goto too_small; @@ -1890,8 +1896,6 @@ dberr_t recv_sys_t::find_checkpoint() file= OS_FILE_CLOSED; recv_sys.files.emplace_back(file); - if (log_sys.archive) - goto find_checkpoint; for (int i= 1; i < 101; i++) { @@ -1922,19 +1926,15 @@ dberr_t recv_sys_t::find_checkpoint() } } else - { ut_ad(srv_operation == SRV_OPERATION_BACKUP); - ut_ad(!log_sys.archive); - } - find_checkpoint: + + ut_ad(!log_sys.archive); log_sys.next_checkpoint_lsn= 0; lsn= 0; buf= my_assume_aligned<4096>(log_sys.buf); if (!log_sys.is_mmap()) if (dberr_t err= log_sys.log.read(0, {buf, log_sys.START_OFFSET})) return err; - if (log_sys.archive) - return find_checkpoint_archived(first_lsn); /* Check the header page checksum. There was no checksum in the first redo log format (version 0). */ @@ -1971,8 +1971,6 @@ dberr_t recv_sys_t::find_checkpoint() first_lsn= mach_read_from_8(buf + LOG_HEADER_START_LSN); log_sys.set_first_lsn(first_lsn); - if (!log_sys.archived_lsn) - log_sys.archived_lsn= first_lsn; char creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR + 1]; memcpy(creator, buf + LOG_HEADER_CREATOR, sizeof creator); /* Ensure that the string is NUL-terminated. */ @@ -2032,6 +2030,8 @@ dberr_t recv_sys_t::find_checkpoint() } if (!log_sys.next_checkpoint_lsn) goto got_no_checkpoint; + else if (!log_sys.archived_lsn) + log_sys.archived_lsn= log_sys.next_checkpoint_lsn; if (recv_sys_invalid_rpo(lsn)) return DB_READ_ONLY; if (!memcmp(creator, "Backup ", 7)) @@ -2984,7 +2984,7 @@ log_parse_file(const page_id_t id, bool if_exists, if (!log_sys.next_checkpoint_lsn) { - /* We are validating checkpoints in + /* We are currently validating checkpoints in recv_log_t::find_checkpoint_archived(). We must not open and validate data files until we actually start recovery from a checkpoint, because there could be lots of FILE_MODIFY and @@ -4954,20 +4954,30 @@ static recv_sys_t::parser get_parse_mmap() noexcept ut_error; } -dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) +dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) { + alignas(4) byte header[log_sys.START_OFFSET]; const byte *buf= log_sys.buf; + if (!log_sys.is_mmap()) + { + buf= header; + if (dberr_t err= log_sys.log.read(0, header)) + return err; + } uint16_t n_checkpoint= 1; { const uint32_t format{mach_read_from_4(buf)}; if (format != uint32_t(srv_encrypt_log)) { - sql_print_error(format < 2 - ? "InnoDB: " LOG_ARCHIVE_NAME - " does not match innodb_encrypt_log" - : "InnoDB: " LOG_ARCHIVE_NAME - " is in unrecognized format", - first_lsn); + /* TODO: correct the file header later, if we can recover + from the previous file */ + if (!silent) + sql_print_error(format < 2 + ? "InnoDB: " LOG_ARCHIVE_NAME + " does not match innodb_encrypt_log" + : "InnoDB: " LOG_ARCHIVE_NAME + " is in unrecognized format", + first_lsn); return DB_ERROR; } @@ -4989,22 +4999,20 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) } } + log_sys.next_checkpoint_lsn= 0; log_sys.set_first_lsn(first_lsn); - if (!log_sys.archived_lsn) - log_sys.archived_lsn= first_lsn; + lsn= 0; /* Validate the checkpoints */ lsn_t end_lsn{first_lsn}, checkpoint{0}, recovery_start_end_lsn{0}; const recv_sys_t::parser parser[2] { get_parse_mmap(), get_parse_mmap() }; ut_ad(recv_spaces.empty()); - byte header[log_sys.START_OFFSET]; - if (log_sys.is_mmap()) - buf= static_cast(memcpy(header, buf, sizeof header)); for (bool first= true; n_checkpoint < sizeof header / 4; n_checkpoint++, first= false) { - const uint32_t d{mach_read_from_4(&buf[4 * n_checkpoint])}; + const uint32_t d= + mach_read_from_4(my_assume_aligned<4>(&buf[4 * n_checkpoint])); if (!d && !first) break; lsn= end_lsn + d; @@ -5025,15 +5033,17 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) checkpoint= log_sys.next_checkpoint_lsn; ut_ad(checkpoint); ut_ad(checkpoint < lsn); + if (first && !log_sys.archived_lsn) + log_sys.archived_lsn= checkpoint; if (checkpoint == recovery_start) recovery_start_end_lsn= end_lsn; } if (!checkpoint) { - /* FIXME: try with the preceding file as well! */ - sql_print_error("InnoDB: Did not find any checkpoint after LSN=" LSN_PF, - first_lsn); + if (!silent) + sql_print_error("InnoDB: Did not find any checkpoint after LSN=" LSN_PF, + first_lsn); return DB_CORRUPTION; } @@ -5042,6 +5052,7 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn) checkpoint= recovery_start, end_lsn= recovery_start_end_lsn; else { + ut_ad(!silent); sql_print_error("InnoDB: Did not find innodb_log_recovery_start=" LSN_PF " between " LSN_PF " and " LSN_PF " (" LSN_PF ")", recovery_start, first_lsn, end_lsn, checkpoint); From 89d8e1ad9c50dbd0f817ad7fa8159d5de4850445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 14 Jan 2026 17:02:46 +0200 Subject: [PATCH 20/33] More multi-file recovery preparation recv_warp: A pointer wrapper for memory-mapped parsing that spans two archive log files. --- storage/innobase/include/log0log.h | 3 + storage/innobase/log/log0recv.cc | 166 ++++++++++++++++++++++------- 2 files changed, 130 insertions(+), 39 deletions(-) diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 77046fc6dfe80..8e171d0db0706 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -111,9 +111,12 @@ class log_file_t bool flush() const noexcept { return os_file_flush(m_file); } }; +class recv_warp; + /** Redo log buffer */ struct log_t { + friend recv_warp; /** The maximum buf_size */ static constexpr unsigned buf_size_max= os_file_request_size_max; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index b3b723bfd09ba..f6d02b328ba59 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1748,7 +1748,6 @@ dberr_t recv_sys_t::find_checkpoint() if (archive) sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", srv_log_group_home_dir); - no_archive_found_reported: if (archive) return DB_ERROR; archive= -1; @@ -1756,7 +1755,7 @@ dberr_t recv_sys_t::find_checkpoint() readdir: struct log { lsn_t end; bool read_only; }; - std::map logs; + std::map logs; #ifdef _WIN32 do { @@ -1810,13 +1809,16 @@ dberr_t recv_sys_t::find_checkpoint() int subsequent= 0; if (i == end) goto no_archive_found; + log_sys.format= srv_encrypt_log + ? log_t::FORMAT_ENC_11 : log_t::FORMAT_10_8; + log_sys.archive= true; for (;;) { const lsn_t first{i->first}, last{i->second.end}; if (log_sys.archived_lsn > first && log_sys.archived_lsn < last) { sql_print_error("InnoDB: Invalid innodb_log_archive_start=" LSN_PF); - goto no_archive_found_reported; + return DB_ERROR; } if (recovery_start >= first && recovery_start < last) found_recovery_start= i; @@ -1833,46 +1835,45 @@ dberr_t recv_sys_t::find_checkpoint() subsequent=0, start= i; } - if (!recovery_start) - { - /* Start the recovery from the first non-read-only log file. */ - for (i= logs.cbegin(); i != start; start--, subsequent++) - if (start->second.read_only) - break; - } - else if (found_recovery_start == end) + if (recovery_start && found_recovery_start == end) { sql_print_error("InnoDB: No matching file found for" " innodb_log_recovery_start=" LSN_PF, recovery_start); - goto no_archive_found_reported; + return DB_ERROR; } - else - start= found_recovery_start; - path.assign(srv_log_group_home_dir); - switch (path.back()) { + for (i= logs.cbegin();; start--, subsequent++) + { + if (recovery_start && start != found_recovery_start) + continue; + path.assign(srv_log_group_home_dir); + switch (path.back()) { #ifdef _WIN32 - case '\\': + case '\\': #endif - case '/': - break; - default: - path.push_back('/'); + case '/': + break; + default: + path.push_back('/'); + } + read_only= subsequent || srv_read_only_mode || start->second.read_only; + file= + os_file_create_func(log_sys.append_archive_name(path, start->first). + c_str(), OS_FILE_OPEN, OS_LOG_FILE, + read_only, &success); + if (file == OS_FILE_CLOSED) + return DB_ERROR; + if (log_sys.log.is_opened()) + log_sys.close_file(true); // TODO: open and register later files + if (!log_sys.attach(file, start->second.end - start->first, read_only)) + { + os_file_close(file); + return DB_ERROR; + } + dberr_t err= find_checkpoint_archived(start->first, !read_only); + if ((err != DB_SUCCESS && read_only) || recovery_start || i == start) + return err; } - read_only= subsequent || srv_read_only_mode; - file= - os_file_create_func(log_sys.append_archive_name(path, start->first). - c_str(), OS_FILE_OPEN, OS_LOG_FILE, - read_only, &success); - if (file == OS_FILE_CLOSED) - goto no_archive_found_reported; - - log_sys.archive= true; - - if (!log_sys.attach(file, start->second.end - start->first, read_only)) - goto err_exit; - - return find_checkpoint_archived(start->first, false); } ut_ad(!log_sys.archive); @@ -2500,6 +2501,93 @@ struct recv_ring : public recv_buf } }; +/** Buffer wrapper for memory-mapped log_sys.archive, +with the capability to warp from log_sys.buf to log_sys.resize_buf */ +struct recv_warp : public recv_buf +{ + constexpr recv_warp(const byte *ptr) : recv_buf(ptr) {} + + static constexpr bool may_wrap() { return true; } + bool is_wrapped(const recv_warp &end) const { return end.ptr < ptr; } + constexpr static bool is_eof() { return false; } + constexpr static bool is_eof(size_t) { return false; } + + byte operator*() const noexcept + { + ut_ad(ptr >= &log_sys.buf[log_sys.START_OFFSET]); + ut_ad(ptr < end()); + return *ptr; + } + recv_warp operator+(size_t len) const noexcept + { recv_warp r{*this}; return r+= len; } + recv_warp &operator++() noexcept { return *this+= 1; } + recv_warp &operator+=(size_t len) noexcept + { + ut_ad(len < recv_sys.MTR_SIZE_MAX * 2); + const byte *const e{end()}; + const bool first{ptr < e && ptr >= log_sys.buf}; + ut_ad(!first || ptr >= &log_sys.buf[log_sys.START_OFFSET]); + ptr+= len; + if (first) + { + if (ptr < e) + return *this; + ptr= &log_sys.resize_buf[log_sys.START_OFFSET + (ptr - e)]; + } + ut_ad(ptr > &log_sys.resize_buf[log_sys.START_OFFSET]); + ut_ad(ptr < &log_sys.resize_buf[log_sys.resize_target]); + return *this; + } + size_t operator-(const recv_warp &start) const noexcept + { + int db= (size_t(ptr - log_sys.buf) < recv_sys.len) - + (size_t(start.ptr - log_sys.buf) < recv_sys.len); + ut_ad(db >= 0); + return size_t((db + ? end() + (ptr - &log_sys.resize_buf[log_sys.START_OFFSET]) + : ptr) - start.ptr); + } + + uint32_t decode_varint() const noexcept + { + recv_warp log{*this}; + uint32_t i{*log}; + if (i < MIN_2BYTE) + return i; + uint32_t j{*++log}; + if (i < 0xc0) + return MIN_2BYTE + ((i & ~0xc0) << 8 | j); + j<<= 8; + j|= *++log; + if (i < 0xe0) + return MIN_3BYTE + ((i & ~0xe0) << 16 | j); + j<<= 8; + j|= *++log; + if (i < 0xf0) + return MIN_4BYTE + ((i & ~0xf0) << 24 | j); + if (i == 0xf0) + { + j<<= 8; + j|= *++log; + if (j <= ~MIN_5BYTE) + return MIN_5BYTE + j; + } + return MLOG_DECODE_ERROR; + } + + uint32_t crc32c(uint32_t crc, const recv_warp &start) const noexcept + { + int db= (size_t(ptr - log_sys.buf) < recv_sys.len) - + (size_t(start.ptr - log_sys.buf) < recv_sys.len); + ut_ad(db >= 0); + return db + ? my_crc32c(my_crc32c(crc, start.ptr, end() - start.ptr), + &log_sys.resize_buf[log_sys.START_OFFSET], + ptr - &log_sys.resize_buf[log_sys.START_OFFSET]) + : my_crc32c(crc, start.ptr, ptr - start.ptr); + } +}; + ATTRIBUTE_COLD void recv_sys_t::rewind(const byte *begin, const byte *end) noexcept { @@ -3423,10 +3511,10 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) recv_sys_t::parse_mtr_result r{parse_mtr(if_exists)}; if (UNIV_LIKELY(r != PREMATURE_EOF) || !log_sys.is_mmap()) return r; - ut_ad(!log_sys.archive); // FIXME: implement ut_ad(recv_sys.len == log_sys.file_size); ut_ad(recv_sys.offset >= log_sys.START_OFFSET); ut_ad(recv_sys.offset <= recv_sys.len); + ut_ad(!log_sys.archive); // FIXME: implement recv_ring s {recv_sys.offset == recv_sys.len ? &log_sys.buf[log_sys.START_OFFSET] @@ -4956,6 +5044,7 @@ static recv_sys_t::parser get_parse_mmap() noexcept dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) { + ut_ad(log_sys.archive); alignas(4) byte header[log_sys.START_OFFSET]; const byte *buf= log_sys.buf; if (!log_sys.is_mmap()) @@ -4964,7 +5053,7 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) if (dberr_t err= log_sys.log.read(0, header)) return err; } - uint16_t n_checkpoint= 1; + uint16_t n_checkpoint; { const uint32_t format{mach_read_from_4(buf)}; if (format != uint32_t(srv_encrypt_log)) @@ -4982,7 +5071,7 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) } if (!format) - log_sys.format= log_t::FORMAT_10_8; + n_checkpoint= 1; else if (!log_crypt_read_header(buf)) return DB_ERROR; else @@ -4995,7 +5084,6 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) if (!tmp_buf) return DB_OUT_OF_MEMORY; } - log_sys.format= log_t::FORMAT_ENC_11; } } From c255e795041a79c23a7e515c42635d443a5c9eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 15 Jan 2026 12:42:53 +0200 Subject: [PATCH 21/33] fixup! 89d8e1ad9c50dbd0f817ad7fa8159d5de4850445 --- storage/innobase/log/log0recv.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index f6d02b328ba59..b774258ac2d38 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1706,7 +1706,7 @@ dberr_t recv_sys_t::find_checkpoint() { byte *buf; lsn_t first_lsn= 0; - bool read_only= bool(srv_read_only_mode); + bool read_only{srv_read_only_mode || srv_operation >= SRV_OPERATION_BACKUP}; ut_ad(pages.empty()); pages_it= pages.end(); @@ -1734,6 +1734,7 @@ dberr_t recv_sys_t::find_checkpoint() return DB_ERROR; else { + ut_ad(srv_operation == SRV_OPERATION_NORMAL); #ifdef _WIN32 WIN32_FIND_DATAA entry; HANDLE d= FindFirstFileA(srv_log_group_home_dir, &entry); From 0e5d4efc1b64b0427b03d9fadbc01bb658ab6d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 15 Jan 2026 15:25:43 +0200 Subject: [PATCH 22/33] Fix checkpoint handling --- storage/innobase/buf/buf0flu.cc | 16 +++++++++++- storage/innobase/log/log0log.cc | 8 +++--- storage/innobase/log/log0recv.cc | 42 +++++++++++++++++++++----------- 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index f70fbd4ae0b6b..b788ca8c8350f 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1827,7 +1827,21 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); if (archive && resize_log.is_opened()) - next_checkpoint_no= is_encrypted() ? 8 : 1; + { + byte *c= checkpoint_buf; + if (c) + memset_aligned<512>(c, 0, write_size); + else + { + c= buf; + ut_ad(!memcmp_aligned<512>(c, field_ref_zero, START_OFFSET)); + } + + next_checkpoint_no= uint16_t(8 * is_encrypted() + 1); + + if (is_encrypted()) + log_crypt_write_header(c); + } const auto n= next_checkpoint_no; size_t offset; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 90f861872dade..dc7ac7aca0374 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -656,19 +656,19 @@ void log_t::header_rewrite(my_bool archive) noexcept } else { - next_checkpoint_no&= 1; + next_checkpoint_no= uint16_t(8 * is_encrypted() + 1); if (!is_encrypted()) { + ut_ad(format == FORMAT_10_8); mach_write_to_8(buf, uint32_t(end_lsn - first_lsn)); - memset(buf + 8, 0, 60 - 8); + memset_aligned<8>(buf + 8, 0, 64 - 8); } else { - next_checkpoint_no+= 8; ut_ad(format == FORMAT_ENC_11); log_crypt_write_header(buf); mach_write_to_4(buf + 32, uint32_t(end_lsn - first_lsn)); - memset(buf + 40, 0, 60 - 40); + memset_aligned<4>(buf + 36, 0, 64 - 36); } pmem_persist(buf, 64); memset_aligned<64>(buf + 64, 0, START_OFFSET - 64); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index b774258ac2d38..869e989fbcd67 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1866,12 +1866,14 @@ dberr_t recv_sys_t::find_checkpoint() return DB_ERROR; if (log_sys.log.is_opened()) log_sys.close_file(true); // TODO: open and register later files - if (!log_sys.attach(file, start->second.end - start->first, read_only)) + if (!log_sys.attach(file, start->second.end - start->first + + log_t::START_OFFSET, read_only)) { os_file_close(file); return DB_ERROR; } - dberr_t err= find_checkpoint_archived(start->first, !read_only); + const dberr_t err= + find_checkpoint_archived(start->first, !read_only && i != start); if ((err != DB_SUCCESS && read_only) || recovery_start || i == start) return err; } @@ -5046,14 +5048,16 @@ static recv_sys_t::parser get_parse_mmap() noexcept dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) { ut_ad(log_sys.archive); - alignas(4) byte header[log_sys.START_OFFSET]; - const byte *buf= log_sys.buf; - if (!log_sys.is_mmap()) + ut_ad(!log_sys.checkpoint_buf == log_sys.is_mmap()); + const byte *buf; + if (byte *c= log_sys.checkpoint_buf) { - buf= header; - if (dberr_t err= log_sys.log.read(0, header)) + buf= c; + if (dberr_t err= log_sys.log.read(0, {c, log_sys.write_size})) return err; } + else + buf= log_sys.buf; uint16_t n_checkpoint; { const uint32_t format{mach_read_from_4(buf)}; @@ -5072,12 +5076,12 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) } if (!format) - n_checkpoint= 1; + buf+= 4, n_checkpoint= 1; else if (!log_crypt_read_header(buf)) return DB_ERROR; else { - n_checkpoint= 32/*log_crypt_read_header()*/ / 4; + buf+= 32/*log_crypt_read_header()*/, n_checkpoint= 32 / 4; if (!tmp_buf) { tmp_buf= static_cast @@ -5097,11 +5101,9 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) get_parse_mmap(), get_parse_mmap() }; ut_ad(recv_spaces.empty()); - for (bool first= true; n_checkpoint < sizeof header / 4; - n_checkpoint++, first= false) + for (bool first= true; n_checkpoint < log_sys.START_OFFSET / 4; first= false) { - const uint32_t d= - mach_read_from_4(my_assume_aligned<4>(&buf[4 * n_checkpoint])); + const uint32_t d= mach_read_from_4(my_assume_aligned<4>(buf)); if (!d && !first) break; lsn= end_lsn + d; @@ -5114,7 +5116,7 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) { found_corrupt_log= false; if (!d && first) - continue; + goto next; break; } ut_ad(file_checkpoint == lsn); @@ -5126,6 +5128,18 @@ dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) log_sys.archived_lsn= checkpoint; if (checkpoint == recovery_start) recovery_start_end_lsn= end_lsn; + next: + n_checkpoint++; + buf+= 4; + if (byte *c= log_sys.checkpoint_buf) + { + uint offset(n_checkpoint * 4); + if (offset & (log_sys.write_size - 1)) + continue; + buf= c; + if (dberr_t err= log_sys.log.read(offset, {c, log_sys.write_size})) + return err; + } } if (!checkpoint) From 50143aa78d2946b6106b74cf1c4b818b3b11282e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 15 Jan 2026 15:40:50 +0200 Subject: [PATCH 23/33] WIP: multi-file recovery recv_sys_t::parse_mmap(): Use recv_warp for innodb_log_archive=ON TODO: Implement this for the pread() interface as well. TODO: Actually attach all files to recv_sys.files and implement the file switching. --- storage/innobase/include/log0log.h | 11 +++++++++-- storage/innobase/log/log0recv.cc | 16 +++++++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 8e171d0db0706..cf7cf9a5ce558 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -111,7 +111,7 @@ class log_file_t bool flush() const noexcept { return os_file_flush(m_file); } }; -class recv_warp; +struct recv_warp; /** Redo log buffer */ struct log_t @@ -248,7 +248,9 @@ struct log_t lsn_t resize_target; /** Buffer for writing to resize_log; @see buf Also a spare buffer between archived_mmap_switch_prepare() - and archived_mmap_switch_complete() */ + and archived_mmap_switch_complete(), + or archived_mmap_switch_recovery_prepare() + and archived_mmap_switch_recovery_complete(). */ byte *resize_buf; /** Buffer for writing to resize_log; @see flush_buf */ byte *resize_flush_buf; @@ -549,6 +551,11 @@ struct log_t @retval 0 if no switch took place */ ATTRIBUTE_COLD lsn_t archived_mmap_switch_complete() noexcept; + /** Prepare for multi-file memory-mapped log recovery. */ + ATTRIBUTE_COLD void archived_mmap_switch_recovery_prepare() noexcept; + /** Finish archived_mmap_switch_recovery_prepare(). */ + ATTRIBUTE_COLD void archived_mmap_switch_recovery_complete() noexcept; + /** How to write log */ enum write { /** normal writing !log_sys.is_mmap() */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 869e989fbcd67..5da6a4ff6c2a9 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3508,6 +3508,11 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr(bool if_exists) return recv_sys.parse(s, if_exists); } +ATTRIBUTE_COLD void log_t::archived_mmap_switch_recovery_complete() noexcept +{ + // TODO +} + template recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) { @@ -3517,7 +3522,16 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) ut_ad(recv_sys.len == log_sys.file_size); ut_ad(recv_sys.offset >= log_sys.START_OFFSET); ut_ad(recv_sys.offset <= recv_sys.len); - ut_ad(!log_sys.archive); // FIXME: implement + if (log_sys.archive) + { + ut_ad(log_sys.archived_mmap_switch()); + recv_warp s{&log_sys.buf[recv_sys.offset]}; + auto r= recv_sys.parse(s,if_exists); + log_sys.archived_mmap_switch_recovery_complete(); + ut_ad(s.ptr > &log_sys.buf[log_sys.START_OFFSET]); + ut_ad(s.ptr < recv_buf::end()); + return r; + } recv_ring s {recv_sys.offset == recv_sys.len ? &log_sys.buf[log_sys.START_OFFSET] From 94972b72cc186dc59cb91e283d289adc884d7088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 07:52:16 +0200 Subject: [PATCH 24/33] fixup! 0e5d4efc1b64b0427b03d9fadbc01bb658ab6d82 --- storage/innobase/log/log0log.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index dc7ac7aca0374..a16487af562e8 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -656,7 +656,7 @@ void log_t::header_rewrite(my_bool archive) noexcept } else { - next_checkpoint_no= uint16_t(8 * is_encrypted() + 1); + next_checkpoint_no= uint16_t(8 * is_encrypted() + 2); if (!is_encrypted()) { ut_ad(format == FORMAT_10_8); From 0366f66cc8b02a33b056d09aa4e19ca57ec680c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 08:52:18 +0200 Subject: [PATCH 25/33] Diagnose a Windows problem --- mysql-test/suite/innodb/t/log_file_size_online.opt | 1 - storage/innobase/log/log0recv.cc | 11 +++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) delete mode 100644 mysql-test/suite/innodb/t/log_file_size_online.opt diff --git a/mysql-test/suite/innodb/t/log_file_size_online.opt b/mysql-test/suite/innodb/t/log_file_size_online.opt deleted file mode 100644 index 1f9a83fbbfbcf..0000000000000 --- a/mysql-test/suite/innodb/t/log_file_size_online.opt +++ /dev/null @@ -1 +0,0 @@ ---skip-innodb-log-archive diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 5da6a4ff6c2a9..e0e30259b1957 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1746,9 +1746,8 @@ dberr_t recv_sys_t::find_checkpoint() goto readdir; #endif no_archive_found: - if (archive) - sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", - srv_log_group_home_dir); + sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", + srv_log_group_home_dir); if (archive) return DB_ERROR; archive= -1; @@ -1765,8 +1764,12 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{entry.cFileName}; - if (1 != sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n) || fn[n]) + const int s= sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n); + if (1 != s || fn[n]) + { + sql_print_information("InnoDB: %s,%d,%d", fn, s, n); continue; + } LARGE_INTEGER filesize; filesize.LowPart= entry.nFileSizeLow; filesize.HighPart= entry.nFileSizeHigh; From e67615dfb06a94c42420790bc466491699c157cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 10:00:28 +0200 Subject: [PATCH 26/33] Fix innodb.log_file_size_online,encrypted and Windows --- storage/innobase/log/log0log.cc | 3 +-- storage/innobase/log/log0recv.cc | 14 +++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index a16487af562e8..df0bc0ea73539 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -712,9 +712,8 @@ void log_t::header_rewrite(my_bool archive) noexcept } log.write(0, {c, write_size}); os_file_flush(log.m_file); - memset_aligned<512>(c, 0, write_size); for (size_t offset= 0; (offset+= write_size) < START_OFFSET;) - log.write(offset, {c, write_size}); + log.write(offset, {field_ref_zero, write_size}); } os_file_flush(log.m_file); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index e0e30259b1957..a6f3b5e523fd8 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1735,9 +1735,19 @@ dberr_t recv_sys_t::find_checkpoint() else { ut_ad(srv_operation == SRV_OPERATION_NORMAL); + path.reserve(strlen(srv_log_group_home_dir) + + sizeof "/ib_0000000000000000.log"); #ifdef _WIN32 WIN32_FIND_DATAA entry; - HANDLE d= FindFirstFileA(srv_log_group_home_dir, &entry); + path.assign(srv_log_group_home_dir); + switch (path.back()) { + case '\\': case '/': + break; + default: + path.push_back('/'); + } + path.append("ib_????????????????.log"); + HANDLE d= FindFirstFileA(path.c_str(), &entry); if (d != INVALID_HANDLE_VALUE) goto readdir; #else @@ -1781,8 +1791,6 @@ dberr_t recv_sys_t::find_checkpoint() while (FindNextFile(d, &entry)); FindClose(d); #else - path.reserve(strlen(srv_log_group_home_dir) + - sizeof "/ib_0000000000000000.log"); while (dirent *e= readdir(d)) { lsn_t lsn; From dbcc4d37bbc2f97a50a9a1ab999b95b8a05a416d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 10:18:16 +0200 Subject: [PATCH 27/33] Revert "Diagnose a Windows problem" This reverts commit 0366f66cc8b02a33b056d09aa4e19ca57ec680c3. --- storage/innobase/log/log0recv.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index a6f3b5e523fd8..0cba83d6d13cd 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1756,8 +1756,9 @@ dberr_t recv_sys_t::find_checkpoint() goto readdir; #endif no_archive_found: - sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", - srv_log_group_home_dir); + if (archive) + sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", + srv_log_group_home_dir); if (archive) return DB_ERROR; archive= -1; @@ -1774,12 +1775,8 @@ dberr_t recv_sys_t::find_checkpoint() lsn_t lsn; int n{0}; const char *fn{entry.cFileName}; - const int s= sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n); - if (1 != s || fn[n]) - { - sql_print_information("InnoDB: %s,%d,%d", fn, s, n); + if (1 != sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n) || fn[n]) continue; - } LARGE_INTEGER filesize; filesize.LowPart= entry.nFileSizeLow; filesize.HighPart= entry.nFileSizeHigh; From 38b11f3b4972f8cfd3fb342dd1fdc812a24f2321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 11:52:37 +0200 Subject: [PATCH 28/33] fixup! 0e5d4efc1b64b0427b03d9fadbc01bb658ab6d82 --- storage/innobase/log/log0log.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index df0bc0ea73539..1a891e7824600 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -638,6 +638,7 @@ void log_t::header_rewrite(my_bool archive) noexcept byte* c= checkpoint_buf; ut_ad(end_lsn >= first_lsn); ut_ad(!archive || end_lsn <= first_lsn + ~0U); + ut_ad(format == (is_encrypted() ? FORMAT_ENC_11 : FORMAT_10_8)); #ifdef HAVE_PMEM if (!c) { @@ -659,13 +660,11 @@ void log_t::header_rewrite(my_bool archive) noexcept next_checkpoint_no= uint16_t(8 * is_encrypted() + 2); if (!is_encrypted()) { - ut_ad(format == FORMAT_10_8); mach_write_to_8(buf, uint32_t(end_lsn - first_lsn)); memset_aligned<8>(buf + 8, 0, 64 - 8); } else { - ut_ad(format == FORMAT_ENC_11); log_crypt_write_header(buf); mach_write_to_4(buf + 32, uint32_t(end_lsn - first_lsn)); memset_aligned<4>(buf + 36, 0, 64 - 36); @@ -700,13 +699,11 @@ void log_t::header_rewrite(my_bool archive) noexcept } else { - next_checkpoint_no&= 1; + next_checkpoint_no= uint16_t(8 * is_encrypted() + 2); if (!is_encrypted()) mach_write_to_8(c, uint32_t(end_lsn - first_lsn)); else { - next_checkpoint_no+= 8; - ut_ad(format == FORMAT_ENC_11); log_crypt_write_header(c); mach_write_to_4(c + 32, uint32_t(end_lsn - first_lsn)); } From d9cf9a518da5c332bf6d9f53a440bc458cefab47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 12:21:17 +0200 Subject: [PATCH 29/33] Cleanup: this==&recv_sys --- storage/innobase/log/log0recv.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 0cba83d6d13cd..3ab67b967c060 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1907,7 +1907,7 @@ dberr_t recv_sys_t::find_checkpoint() else file= OS_FILE_CLOSED; - recv_sys.files.emplace_back(file); + files.emplace_back(file); for (int i= 1; i < 101; i++) { @@ -1925,7 +1925,7 @@ dberr_t recv_sys_t::find_checkpoint() int(path.size()), path.data(), sz, size); first_lsn= LSN_MAX; } - recv_sys.files.emplace_back(file); + files.emplace_back(file); } if (!size) @@ -1963,10 +1963,10 @@ dberr_t recv_sys_t::find_checkpoint() log_sys.last_checkpoint_lsn= log_sys.next_checkpoint_lsn; log_sys.set_recovered_lsn(log_sys.next_checkpoint_lsn); lsn= file_checkpoint= log_sys.next_checkpoint_lsn; - if (recv_sys.rpo && recv_sys.rpo != lsn) + if (rpo && rpo != lsn) { sql_print_error("InnoDB: cannot fulfill innodb_log_recovery_target=%" - PRIu64 "!=%" PRIu64, recv_sys.rpo, lsn); + PRIu64 "!=%" PRIu64, rpo, lsn); return DB_CORRUPTION; } if (UNIV_LIKELY(lsn != 0)) From ce75f74a580035d26e3e4f08e3f7a3e42f43954d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Jan 2026 16:20:14 +0200 Subject: [PATCH 30/33] Steps towards multi-file recovery --- .../suite/encryption/t/innochecksum.test | 1 - .../t/innodb-page_encryption-32k.test | 1 - .../t/innodb-page_encryption_compression.test | 1 - .../suite/innodb/t/alter_copy_bulk.test | 1 - mysql-test/suite/innodb/t/bulk_load.test | 1 - .../t/innodb_bulk_create_index_debug.test | 1 - .../suite/innodb/t/shrink_cached_undo.test | 1 - .../suite/innodb/t/sys_truncate_debug.test | 1 - .../suite/innodb/t/sys_truncate_large.test | 1 - .../innodb/t/sys_truncate_shutdown_debug.test | 1 - mysql-test/suite/innodb/t/undo_truncate.test | 1 - .../suite/innodb/t/undo_truncate_recover.test | 1 - storage/innobase/include/log0log.h | 3 ++ storage/innobase/log/log0log.cc | 4 +- storage/innobase/log/log0recv.cc | 46 ++++++++++++++----- 15 files changed, 40 insertions(+), 25 deletions(-) diff --git a/mysql-test/suite/encryption/t/innochecksum.test b/mysql-test/suite/encryption/t/innochecksum.test index fc390aa59f635..e440e3d3344f8 100644 --- a/mysql-test/suite/encryption/t/innochecksum.test +++ b/mysql-test/suite/encryption/t/innochecksum.test @@ -6,7 +6,6 @@ -- source include/not_embedded.inc # Require InnoDB -- source include/have_innodb.inc --- source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME -- source include/have_file_key_management_plugin.inc -- source include/innodb_page_size_small.inc -- source include/innodb_checksum_algorithm.inc diff --git a/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test b/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test index e175924d8159f..03c9c93ff06d9 100644 --- a/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test +++ b/mysql-test/suite/encryption/t/innodb-page_encryption-32k.test @@ -4,7 +4,6 @@ --source include/have_innodb.inc --source include/have_innodb_32k.inc --source include/have_file_key_management_plugin.inc ---source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME create table innodb_normal(c1 bigint not null, b char(200)) engine=innodb; create table innodb_compact(c1 bigint not null, b char(200)) engine=innodb row_format=compact encrypted=yes encryption_key_id=1; diff --git a/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test b/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test index de826e28d6c70..5fe6f68657661 100644 --- a/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test +++ b/mysql-test/suite/encryption/t/innodb-page_encryption_compression.test @@ -1,7 +1,6 @@ -- source include/have_innodb.inc -- source include/not_embedded.inc -- source include/have_file_key_management_plugin.inc --- source ../../suite/innodb/include/skip_innodb_log_archive.inc # FIXME let $innodb_compression_algorithm_orig=`SELECT @@innodb_compression_algorithm`; diff --git a/mysql-test/suite/innodb/t/alter_copy_bulk.test b/mysql-test/suite/innodb/t/alter_copy_bulk.test index d471ec498aa6f..ffa8e70f77ea0 100644 --- a/mysql-test/suite/innodb/t/alter_copy_bulk.test +++ b/mysql-test/suite/innodb/t/alter_copy_bulk.test @@ -1,5 +1,4 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_partition.inc --source include/have_sequence.inc diff --git a/mysql-test/suite/innodb/t/bulk_load.test b/mysql-test/suite/innodb/t/bulk_load.test index abec8439542c8..711786b97a5ae 100644 --- a/mysql-test/suite/innodb/t/bulk_load.test +++ b/mysql-test/suite/innodb/t/bulk_load.test @@ -1,5 +1,4 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/big_test.inc --source include/not_embedded.inc diff --git a/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test b/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test index 4e36887ee8401..34c9336e51795 100644 --- a/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test +++ b/mysql-test/suite/innodb/t/innodb_bulk_create_index_debug.test @@ -5,7 +5,6 @@ # Test Restart & Crash Recovery. -- source include/big_test.inc -- source include/innodb_page_size_small.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery let $restart_noprint=2; # Test Row Format: REDUNDANT. diff --git a/mysql-test/suite/innodb/t/shrink_cached_undo.test b/mysql-test/suite/innodb/t/shrink_cached_undo.test index 10ab63127669e..8274556d8fd57 100644 --- a/mysql-test/suite/innodb/t/shrink_cached_undo.test +++ b/mysql-test/suite/innodb/t/shrink_cached_undo.test @@ -1,5 +1,4 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/have_debug.inc --source include/not_embedded.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate_debug.test b/mysql-test/suite/innodb/t/sys_truncate_debug.test index 059193afc7b2f..7dcb5ffde2aa9 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_debug.test +++ b/mysql-test/suite/innodb/t/sys_truncate_debug.test @@ -1,5 +1,4 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/not_embedded.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate_large.test b/mysql-test/suite/innodb/t/sys_truncate_large.test index 2f6f8407b2473..8499ce0bf3a6a 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_large.test +++ b/mysql-test/suite/innodb/t/sys_truncate_large.test @@ -1,6 +1,5 @@ --source include/big_test.inc --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/not_valgrind.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test b/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test index 08688de1f1a0e..d819ebe42eef4 100644 --- a/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test +++ b/mysql-test/suite/innodb/t/sys_truncate_shutdown_debug.test @@ -1,5 +1,4 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_sequence.inc --source include/not_embedded.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/undo_truncate.test b/mysql-test/suite/innodb/t/undo_truncate.test index 7aff8e7e11433..9abf08ff43631 100644 --- a/mysql-test/suite/innodb/t/undo_truncate.test +++ b/mysql-test/suite/innodb/t/undo_truncate.test @@ -1,5 +1,4 @@ --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/innodb_page_size.inc --source include/not_embedded.inc --source include/have_sequence.inc diff --git a/mysql-test/suite/innodb/t/undo_truncate_recover.test b/mysql-test/suite/innodb/t/undo_truncate_recover.test index 0b9e9459b0306..988b28f75b461 100644 --- a/mysql-test/suite/innodb/t/undo_truncate_recover.test +++ b/mysql-test/suite/innodb/t/undo_truncate_recover.test @@ -7,7 +7,6 @@ # With larger innodb_page_size, the undo log tablespaces do not grow enough. --source include/innodb_page_size_small.inc --source include/have_innodb.inc ---source include/skip_innodb_log_archive.inc # FIXME: multi-file recovery --source include/have_debug.inc # Tests with embedded server do not support restarting --source include/not_embedded.inc diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index cf7cf9a5ce558..267ae18014abb 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -408,6 +408,9 @@ struct log_t /** Disable memory-mapped access (update log_mmap) */ void clear_mmap() noexcept; void close_file(bool really_close= true) noexcept; + /** Stash a log archive file in multi-file recovery */ + inline void stash_archive_file() noexcept; + #if defined __linux__ || defined _WIN32 /** Try to enable or disable file system caching (update log_buffered) */ void set_buffered(bool buffered) noexcept; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 1a891e7824600..9158215d7d781 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -327,11 +327,13 @@ static inline void log_file_message() noexcept {} bool log_t::attach(log_file_t file, os_offset_t size, bool read_only) noexcept { ut_ad(!log.is_opened()); - ut_ad(!resize_log.is_opened()); + ut_ad(archive || !resize_log.is_opened()); ut_ad(!size || size >= START_OFFSET + SIZE_OF_FILE_CHECKPOINT); ut_ad(!buf); ut_ad(!flush_buf); ut_ad(!writer); + ut_ad(archive || !resize_buf); + ut_ad(!resize_flush_buf); file_size= size; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 3ab67b967c060..707987fd26375 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1702,6 +1702,26 @@ static bool recv_sys_invalid_rpo(lsn_t lsn) noexcept return true; } +inline void log_t::stash_archive_file() noexcept +{ + ut_ad(log.is_opened()); + if (resize_log.is_opened()) + { + ut_ad(!is_mmap() == !resize_buf); + if (resize_buf) + my_munmap(resize_buf, size_t(resize_target)); + resize_log.close(); + } + if (is_mmap()) + { + resize_buf= buf; + buf= nullptr; + } + std::swap(log, resize_log); + resize_target= file_size; + writer= nullptr; +} + dberr_t recv_sys_t::find_checkpoint() { byte *buf; @@ -1872,8 +1892,6 @@ dberr_t recv_sys_t::find_checkpoint() read_only, &success); if (file == OS_FILE_CLOSED) return DB_ERROR; - if (log_sys.log.is_opened()) - log_sys.close_file(true); // TODO: open and register later files if (!log_sys.attach(file, start->second.end - start->first + log_t::START_OFFSET, read_only)) { @@ -1882,8 +1900,9 @@ dberr_t recv_sys_t::find_checkpoint() } const dberr_t err= find_checkpoint_archived(start->first, !read_only && i != start); - if ((err != DB_SUCCESS && read_only) || recovery_start || i == start) + if (err == DB_SUCCESS || read_only || recovery_start || i == start) return err; + log_sys.stash_archive_file(); } } @@ -2525,8 +2544,9 @@ struct recv_warp : public recv_buf byte operator*() const noexcept { - ut_ad(ptr >= &log_sys.buf[log_sys.START_OFFSET]); - ut_ad(ptr < end()); + ut_ad((ptr >= &log_sys.buf[log_sys.START_OFFSET] && ptr < end()) || + (ptr >= &log_sys.resize_buf[log_sys.START_OFFSET] && + ptr < &log_sys.resize_buf[log_sys.resize_target])); return *ptr; } recv_warp operator+(size_t len) const noexcept @@ -2551,12 +2571,14 @@ struct recv_warp : public recv_buf } size_t operator-(const recv_warp &start) const noexcept { - int db= (size_t(ptr - log_sys.buf) < recv_sys.len) - - (size_t(start.ptr - log_sys.buf) < recv_sys.len); - ut_ad(db >= 0); - return size_t((db - ? end() + (ptr - &log_sys.resize_buf[log_sys.START_OFFSET]) - : ptr) - start.ptr); + const int start_in_buf= size_t(start.ptr - log_sys.buf) < recv_sys.len, + buf_diff= (size_t(ptr - log_sys.buf) < recv_sys.len) - start_in_buf; + ut_ad(buf_diff <= 0); + ut_ad(buf_diff == 0 || start_in_buf); + return buf_diff + ? size_t((ptr - &log_sys.resize_buf[log_sys.START_OFFSET]) - + (end() - start.ptr)) + : size_t(ptr - start.ptr); } uint32_t decode_varint() const noexcept @@ -3518,7 +3540,7 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr(bool if_exists) ATTRIBUTE_COLD void log_t::archived_mmap_switch_recovery_complete() noexcept { - // TODO + ut_error; // TODO } template From ec192452ef9450b5217ebe153961b92446d5a547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 19 Jan 2026 11:21:21 +0200 Subject: [PATCH 31/33] Fix log rotation log_t::checkpoint_margin(): Replaces log_checkpoint_margin(). If a new archived log file has been created, wait for the first checkpoint in that file. --- storage/innobase/include/log0log.h | 3 ++ storage/innobase/log/log0log.cc | 50 ++++++++++++++---------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 267ae18014abb..3c32d5dd03e61 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -663,6 +663,9 @@ struct log_t @param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */ inline void write_checkpoint(lsn_t end_lsn) noexcept; + /** Wait for write_checkpoint() if necessary. */ + ATTRIBUTE_COLD void checkpoint_margin() noexcept; + /** Variations of write_buf() */ enum resizing_and_latch { /** skip latch.wr_unlock(); log resizing may or may not be in progress */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 9158215d7d781..b5feb8348fc03 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1738,45 +1738,46 @@ ATTRIBUTE_COLD void log_write_and_flush() noexcept } } -/****************************************************************//** -Tries to establish a big enough margin of free space in the log, such -that a new log entry can be catenated without an immediate need for a -checkpoint. NOTE: this function may only be called if the calling thread -owns no synchronization objects! */ -ATTRIBUTE_COLD static void log_checkpoint_margin() noexcept +ATTRIBUTE_COLD void log_t::checkpoint_margin() noexcept { - while (log_sys.check_for_checkpoint()) + ut_ad(this == &log_sys); + ut_ad(!recv_no_log_write); + + while (check_for_checkpoint()) { - log_sys.latch.wr_lock(SRW_LOCK_CALL); + latch.wr_lock(SRW_LOCK_CALL); ut_ad(!recv_no_log_write); - if (!log_sys.check_for_checkpoint()) + if (!check_for_checkpoint()) { func_exit: - log_sys.latch.wr_unlock(); + latch.wr_unlock(); return; } - const lsn_t lsn= log_sys.get_lsn(); - const lsn_t max_age= log_sys.max_checkpoint_age; - const lsn_t age= lsn_t(lsn - log_sys.last_checkpoint_lsn); + const lsn_t last{last_checkpoint_lsn}, max_age{max_checkpoint_age}; + lsn_t lsn{get_lsn()}; - if (age <= max_age) + if (last < first_lsn) + lsn= first_lsn; + else { + if (lsn_t(lsn - last) <= max_age) + { #ifndef DBUG_OFF - skip_checkpoint: + skip_checkpoint: #endif - log_sys.set_check_for_checkpoint(false); - goto func_exit; + set_check_for_checkpoint(false); + goto func_exit; + } + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;); + lsn-= max_age; } - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;); - log_sys.latch.wr_unlock(); + latch.wr_unlock(); /* We must wait to prevent the tail of the log overwriting the head. */ - buf_flush_wait_flushed(lsn - max_age); - /* Sleep to avoid a thundering herd */ - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + buf_flush_wait_flushed(lsn); } } @@ -1787,10 +1788,7 @@ void log_free_check() noexcept { ut_ad(!lock_sys.is_holder()); if (log_sys.check_for_checkpoint()) - { - ut_ad(!recv_no_log_write); - log_checkpoint_margin(); - } + log_sys.checkpoint_margin(); } #ifdef __linux__ From fbe654948bf76c4e395439727a312a9f21d2642f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 19 Jan 2026 12:19:05 +0200 Subject: [PATCH 32/33] Refine the checkpoint switch log_t::write_checkpoint(): Allow an old checkpoint to be completed in the old archive file while a new one has been created. --- storage/innobase/buf/buf0flu.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index b788ca8c8350f..b01c8bb696983 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1815,7 +1815,7 @@ static ulint buf_flush_LRU(ulint max_n) noexcept inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept { ut_ad(!srv_read_only_mode); - ut_ad((archive ? end_lsn : next_checkpoint_lsn) >= first_lsn); + ut_ad(archive || next_checkpoint_lsn >= first_lsn); ut_ad(end_lsn >= next_checkpoint_lsn); ut_d(const lsn_t current_lsn{get_lsn()}); ut_ad(end_lsn <= current_lsn); @@ -1826,8 +1826,15 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept DBUG_PRINT("ib_log", ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); - if (archive && resize_log.is_opened()) + const bool first_checkpoint_in_new_archive= + archive && resize_log.is_opened() && end_lsn >= first_lsn; + + if (first_checkpoint_in_new_archive) { +#ifdef HAVE_PMEM + if (UNIV_LIKELY_NULL(resize_buf)) + archived_mmap_switch_complete(); +#endif byte *c= checkpoint_buf; if (c) memset_aligned<512>(c, 0, write_size); @@ -1952,7 +1959,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept last_checkpoint_lsn= checkpoint_lsn; if (!archive) archived_lsn= checkpoint_lsn; - else if (resize_log.is_opened()) + else if (first_checkpoint_in_new_archive) { /* Make the previous archived log file read-only */ #ifdef _WIN32 From f720198dbda5b5d08cde9c2bd54314d586bc153a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 19 Jan 2026 16:14:29 +0200 Subject: [PATCH 33/33] Relax the sequence bit handling --- storage/innobase/log/log0recv.cc | 2 +- storage/innobase/mtr/mtr0mtr.cc | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 707987fd26375..d4ab569096c21 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2729,7 +2729,7 @@ recv_sys_t::parse_mtr_result log_parse_start(source &l, unsigned nonce) eom_found: const lsn_t end_lsn{(l - begin) + recv_sys.lsn}; - if (*l != log_sys.get_sequence_bit(end_lsn)) + if (*l != log_sys.get_sequence_bit(end_lsn) && !log_sys.archive) return recv_sys_t::GOT_EOF; if (recv_sys.rpo && recv_sys.rpo < end_lsn) diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index ea76a7b00d65e..0554bda5c2c0b 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1369,7 +1369,10 @@ mtr_t::finish_writer(mtr_t *mtr, size_t len) if (size_left <= size) { byte tail[5 + 8]; - tail[0]= log_sys.get_sequence_bit(start.first + len - size); + tail[0]= + (mode == log_t::WRITE_NORMAL + ? log_sys.archive : mode == log_t::ARCHIVED_MMAP) || + log_sys.get_sequence_bit(start.first + len - size); if (mtr->m_commit_lsn) { @@ -1388,7 +1391,10 @@ mtr_t::finish_writer(mtr_t *mtr, size_t len) } #endif - *start.second++= log_sys.get_sequence_bit(start.first + len - size); + *start.second++= + (mode == log_t::WRITE_NORMAL + ? log_sys.archive : mode == log_t::ARCHIVED_MMAP) || + log_sys.get_sequence_bit(start.first + len - size); if (mtr->m_commit_lsn) {