From f2e9f7088d8ebd54b60a5efded8f5300edd5ad18 Mon Sep 17 00:00:00 2001 From: liangzhiwei20 Date: Fri, 5 Dec 2025 16:28:13 +0800 Subject: [PATCH] bugfix: fix core dump of large beam width. --- .../core/framework/block/block_manager_pool.cpp | 17 ++++++++++------- xllm/core/framework/block/block_manager_pool.h | 2 +- .../framework/request/sequence_kv_state.cpp | 7 +++---- xllm/core/framework/request/sequence_kv_state.h | 2 +- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/xllm/core/framework/block/block_manager_pool.cpp b/xllm/core/framework/block/block_manager_pool.cpp index 97484dcb2..abb6eaf29 100644 --- a/xllm/core/framework/block/block_manager_pool.cpp +++ b/xllm/core/framework/block/block_manager_pool.cpp @@ -216,8 +216,7 @@ bool BlockManagerPool::allocate(Sequence* sequence, size_t num_tokens) { const size_t block_size = options_.block_size(); const size_t num_blocks_needed = (num_tokens + block_size - 1) / block_size; if (num_blocks_needed <= num_blocks) { - process_beam_search(sequence, /*need_swap*/ true); - return true; + return process_beam_search(sequence, /*need_swap*/ true); } process_beam_search(sequence); @@ -263,14 +262,14 @@ std::vector BlockManagerPool::allocate(size_t num_tokens, return block_managers_[dp_rank]->allocate(num_blocks_needed); } -void BlockManagerPool::process_beam_search(Sequence* sequence, bool need_swap) { +bool BlockManagerPool::process_beam_search(Sequence* sequence, bool need_swap) { if (!sequence->check_beam_search()) { - return; + return true; } auto src_blocks = sequence->kv_state().src_blocks(); if (src_blocks.size() == 0) { - return; + return true; } // when sequence need to swap the last block and no new block appended, @@ -278,12 +277,16 @@ void BlockManagerPool::process_beam_search(Sequence* sequence, bool need_swap) { if (need_swap && sequence->kv_state().need_swap()) { int32_t dp_rank = get_dp_rank(sequence); auto new_blocks = block_managers_[dp_rank]->allocate(1); + if (new_blocks.size() == 0) { + return false; + } swap_block_transfer_infos_[dp_rank].emplace_back(src_blocks.back().id(), new_blocks[0].id()); - sequence->kv_state().process_beam_search(new_blocks); + sequence->kv_state().process_beam_search(new_blocks[0]); } else { - sequence->kv_state().process_beam_search({}); + sequence->kv_state().process_beam_search(std::nullopt); } + return true; } uint32_t BlockManagerPool::pre_allocate(Sequence* sequence) { diff --git a/xllm/core/framework/block/block_manager_pool.h b/xllm/core/framework/block/block_manager_pool.h index 556fc0881..21d6f6a2f 100644 --- a/xllm/core/framework/block/block_manager_pool.h +++ b/xllm/core/framework/block/block_manager_pool.h @@ -91,7 +91,7 @@ class BlockManagerPool final : public KVCacheManager { void allocate_host_shared(Sequence* sequence); void save_offload_blocks(Sequence* sequence); - void process_beam_search(Sequence* sequence, bool need_swap = false); + bool process_beam_search(Sequence* sequence, bool need_swap = false); private: std::vector> block_managers_; diff --git a/xllm/core/framework/request/sequence_kv_state.cpp b/xllm/core/framework/request/sequence_kv_state.cpp index 883eff85b..53b9e6376 100644 --- a/xllm/core/framework/request/sequence_kv_state.cpp +++ b/xllm/core/framework/request/sequence_kv_state.cpp @@ -143,14 +143,13 @@ void KVCacheState::reset() { transfer_kv_info_.reset(); } -void KVCacheState::process_beam_search(const std::vector& new_blocks) { +void KVCacheState::process_beam_search(std::optional new_block) { blocks_.clear(); blocks_ = std::move(src_blocks_); - if (!new_blocks.empty()) { - CHECK_EQ(new_blocks.size(), 1); + if (new_block.has_value()) { blocks_.pop_back(); - blocks_.insert(blocks_.end(), new_blocks.begin(), new_blocks.end()); + blocks_.emplace_back(new_block.value()); } } diff --git a/xllm/core/framework/request/sequence_kv_state.h b/xllm/core/framework/request/sequence_kv_state.h index b4ef6b6ab..8b4401595 100644 --- a/xllm/core/framework/request/sequence_kv_state.h +++ b/xllm/core/framework/request/sequence_kv_state.h @@ -62,7 +62,7 @@ class KVCacheState { void reset(); - void process_beam_search(const std::vector& new_blocks); + void process_beam_search(std::optional new_block = std::nullopt); private: // number of tokens in kv cache