@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
1313limitations under the License.
1414==============================================================================*/
1515
16- #include " multi_tier_block_manager_pool .h"
16+ #include " hierarchy_block_manager_pool .h"
1717
1818#include " block_manager_impl.h"
1919#include " concurrent_block_manager_impl.h"
2020
2121namespace xllm {
2222
23- MultiTierBlockManagerPool::MultiTierBlockManagerPool (
23+ HierarchyBlockManagerPool::HierarchyBlockManagerPool (
2424 const BlockManagerPool::Options& options,
2525 Engine* engine,
2626 int32_t dp_size)
@@ -52,7 +52,7 @@ MultiTierBlockManagerPool::MultiTierBlockManagerPool(
5252 saved_device_blocks_.resize (host_block_managers_.size ());
5353}
5454
55- void MultiTierBlockManagerPool ::deallocate (Sequence* sequence) {
55+ void HierarchyBlockManagerPool ::deallocate (Sequence* sequence) {
5656 DCHECK (sequence != nullptr );
5757 // add blocks to the prefix cache
5858 int32_t dp_rank = BlockManagerPool::get_dp_rank (sequence);
@@ -107,7 +107,7 @@ void MultiTierBlockManagerPool::deallocate(Sequence* sequence) {
107107 sequence->reset ();
108108}
109109
110- bool MultiTierBlockManagerPool ::allocate (Sequence* sequence,
110+ bool HierarchyBlockManagerPool ::allocate (Sequence* sequence,
111111 size_t num_tokens) {
112112 BlockManagerPool::allocate (sequence, num_tokens);
113113
@@ -137,7 +137,7 @@ bool MultiTierBlockManagerPool::allocate(Sequence* sequence,
137137 return true ;
138138}
139139
140- void MultiTierBlockManagerPool ::allocate_host_shared (Sequence* sequence) {
140+ void HierarchyBlockManagerPool ::allocate_host_shared (Sequence* sequence) {
141141 if (options_.enable_prefix_cache ()) {
142142 int32_t dp_rank = BlockManagerPool::get_dp_rank (sequence);
143143 std::vector<Block> shared_blocks =
@@ -146,7 +146,7 @@ void MultiTierBlockManagerPool::allocate_host_shared(Sequence* sequence) {
146146 }
147147}
148148
149- void MultiTierBlockManagerPool ::prefetch_from_storage (
149+ void HierarchyBlockManagerPool ::prefetch_from_storage (
150150 std::shared_ptr<Request>& request) {
151151 if (!options_.enable_kvcache_store ()) {
152152 return ;
@@ -202,7 +202,7 @@ void MultiTierBlockManagerPool::prefetch_from_storage(
202202 }
203203}
204204
205- bool MultiTierBlockManagerPool ::update_prefetch_result (
205+ bool HierarchyBlockManagerPool ::update_prefetch_result (
206206 std::shared_ptr<Request>& request,
207207 const uint32_t timeout) {
208208 if (!options_.enable_kvcache_store ()) {
@@ -216,7 +216,7 @@ bool MultiTierBlockManagerPool::update_prefetch_result(
216216 return prefetch_result;
217217}
218218
219- void MultiTierBlockManagerPool ::transfer_blocks (std::vector<Batch>* batches) {
219+ void HierarchyBlockManagerPool ::transfer_blocks (std::vector<Batch>* batches) {
220220 if (batches != nullptr ) {
221221 // load blocks from host to device
222222 for (int i = 0 ; i < batches->size (); i++) {
@@ -265,7 +265,7 @@ void MultiTierBlockManagerPool::transfer_blocks(std::vector<Batch>* batches) {
265265 saved_device_blocks_.resize (host_block_managers_.size ());
266266}
267267
268- void MultiTierBlockManagerPool ::get_merged_kvcache_event (
268+ void HierarchyBlockManagerPool ::get_merged_kvcache_event (
269269 KvCacheEvent* event) const {
270270 if (host_block_managers_.empty ()) {
271271 BlockManagerPool::get_merged_kvcache_event (event);
0 commit comments