Skip to content

Commit 783e781

Browse files
committed
feat: add kvcache event uploading for host block.
1 parent 053d282 commit 783e781

File tree

8 files changed

+15
-18
lines changed

8 files changed

+15
-18
lines changed

xllm/core/framework/block/block_manager_impl.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,6 @@ void BlockManagerImpl::get_merged_kvcache_event(KvCacheEvent* event) const {
171171
if (events != nullptr) {
172172
event->removed_cache.merge(events->removed_cache);
173173
event->stored_cache.merge(events->stored_cache);
174-
event->offload_cache.merge(events->offload_cache);
175174
events->clear();
176175
}
177176
}

xllm/core/framework/block/block_manager_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class BlockManagerImpl : public BlockManager {
7777
}
7878

7979
float get_gpu_cache_usage_perc() const override {
80-
return 1.0 - num_free_blocks_ * 1.0 / num_total_blocks();
80+
return 1 - static_cast<float>(num_free_blocks_) / num_total_blocks();
8181
}
8282

8383
// call BlockManager to free block used by Block.

xllm/core/framework/block/multi_tier_block_manager_pool.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,4 +265,15 @@ void MultiTierBlockManagerPool::transfer_blocks(std::vector<Batch>* batches) {
265265
saved_device_blocks_.resize(host_block_managers_.size());
266266
}
267267

268+
void MultiTierBlockManagerPool::get_merged_kvcache_event(
269+
KvCacheEvent* event) const {
270+
if (host_block_managers_.empty()) {
271+
BlockManagerPool::get_merged_kvcache_event(event);
272+
} else {
273+
for (int32_t i = 0; i < host_block_managers_.size(); ++i) {
274+
host_block_managers_[i]->get_merged_kvcache_event(event);
275+
}
276+
}
277+
}
278+
268279
} // namespace xllm

xllm/core/framework/block/multi_tier_block_manager_pool.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ class MultiTierBlockManagerPool : public BlockManagerPool {
4040
bool update_prefetch_result(std::shared_ptr<Request>& request,
4141
const uint32_t timeout) override;
4242

43+
void get_merged_kvcache_event(KvCacheEvent* event) const override;
44+
4345
private:
4446
void allocate_host_shared(Sequence* sequence);
4547

xllm/core/framework/kv_cache/kv_cache_event.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,10 @@ struct KvCacheEvent {
2525
stored_cache;
2626
std::unordered_set<Murmur3Key, FixedStringKeyHash, FixedStringKeyEqual>
2727
removed_cache;
28-
std::unordered_set<Murmur3Key, FixedStringKeyHash, FixedStringKeyEqual>
29-
offload_cache;
3028

3129
void clear() {
3230
stored_cache.clear();
3331
removed_cache.clear();
34-
offload_cache.clear();
3532
}
3633
};
3734

xllm/core/framework/request/sequence.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ limitations under the License.
1919
#include <absl/time/time.h>
2020
#include <folly/futures/Future.h>
2121

22-
#include <chrono>
2322
#include <cstdint>
2423
#include <vector>
2524

@@ -249,7 +248,7 @@ class Sequence final {
249248
return &prefetch_results_;
250249
}
251250

252-
bool update_prefetch_result(uint32_t timeout = 30);
251+
bool update_prefetch_result(uint32_t timeout);
253252

254253
void reset();
255254

xllm/core/runtime/xservice_client.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -330,15 +330,6 @@ void XServiceClient::heartbeat() {
330330
sizeof(hash_key.data));
331331
}
332332
}
333-
334-
if (event.offload_cache.size()) {
335-
cache_event->mutable_offload_cache()->Reserve(
336-
event.offload_cache.size());
337-
for (auto& hash_key : event.offload_cache) {
338-
cache_event->add_offload_cache(hash_key.data,
339-
sizeof(hash_key.data));
340-
}
341-
}
342333
}
343334

344335
req.mutable_load_metrics()->set_gpu_cache_usage_perc(

xllm/proto/xservice.proto

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ message InstanceMetaInfo {
4242
message KvCacheEvent {
4343
repeated bytes stored_cache = 1;
4444
repeated bytes removed_cache = 2;
45-
repeated bytes offload_cache = 3;
4645
}
4746

4847
message LoadMetrics {
@@ -55,7 +54,6 @@ message LatencyMetrics {
5554
int64 recent_max_tbt = 2;
5655
}
5756

58-
// TODO: add metainfo/metrics
5957
message HeartbeatRequest {
6058
string name = 1;
6159
KvCacheEvent cache_event = 2;

0 commit comments

Comments
 (0)