Skip to content

Commit 086d258

Browse files
committed
Revert "bugfix: fix memory growth caused by brpc arena configuration."
This reverts commit 048cbb8.
1 parent 048cbb8 commit 086d258

File tree

3 files changed

+18
-8
lines changed

3 files changed

+18
-8
lines changed

xllm/api_service/api_service.cpp

100644100755
Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ void ChatCompletionsImpl(std::unique_ptr<Service>& service,
148148
return;
149149
}
150150

151-
auto call =
152-
std::make_shared<ChatCall>(ctrl, guard.release(), req_pb, resp_pb);
151+
auto call = std::make_shared<ChatCall>(
152+
ctrl, guard.release(), req_pb, resp_pb, arena != nullptr /*use_arena*/);
153153
service->process_async(call);
154154
}
155155
} // namespace
@@ -167,17 +167,19 @@ void APIService::ChatCompletionsHttp(
167167
LOG(ERROR) << "brpc request | respose | controller is null";
168168
return;
169169
}
170-
auto arena = response->GetArena();
170+
171171
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
172172

173173
if (FLAGS_backend == "llm") {
174+
auto arena = response->GetArena();
174175
CHECK(chat_service_impl_) << " chat service is invalid.";
175176
ChatCompletionsImpl<ChatCall, ChatServiceImpl>(
176177
chat_service_impl_, done_guard, arena, ctrl);
177178
} else if (FLAGS_backend == "vlm") {
178179
CHECK(mm_chat_service_impl_) << " mm chat service is invalid.";
180+
// TODO: fix me - temporarily using heap allocation instead of arena
179181
ChatCompletionsImpl<MMChatCall, MMChatServiceImpl>(
180-
mm_chat_service_impl_, done_guard, arena, ctrl);
182+
mm_chat_service_impl_, done_guard, nullptr, ctrl);
181183
}
182184
}
183185

xllm/api_service/stream_call.h

100644100755
Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,13 @@ class StreamCall : public Call {
3939
StreamCall(brpc::Controller* controller,
4040
::google::protobuf::Closure* done,
4141
Request* request,
42-
Response* response)
43-
: Call(controller), done_(done), request_(request), response_(response) {
42+
Response* response,
43+
bool use_arena = true)
44+
: Call(controller),
45+
done_(done),
46+
request_(request),
47+
response_(response),
48+
use_arena_(use_arena) {
4449
stream_ = request_->stream();
4550
if (stream_) {
4651
pa_ = controller_->CreateProgressiveAttachment();
@@ -67,6 +72,10 @@ class StreamCall : public Call {
6772
if (!stream_) {
6873
done_->Run();
6974
}
75+
if (!use_arena_) {
76+
delete request_;
77+
delete response_;
78+
}
7079
}
7180

7281
bool write_and_finish(Response& response) {
@@ -142,6 +151,7 @@ class StreamCall : public Call {
142151
Response* response_;
143152

144153
bool stream_ = false;
154+
bool use_arena_ = true;
145155
butil::intrusive_ptr<brpc::ProgressiveAttachment> pa_;
146156
butil::IOBuf io_buf_;
147157

xllm/server/xllm_server.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@ bool XllmServer::start(std::unique_ptr<APIService> service) {
5050
}
5151

5252
brpc::ServerOptions options;
53-
options.rpc_pb_message_factory =
54-
brpc::GetArenaRpcPBMessageFactory<1024 * 1024, 1024 * 1024 * 100>();
5553
options.idle_timeout_sec = FLAGS_rpc_idle_timeout_s;
5654
options.num_threads = FLAGS_num_threads;
5755
if (server_->Start(FLAGS_port, &options) != 0) {

0 commit comments

Comments
 (0)