Skip to content

Commit 0e26d72

Browse files
committed
fix: add clear_grammar to remove grammar from reused model_request
1 parent fbbbb4f commit 0e26d72

File tree

4 files changed

+17
-1
lines changed

4 files changed

+17
-1
lines changed

lmdeploy/turbomind/turbomind.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,8 @@ async def async_stream_infer(self,
814814
while not state or state.status == 0:
815815
await sem.acquire()
816816
state = shared_state.consume()
817+
818+
self.model_inst.clear_grammar()
817819
logger.info(f'[async_stream_infer] session {session_id} done')
818820

819821
def _get_error_output(self, status):

src/turbomind/engine/model_request.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,9 @@ void ModelRequest::setGrammar(const xgrammar::CompiledGrammar& grammar)
152152
grammar_ = std::make_shared<xgrammar::CompiledGrammar>(grammar);
153153
}
154154

155+
void ModelRequest::clearGrammar()
156+
{
157+
grammar_.reset();
158+
}
159+
155160
} // namespace turbomind

src/turbomind/engine/model_request.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class ModelRequest {
4141

4242
OutputParam Forward(InputParam param, std::function<void()> cb);
4343
void setGrammar(const xgrammar::CompiledGrammar& grammar);
44+
void clearGrammar();
4445

4546
protected:
4647
Gateway* const gateway_;

src/turbomind/python/bind.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,15 @@ PYBIND11_MODULE(_turbomind, m)
498498
model_request->setGrammar(grammar);
499499
},
500500
py::call_guard<py::gil_scoped_release>(),
501-
"grammar"_a);
501+
"grammar"_a)
502+
.def(
503+
"clear_grammar",
504+
[](ModelRequest* model_request) {
505+
TM_LOG_INFO("Release grammar for model_request");
506+
model_request->clearGrammar();
507+
},
508+
py::call_guard<py::gil_scoped_release>());
509+
502510

503511
// transformer model
504512
using ft::LlamaTritonModel;

0 commit comments

Comments
 (0)