Fix vllm genlength bug for batch input

aerdem4 · aerdem4 · commit 4acbb29800dc · 2025-05-05T15:24:54.000+03:00
Signed-off-by: aerdem4 &lt;ahmeterd4@gmail.com&gt;
diff --git a/example_notebooks/vllm/utils.py b/example_notebooks/vllm/utils.py
@@ -1,5 +1,10 @@
+import os
 import vllm
 
+# vLLM V1 does not currently accept logits processor so we need to disable it
+# https://docs.vllm.ai/en/latest/getting_started/v1_user_guide.html#deprecated-features
+os.environ["VLLM_USE_V1"] = "0"
+
 
 class vLLMRunner:
     def __init__(self, model_name="Qwen/Qwen2.5-1.5B-Instruct"):
diff --git a/logits_processor_zoo/vllm/generation_length.py b/logits_processor_zoo/vllm/generation_length.py
@@ -39,6 +39,7 @@ class GenLengthLogitsProcessor:
     def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float,
                  p: int = 2, complete_sentences: bool = False, boost_token_str: str = None):
         self.boost_token = tokenizer.eos_token_id
+        self.boost_token_str = boost_token_str
         if boost_token_str is not None:
             self.boost_token = text_to_token(tokenizer, boost_token_str, last=False)
         self.boost_factor = boost_factor
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logits-processor-zoo"
-version = "0.1.6"
+version = "0.1.7"
 description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks."
 authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"]
 readme = "README.md"