Merge pull request #17 from NVIDIA/fix-bugs

aerdem4 · web-flow · commit 00520ae5354f · 2025-05-06T16:00:42.000+03:00
Fix bugs
diff --git a/example_notebooks/vllm/utils.py b/example_notebooks/vllm/utils.py
@@ -1,5 +1,10 @@
+import os
 import vllm
 
+# vLLM V1 does not currently accept logits processor so we need to disable it
+# https://docs.vllm.ai/en/latest/getting_started/v1_user_guide.html#deprecated-features
+os.environ["VLLM_USE_V1"] = "0"
+
 
 class vLLMRunner:
     def __init__(self, model_name="Qwen/Qwen2.5-1.5B-Instruct"):
diff --git a/logits_processor_zoo/transformers/cite_prompt.py b/logits_processor_zoo/transformers/cite_prompt.py
@@ -41,11 +41,12 @@ def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float = 1.0, bo
         self.boost_eos = boost_eos
 
     def _process(self, input_ids: List[int], scores: torch.Tensor) -> torch.Tensor:
+        voc_size = scores.shape[1]
         for i in range(scores.shape[0]):
             tokens = set(self.prompt_token_ids[i])
             if self.boost_eos:
                 tokens.add(self.eos_token_id)
 
-            tokens = list(tokens)
+            tokens = [t for t in tokens if t < voc_size]
             scores[i, tokens] += self.boost_factor
         return scores
diff --git a/logits_processor_zoo/vllm/cite_prompt.py b/logits_processor_zoo/vllm/cite_prompt.py
@@ -46,6 +46,6 @@ def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scor
         if self.boost_eos:
             tokens.add(self.eos_token_id)
 
-        tokens = list(tokens)
+        tokens = [t for t in tokens if t < scores.shape[0]]
         scores[tokens] += self.boost_factor
         return scores
diff --git a/logits_processor_zoo/vllm/generation_length.py b/logits_processor_zoo/vllm/generation_length.py
@@ -39,6 +39,7 @@ class GenLengthLogitsProcessor:
     def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float,
                  p: int = 2, complete_sentences: bool = False, boost_token_str: str = None):
         self.boost_token = tokenizer.eos_token_id
+        self.boost_token_str = boost_token_str
         if boost_token_str is not None:
             self.boost_token = text_to_token(tokenizer, boost_token_str, last=False)
         self.boost_factor = boost_factor
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logits-processor-zoo"
-version = "0.1.6"
+version = "0.1.7"
 description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks."
 authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"]
 readme = "README.md"