Fix boost_first_word of MultipleChoice to consider other newline tokens

aerdem4 · web-flow · commit 91702e45fbb5 · 2024-12-19T15:00:16.000+03:00
* Fix boost_first_word of MultipleChoice to consider other new line alternatives

* Update version

* Cover more newline tokens
diff --git a/example_notebooks/transformers/multiple_choice_logits_processor.ipynb b/example_notebooks/transformers/multiple_choice_logits_processor.ipynb
@@ -2,17 +2,25 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "28ed6952",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/home/aerdem/projects/nvidia/logits-processor-zoo\n"
+     ]
+    }
+   ],
    "source": [
     "%cd ../.."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "a85f8503",
    "metadata": {},
    "outputs": [],
@@ -103,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "7d74eb26",
    "metadata": {},
    "outputs": [
@@ -160,7 +168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "id": "b2297aab",
    "metadata": {},
    "outputs": [
@@ -177,7 +185,7 @@
       "\n",
       "\n",
       "LLM response:\n",
-      "1\n",
+      "3\n",
       "-----END-----\n",
       "\n",
       "\n",
@@ -190,7 +198,7 @@
       "\n",
       "\n",
       "LLM response:\n",
-      "b\n",
+      "a\n",
       "-----END-----\n",
       "\n",
       "\n"
@@ -214,7 +222,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -228,7 +236,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,
diff --git a/example_notebooks/vllm/multiple_choice_logits_processor.ipynb b/example_notebooks/vllm/multiple_choice_logits_processor.ipynb
@@ -10,7 +10,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/home/aerdem/projects/logits-processor-zoo\n"
+      "/home/aerdem/projects/nvidia/logits-processor-zoo\n"
      ]
     }
    ],
@@ -25,22 +25,35 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/home/aerdem/projects/LLM/llmenv/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
-      "  warnings.warn(\n"
+      "WARNING 12-19 10:37:26 config.py:1563] Casting torch.bfloat16 to torch.float16.\n",
+      "INFO 12-19 10:37:26 llm_engine.py:184] Initializing an LLM engine (v0.5.5) with config: model='google/gemma-1.1-2b-it', speculative_config=None, tokenizer='google/gemma-1.1-2b-it', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=google/gemma-1.1-2b-it, use_v2_block_manager=False, enable_prefix_caching=False)\n",
+      "INFO 12-19 10:37:27 model_runner.py:879] Starting to load model google/gemma-1.1-2b-it...\n",
+      "INFO 12-19 10:37:28 weight_utils.py:236] Using model weights format ['*.safetensors']\n"
      ]
     },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "243efc7aaada47fd82cc1043c275f03d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "WARNING 07-23 11:04:22 config.py:1222] Casting torch.bfloat16 to torch.float16.\n",
-      "INFO 07-23 11:04:22 llm_engine.py:161] Initializing an LLM engine (v0.5.0.post1) with config: model='google/gemma-1.1-2b-it', speculative_config=None, tokenizer='google/gemma-1.1-2b-it', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=google/gemma-1.1-2b-it)\n",
-      "INFO 07-23 11:04:25 weight_utils.py:218] Using model weights format ['*.safetensors']\n",
-      "INFO 07-23 11:04:27 model_runner.py:160] Loading model weights took 4.6720 GB\n",
-      "INFO 07-23 11:04:28 gpu_executor.py:83] # GPU blocks: 52902, # CPU blocks: 14563\n"
+      "INFO 12-19 10:37:30 model_runner.py:890] Loading model weights took 4.6720 GB\n",
+      "INFO 12-19 10:37:32 gpu_executor.py:121] # GPU blocks: 49691, # CPU blocks: 14563\n"
      ]
     }
    ],
diff --git a/logits_processor_zoo/transformers/multiple_choice.py b/logits_processor_zoo/transformers/multiple_choice.py
@@ -18,7 +18,7 @@
 from transformers import PreTrainedTokenizer
 from typing import List
 import torch
-from logits_processor_zoo.utils import text_to_token
+from logits_processor_zoo.utils import text_to_token, get_new_line_tokens
 
 
 class MultipleChoiceLogitsProcessor:
@@ -46,7 +46,7 @@ def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
         if choices is None:
             choices = ["1", "2", "3", "4"]
 
-        self.new_line_token = text_to_token(tokenizer, "\n", last=False)
+        self.new_line_tokens = get_new_line_tokens(tokenizer)
         self.delimiter_token = text_to_token(tokenizer, delimiter, last=False)
         self.choice_tokens = [text_to_token(tokenizer, choice, last=False) for choice in choices]
         self.boost_first_words = boost_first_words
@@ -61,7 +61,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
                 for i in range(len(input_ids[row_ind]) - 3):
                     # A choice is like "\nA) hair dryer", where first token is "hair"
                     choice_starts = (
-                            (input_ids[row_ind, i] == self.new_line_token) and
+                            (input_ids[row_ind, i].item() in self.new_line_tokens) and
                             (input_ids[row_ind, i + 1] == self.choice_tokens[choice]) and
                             (input_ids[row_ind, i + 2] == self.delimiter_token)
                     )
diff --git a/logits_processor_zoo/trtllm/multiple_choice.py b/logits_processor_zoo/trtllm/multiple_choice.py
@@ -18,7 +18,7 @@
 from transformers import PreTrainedTokenizer
 from typing import List, Optional
 import torch
-from logits_processor_zoo.utils import text_to_token
+from logits_processor_zoo.utils import text_to_token, get_new_line_tokens
 
 
 class MultipleChoiceLogitsProcessor:
@@ -46,7 +46,7 @@ def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
         if choices is None:
             choices = ["1", "2", "3", "4"]
 
-        self.new_line_token = text_to_token(tokenizer, "\n", last=False)
+        self.new_line_tokens = get_new_line_tokens(tokenizer)
         self.delimiter_token = text_to_token(tokenizer, delimiter, last=False)
         self.choice_tokens = [text_to_token(tokenizer, choice, last=False) for choice in choices]
         self.boost_first_words = boost_first_words
@@ -68,7 +68,7 @@ def __call__(self, req_ids_batch: List[int], logits_batch: List[torch.Tensor],
                         for i in range(len(ids_batch[row_ind]) - 3):
                             # A choice is like "\nA) hair dryer", where first token is "hair"
                             choice_starts = (
-                                    (ids_batch[row_ind, i] == self.new_line_token) and
+                                    (ids_batch[row_ind, i].item() in self.new_line_tokens) and
                                     (ids_batch[row_ind, i + 1] == self.choice_tokens[choice]) and
                                     (ids_batch[row_ind, i + 2] == self.delimiter_token)
                             )
diff --git a/logits_processor_zoo/utils.py b/logits_processor_zoo/utils.py
@@ -26,3 +26,10 @@ def text_to_token(tokenizer: PreTrainedTokenizer, text: str, last: bool):
         raise Exception(f"Can't convert {text} to token. It has {len(tokens)} tokens.")
 
     return tokens[-1]
+
+
+def get_new_line_tokens(tokenizer):
+    new_line_tokens = [token for token in tokenizer.get_vocab().values()
+                       if tokenizer.decode(token).endswith("\n")]
+
+    return set(new_line_tokens)
diff --git a/logits_processor_zoo/vllm/multiple_choice.py b/logits_processor_zoo/vllm/multiple_choice.py
@@ -18,7 +18,7 @@
 from transformers import PreTrainedTokenizer
 from typing import List
 import torch
-from logits_processor_zoo.utils import text_to_token
+from logits_processor_zoo.utils import text_to_token, get_new_line_tokens
 
 
 class MultipleChoiceLogitsProcessor:
@@ -46,7 +46,7 @@ def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
         if choices is None:
             choices = ["1", "2", "3", "4"]
 
-        self.new_line_token = text_to_token(tokenizer, "\n", last=False)
+        self.new_line_token = get_new_line_tokens(tokenizer)
         self.delimiter_token = text_to_token(tokenizer, delimiter, last=False)
         self.choice_tokens = [text_to_token(tokenizer, choice, last=False) for choice in choices]
         self.boost_first_words = boost_first_words
@@ -61,7 +61,7 @@ def __call__(self, prompt_tokens_ids: List[int], past_token_ids: List[int], scor
             for i in range(len(prompt_tokens_ids) - 3):
                 # A choice is like "\nA) hair dryer", where first token is "hair"
                 choice_starts = (
-                        (prompt_tokens_ids[i] == self.new_line_token) and
+                        (prompt_tokens_ids[i] in self.new_line_token) and
                         (prompt_tokens_ids[i + 1] == self.choice_tokens[choice]) and
                         (prompt_tokens_ids[i + 2] == self.delimiter_token)
                 )
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "logits-processor-zoo"
-version = "0.1.1"
+version = "0.1.2"
 description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks."
 authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"]
 readme = "README.md"
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,18 @@
+from logits_processor_zoo.utils import text_to_token, get_new_line_tokens
+
+
+def test_text_to_token(llm_runner):
+    assert text_to_token(llm_runner.tokenizer, ",", last=False) == 1919
+    assert text_to_token(llm_runner.tokenizer, "apple, orange,", last=True) == 29892
+    assert text_to_token(llm_runner.tokenizer, "apple, orange\n", last=True) == 13
+
+    try:
+        token = text_to_token(llm_runner.tokenizer, "apple, orange,", last=False)
+    except Exception:
+        token = -1
+
+    assert token == -1
+
+
+def test_get_new_line_tokens(llm_runner):
+    assert get_new_line_tokens(llm_runner.tokenizer) == {13}