Skip to content

Commit c9f2005

Browse files
authored
Support vllm serve (#21)
Signed-off-by: aerdem4 <ahmeterd4@gmail.com>
1 parent df7f158 commit c9f2005

File tree

7 files changed

+224
-32
lines changed

7 files changed

+224
-32
lines changed
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "59f98cf9",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"/home/aerdem/projects/nvidia/logits-processor-zoo\n"
14+
]
15+
}
16+
],
17+
"source": [
18+
"%cd ../.."
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 2,
24+
"id": "f2a86616",
25+
"metadata": {},
26+
"outputs": [],
27+
"source": [
28+
"# Run vllm serve like this:\n",
29+
"# vllm serve Qwen/Qwen2.5-1.5B-Instruct --dtype auto --api-key lpz-test --logits-processor-pattern \"logits_processor_zoo.vllm\""
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": 3,
35+
"id": "13f407ff",
36+
"metadata": {},
37+
"outputs": [
38+
{
39+
"name": "stdout",
40+
"output_type": "stream",
41+
"text": [
42+
"Fried rice chicken is a popular Chinese dish that combines the flavors of fried rice with the tender texture and juicy meat of chicken. Here's a basic recipe to help you make it at home:\n",
43+
"\n",
44+
"### Ingredients:\n",
45+
"- 1 pound boneless skinless chicken breast or thighs (cut into bite-sized pieces)\n",
46+
"- 2 tablespoons vegetable oil\n",
47+
"- 3 cloves garlic, minced\n",
48+
"- 1 tablespoon ginger, grated\n",
49+
"- 1/4 cup soy sauce\n",
50+
"- 1/4 cup oyster sauce\n",
51+
"- 1 teaspoon sugar\n",
52+
"- 1/2 teaspoon salt\n",
53+
"- 1/4 teaspoon black pepper\n",
54+
"- 1 can (8 oz) condensed cream of mushroom soup\n",
55+
"- 1 cup frozen mixed vegetables (such as peas, carrots, corn)\n",
56+
"- 1/2 cup chopped green onions\n",
57+
"- 1/4 cup chopped cilantro\n",
58+
"\n",
59+
"### Instructions:\n",
60+
"\n",
61+
"#### Step 1: Prepare the Chicken\n",
62+
"1. **Marinate the Chicken:** In a bowl, mix together the chicken, soy sauce, oyster sauce, sugar, salt, and black pepper.\n",
63+
"2. **Cook the Chicken:** Heat the vegetable oil in a large skillet over medium-high heat. Add the marinated chicken and cook until browned on all sides, about 5 minutes per side. Remove from the pan and set aside.\n",
64+
"\n",
65+
"#### Step 2: Cook the Vegetables\n",
66+
"1. **Sauté the Vegetables:** In the same skillet, add the remaining 1 tablespoon of oil. Sauté the minced garlic and grated ginger for about 30 seconds until fragrant.\n",
67+
"2. **Add the Mixed Vegetables:** Stir in the frozen mixed vegetables and sauté until they start to soften, about 2-3 minutes.\n",
68+
"3. **Combine Everything:** Return the cooked chicken to the skillet along with the sautéed vegetables. Pour in the condensed cream of mushroom soup and stir well to combine everything.\n",
69+
"\n",
70+
"#### Step 3: Finish Cooking\n",
71+
"1. **Simmer the Sauce:** Bring the mixture to a simmer over low heat. Let it cook for about 5 minutes, stirring occasionally, until the sauce thickens slightly.\n",
72+
"2. **Serve:** Garnish with chopped green onions and cilantro before serving. This dish can be served hot or cold depending on your preference.\n",
73+
"\n",
74+
"Enjoy your homemade fried rice chicken! Adjust the seasoning according to your taste preferences.\n"
75+
]
76+
}
77+
],
78+
"source": [
79+
"from openai import OpenAI\n",
80+
"\n",
81+
"model_name = \"Qwen/Qwen2.5-1.5B-Instruct\"\n",
82+
"\n",
83+
"client = OpenAI(\n",
84+
" base_url=\"http://localhost:8000/v1\",\n",
85+
" api_key=\"lpz-test\",\n",
86+
")\n",
87+
"\n",
88+
"completion = client.chat.completions.create(\n",
89+
" model=model_name,\n",
90+
" messages=[\n",
91+
" {\"role\": \"user\", \"content\": \"Can you explain how fried rice chicken is cooked?\"}\n",
92+
" ], \n",
93+
" temperature=0,\n",
94+
" top_p=1\n",
95+
")\n",
96+
"\n",
97+
"print(completion.choices[0].message.content)"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": 4,
103+
"id": "6227231c",
104+
"metadata": {},
105+
"outputs": [
106+
{
107+
"name": "stdout",
108+
"output_type": "stream",
109+
"text": [
110+
"Fried rice chicken is a popular Chinese dish that combines the flavors of fried rice with the tender texture and juicy meat of chicken. Here's a basic recipe to help you make it at home:\n",
111+
"\n",
112+
"### Ingredients:\n",
113+
"- 1 pound boneless skinless chicken breast or thighs (cut into bite-sized pieces)\n",
114+
"- 2 tablespoons vegetable oil\n",
115+
"- 3 cloves garlic, minced\n",
116+
"- 1 tablespoon ginger, grated\n",
117+
"- 1/4 cup soy sauce\n",
118+
"- 1/4 cup oyster sauce\n",
119+
"- 1 teaspoon sugar\n",
120+
"- 1/2 teaspoon salt\n",
121+
"- 1/4 teaspoon black pepper\n",
122+
"- 1 can (8 oz) condensed cream of mushroom soup\n",
123+
"\n"
124+
]
125+
}
126+
],
127+
"source": [
128+
"completion = client.chat.completions.create(\n",
129+
" model=model_name,\n",
130+
" messages=[\n",
131+
" {\"role\": \"user\", \"content\": \"Can you explain how fried rice chicken is cooked?\"}\n",
132+
" ],\n",
133+
" temperature=0,\n",
134+
" top_p=1,\n",
135+
" extra_body={\n",
136+
" \"logits_processors\": [{\n",
137+
" \"qualname\": \"logits_processor_zoo.vllm.GenLengthLogitsProcessor\",\n",
138+
" \"kwargs\": {\"tokenizer\": model_name, \"boost_factor\": 0.2, \"complete_sentences\": True}\n",
139+
" }]\n",
140+
" }\n",
141+
")\n",
142+
"\n",
143+
"print(completion.choices[0].message.content)"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": null,
149+
"id": "96544ec2",
150+
"metadata": {},
151+
"outputs": [],
152+
"source": []
153+
}
154+
],
155+
"metadata": {
156+
"kernelspec": {
157+
"display_name": "Python 3 (ipykernel)",
158+
"language": "python",
159+
"name": "python3"
160+
},
161+
"language_info": {
162+
"codemirror_mode": {
163+
"name": "ipython",
164+
"version": 3
165+
},
166+
"file_extension": ".py",
167+
"mimetype": "text/x-python",
168+
"name": "python",
169+
"nbconvert_exporter": "python",
170+
"pygments_lexer": "ipython3",
171+
"version": "3.10.17"
172+
}
173+
},
174+
"nbformat": 4,
175+
"nbformat_minor": 5
176+
}

logits_processor_zoo/vllm/cite_prompt.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
# limitations under the License.
1616
#
1717

18-
from typing import List
18+
from typing import List, Union
1919
import torch
20-
from transformers import PreTrainedTokenizer
20+
from transformers import PreTrainedTokenizer, AutoTokenizer
2121

2222

2323
class CiteFromPromptLogitsProcessor:
@@ -33,11 +33,14 @@ class CiteFromPromptLogitsProcessor:
3333
boost_eos (bool, optional): If True, boosts EOS token too.
3434
conditional_boost_factor (float, optional): A factor to boost the likelihood of the tokens based on previous token.
3535
"""
36-
def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float = 1.0, boost_eos: bool = True,
36+
def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float = 1.0, boost_eos: bool = True,
3737
conditional_boost_factor: float = 0.0):
3838
self.tokenizer = tokenizer
39+
if isinstance(self.tokenizer, str):
40+
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)
41+
3942
self.boost_factor = boost_factor
40-
self.eos_token_id = tokenizer.eos_token_id
43+
self.eos_token_id = self.tokenizer.eos_token_id
4144
self.boost_eos = boost_eos
4245
self.conditional_boost_factor = conditional_boost_factor
4346

logits_processor_zoo/vllm/generation_length.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
# limitations under the License.
1616
#
1717

18-
from typing import List
18+
from typing import List, Union
1919
import torch
20-
from transformers import PreTrainedTokenizer
20+
from transformers import PreTrainedTokenizer, AutoTokenizer
2121
from logits_processor_zoo.utils import text_to_token
2222

2323

@@ -36,18 +36,22 @@ class GenLengthLogitsProcessor:
3636
or a new line. Default is False.
3737
boost_token_str (str, optional): A string to be tokenized and used instead of EOS. Especially useful for </think>.
3838
"""
39-
def __init__(self, tokenizer: PreTrainedTokenizer, boost_factor: float,
39+
def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], boost_factor: float,
4040
p: int = 2, complete_sentences: bool = False, boost_token_str: str = None):
41-
self.boost_token = tokenizer.eos_token_id
41+
42+
self.tokenizer = tokenizer
43+
if isinstance(self.tokenizer, str):
44+
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)
45+
46+
self.boost_token = self.tokenizer.eos_token_id
4247
self.boost_token_str = boost_token_str
4348
if boost_token_str is not None:
44-
self.boost_token = text_to_token(tokenizer, boost_token_str, last=False)
49+
self.boost_token = text_to_token(self.tokenizer, boost_token_str, last=False)
4550
self.boost_factor = boost_factor
4651
self.p = p
47-
self.full_stop_token = text_to_token(tokenizer, "It is a sentence.", last=True)
48-
self.new_line_token = text_to_token(tokenizer, "It is a new line\n", last=True)
52+
self.full_stop_token = text_to_token(self.tokenizer, "It is a sentence.", last=True)
53+
self.new_line_token = text_to_token(self.tokenizer, "It is a new line\n", last=True)
4954
self.complete_sentences = complete_sentences
50-
self.tokenizer = tokenizer
5155

5256
def clone(self):
5357
return GenLengthLogitsProcessor(self.tokenizer, self.boost_factor, self.p,

logits_processor_zoo/vllm/last_phrase.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
# limitations under the License.
1616
#
1717

18-
from transformers import PreTrainedTokenizer
19-
from typing import List
18+
from transformers import PreTrainedTokenizer, AutoTokenizer
19+
from typing import List, Union
2020
import torch
2121
from logits_processor_zoo.utils import enforce_tokens
2222

@@ -31,12 +31,15 @@ class ForceLastPhraseLogitsProcessor:
3131
phrase (str): The phrase to be generated by LLM before the end of its speech.
3232
tokenizer (PreTrainedTokenizer): The tokenizer used by the LLM.
3333
"""
34-
def __init__(self, phrase: str, tokenizer: PreTrainedTokenizer):
35-
self.eos_token_id = tokenizer.eos_token_id
36-
self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False)
34+
def __init__(self, phrase: str, tokenizer: Union[PreTrainedTokenizer, str]):
35+
self.tokenizer = tokenizer
36+
if isinstance(self.tokenizer, str):
37+
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)
38+
39+
self.eos_token_id = self.tokenizer.eos_token_id
40+
self.phrase_tokens = self.tokenizer.encode(phrase, add_special_tokens=False)
3741
self._reset()
3842
self.phrase = phrase
39-
self.tokenizer = tokenizer
4043

4144
# LogitsProcessor can contain a clone attribute to deep copy it
4245
# https://github.com/vllm-project/vllm/blob/19dcc02a72e3ed52e3bf95aae44ea1f40ce42ea0/vllm/sampling_params.py#L537-L550

logits_processor_zoo/vllm/multiple_choice.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
# limitations under the License.
1616
#
1717

18-
from transformers import PreTrainedTokenizer
19-
from typing import List
18+
from transformers import PreTrainedTokenizer, AutoTokenizer
19+
from typing import List, Union
2020
import torch
2121
from logits_processor_zoo.utils import text_to_token, get_new_line_tokens, enforce_tokens
2222

@@ -41,17 +41,20 @@ class MultipleChoiceLogitsProcessor:
4141
boost_first_words (float): Nonzero values add choices' first tokens' logits to boost performance.
4242
Especially useful for the models which have difficulty associating the choice with its text.
4343
"""
44-
def __init__(self, tokenizer: PreTrainedTokenizer, choices: List[str] = None,
44+
def __init__(self, tokenizer: Union[PreTrainedTokenizer, str], choices: List[str] = None,
4545
delimiter: str = ".", boost_first_words: float = 0.0):
4646
self.tokenizer = tokenizer
47+
if isinstance(self.tokenizer, str):
48+
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)
49+
4750
self.choices = choices
4851
self.delimiter = delimiter
4952
if choices is None:
5053
choices = ["1", "2", "3", "4"]
5154

52-
self.new_line_token = get_new_line_tokens(tokenizer)
53-
self.delimiter_token = text_to_token(tokenizer, delimiter, last=False)
54-
self.choice_tokens = [text_to_token(tokenizer, choice, last=False) for choice in choices]
55+
self.new_line_token = get_new_line_tokens(self.tokenizer)
56+
self.delimiter_token = text_to_token(self.tokenizer, delimiter, last=False)
57+
self.choice_tokens = [text_to_token(self.tokenizer, choice, last=False) for choice in choices]
5558
self.boost_first_words = boost_first_words
5659

5760
def clone(self):

logits_processor_zoo/vllm/trigger_phrase.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
# limitations under the License.
1616
#
1717

18-
from transformers import PreTrainedTokenizer
19-
from typing import List
18+
from transformers import PreTrainedTokenizer, AutoTokenizer
19+
from typing import List, Union
2020
import torch
2121
from logits_processor_zoo.utils import text_to_token, enforce_tokens
2222

@@ -33,14 +33,17 @@ class TriggerPhraseLogitsProcessor:
3333
trigger_count (int): How many times the phrase will be triggered.
3434
trigger_after (bool): Whether the phrase is written after the trigger token or instead of the trigger token.
3535
"""
36-
def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: PreTrainedTokenizer, trigger_count: int = 1,
37-
trigger_after: bool = False):
36+
def __init__(self, phrase: str, trigger_token_phrase: str, tokenizer: Union[PreTrainedTokenizer, str],
37+
trigger_count: int = 1, trigger_after: bool = False):
38+
self.tokenizer = tokenizer
39+
if isinstance(self.tokenizer, str):
40+
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer)
41+
3842
self.phrase = phrase
3943
self.trigger_token_phrase = trigger_token_phrase
40-
self.tokenizer = tokenizer
4144
self.trigger_count = trigger_count
42-
self.trigger_token = text_to_token(tokenizer, trigger_token_phrase, last=False)
43-
self.phrase_tokens = tokenizer.encode(phrase, add_special_tokens=False)
45+
self.trigger_token = text_to_token(self.tokenizer, trigger_token_phrase, last=False)
46+
self.phrase_tokens = self.tokenizer.encode(phrase, add_special_tokens=False)
4447
self.initial_trigger_count = trigger_count
4548
self.trigger_after = trigger_after
4649
self._reset()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "logits-processor-zoo"
3-
version = "0.1.8"
3+
version = "0.1.9"
44
description = "A collection of LogitsProcessors to customize and enhance LLM behavior for specific tasks."
55
authors = ["Ahmet Erdem", "Ivan Sorokin", "Maximilian Jeblick", "Darragh Hanley", "David Austin"]
66
readme = "README.md"

0 commit comments

Comments
 (0)