2727
2828import numpy as np
2929import numpy .typing as npt
30-
30+ import llama_cpp . llama_cpp as llama_cpp
3131import llama_cpp .llama as llama
3232import llama_cpp .llama_types as llama_types
3333import llama_cpp .llama_grammar as llama_grammar
@@ -594,7 +594,7 @@ def chat_completion_handler(
594594 tool_choice = tool_choice ,
595595 )
596596 prompt = llama .tokenize (
597- vocab = llama .llama_model_get_vocab (llama .model ),
597+ vocab = llama_cpp .llama_model_get_vocab (llama .model ),
598598 text = result .prompt .encode ("utf-8" ),
599599 add_bos = not result .added_special ,
600600 special = True ,
@@ -2813,8 +2813,8 @@ def __call__(
28132813 text = template .render (
28142814 messages = messages ,
28152815 add_generation_prompt = True ,
2816- eos_token = llama .detokenize (vocab = llama .llama_model_get_vocab (llama .model ), tokens = [llama .token_eos ()]),
2817- bos_token = llama .detokenize (vocab = llama .llama_model_get_vocab (llama .model ), tokens = [llama .token_bos ()]),
2816+ eos_token = llama .detokenize (vocab = llama_cpp .llama_model_get_vocab (llama .model ), tokens = [llama .token_eos ()]),
2817+ bos_token = llama .detokenize (vocab = llama_cpp .llama_model_get_vocab (llama .model ), tokens = [llama .token_bos ()]),
28182818 )
28192819 split_text = self .split_text_on_image_urls (text , image_urls )
28202820
@@ -2828,7 +2828,7 @@ def __call__(
28282828 for type_ , value in split_text :
28292829 if type_ == "text" :
28302830 tokens = llama .tokenize (
2831- vocab = llama .llama_model_get_vocab (llama .model ),
2831+ vocab = llama_cpp .llama_model_get_vocab (llama .model ),
28322832 text = value .encode ("utf8" ), add_bos = False , special = True
28332833 )
28342834 if llama .n_tokens + len (tokens ) > llama .n_ctx ():
0 commit comments