1414class BaseLlamaTokenizer (abc .ABC ):
1515 @abc .abstractmethod
1616 def tokenize (
17- self , text : bytes , add_bos : bool = True , special : bool = True
17+ self , vocab : llama_cpp . llama_vocab_p , text : bytes , add_bos : bool = True , special : bool = True
1818 ) -> List [int ]:
1919 """Tokenize the text into tokens.
2020
@@ -28,6 +28,7 @@ def tokenize(
2828 @abc .abstractmethod
2929 def detokenize (
3030 self ,
31+ vocab :llama_cpp .llama_vocab_p ,
3132 tokens : List [int ],
3233 prev_tokens : Optional [List [int ]] = None ,
3334 special : bool = False ,
@@ -47,17 +48,18 @@ def __init__(self, llama: llama_cpp.Llama):
4748 self ._model = llama ._model # type: ignore
4849
4950 def tokenize (
50- self , text : bytes , add_bos : bool = True , special : bool = True
51+ self , vocab : llama_cpp . llama_vocab_p , text : bytes , add_bos : bool = True , special : bool = True
5152 ) -> List [int ]:
52- return self ._model .tokenize (text , add_bos = add_bos , special = special )
53+ return self ._model .tokenize (vocab , text , add_bos = add_bos , special = special )
5354
5455 def detokenize (
5556 self ,
57+ vocab :llama_cpp .llama_vocab_p ,
5658 tokens : List [int ],
5759 prev_tokens : Optional [List [int ]] = None ,
5860 special : bool = False ,
5961 ) -> bytes :
60- return self ._model .detokenize (tokens , special = special )
62+ return self ._model .detokenize (vocab , tokens , special = special )
6163
6264 def encode (
6365 self , text : str , add_bos : bool = True , special : bool = True
0 commit comments