@@ -285,59 +285,3 @@ def from_huggingface(
285285 else :
286286 # TODO(yixin): unsupported tokenizer
287287 raise ValueError (f'Unsupported tokenizer type: { type (tokenizer )} ' )
288-
289- @property
290- def vocab_type (self ) -> VocabType :
291- """The type of the vocabulary."""
292- return VocabType (self ._handle .vocab_type )
293-
294- @property
295- def vocab_size (self ) -> int :
296- """The size of the vocabulary."""
297- return self ._handle .vocab_size
298-
299- @property
300- def add_prefix_space (self ) -> bool :
301- """Whether the tokenizer will prepend a space before the text in the
302- tokenization process."""
303- return self ._handle .add_prefix_space
304-
305- @property
306- def prepend_space_in_tokenization (self ) -> bool :
307- """Whether the tokenizer will prepend a space before the text in the
308- tokenization process.
309-
310- This property is deprecated. Use add_prefix_space instead.
311- """
312- logger .warning ('prepend_space_in_tokenization is deprecated. Use add_prefix_space instead.' )
313- return self .add_prefix_space
314-
315- @property
316- def decoded_vocab (self ) -> List [bytes ]:
317- """The decoded vocabulary of the tokenizer.
318-
319- This converts the tokens in the LLM's vocabulary back to the original format of the input text. E.g. for type
320- ByteFallback, the token <0x1B> is converted back to "\u001b ".
321- """
322- return self ._handle .decoded_vocab
323-
324- @property
325- def stop_token_ids (self ) -> List [int ]:
326- """The stop token ids."""
327- return self ._handle .stop_token_ids
328-
329- @property
330- def special_token_ids (self ) -> List [int ]:
331- """The special token ids.
332-
333- Special tokens include control tokens, reserved tokens, padded tokens, etc. Now it is automatically detected
334- from the vocabulary.
335- """
336- return self ._handle .special_token_ids
337-
338- def dump_metadata (self ) -> str :
339- """Dump the metadata of the tokenizer to a json string.
340-
341- It can be used to construct the tokenizer info from the vocabulary and the metadata string.
342- """
343- return self ._handle .dump_metadata ()
0 commit comments