File tree Expand file tree Collapse file tree 1 file changed +6
-2
lines changed
Expand file tree Collapse file tree 1 file changed +6
-2
lines changed Original file line number Diff line number Diff line change 2323def text_to_token (tokenizer : PreTrainedTokenizer , text : str , last : bool ):
2424 tokens = tokenizer .encode (text , add_special_tokens = False )
2525
26- # We allow 2 tokens to account for the BOS token
27- max_token_count = 2 if tokenizer .bos_token and tokenizer .bos_token_id in tokens else 1
26+ # We allow 2 tokens to account for the BOS or prefix token
27+ max_token_count = 1
28+ bos_token_added = tokenizer .bos_token and tokenizer .bos_token_id in tokens
29+ prefix_token_added = tokenizer .add_prefix_space is not False
30+ if bos_token_added or prefix_token_added :
31+ max_token_count = 2
2832
2933 if not last and len (tokens ) > max_token_count :
3034 raise Exception (f"Can't convert { text } to token. It has { len (tokens )} tokens." )
You can’t perform that action at this time.
0 commit comments