Skip to content

Commit e85feb0

Browse files
committed
fixed via add_prefix_token
Signed-off-by: alessiodevoto <devoto.alessio@gmail.com>
1 parent 5054bd4 commit e85feb0

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

logits_processor_zoo/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,12 @@
2323
def text_to_token(tokenizer: PreTrainedTokenizer, text: str, last: bool):
2424
tokens = tokenizer.encode(text, add_special_tokens=False)
2525

26-
# We allow 2 tokens to account for the BOS token
27-
max_token_count = 2 if tokenizer.bos_token and tokenizer.bos_token_id in tokens else 1
26+
# We allow 2 tokens to account for the BOS or prefix token
27+
max_token_count = 1
28+
bos_token_added = tokenizer.bos_token and tokenizer.bos_token_id in tokens
29+
prefix_token_added = tokenizer.add_prefix_space is not False
30+
if bos_token_added or prefix_token_added:
31+
max_token_count = 2
2832

2933
if not last and len(tokens) > max_token_count:
3034
raise Exception(f"Can't convert {text} to token. It has {len(tokens)} tokens.")

0 commit comments

Comments
 (0)