Skip to content

Commit e3708ef

Browse files
authored
workaround for issue "TypeError argument 'tokens': 'NoneType' object cannot be converted to 'PyString" (#4103)
* workaround for issue TypeError argument 'tokens': 'NoneType' object cannot be converted to 'PyString * fix * new_tokens and prev_tokens should be lists
1 parent c91d148 commit e3708ef

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

lmdeploy/tokenizer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,12 +290,18 @@ def detokenize_incrementally(self,
290290
# This is the first iteration for this sequence
291291
new_tokens = tokenizer.convert_ids_to_tokens(all_input_ids[ids_offset:],
292292
skip_special_tokens=skip_special_tokens)
293+
# `convert_ids_to_tokens` returns None for out-of-range token_id
294+
new_tokens = new_tokens or []
295+
new_tokens = [x for x in new_tokens if x is not None] if None in new_tokens else new_tokens
293296
if prev_tokens is None:
294297
# Please notice that in VLLM, indexes are detokenized one by one
295298
# while in LMDeploy, every turn, the detokenized indexes length
296299
# can be different.
297300
prev_tokens = tokenizer.convert_ids_to_tokens(all_input_ids[:ids_offset],
298301
skip_special_tokens=skip_special_tokens)
302+
# `convert_ids_to_tokens` returns None for out-of-range token_id
303+
prev_tokens = prev_tokens or []
304+
prev_tokens = [x for x in prev_tokens if x is not None] if None in prev_tokens else prev_tokens
299305
read_offset = len(prev_tokens)
300306
if skip_special_tokens and new_tokens and new_tokens[0] in tokenizer.all_special_ids:
301307
read_offset = read_offset + 1 # skip special token

0 commit comments

Comments
 (0)