Skip to content

truncation_strategy is passed twice leading to an error #62

@macleginn

Description

@macleginn

Like this:

In [1]: from transformers import AutoModel, AutoTokenizer

In [2]: from calflops import calculate_flops, calculate_flops_hf

In [3]: model_name = "google/gemma-2b"

In [4]: model = AutoModel.from_pretrained(model_name, cache_dir="hf_cache")

In [5]: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="hf_cache")

In [6]: batch_size, max_seq_length = 1, 128

In [7]: flops, macs, params = calculate_flops(model=model, input_shape=(batch_size, max_seq_length), transformer_tokenizer=tokenizer, print_detailed=True)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[7], line 1
----> 1 flops, macs, params = calculate_flops(model=model, input_shape=(batch_size, max_seq_length), transformer_tokenizer=tokenizer, print_detailed=True)

File XXX/.venv/lib/python3.13/site-packages/calflops/flops_counter.py:142, in calculate_flops(model, input_shape, transformer_tokenizer, args, kwargs, forward_mode, include_backPropagation, compute_bp_factor, print_results, print_detailed, output_as_string, output_precision, output_unit, ignore_modules)
    140     else:
    141         assert len(input_shape) == 2, "the format of input_shape must be (batch_size, seq_len) if model is transformers model and auto_generate_transformers_input if True"
--> 142         kwargs = generate_transformer_input(input_shape=input_shape,
    143                                             model_tokenizer=transformer_tokenizer,
    144                                             device=device)
    145 else:
    146     assert transformer_tokenizer or (len(args) > 0 or len(kwargs) > 0),  "input_shape or args or kwargs one of there parameters must specified if auto_generate_input is False"

File XXX/.venv/lib/python3.13/site-packages/calflops/utils.py:115, in generate_transformer_input(model_tokenizer, input_shape, device)
    113 inp_seq = ""
    114 for _ in range(input_shape[0]):
--> 115     inputs = model_tokenizer.encode_plus(
    116         inp_seq,
    117         add_special_tokens=True,
    118         truncation_strategy='longest_first',
    119     )
    120     origin_length = len(inputs["input_ids"])
    121     padding_length = max_length - origin_length

File XXX/.venv/lib/python3.13/site-packages/transformers/tokenization_utils_base.py:3123, in PreTrainedTokenizerBase.encode_plus(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, padding_side, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
   3094 """
   3095 Tokenize and prepare for the model a sequence or a pair of sequences.
   3096
   (...)   3111         method).
   3112 """
   3114 padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
   3115     padding=padding,
   3116     truncation=truncation,
   (...)   3120     **kwargs,
   3121 )
-> 3123 return self._encode_plus(
   3124     text=text,
   3125     text_pair=text_pair,
   3126     add_special_tokens=add_special_tokens,
   3127     padding_strategy=padding_strategy,
   3128     truncation_strategy=truncation_strategy,
   3129     max_length=max_length,
   3130     stride=stride,
   3131     is_split_into_words=is_split_into_words,
   3132     pad_to_multiple_of=pad_to_multiple_of,
   3133     padding_side=padding_side,
   3134     return_tensors=return_tensors,
   3135     return_token_type_ids=return_token_type_ids,
   3136     return_attention_mask=return_attention_mask,
   3137     return_overflowing_tokens=return_overflowing_tokens,
   3138     return_special_tokens_mask=return_special_tokens_mask,
   3139     return_offsets_mapping=return_offsets_mapping,
   3140     return_length=return_length,
   3141     verbose=verbose,
   3142     split_special_tokens=kwargs.pop("split_special_tokens", self.split_special_tokens),
   3143     **kwargs,
   3144 )

TypeError: transformers.tokenization_utils_fast.PreTrainedTokenizerFast._encode_plus() got multiple values for keyword argument 'truncation_strategy'

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions