Received error running Swinv2ForImageClassification(
(swinv2): Swinv2Model(
(embeddings): Swinv2Embeddings(
(patch_embeddings): Swinv2PatchEmbeddings(
(projection): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
)
(norm): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(encoder): Swinv2Encoder(
(layers): ModuleList(
(0): Swinv2Stage(
(blocks): ModuleList(
(0-1): 2 x Swinv2Layer(
(attention): Swinv2Attention(
(self): Swinv2SelfAttention(
(continuous_position_bias_mlp): Sequential(
(0): Linear(in_features=2, out_features=512, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=512, out_features=3, bias=False)
)
(query): Linear(in_features=96, out_features=96, bias=True)
(key): Linear(in_features=96, out_features=96, bias=False)
(value): Linear(in_features=96, out_features=96, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(output): Swinv2SelfOutput(
(dense): Linear(in_features=96, out_features=96, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
)
(layernorm_before): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
(drop_path): Swinv2DropPath(p=0.1)
(intermediate): Swinv2Intermediate(
(dense): Linear(in_features=96, out_features=384, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): Swinv2Output(
(dense): Linear(in_features=384, out_features=96, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(layernorm_after): LayerNorm((96,), eps=1e-05, elementwise_affine=True)
)
)
(downsample): Swinv2PatchMerging(
(reduction): Linear(in_features=384, out_features=192, bias=False)
(norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
)
)
(1): Swinv2Stage(
(blocks): ModuleList(
(0-1): 2 x Swinv2Layer(
(attention): Swinv2Attention(
(self): Swinv2SelfAttention(
(continuous_position_bias_mlp): Sequential(
(0): Linear(in_features=2, out_features=512, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=512, out_features=6, bias=False)
)
(query): Linear(in_features=192, out_features=192, bias=True)
(key): Linear(in_features=192, out_features=192, bias=False)
(value): Linear(in_features=192, out_features=192, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(output): Swinv2SelfOutput(
(dense): Linear(in_features=192, out_features=192, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
)
(layernorm_before): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
(drop_path): Swinv2DropPath(p=0.1)
(intermediate): Swinv2Intermediate(
(dense): Linear(in_features=192, out_features=768, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): Swinv2Output(
(dense): Linear(in_features=768, out_features=192, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(layernorm_after): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
)
)
(downsample): Swinv2PatchMerging(
(reduction): Linear(in_features=768, out_features=384, bias=False)
(norm): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
)
)
(2): Swinv2Stage(
(blocks): ModuleList(
(0-5): 6 x Swinv2Layer(
(attention): Swinv2Attention(
(self): Swinv2SelfAttention(
(continuous_position_bias_mlp): Sequential(
(0): Linear(in_features=2, out_features=512, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=512, out_features=12, bias=False)
)
(query): Linear(in_features=384, out_features=384, bias=True)
(key): Linear(in_features=384, out_features=384, bias=False)
(value): Linear(in_features=384, out_features=384, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(output): Swinv2SelfOutput(
(dense): Linear(in_features=384, out_features=384, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
)
(layernorm_before): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
(drop_path): Swinv2DropPath(p=0.1)
(intermediate): Swinv2Intermediate(
(dense): Linear(in_features=384, out_features=1536, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): Swinv2Output(
(dense): Linear(in_features=1536, out_features=384, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(layernorm_after): LayerNorm((384,), eps=1e-05, elementwise_affine=True)
)
)
(downsample): Swinv2PatchMerging(
(reduction): Linear(in_features=1536, out_features=768, bias=False)
(norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
(3): Swinv2Stage(
(blocks): ModuleList(
(0-1): 2 x Swinv2Layer(
(attention): Swinv2Attention(
(self): Swinv2SelfAttention(
(continuous_position_bias_mlp): Sequential(
(0): Linear(in_features=2, out_features=512, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=512, out_features=24, bias=False)
)
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=False)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(output): Swinv2SelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
)
(layernorm_before): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(drop_path): Swinv2DropPath(p=0.1)
(intermediate): Swinv2Intermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): Swinv2Output(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(dropout): Dropout(p=0.0, inplace=False)
)
(layernorm_after): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
)
)
)
)
(layernorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(pooler): AdaptiveAvgPool1d(output_size=1)
)
(classifier): Linear(in_features=768, out_features=1000, bias=True)
): KeyError: ModularIndexing(index1, 1, 64)
Describe the bug
Hello,
I have written a simple program to run some vision models from HuggingFace in this gist: https://gist.github.com/oluwatimilehin/db204bcab6eb7531173453a2e4ed7e8e
When I run the script for swinv2 with any batch size greater than 1, like:
python vision_models.py -m swinv2 -b 8I get a
KeyError: ModularIndexing(index1, 1, 64)error message as follows:Could you please look into this?
Thanks!