Skip to content

Commit 4a64489

Browse files
MatthewBonannicharlotte12l
authored andcommitted
[Attention] Remove imports from vllm/attention/__init__.py (vllm-project#29342)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Xingyu Liu <charlotteliu12x@gmail.com>
1 parent 1e10a9b commit 4a64489

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+120
-121
lines changed

docs/contributing/model/basic.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ The initialization code should look like this:
2929
```python
3030
from torch import nn
3131
from vllm.config import VllmConfig
32-
from vllm.attention import Attention
32+
from vllm.attention.layer import Attention
3333

3434
class MyAttention(nn.Module):
3535
def __init__(self, vllm_config: VllmConfig, prefix: str):

tests/compile/test_fusion_attn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@
99
from tests.utils import flat_product
1010
from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
1111
from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
12-
from vllm.attention import Attention, AttentionMetadata
12+
from vllm.attention.backends.abstract import AttentionMetadata
1313
from vllm.attention.backends.registry import AttentionBackendEnum
14+
from vllm.attention.layer import Attention
1415
from vllm.attention.selector import global_force_attn_backend_context_manager
1516
from vllm.compilation.fusion_attn import ATTN_OP, AttnFusionPass
1617
from vllm.compilation.fx_utils import find_op_nodes

tests/compile/test_qk_norm_rope_fusion.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
import torch
66

77
from tests.compile.backend import TestBackend
8-
from vllm.attention import Attention, AttentionType
8+
from vllm.attention.backends.abstract import AttentionType
9+
from vllm.attention.layer import Attention
910
from vllm.compilation.matcher_utils import FLASHINFER_ROTARY_OP, RMS_OP, ROTARY_OP
1011
from vllm.compilation.noop_elimination import NoOpEliminationPass
1112
from vllm.compilation.post_cleanup import PostCleanupPass

tests/kernels/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from torch._prims_common import TensorLikeType
1515

1616
from tests.kernels.quant_utils import native_w8a8_block_matmul
17-
from vllm.attention import AttentionType
17+
from vllm.attention.backends.abstract import AttentionType
1818
from vllm.model_executor.layers.activation import SiluAndMul
1919
from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
2020
from vllm.utils import (

tests/v1/worker/test_gpu_model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
import pytest
66
import torch
77

8-
from vllm.attention import Attention
98
from vllm.attention.backends.abstract import MultipleOf
9+
from vllm.attention.layer import Attention
1010
from vllm.config import (
1111
CacheConfig,
1212
ModelConfig,

tests/v1/worker/test_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88

99
def test_bind_kv_cache():
10-
from vllm.attention import Attention
10+
from vllm.attention.layer import Attention
1111

1212
ctx = {
1313
"layers.0.self_attn": Attention(32, 128, 0.1),
@@ -35,7 +35,7 @@ def test_bind_kv_cache():
3535

3636

3737
def test_bind_kv_cache_non_attention():
38-
from vllm.attention import Attention
38+
from vllm.attention.layer import Attention
3939

4040
# example from Jamba PP=2
4141
ctx = {

vllm/attention/__init__.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +0,0 @@
1-
# SPDX-License-Identifier: Apache-2.0
2-
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3-
4-
from vllm.attention.backends.abstract import (
5-
AttentionBackend,
6-
AttentionMetadata,
7-
AttentionType,
8-
)
9-
from vllm.attention.layer import Attention
10-
from vllm.attention.selector import get_attn_backend, get_mamba_attn_backend
11-
12-
__all__ = [
13-
"Attention",
14-
"AttentionBackend",
15-
"AttentionMetadata",
16-
"AttentionType",
17-
"get_attn_backend",
18-
"get_mamba_attn_backend",
19-
]

vllm/attention/backends/abstract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def supports_attn_type(cls, attn_type: str) -> bool:
178178
By default, only supports decoder attention.
179179
Backends should override this to support other attention types.
180180
"""
181-
from vllm.attention import AttentionType
181+
from vllm.attention.backends.abstract import AttentionType
182182

183183
return attn_type == AttentionType.DECODER
184184

vllm/attention/layer.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,11 @@
1010
import torch.nn.functional as F
1111

1212
import vllm.envs as envs
13-
from vllm.attention import AttentionType
14-
from vllm.attention.backends.abstract import AttentionBackend, MLAAttentionImpl
13+
from vllm.attention.backends.abstract import (
14+
AttentionBackend,
15+
AttentionType,
16+
MLAAttentionImpl,
17+
)
1518
from vllm.attention.backends.registry import AttentionBackendEnum
1619
from vllm.attention.selector import get_attn_backend
1720
from vllm.attention.utils.kv_sharing_utils import validate_kv_sharing_target

vllm/compilation/fusion_attn.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from torch._higher_order_ops.auto_functionalize import auto_functionalized
1111
from torch._inductor.pattern_matcher import PatternMatcherPass
1212

13-
from vllm.attention import Attention
13+
from vllm.attention.layer import Attention
1414
from vllm.config import VllmConfig, get_layers_from_vllm_config
1515
from vllm.logger import init_logger
1616
from vllm.model_executor.layers.quantization.utils.quant_utils import (

0 commit comments

Comments
 (0)