charlotte12l
diff --git a/‎docs/contributing/model/basic.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/contributing/model/basic.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/compile/test_fusion_attn.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/compile/test_fusion_attn.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/compile/test_qk_norm_rope_fusion.py‎
Lines changed: 2 additions & 1 deletion b/‎tests/compile/test_qk_norm_rope_fusion.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎tests/kernels/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/kernels/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/v1/worker/test_gpu_model_runner.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/v1/worker/test_gpu_model_runner.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/v1/worker/test_utils.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/v1/worker/test_utils.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎vllm/attention/__init__.py‎
Lines changed: 0 additions & 19 deletions b/‎vllm/attention/__init__.py‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎vllm/attention/backends/abstract.py‎
Lines changed: 1 addition & 1 deletion b/‎vllm/attention/backends/abstract.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎vllm/attention/layer.py‎
Lines changed: 5 additions & 2 deletions b/‎vllm/attention/layer.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎vllm/compilation/fusion_attn.py‎
Lines changed: 1 addition & 1 deletion b/‎vllm/compilation/fusion_attn.py‎
Lines changed: 1 addition & 1 deletion
@@ -29,7 +29,7 @@ The initialization code should look like this:
     ```python
     from torch import nn
     from vllm.config import VllmConfig
-    from vllm.attention import Attention
+    from vllm.attention.layer import Attention
 
     class MyAttention(nn.Module):
         def __init__(self, vllm_config: VllmConfig, prefix: str):
 
@@ -9,8 +9,9 @@
 from tests.utils import flat_product
 from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
 from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
-from vllm.attention import Attention, AttentionMetadata
+from vllm.attention.backends.abstract import AttentionMetadata
 from vllm.attention.backends.registry import AttentionBackendEnum
+from vllm.attention.layer import Attention
 from vllm.attention.selector import global_force_attn_backend_context_manager
 from vllm.compilation.fusion_attn import ATTN_OP, AttnFusionPass
 from vllm.compilation.fx_utils import find_op_nodes
 
@@ -5,7 +5,8 @@
 import torch
 
 from tests.compile.backend import TestBackend
-from vllm.attention import Attention, AttentionType
+from vllm.attention.backends.abstract import AttentionType
+from vllm.attention.layer import Attention
 from vllm.compilation.matcher_utils import FLASHINFER_ROTARY_OP, RMS_OP, ROTARY_OP
 from vllm.compilation.noop_elimination import NoOpEliminationPass
 from vllm.compilation.post_cleanup import PostCleanupPass
 
@@ -14,7 +14,7 @@
 from torch._prims_common import TensorLikeType
 
 from tests.kernels.quant_utils import native_w8a8_block_matmul
-from vllm.attention import AttentionType
+from vllm.attention.backends.abstract import AttentionType
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
 from vllm.utils import (
 
@@ -5,8 +5,8 @@
 import pytest
 import torch
 
-from vllm.attention import Attention
 from vllm.attention.backends.abstract import MultipleOf
+from vllm.attention.layer import Attention
 from vllm.config import (
     CacheConfig,
     ModelConfig,
 
@@ -7,7 +7,7 @@
 
 
 def test_bind_kv_cache():
-    from vllm.attention import Attention
+    from vllm.attention.layer import Attention
 
     ctx = {
         "layers.0.self_attn": Attention(32, 128, 0.1),
@@ -35,7 +35,7 @@ def test_bind_kv_cache():
 
 
 def test_bind_kv_cache_non_attention():
-    from vllm.attention import Attention
+    from vllm.attention.layer import Attention
 
     # example from Jamba PP=2
     ctx = {
 
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
-from vllm.attention.backends.abstract import (
-    AttentionBackend,
-    AttentionMetadata,
-    AttentionType,
-)
-from vllm.attention.layer import Attention
-from vllm.attention.selector import get_attn_backend, get_mamba_attn_backend
-
-__all__ = [
-    "Attention",
-    "AttentionBackend",
-    "AttentionMetadata",
-    "AttentionType",
-    "get_attn_backend",
-    "get_mamba_attn_backend",
-]
@@ -178,7 +178,7 @@ def supports_attn_type(cls, attn_type: str) -> bool:
         By default, only supports decoder attention.
         Backends should override this to support other attention types.
         """
-        from vllm.attention import AttentionType
+        from vllm.attention.backends.abstract import AttentionType
 
         return attn_type == AttentionType.DECODER
 
 
@@ -10,8 +10,11 @@
 import torch.nn.functional as F
 
 import vllm.envs as envs
-from vllm.attention import AttentionType
-from vllm.attention.backends.abstract import AttentionBackend, MLAAttentionImpl
+from vllm.attention.backends.abstract import (
+    AttentionBackend,
+    AttentionType,
+    MLAAttentionImpl,
+)
 from vllm.attention.backends.registry import AttentionBackendEnum
 from vllm.attention.selector import get_attn_backend
 from vllm.attention.utils.kv_sharing_utils import validate_kv_sharing_target
 
@@ -10,7 +10,7 @@
 from torch._higher_order_ops.auto_functionalize import auto_functionalized
 from torch._inductor.pattern_matcher import PatternMatcherPass
 
-from vllm.attention import Attention
+from vllm.attention.layer import Attention
 from vllm.config import VllmConfig, get_layers_from_vllm_config
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.quant_utils import (