From c6f3f3175282522aa07467fe5213b73b2ec9b013 Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <lwilkins@redhat.com>
Date: Fri, 5 Dec 2025 07:13:58 +0000
Subject: [PATCH 1/3] fix hang

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
---
 vllm/v1/worker/gpu_model_runner.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 152bea2c0975..cfecafccbf5b 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4104,10 +4104,17 @@ def _dummy_run(
 
             if self.speculative_config and self.speculative_config.use_eagle():
                 assert isinstance(self.drafter, EagleProposer)
+                # since eagle currently only supports PIECEWISE cudagraphs if we are
+                # capturing cudagraphs only capture for PIECEWISE cudagraphs use
+                # PIECEWISE cudagraphs if the main model used cudagraphs.
+                # NOTE(lucas): this is a hack, need to clean up.
                 use_cudagraphs = (
-                    cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
-                    and not self.speculative_config.enforce_eager
-                )
+                    (
+                        cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
+                        and is_graph_capturing
+                    )
+                    or (cudagraph_runtime_mode != CUDAGraphMode.NONE)
+                ) and not self.speculative_config.enforce_eager
 
                 # Note(gnovack) - We need to disable cudagraphs for one of the two
                 # lora cases when cudagraph_specialize_lora is enabled. This is a

From 71019e3722149e91af58e4cf29ab9103d859e17f Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <lwilkins@redhat.com>
Date: Fri, 5 Dec 2025 15:35:25 +0000
Subject: [PATCH 2/3] cleanup

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
---
 vllm/v1/worker/gpu_model_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index cfecafccbf5b..442ac5e210b0 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4110,8 +4110,8 @@ def _dummy_run(
                 # NOTE(lucas): this is a hack, need to clean up.
                 use_cudagraphs = (
                     (
-                        cudagraph_runtime_mode.has_mode(CUDAGraphMode.PIECEWISE)
-                        and is_graph_capturing
+                        is_graph_capturing
+                        and cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
                     )
                     or (cudagraph_runtime_mode != CUDAGraphMode.NONE)
                 ) and not self.speculative_config.enforce_eager

From 3f56bb09be4e3733d1176665caa43b7bb496f6c9 Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Date: Fri, 5 Dec 2025 14:14:13 -0500
Subject: [PATCH 3/3] Update vllm/v1/worker/gpu_model_runner.py

Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
Signed-off-by: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
---
 vllm/v1/worker/gpu_model_runner.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 442ac5e210b0..14459485d7d2 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4104,9 +4104,8 @@ def _dummy_run(
 
             if self.speculative_config and self.speculative_config.use_eagle():
                 assert isinstance(self.drafter, EagleProposer)
-                # since eagle currently only supports PIECEWISE cudagraphs if we are
-                # capturing cudagraphs only capture for PIECEWISE cudagraphs use
-                # PIECEWISE cudagraphs if the main model used cudagraphs.
+                # Eagle currently only supports PIECEWISE cudagraphs.
+                # Therefore only use cudagraphs if the main model uses PIECEWISE
                 # NOTE(lucas): this is a hack, need to clean up.
                 use_cudagraphs = (
                     (