vllm-project
diff --git a/‎examples/gke/benchmarks/README.md‎ ‎examples/offload/gke/benchmarks/README.md‎examples/gke/benchmarks/README.md renamed to examples/offload/gke/benchmarks/README.md b/‎examples/gke/benchmarks/README.md‎ ‎examples/offload/gke/benchmarks/README.md‎examples/gke/benchmarks/README.md renamed to examples/offload/gke/benchmarks/README.md
diff --git a/‎…amples/gke/benchmarks/benchmark-pod.yaml‎ ‎…ffload/gke/benchmarks/benchmark-pod.yaml‎examples/gke/benchmarks/benchmark-pod.yaml renamed to examples/offload/gke/benchmarks/benchmark-pod.yaml b/‎…amples/gke/benchmarks/benchmark-pod.yaml‎ ‎…ffload/gke/benchmarks/benchmark-pod.yaml‎examples/gke/benchmarks/benchmark-pod.yaml renamed to examples/offload/gke/benchmarks/benchmark-pod.yaml
diff --git a/‎…ples/gke/benchmarks/deploy-baseline.yaml‎ ‎…load/gke/benchmarks/deploy-baseline.yaml‎examples/gke/benchmarks/deploy-baseline.yaml renamed to examples/offload/gke/benchmarks/deploy-baseline.yaml b/‎…ples/gke/benchmarks/deploy-baseline.yaml‎ ‎…load/gke/benchmarks/deploy-baseline.yaml‎examples/gke/benchmarks/deploy-baseline.yaml renamed to examples/offload/gke/benchmarks/deploy-baseline.yaml
diff --git a/‎…s/gke/benchmarks/deploy-cpu-offload.yaml‎ ‎…d/gke/benchmarks/deploy-cpu-offload.yaml‎examples/gke/benchmarks/deploy-cpu-offload.yaml renamed to examples/offload/gke/benchmarks/deploy-cpu-offload.yaml b/‎…s/gke/benchmarks/deploy-cpu-offload.yaml‎ ‎…d/gke/benchmarks/deploy-cpu-offload.yaml‎examples/gke/benchmarks/deploy-cpu-offload.yaml renamed to examples/offload/gke/benchmarks/deploy-cpu-offload.yaml
diff --git a/‎examples/gke/benchmarks/service.yaml‎ ‎…ples/offload/gke/benchmarks/service.yaml‎examples/gke/benchmarks/service.yaml renamed to examples/offload/gke/benchmarks/service.yaml b/‎examples/gke/benchmarks/service.yaml‎ ‎…ples/offload/gke/benchmarks/service.yaml‎examples/gke/benchmarks/service.yaml renamed to examples/offload/gke/benchmarks/service.yaml
diff --git a/‎examples/gke/hf_secret.yaml‎ ‎examples/offload/gke/hf_secret.yaml‎examples/gke/hf_secret.yaml renamed to examples/offload/gke/hf_secret.yaml b/‎examples/gke/hf_secret.yaml‎ ‎examples/offload/gke/hf_secret.yaml‎examples/gke/hf_secret.yaml renamed to examples/offload/gke/hf_secret.yaml
diff --git a/‎…les/gke/pod_tpu_commons_cpu_offload.yaml‎ ‎…oad/gke/pod_tpu_commons_cpu_offload.yaml‎examples/gke/pod_tpu_commons_cpu_offload.yaml renamed to examples/offload/gke/pod_tpu_commons_cpu_offload.yaml
Lines changed: 1 addition & 1 deletion b/‎…les/gke/pod_tpu_commons_cpu_offload.yaml‎ ‎…oad/gke/pod_tpu_commons_cpu_offload.yaml‎examples/gke/pod_tpu_commons_cpu_offload.yaml renamed to examples/offload/gke/pod_tpu_commons_cpu_offload.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎…pu_commons_cpu_offload_verification.yaml‎ ‎…pu_commons_cpu_offload_verification.yaml‎examples/gke/pod_tpu_commons_cpu_offload_verification.yaml renamed to examples/offload/gke/pod_tpu_commons_cpu_offload_verification.yaml
Lines changed: 1 addition & 1 deletion b/‎…pu_commons_cpu_offload_verification.yaml‎ ‎…pu_commons_cpu_offload_verification.yaml‎examples/gke/pod_tpu_commons_cpu_offload_verification.yaml renamed to examples/offload/gke/pod_tpu_commons_cpu_offload_verification.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎…gke/pod_tpu_host_offload_unit_tests.yaml‎ ‎…gke/pod_tpu_host_offload_unit_tests.yaml‎examples/gke/pod_tpu_host_offload_unit_tests.yaml renamed to examples/offload/gke/pod_tpu_host_offload_unit_tests.yaml b/‎…gke/pod_tpu_host_offload_unit_tests.yaml‎ ‎…gke/pod_tpu_host_offload_unit_tests.yaml‎examples/gke/pod_tpu_host_offload_unit_tests.yaml renamed to examples/offload/gke/pod_tpu_host_offload_unit_tests.yaml
diff --git a/‎examples/offline_inference_kv_cache.py‎ ‎…es/offload/offline_inference_kv_cache.py‎examples/offline_inference_kv_cache.py renamed to examples/offload/offline_inference_kv_cache.py
Lines changed: 1 addition & 1 deletion b/‎examples/offline_inference_kv_cache.py‎ ‎…es/offload/offline_inference_kv_cache.py‎examples/offline_inference_kv_cache.py renamed to examples/offload/offline_inference_kv_cache.py
Lines changed: 1 addition & 1 deletion
@@ -13,7 +13,7 @@ spec:
     imagePullPolicy: Always # Uncomment to always pull the latest image for any dev work
     command:
     - python
-    - /workspace/tpu_inference/examples/offline_inference_kv_cache.py
+    - /workspace/tpu_inference/examples/offload/offline_inference_kv_cache.py
     - --model=meta-llama/Llama-3.1-8B
     - --tensor_parallel_size=8
     - --max_model_len=1024
 
@@ -19,7 +19,7 @@ spec:
     imagePullPolicy: Always
     command:
     - python
-    - /workspace/tpu_inference/examples/offline_inference_kv_cache_verification.py
+    - /workspace/tpu_inference/examples/offload/offline_inference_kv_cache_verification.py
     - --model=meta-llama/Llama-3.1-8B
     - --tensor_parallel_size=8
     - --max_model_len=1024
 
@@ -5,7 +5,7 @@
 
 import vllm.envs as envs
 from vllm import LLM, EngineArgs
-from vllm.utils import FlexibleArgumentParser
+from vllm.utils.argparse_utils import FlexibleArgumentParser
 
 
 def create_parser():