diff --git a/.gitmodules b/.gitmodules
index 0f8525526..458f777b7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "libvgpu"]
 	path = libvgpu
-	url = https://github.com/Project-HAMi/HAMi-core.git
-	branch = main
+	url = https://github.com/xiilab/HAMi-core.git
+	branch = vulkan-layer
diff --git a/charts/hami/templates/scheduler/webhook.yaml b/charts/hami/templates/scheduler/webhook.yaml
index 98ab05b22..175f67709 100644
--- a/charts/hami/templates/scheduler/webhook.yaml
+++ b/charts/hami/templates/scheduler/webhook.yaml
@@ -29,10 +29,17 @@ webhooks:
         {{- toYaml .Values.scheduler.admissionWebhook.namespaceSelector.matchLabels | nindent 8 }}
       {{- end }}
       matchExpressions:
+      {{- if eq (.Values.scheduler.admissionWebhook.namespaceSelector.mode | default "opt-out") "opt-in" }}
+      - key: hami.io/vgpu
+        operator: In
+        values:
+        - enabled
+      {{- else }}
       - key: hami.io/webhook
         operator: NotIn
         values:
         - ignore
+      {{- end }}
       {{- if .Values.scheduler.admissionWebhook.whitelistNamespaces }}
       - key: kubernetes.io/metadata.name
         operator: NotIn
diff --git a/charts/hami/values.yaml b/charts/hami/values.yaml
index 382c2ec47..1e4393241 100644
--- a/charts/hami/values.yaml
+++ b/charts/hami/values.yaml
@@ -172,19 +172,19 @@ scheduler:
       # - default
       # - kube-system
       # - istio-system
-    # namespaceSelector controls which namespaces the webhook will be applied to.
-    # The default matchExpressions exclude namespaces with label "hami.io/webhook: ignore".
-    # You can add additional matchLabels or matchExpressions to further filter namespaces.
+    # namespaceSelector controls which namespaces the webhook will apply to.
+    # mode:
+    #   "opt-out" (legacy default): apply to all namespaces except those labeled
+    #              hami.io/webhook=ignore. Suitable when most workloads need vGPU
+    #              isolation and a small number opt out.
+    #   "opt-in"  (recommended for clusters with NVIDIA Omniverse / Isaac Sim
+    #              workloads that conflict with HAMi-core hooks): apply ONLY to
+    #              namespaces labeled hami.io/vgpu=enabled. Other namespaces see
+    #              no mutation, no LD_PRELOAD inject, no implicit Vulkan layer.
     namespaceSelector:
-      matchLabels:
-        # app.kubernetes.io/part-of: kubeflow-profile
+      mode: opt-in
+      matchLabels: {}
       matchExpressions: []
-      # Example: exclude namespaces with specific labels
-      # - key: environment
-      #   operator: In
-      #   values:
-      #   - development
-      #   - staging
     # objectSelector controls which pods the webhook will be applied to.
     # The default matchExpressions exclude pods with label "hami.io/webhook: ignore".
     # You can add additional matchLabels or matchExpressions to further filter pods.
diff --git a/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml
new file mode 100644
index 000000000..73dbb43b5
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml
@@ -0,0 +1,27 @@
+apiVersion: v1
+data:
+  hami.json: |
+    {
+        "file_format_version": "1.0.0",
+        "layer": {
+            "name": "VK_LAYER_HAMI_vgpu",
+            "type": "GLOBAL",
+            "library_path": "/usr/local/vgpu/libvgpu.so",
+            "api_version": "1.3.0",
+            "implementation_version": "1",
+            "description": "HAMi Vulkan vGPU memory partitioning layer",
+            "enable_environment": {
+                "HAMI_VULKAN_ENABLE": "1"
+            }
+        }
+    }
+kind: ConfigMap
+metadata:
+  annotations:
+    kubectl.kubernetes.io/last-applied-configuration: |
+      {"apiVersion":"v1","data":{"hami.json":"{\n    \"file_format_version\": \"1.0.0\",\n    \"layer\": {\n        \"name\": \"VK_LAYER_HAMI_vgpu\",\n        \"type\": \"GLOBAL\",\n        \"library_path\": \"/usr/local/vgpu/libvgpu.so\",\n        \"api_version\": \"1.3.0\",\n        \"implementation_version\": \"1\",\n        \"description\": \"HAMi Vulkan vGPU memory partitioning layer\",\n        \"enable_environment\": {\n            \"HAMI_VULKAN_ENABLE\": \"1\"\n        }\n    }\n}\n"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"hami-vulkan-manifest","namespace":"kube-system"}}
+  creationTimestamp: "2026-04-27T02:17:50Z"
+  name: hami-vulkan-manifest
+  namespace: kube-system
+  resourceVersion: "20078116"
+  uid: cf25a104-6177-43ee-8a64-0483ef5901fa
diff --git a/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml
new file mode 100644
index 000000000..de65e0b1e
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml
@@ -0,0 +1,80 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  annotations:
+    deprecated.daemonset.template.generation: "2"
+    kubectl.kubernetes.io/last-applied-configuration: |
+      {"apiVersion":"apps/v1","kind":"DaemonSet","metadata":{"annotations":{},"labels":{"app":"hami-vulkan-manifest-installer"},"name":"hami-vulkan-manifest-installer","namespace":"kube-system"},"spec":{"selector":{"matchLabels":{"app":"hami-vulkan-manifest-installer"}},"template":{"metadata":{"labels":{"app":"hami-vulkan-manifest-installer"}},"spec":{"containers":[{"command":["/bin/sh","-c","set -eu\nmkdir -p /host/usr/local/vgpu/vulkan/implicit_layer.d\ncp -f /manifest/hami.json \\\n      /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json\necho \"[hami-vulkan-manifest] installed at /usr/local/vgpu/vulkan/implicit_layer.d/hami.json\"\n# DaemonSet 이라 종료하지 않고 sleep — restart 루프 회피\nsleep infinity\n"],"image":"busybox:1.36","name":"installer","securityContext":{"runAsUser":0},"volumeMounts":[{"mountPath":"/manifest","name":"manifest","readOnly":true},{"mountPath":"/host/usr/local/vgpu","name":"host-vgpu"}]}],"hostPID":false,"nodeSelector":{"nvidia.com/gpu.present":"true"},"restartPolicy":"Always","tolerations":[{"operator":"Exists"}],"volumes":[{"configMap":{"name":"hami-vulkan-manifest"},"name":"manifest"},{"hostPath":{"path":"/usr/local/vgpu","type":"DirectoryOrCreate"},"name":"host-vgpu"}]}}}}
+  creationTimestamp: "2026-04-27T02:17:50Z"
+  generation: 2
+  labels:
+    app: hami-vulkan-manifest-installer
+  name: hami-vulkan-manifest-installer
+  namespace: kube-system
+  resourceVersion: "20897004"
+  uid: 6998669a-7bd7-463b-9564-94ad1a4f3feb
+spec:
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      app: hami-vulkan-manifest-installer
+  template:
+    metadata:
+      labels:
+        app: hami-vulkan-manifest-installer
+    spec:
+      containers:
+      - command:
+        - /bin/sh
+        - -c
+        - |
+          set -eu
+          mkdir -p /host/usr/local/vgpu/vulkan/implicit_layer.d
+          cp -f /manifest/hami.json \
+                /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json
+          echo "[hami-vulkan-manifest] installed at /usr/local/vgpu/vulkan/implicit_layer.d/hami.json"
+          # DaemonSet 이라 종료하지 않고 sleep — restart 루프 회피
+          sleep infinity
+        image: busybox:1.36
+        imagePullPolicy: IfNotPresent
+        name: installer
+        resources: {}
+        securityContext:
+          runAsUser: 0
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /manifest
+          name: manifest
+          readOnly: true
+        - mountPath: /host/usr/local/vgpu
+          name: host-vgpu
+      dnsPolicy: ClusterFirst
+      nodeSelector:
+        hami.io/disabled: "true"
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      securityContext: {}
+      terminationGracePeriodSeconds: 30
+      tolerations:
+      - operator: Exists
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: hami-vulkan-manifest
+        name: manifest
+      - hostPath:
+          path: /usr/local/vgpu
+          type: DirectoryOrCreate
+        name: host-vgpu
+  updateStrategy:
+    rollingUpdate:
+      maxSurge: 0
+      maxUnavailable: 1
+    type: RollingUpdate
+status:
+  currentNumberScheduled: 0
+  desiredNumberScheduled: 0
+  numberMisscheduled: 0
+  numberReady: 0
+  observedGeneration: 2
diff --git a/cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml b/cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml
new file mode 100644
index 000000000..12814a69f
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml
@@ -0,0 +1,51 @@
+apiVersion: admissionregistration.k8s.io/v1
+kind: MutatingWebhookConfiguration
+metadata:
+  annotations:
+    meta.helm.sh/release-name: hami-webhook
+    meta.helm.sh/release-namespace: hami-system
+  creationTimestamp: "2026-04-27T03:34:22Z"
+  generation: 2
+  labels:
+    app.kubernetes.io/managed-by: Helm
+  name: hami-webhook-webhook
+  resourceVersion: "20112390"
+  uid: 32e51184-bf13-41df-a382-1566671cc010
+webhooks:
+- admissionReviewVersions:
+  - v1beta1
+  clientConfig:
+    caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVJ5Z0F3SUJBZ0lSQVBlREVjTWFneDYwOENEVWJYdUZXdjR3Q2dZSUtvWkl6ajBFQXdJd0R6RU4KTUFzR0ExVUVDaE1FYm1sc01UQWdGdzB5TmpBME1qY3dNakl5TlROYUdBOHlNVEkyTURRd016QXlNakkxTTFvdwpEekVOTUFzR0ExVUVDaE1FYm1sc01UQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJIc2dFbU95CnpQWllFTHEwMmFIQUIxRXI1MnE1dlE0Vi9qV2hTVmg2S0RIRUh6Y1JFNUdlQ29nMDBMbDBaYnlDZXZ1RmhETGcKdDBYZXlXOVJaTldMVDF1alZ6QlZNQTRHQTFVZER3RUIvd1FFQXdJQ0JEQVRCZ05WSFNVRUREQUtCZ2dyQmdFRgpCUWNEQVRBUEJnTlZIUk1CQWY4RUJUQURBUUgvTUIwR0ExVWREZ1FXQkJSOHk5ZXVCOFVIaVZ0bWtvQkh6VzVDCnZXRXJHekFLQmdncWhrak9QUVFEQWdOSUFEQkZBaUVBdUJ2eWVmekJoam5rclpYN0hKRzNURzVRZ2NzTVpvVC8KUmpPQWFhNEFrNHNDSUEvaGhyMndkODE2M2loNmEvWWtuTWdhOXRCeVpGQ2lxeXF3NjhVUjRaNVIKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
+    service:
+      name: hami-webhook-scheduler
+      namespace: hami-system
+      path: /webhook
+      port: 443
+  failurePolicy: Ignore
+  matchPolicy: Equivalent
+  name: vgpu.hami.io
+  namespaceSelector:
+    matchExpressions:
+    - key: hami.io/webhook
+      operator: NotIn
+      values:
+      - ignore
+  objectSelector:
+    matchExpressions:
+    - key: hami.io/webhook
+      operator: NotIn
+      values:
+      - ignore
+  reinvocationPolicy: Never
+  rules:
+  - apiGroups:
+    - ""
+    apiVersions:
+    - v1
+    operations:
+    - CREATE
+    resources:
+    - pods
+    scope: '*'
+  sideEffects: None
+  timeoutSeconds: 10
diff --git a/cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml b/cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml
new file mode 100644
index 000000000..a4003afaf
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml
@@ -0,0 +1,180 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  annotations:
+    deprecated.daemonset.template.generation: "5"
+    kubectl.kubernetes.io/last-applied-configuration: |
+      {"apiVersion":"apps/v1","kind":"DaemonSet","metadata":{"annotations":{},"name":"volcano-device-plugin","namespace":"kube-system"},"spec":{"selector":{"matchLabels":{"name":"volcano-device-plugin"}},"template":{"metadata":{"annotations":{"scheduler.alpha.kubernetes.io/critical-pod":""},"labels":{"name":"volcano-device-plugin"}},"spec":{"containers":[{"args":["--device-split-count=10"],"env":[{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"HOOK_PATH","value":"/usr/local/vgpu"},{"name":"NVIDIA_VISIBLE_DEVICES","value":"all"},{"name":"NVIDIA_MIG_MONITOR_DEVICES","value":"all"},{"name":"NVIDIA_DRIVER_CAPABILITIES","value":"utility"}],"image":"10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1","lifecycle":{"postStart":{"exec":{"command":["/bin/sh","-c","cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/"]}}},"name":"volcano-device-plugin","securityContext":{"allowPrivilegeEscalation":true,"capabilities":{"add":["SYS_ADMIN"],"drop":["ALL"]},"privileged":true},"volumeMounts":[{"mountPath":"/config","name":"deviceconfig"},{"mountPath":"/var/lib/kubelet/device-plugins","name":"device-plugin"},{"mountPath":"/usr/local/vgpu","name":"lib"},{"mountPath":"/tmp","name":"hosttmp"}]},{"command":["/bin/bash","-c","volcano-vgpu-monitor"],"env":[{"name":"NVIDIA_VISIBLE_DEVICES","value":"all"},{"name":"NVIDIA_MIG_MONITOR_DEVICES","value":"all"},{"name":"HOOK_PATH","value":"/tmp/vgpu"},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}}],"image":"10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1","name":"monitor","securityContext":{"allowPrivilegeEscalation":true,"capabilities":{"add":["SYS_ADMIN"],"drop":["ALL"]},"privileged":true},"volumeMounts":[{"mountPath":"/run/docker","name":"dockers"},{"mountPath":"/run/containerd","name":"containerds"},{"mountPath":"/sysinfo","name":"sysinfo"},{"mountPath":"/hostvar","name":"hostvar"},{"mountPath":"/tmp","name":"hosttmp"}]}],"priorityClassName":"system-node-critical","serviceAccount":"volcano-device-plugin","tolerations":[{"key":"CriticalAddonsOnly","operator":"Exists"},{"effect":"NoSchedule","key":"volcano.sh/gpu-memory","operator":"Exists"}],"volumes":[{"configMap":{"name":"volcano-vgpu-node-config"},"name":"deviceconfig"},{"hostPath":{"path":"/var/lib/kubelet/device-plugins","type":"Directory"},"name":"device-plugin"},{"hostPath":{"path":"/usr/local/vgpu","type":"DirectoryOrCreate"},"name":"lib"},{"hostPath":{"path":"/tmp","type":"DirectoryOrCreate"},"name":"hosttmp"},{"hostPath":{"path":"/run/docker","type":"DirectoryOrCreate"},"name":"dockers"},{"hostPath":{"path":"/run/containerd","type":"DirectoryOrCreate"},"name":"containerds"},{"hostPath":{"path":"/usr/bin","type":"Directory"},"name":"usrbin"},{"hostPath":{"path":"/sys","type":"Directory"},"name":"sysinfo"},{"hostPath":{"path":"/var","type":"Directory"},"name":"hostvar"}]}},"updateStrategy":{"type":"RollingUpdate"}}}
+  creationTimestamp: "2026-04-27T02:17:52Z"
+  generation: 5
+  name: volcano-device-plugin
+  namespace: kube-system
+  resourceVersion: "20250370"
+  uid: f0d77283-6b73-419e-8504-6d4965dde85a
+spec:
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      name: volcano-device-plugin
+  template:
+    metadata:
+      annotations:
+        kubectl.kubernetes.io/restartedAt: "2026-04-27T17:44:34+09:00"
+        scheduler.alpha.kubernetes.io/critical-pod: ""
+      labels:
+        name: volcano-device-plugin
+    spec:
+      containers:
+      - args:
+        - --device-split-count=10
+        - --gpu-memory-factor=1024
+        env:
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: spec.nodeName
+        - name: HOOK_PATH
+          value: /usr/local/vgpu
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: all
+        - name: NVIDIA_MIG_MONITOR_DEVICES
+          value: all
+        - name: NVIDIA_DRIVER_CAPABILITIES
+          value: utility
+        image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1
+        imagePullPolicy: IfNotPresent
+        lifecycle:
+          postStart:
+            exec:
+              command:
+              - /bin/sh
+              - -c
+              - cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/
+        name: volcano-device-plugin
+        resources: {}
+        securityContext:
+          allowPrivilegeEscalation: true
+          capabilities:
+            add:
+            - SYS_ADMIN
+            drop:
+            - ALL
+          privileged: true
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /config
+          name: deviceconfig
+        - mountPath: /var/lib/kubelet/device-plugins
+          name: device-plugin
+        - mountPath: /usr/local/vgpu
+          name: lib
+        - mountPath: /tmp
+          name: hosttmp
+      - command:
+        - /bin/bash
+        - -c
+        - volcano-vgpu-monitor
+        env:
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: all
+        - name: NVIDIA_MIG_MONITOR_DEVICES
+          value: all
+        - name: HOOK_PATH
+          value: /tmp/vgpu
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: spec.nodeName
+        image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1
+        imagePullPolicy: IfNotPresent
+        name: monitor
+        resources: {}
+        securityContext:
+          allowPrivilegeEscalation: true
+          capabilities:
+            add:
+            - SYS_ADMIN
+            drop:
+            - ALL
+          privileged: true
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /run/docker
+          name: dockers
+        - mountPath: /run/containerd
+          name: containerds
+        - mountPath: /sysinfo
+          name: sysinfo
+        - mountPath: /hostvar
+          name: hostvar
+        - mountPath: /tmp
+          name: hosttmp
+      dnsPolicy: ClusterFirst
+      priorityClassName: system-node-critical
+      restartPolicy: Always
+      runtimeClassName: nvidia
+      schedulerName: default-scheduler
+      securityContext: {}
+      serviceAccount: volcano-device-plugin
+      serviceAccountName: volcano-device-plugin
+      terminationGracePeriodSeconds: 30
+      tolerations:
+      - key: CriticalAddonsOnly
+        operator: Exists
+      - effect: NoSchedule
+        key: volcano.sh/gpu-memory
+        operator: Exists
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: volcano-vgpu-node-config
+        name: deviceconfig
+      - hostPath:
+          path: /var/lib/kubelet/device-plugins
+          type: Directory
+        name: device-plugin
+      - hostPath:
+          path: /usr/local/vgpu
+          type: DirectoryOrCreate
+        name: lib
+      - hostPath:
+          path: /tmp
+          type: DirectoryOrCreate
+        name: hosttmp
+      - hostPath:
+          path: /run/docker
+          type: DirectoryOrCreate
+        name: dockers
+      - hostPath:
+          path: /run/containerd
+          type: DirectoryOrCreate
+        name: containerds
+      - hostPath:
+          path: /usr/bin
+          type: Directory
+        name: usrbin
+      - hostPath:
+          path: /sys
+          type: Directory
+        name: sysinfo
+      - hostPath:
+          path: /var
+          type: Directory
+        name: hostvar
+  updateStrategy:
+    rollingUpdate:
+      maxSurge: 0
+      maxUnavailable: 1
+    type: RollingUpdate
+status:
+  currentNumberScheduled: 1
+  desiredNumberScheduled: 1
+  numberAvailable: 1
+  numberMisscheduled: 0
+  numberReady: 1
+  observedGeneration: 5
+  updatedNumberScheduled: 1
diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml
new file mode 100644
index 000000000..32f25062b
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: hami-vulkan-manifest
+  namespace: kube-system
+data:
+  hami.json: |
+    {
+        "file_format_version": "1.0.0",
+        "layer": {
+            "name": "VK_LAYER_HAMI_vgpu",
+            "type": "INSTANCE",
+            "library_path": "/usr/local/vgpu/libvgpu_vk.so",
+            "api_version": "1.3.0",
+            "implementation_version": "1",
+            "description": "HAMi vGPU partition layer (Step D — split libvgpu_vk.so)",
+            "instance_extensions": [],
+            "device_extensions": [],
+            "enable_environment": {
+                "HAMI_VULKAN_ENABLE": "1"
+            },
+            "disable_environment": {
+                "DISABLE_HAMI_VULKAN_LAYER": "1"
+            }
+        }
+    }
diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml
new file mode 100644
index 000000000..b555f3b39
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml
@@ -0,0 +1,57 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: hami-vulkan-manifest-installer
+  namespace: kube-system
+  labels:
+    app: hami-vulkan-manifest-installer
+spec:
+  selector:
+    matchLabels:
+      app: hami-vulkan-manifest-installer
+  template:
+    metadata:
+      labels:
+        app: hami-vulkan-manifest-installer
+    spec:
+      nodeSelector:
+        nvidia.com/gpu.present: "true"
+      hostPID: false
+      restartPolicy: Always
+      tolerations:
+        - operator: Exists
+      containers:
+        - name: installer
+          image: busybox:1.36
+          imagePullPolicy: IfNotPresent
+          securityContext:
+            runAsUser: 0
+          command:
+            - /bin/sh
+            - -c
+            - |
+              set -eu
+              mkdir -p /host/etc/vulkan/implicit_layer.d
+              cp -f /manifest/hami.json /host/etc/vulkan/implicit_layer.d/hami.json
+              echo "[hami-vulkan-manifest] installed at /etc/vulkan/implicit_layer.d/hami.json"
+              # DaemonSet 이라 종료하지 않고 sleep — restart 루프 회피
+              sleep infinity
+          volumeMounts:
+            - name: manifest
+              mountPath: /manifest
+              readOnly: true
+            - name: host-etc-vulkan
+              mountPath: /host/etc/vulkan
+      volumes:
+        - name: manifest
+          configMap:
+            name: hami-vulkan-manifest
+        - name: host-etc-vulkan
+          hostPath:
+            path: /etc/vulkan
+            type: DirectoryOrCreate
+  updateStrategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 0
+      maxUnavailable: 1
diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py b/cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py
new file mode 100644
index 000000000..4090e3c8e
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""Step D 4-path verification — Vulkan-side partition enforce.
+
+Path 3: vkGetPhysicalDeviceMemoryProperties → device-local heap size
+        MUST be the partition limit (~23 GiB), not the raw 46 GiB.
+Path 4: vkAllocateMemory(size = 25 GiB) MUST fail with
+        VK_ERROR_OUT_OF_DEVICE_MEMORY (partition limit is ~23 GiB).
+
+Run inside isaac-launchable-0 vscode container (annotation
+hami.io/vulkan: "true" + webhook-injected manifest + libvgpu_vk.so).
+"""
+import sys
+
+PARTITION_MIB = 23552
+PARTITION_BYTES = PARTITION_MIB * 1024 * 1024
+TOLERANCE_MIB = 256
+OVER_BUDGET_BYTES = 25 * 1024 * 1024 * 1024
+
+try:
+    import vulkan as vk
+except ImportError:
+    print("ERR: pip install vulkan")
+    sys.exit(2)
+
+API_1_3 = (1 << 22) | (3 << 12)
+
+
+def main():
+    app = vk.VkApplicationInfo(
+        sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO,
+        pApplicationName="hami-step-d-probe",
+        applicationVersion=1,
+        pEngineName="probe",
+        engineVersion=1,
+        apiVersion=API_1_3,
+    )
+    ci = vk.VkInstanceCreateInfo(
+        sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+        pApplicationInfo=app,
+    )
+    inst = vk.vkCreateInstance(ci, None)
+    phys_devs = vk.vkEnumeratePhysicalDevices(inst)
+    if not phys_devs:
+        print("ERR: no physical devices")
+        sys.exit(2)
+    dev = phys_devs[0]
+    mem_props = vk.vkGetPhysicalDeviceMemoryProperties(dev)
+
+    # Path 3
+    device_local_heap_size = 0
+    for i in range(mem_props.memoryHeapCount):
+        heap = mem_props.memoryHeaps[i]
+        if heap.flags & vk.VK_MEMORY_HEAP_DEVICE_LOCAL_BIT:
+            if heap.size > device_local_heap_size:
+                device_local_heap_size = heap.size
+    p3_mib = device_local_heap_size // (1024 * 1024)
+    print(f"Path 3: device-local heap = {device_local_heap_size} bytes ({p3_mib} MiB)")
+    if abs(p3_mib - PARTITION_MIB) <= TOLERANCE_MIB:
+        print(f"Path 3: PASS (within {TOLERANCE_MIB} MiB of {PARTITION_MIB} MiB partition)")
+        path3_ok = True
+    else:
+        print(f"Path 3: FAIL (expected ~{PARTITION_MIB} MiB, got {p3_mib} MiB)")
+        path3_ok = False
+
+    # Path 4
+    queue_create = vk.VkDeviceQueueCreateInfo(
+        sType=vk.VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+        queueFamilyIndex=0,
+        queueCount=1,
+        pQueuePriorities=[1.0],
+    )
+    device_create = vk.VkDeviceCreateInfo(
+        sType=vk.VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+        queueCreateInfoCount=1,
+        pQueueCreateInfos=[queue_create],
+    )
+    ldev = vk.vkCreateDevice(dev, device_create, None)
+    mem_type_idx = -1
+    for i in range(mem_props.memoryTypeCount):
+        if mem_props.memoryTypes[i].propertyFlags & vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT:
+            mem_type_idx = i
+            break
+    if mem_type_idx < 0:
+        print("Path 4: SKIP (no device-local memory type)")
+        path4_ok = False
+    else:
+        alloc_info = vk.VkMemoryAllocateInfo(
+            sType=vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+            allocationSize=OVER_BUDGET_BYTES,
+            memoryTypeIndex=mem_type_idx,
+        )
+        path4_ok = False
+        try:
+            mem = vk.vkAllocateMemory(ldev, alloc_info, None)
+            print(f"Path 4: FAIL (expected OOM for {OVER_BUDGET_BYTES // (1024**3)} GiB, got success — partition not enforced)")
+            vk.vkFreeMemory(ldev, mem, None)
+        except vk.VkErrorOutOfDeviceMemory:
+            print(f"Path 4: PASS (VK_ERROR_OUT_OF_DEVICE_MEMORY for {OVER_BUDGET_BYTES // (1024**3)} GiB > {PARTITION_MIB // 1024} GiB partition)")
+            path4_ok = True
+        except Exception as e:
+            print(f"Path 4: FAIL (unexpected error {type(e).__name__}: {e})")
+    vk.vkDestroyDevice(ldev, None)
+    vk.vkDestroyInstance(inst, None)
+
+    return 0 if (path3_ok and path4_ok) else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml b/cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml
new file mode 100644
index 000000000..303010090
--- /dev/null
+++ b/cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml
@@ -0,0 +1,164 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: volcano-device-plugin
+  namespace: kube-system
+spec:
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      name: volcano-device-plugin
+  template:
+    metadata:
+      annotations:
+        kubectl.kubernetes.io/restartedAt: '2026-04-27T17:44:34+09:00'
+        scheduler.alpha.kubernetes.io/critical-pod: ''
+      labels:
+        name: volcano-device-plugin
+    spec:
+      containers:
+      - args:
+        - --device-split-count=10
+        - --gpu-memory-factor=1024
+        env:
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: spec.nodeName
+        - name: HOOK_PATH
+          value: /usr/local/vgpu
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: all
+        - name: NVIDIA_MIG_MONITOR_DEVICES
+          value: all
+        - name: NVIDIA_DRIVER_CAPABILITIES
+          value: utility
+        image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2
+        imagePullPolicy: IfNotPresent
+        lifecycle:
+          postStart:
+            exec:
+              command:
+              - /bin/sh
+              - -c
+              - cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/
+        name: volcano-device-plugin
+        resources: {}
+        securityContext:
+          allowPrivilegeEscalation: true
+          capabilities:
+            add:
+            - SYS_ADMIN
+            drop:
+            - ALL
+          privileged: true
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /config
+          name: deviceconfig
+        - mountPath: /var/lib/kubelet/device-plugins
+          name: device-plugin
+        - mountPath: /usr/local/vgpu
+          name: lib
+        - mountPath: /tmp
+          name: hosttmp
+      - command:
+        - /bin/bash
+        - -c
+        - volcano-vgpu-monitor
+        env:
+        - name: NVIDIA_VISIBLE_DEVICES
+          value: all
+        - name: NVIDIA_MIG_MONITOR_DEVICES
+          value: all
+        - name: HOOK_PATH
+          value: /tmp/vgpu
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: spec.nodeName
+        image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2
+        imagePullPolicy: IfNotPresent
+        name: monitor
+        resources: {}
+        securityContext:
+          allowPrivilegeEscalation: true
+          capabilities:
+            add:
+            - SYS_ADMIN
+            drop:
+            - ALL
+          privileged: true
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /run/docker
+          name: dockers
+        - mountPath: /run/containerd
+          name: containerds
+        - mountPath: /sysinfo
+          name: sysinfo
+        - mountPath: /hostvar
+          name: hostvar
+        - mountPath: /tmp
+          name: hosttmp
+      dnsPolicy: ClusterFirst
+      priorityClassName: system-node-critical
+      restartPolicy: Always
+      runtimeClassName: nvidia
+      schedulerName: default-scheduler
+      securityContext: {}
+      serviceAccount: volcano-device-plugin
+      serviceAccountName: volcano-device-plugin
+      terminationGracePeriodSeconds: 30
+      tolerations:
+      - key: CriticalAddonsOnly
+        operator: Exists
+      - effect: NoSchedule
+        key: volcano.sh/gpu-memory
+        operator: Exists
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: volcano-vgpu-node-config
+        name: deviceconfig
+      - hostPath:
+          path: /var/lib/kubelet/device-plugins
+          type: Directory
+        name: device-plugin
+      - hostPath:
+          path: /usr/local/vgpu
+          type: DirectoryOrCreate
+        name: lib
+      - hostPath:
+          path: /tmp
+          type: DirectoryOrCreate
+        name: hosttmp
+      - hostPath:
+          path: /run/docker
+          type: DirectoryOrCreate
+        name: dockers
+      - hostPath:
+          path: /run/containerd
+          type: DirectoryOrCreate
+        name: containerds
+      - hostPath:
+          path: /usr/bin
+          type: Directory
+        name: usrbin
+      - hostPath:
+          path: /sys
+          type: Directory
+        name: sysinfo
+      - hostPath:
+          path: /var
+          type: Directory
+        name: hostvar
+  updateStrategy:
+    rollingUpdate:
+      maxSurge: 0
+      maxUnavailable: 1
+    type: RollingUpdate
diff --git a/docker/Dockerfile b/docker/Dockerfile
index c592a72d8..9599ff7df 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -17,7 +17,8 @@ COPY .git/modules/libvgpu /libvgpu-git
 RUN rm -rf /libvgpu/.git && echo "gitdir: /libvgpu-git" > /libvgpu/.git
 WORKDIR /libvgpu
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get -y update; apt-get -y --no-install-recommends install cmake git && rm -rf /var/lib/apt/lists/*
+RUN apt-get -y update; apt-get -y --no-install-recommends install cmake git libvulkan-dev && rm -rf /var/lib/apt/lists/*
+ENV CI_COMMIT_SHA=docker-build
 RUN bash ./build.sh
 
 FROM nvidia/cuda:13.2.1-base-ubuntu22.04
@@ -40,6 +41,7 @@ COPY --from=gobuild /go/bin/nvidia-mig-parted /k8s-vgpu/bin/
 COPY ./docker/entrypoint.sh /k8s-vgpu/bin/entrypoint.sh
 COPY ./lib /k8s-vgpu/lib
 COPY --from=nvbuild /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/libvgpu.so."$VERSION"
+COPY --from=nvbuild /libvgpu/etc/vulkan/implicit_layer.d/hami.json /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json
 COPY ./docker/vgpu-init.sh /k8s-vgpu/bin/vgpu-init.sh
 
 ENV PATH="/k8s-vgpu/bin:${PATH}"
diff --git a/docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md b/docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md
new file mode 100644
index 000000000..fdd9cd192
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md
@@ -0,0 +1,2205 @@
+# Vulkan vGPU 분할 구현 계획
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** `hami.io/vulkan: "true"` annotation을 붙인 파드의 Vulkan 메모리 할당과 큐 제출에 기존 `nvidia.com/gpumem` / `nvidia.com/gpucores` 버짓을 강제한다.
+
+**Architecture:** HAMi-core(`libvgpu.so`)에 Vulkan implicit layer를 추가해 `vkAllocateMemory` / `vkFreeMemory` / `vkGetPhysicalDeviceMemoryProperties[2]` / `vkQueueSubmit[2]`를 가로챈다. 기존 CUDA 훅이 사용하는 per-device 메모리 카운터와 SM throttle 유틸을 그대로 재사용한다. HAMi(Go)의 `MutateAdmission`은 annotation을 감지해 `NVIDIA_DRIVER_CAPABILITIES`에 `graphics`를 합치고 `HAMI_VULKAN_ENABLE=1`을 주입한다.
+
+**Tech Stack:** Go 1.22+ (HAMi), C11 + Vulkan 1.3 headers + pthread + NVML (HAMi-core), CMake, Docker multi-stage 빌드.
+
+**Reference Spec:** `docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md`
+
+**Layout Notes:** `docs/superpowers/plans/notes/hami-core-layout.md` — Task 0.2에서 확보한 HAMi-core 실제 API 시그니처.
+
+## 중요 개정 사항 (탐색 결과 반영)
+
+Task 0.2에서 HAMi-core 실제 구조를 확인한 결과, 초기 플랜의 일부 가정이 실제와 달라 Task 1.3~1.8을 다음과 같이 개정합니다:
+
+1. **VRAM 카운터는 atomic reserve가 아니라 check-only**. 실제 API는 `oom_check(dev,size)`(체크만, `1`=OOM) + `add_gpu_device_memory_usage(pid,dev,size,type)`(커밋). CUDA 경로와 동일한 2단계 사용.
+2. **SM throttle은 이미 추출 형태**. `rate_limiter(grids,blocks)` 소비자(토큰 버킷) + `utilization_watcher` pthread 생산자. 별도 추출 Task 불필요 → **Task 1.4 삭제**, Vulkan 훅에서 직접 `rate_limiter(1,1)` 호출.
+3. **빌드는 CMake + OBJECT 라이브러리 구조**. `libvgpu/src/vulkan/` 신규 서브디렉토리에 자체 `CMakeLists.txt`를 두고 OBJECT lib `vulkan_mod`를 정의해 루트에서 링크. Makefile 직접 수정은 불필요 (`make build` → `./build.sh` → `cmake` 흐름 유지).
+4. **`-fvisibility=hidden`(Release)**. 레이어 엔트리포인트는 Vulkan SDK 헤더의 `VK_LAYER_EXPORT` 매크로가 이미 `__attribute__((visibility("default")))`를 적용하므로 추가 조치 불필요.
+5. **`limit==0` 센티넬 = unlimited**. 버짓 어댑터가 이를 그대로 전달해 clamp/예약 로직이 0이면 훅을 건너뛰도록 한다.
+6. **단위 테스트 프레임워크 없음**. 기존 `test/*.c` 샘플 스타일대로 stand-alone binary로 작성하거나, HAMi(Go) 쪽에서 cgo 미사용이면 C 테스트는 `test/vulkan/`에 `assert.h` 기반 프로그램으로 추가하고 CMake `test/CMakeLists.txt`의 glob이 자동으로 컴파일하도록 한다.
+
+---
+
+## Phase 0 — Submodule 초기화 및 탐색
+
+### Task 0.1: HAMi-core submodule 초기화
+
+**Files:**
+- Modify: 없음 (체크아웃만)
+
+- [ ] **Step 1: submodule 상태 확인**
+
+Run:
+```bash
+git submodule status
+```
+Expected output contains `libvgpu` 항목. 앞에 `-`가 붙어 있으면 미초기화.
+
+- [ ] **Step 2: submodule 초기화 및 체크아웃**
+
+Run:
+```bash
+git submodule update --init --recursive libvgpu
+```
+Expected: `libvgpu/` 아래에 C 소스(`src/`, `Makefile` 등)가 체크아웃됨.
+
+- [ ] **Step 3: 커밋 불필요 확인**
+
+Run:
+```bash
+git status
+```
+Expected: working tree clean (submodule 포인터는 이미 `.gitmodules`의 pin과 일치).
+
+---
+
+### Task 0.2: HAMi-core 구조와 기존 카운터 API 탐색
+
+**Files:**
+- Create: `docs/superpowers/plans/notes/hami-core-layout.md` (임시 노트, 플랜 종료 후 삭제)
+
+- [ ] **Step 1: 상위 구조 파악**
+
+Run:
+```bash
+ls libvgpu/
+ls libvgpu/src/
+find libvgpu/src -maxdepth 2 -name "*.c" -o -name "*.h" | head -40
+```
+Expected: `libvgpu/src` 하위에 `cuda/`, `memory/` 또는 유사 디렉토리. 공유 헤더(`include/` 또는 `src/*.h`) 확인.
+
+- [ ] **Step 2: VRAM 카운터 API 식별**
+
+Run:
+```bash
+grep -rn "used_memory\|device_memory\|reserve_memory\|allocate_memory_check" libvgpu/src | head
+grep -rn "cuMemAlloc\b" libvgpu/src | head
+```
+위 검색 결과에서 CUDA allocate 래퍼가 호출하는 "예약" 함수의 시그니처를 확보. 예시 후보: `int32_t oom_check(int, size_t)`, `void add_allocated(int, size_t)` 등.
+
+- [ ] **Step 3: SM throttle 루프 식별**
+
+Run:
+```bash
+grep -rn "nvmlDeviceGetUtilizationRates\|utilization_watchdog\|usleep\|sm_limit" libvgpu/src | head
+```
+기존 throttle 폴링 루프가 있는 파일과 함수명 확보.
+
+- [ ] **Step 4: 테스트 프레임워크 식별**
+
+Run:
+```bash
+ls libvgpu/test 2>/dev/null || ls libvgpu/tests 2>/dev/null
+grep -rn "assert(" libvgpu/ 2>/dev/null | head
+cat libvgpu/Makefile | head -60
+```
+테스트 타겟(`make test`, `make check` 등)과 디렉토리 위치 확보. 없으면 "테스트 타겟 없음"을 노트.
+
+- [ ] **Step 5: 노트 기록**
+
+Write `docs/superpowers/plans/notes/hami-core-layout.md` 내용 예시(실제 수치는 Step 2~4 결과로 채움):
+```markdown
+# HAMi-core layout notes
+
+- src/cuda/memory.c — cuMemAlloc 래퍼. reserve 함수: `int reserve_device_memory(int dev, size_t size)` (L123)
+- src/cuda/launch.c — cuLaunchKernel 래퍼. throttle 루프: `static void throttle_wait(int dev)` (L77)
+- include/hami_core.h — 공통 헤더. device_memory 구조체 노출.
+- test 디렉토리 없음. Makefile `make test` 타겟 없음 → assert.h + 자체 러너 추가 필요.
+- Vulkan 헤더: 빌드 미의존. vulkan-headers 패키지 추가 필요.
+```
+
+- [ ] **Step 6: 커밋**
+
+```bash
+git add docs/superpowers/plans/notes/hami-core-layout.md
+git commit -m "docs: HAMi-core layout notes for Vulkan plan"
+```
+
+---
+
+## Phase 1 — HAMi-core Vulkan Layer (C)
+
+이 Phase의 모든 작업은 `libvgpu/` 하위에서 진행됩니다. HAMi-core는 submodule이므로, Phase 마지막에 `libvgpu` 레포에 별도 브랜치/PR로 밀고, HAMi 쪽에서 submodule 포인터를 업데이트합니다.
+
+### Task 1.1: 레이어 엔트리포인트 스켈레톤
+
+**Files:**
+- Create: `libvgpu/src/vulkan/layer.h`
+- Create: `libvgpu/src/vulkan/layer.c`
+- Create: `libvgpu/src/vulkan/dispatch.h`
+- Create: `libvgpu/src/vulkan/dispatch.c`
+
+- [ ] **Step 1: 실패 테스트 작성 — `vkNegotiateLoaderLayerInterfaceVersion` export 확인**
+
+Create `libvgpu/test/vulkan/test_layer.c`:
+```c
+#include <assert.h>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <vulkan/vulkan.h>
+#include <vulkan/vk_layer.h>
+
+typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderLayerInterfaceVersion)(VkNegotiateLayerInterface*);
+
+int main(void) {
+    void *h = dlopen("./libvgpu.so", RTLD_NOW);
+    assert(h != NULL);
+    PFN_vkNegotiateLoaderLayerInterfaceVersion fn =
+        (PFN_vkNegotiateLoaderLayerInterfaceVersion)
+        dlsym(h, "vkNegotiateLoaderLayerInterfaceVersion");
+    assert(fn != NULL);
+
+    VkNegotiateLayerInterface iface = {0};
+    iface.sType = LAYER_NEGOTIATE_INTERFACE_STRUCT;
+    iface.loaderLayerInterfaceVersion = 2;
+    VkResult r = fn(&iface);
+    assert(r == VK_SUCCESS);
+    assert(iface.pfnGetInstanceProcAddr != NULL);
+    assert(iface.pfnGetDeviceProcAddr != NULL);
+    printf("ok: layer entry point negotiates\n");
+    return 0;
+}
+```
+
+- [ ] **Step 2: 테스트가 빌드/실행 실패함 확인**
+
+Run (from `libvgpu/`):
+```bash
+cc -o /tmp/t test/vulkan/test_layer.c -ldl && /tmp/t
+```
+Expected: 링크 실패 또는 `dlsym`이 NULL 반환 (심볼 미구현).
+
+- [ ] **Step 3: `layer.h` 최소 헤더 작성**
+
+Create `libvgpu/src/vulkan/layer.h`:
+```c
+#ifndef HAMI_VULKAN_LAYER_H
+#define HAMI_VULKAN_LAYER_H
+
+#include <vulkan/vulkan.h>
+#include <vulkan/vk_layer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+VK_LAYER_EXPORT VkResult VKAPI_CALL
+vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct);
+
+PFN_vkVoidFunction VKAPI_CALL
+hami_vkGetInstanceProcAddr(VkInstance instance, const char *pName);
+
+PFN_vkVoidFunction VKAPI_CALL
+hami_vkGetDeviceProcAddr(VkDevice device, const char *pName);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HAMI_VULKAN_LAYER_H */
+```
+
+- [ ] **Step 4: `dispatch.h` 작성 (next-layer 포인터 테이블)**
+
+Create `libvgpu/src/vulkan/dispatch.h`:
+```c
+#ifndef HAMI_VULKAN_DISPATCH_H
+#define HAMI_VULKAN_DISPATCH_H
+
+#include <vulkan/vulkan.h>
+#include <vulkan/vk_layer.h>
+
+typedef struct hami_instance_dispatch {
+    VkInstance handle;
+    PFN_vkGetInstanceProcAddr next_gipa;
+    PFN_vkDestroyInstance DestroyInstance;
+    PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices;
+    PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties;
+    PFN_vkGetPhysicalDeviceMemoryProperties2 GetPhysicalDeviceMemoryProperties2;
+    struct hami_instance_dispatch *next;
+} hami_instance_dispatch_t;
+
+typedef struct hami_device_dispatch {
+    VkDevice handle;
+    VkPhysicalDevice physical;
+    PFN_vkGetDeviceProcAddr next_gdpa;
+    PFN_vkDestroyDevice DestroyDevice;
+    PFN_vkAllocateMemory AllocateMemory;
+    PFN_vkFreeMemory FreeMemory;
+    PFN_vkQueueSubmit QueueSubmit;
+    PFN_vkQueueSubmit2 QueueSubmit2;
+    struct hami_device_dispatch *next;
+} hami_device_dispatch_t;
+
+hami_instance_dispatch_t *hami_instance_lookup(VkInstance inst);
+hami_instance_dispatch_t *hami_instance_register(VkInstance inst, PFN_vkGetInstanceProcAddr gipa);
+void hami_instance_unregister(VkInstance inst);
+
+hami_device_dispatch_t *hami_device_lookup(VkDevice dev);
+hami_device_dispatch_t *hami_device_register(VkDevice dev, VkPhysicalDevice phys, PFN_vkGetDeviceProcAddr gdpa);
+void hami_device_unregister(VkDevice dev);
+
+#endif /* HAMI_VULKAN_DISPATCH_H */
+```
+
+- [ ] **Step 5: `dispatch.c` 작성 (단순 linked list + pthread mutex)**
+
+Create `libvgpu/src/vulkan/dispatch.c`:
+```c
+#include "dispatch.h"
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+
+static hami_instance_dispatch_t *g_inst_head = NULL;
+static hami_device_dispatch_t   *g_dev_head  = NULL;
+static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void *resolve(PFN_vkGetInstanceProcAddr gipa, VkInstance inst, const char *name) {
+    return (void *)gipa(inst, name);
+}
+
+hami_instance_dispatch_t *hami_instance_register(VkInstance inst, PFN_vkGetInstanceProcAddr gipa) {
+    hami_instance_dispatch_t *d = calloc(1, sizeof(*d));
+    d->handle   = inst;
+    d->next_gipa = gipa;
+    d->DestroyInstance                    = (PFN_vkDestroyInstance)                    resolve(gipa, inst, "vkDestroyInstance");
+    d->EnumeratePhysicalDevices           = (PFN_vkEnumeratePhysicalDevices)           resolve(gipa, inst, "vkEnumeratePhysicalDevices");
+    d->GetPhysicalDeviceMemoryProperties  = (PFN_vkGetPhysicalDeviceMemoryProperties)  resolve(gipa, inst, "vkGetPhysicalDeviceMemoryProperties");
+    d->GetPhysicalDeviceMemoryProperties2 = (PFN_vkGetPhysicalDeviceMemoryProperties2) resolve(gipa, inst, "vkGetPhysicalDeviceMemoryProperties2");
+
+    pthread_mutex_lock(&g_lock);
+    d->next = g_inst_head;
+    g_inst_head = d;
+    pthread_mutex_unlock(&g_lock);
+    return d;
+}
+
+hami_instance_dispatch_t *hami_instance_lookup(VkInstance inst) {
+    pthread_mutex_lock(&g_lock);
+    hami_instance_dispatch_t *p = g_inst_head;
+    while (p && p->handle != inst) p = p->next;
+    pthread_mutex_unlock(&g_lock);
+    return p;
+}
+
+void hami_instance_unregister(VkInstance inst) {
+    pthread_mutex_lock(&g_lock);
+    hami_instance_dispatch_t **pp = &g_inst_head;
+    while (*pp && (*pp)->handle != inst) pp = &(*pp)->next;
+    if (*pp) { hami_instance_dispatch_t *victim = *pp; *pp = victim->next; free(victim); }
+    pthread_mutex_unlock(&g_lock);
+}
+
+static void *resolve_dev(PFN_vkGetDeviceProcAddr gdpa, VkDevice dev, const char *name) {
+    return (void *)gdpa(dev, name);
+}
+
+hami_device_dispatch_t *hami_device_register(VkDevice dev, VkPhysicalDevice phys, PFN_vkGetDeviceProcAddr gdpa) {
+    hami_device_dispatch_t *d = calloc(1, sizeof(*d));
+    d->handle   = dev;
+    d->physical = phys;
+    d->next_gdpa = gdpa;
+    d->DestroyDevice   = (PFN_vkDestroyDevice)   resolve_dev(gdpa, dev, "vkDestroyDevice");
+    d->AllocateMemory  = (PFN_vkAllocateMemory)  resolve_dev(gdpa, dev, "vkAllocateMemory");
+    d->FreeMemory      = (PFN_vkFreeMemory)      resolve_dev(gdpa, dev, "vkFreeMemory");
+    d->QueueSubmit     = (PFN_vkQueueSubmit)     resolve_dev(gdpa, dev, "vkQueueSubmit");
+    d->QueueSubmit2    = (PFN_vkQueueSubmit2)    resolve_dev(gdpa, dev, "vkQueueSubmit2");
+
+    pthread_mutex_lock(&g_lock);
+    d->next = g_dev_head;
+    g_dev_head = d;
+    pthread_mutex_unlock(&g_lock);
+    return d;
+}
+
+hami_device_dispatch_t *hami_device_lookup(VkDevice dev) {
+    pthread_mutex_lock(&g_lock);
+    hami_device_dispatch_t *p = g_dev_head;
+    while (p && p->handle != dev) p = p->next;
+    pthread_mutex_unlock(&g_lock);
+    return p;
+}
+
+void hami_device_unregister(VkDevice dev) {
+    pthread_mutex_lock(&g_lock);
+    hami_device_dispatch_t **pp = &g_dev_head;
+    while (*pp && (*pp)->handle != dev) pp = &(*pp)->next;
+    if (*pp) { hami_device_dispatch_t *victim = *pp; *pp = victim->next; free(victim); }
+    pthread_mutex_unlock(&g_lock);
+}
+```
+
+- [ ] **Step 6: `layer.c` 작성 (엔트리포인트 + `vkCreateInstance` / `vkCreateDevice` 훅)**
+
+Create `libvgpu/src/vulkan/layer.c`:
+```c
+#include "layer.h"
+#include "dispatch.h"
+#include <string.h>
+#include <stdlib.h>
+
+/* forward declarations for hooks implemented in sibling files */
+extern void hami_vk_hook_instance(hami_instance_dispatch_t *d);
+extern void hami_vk_hook_device(hami_device_dispatch_t *d);
+
+static VkLayerInstanceCreateInfo *find_chain_info(const VkInstanceCreateInfo *pCreateInfo,
+                                                  VkLayerFunction func) {
+    const VkLayerInstanceCreateInfo *ci = pCreateInfo->pNext;
+    while (ci) {
+        if (ci->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && ci->function == func) {
+            return (VkLayerInstanceCreateInfo *)ci;
+        }
+        ci = (const VkLayerInstanceCreateInfo *)ci->pNext;
+    }
+    return NULL;
+}
+
+static VkLayerDeviceCreateInfo *find_dev_chain_info(const VkDeviceCreateInfo *pCreateInfo,
+                                                    VkLayerFunction func) {
+    const VkLayerDeviceCreateInfo *ci = pCreateInfo->pNext;
+    while (ci) {
+        if (ci->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && ci->function == func) {
+            return (VkLayerDeviceCreateInfo *)ci;
+        }
+        ci = (const VkLayerDeviceCreateInfo *)ci->pNext;
+    }
+    return NULL;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkCreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+                      const VkAllocationCallbacks *pAllocator,
+                      VkInstance *pInstance) {
+    VkLayerInstanceCreateInfo *chain = find_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
+    if (!chain || !chain->u.pLayerInfo) return VK_ERROR_INITIALIZATION_FAILED;
+
+    PFN_vkGetInstanceProcAddr next_gipa = chain->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+    chain->u.pLayerInfo = chain->u.pLayerInfo->pNext;
+
+    PFN_vkCreateInstance next_create =
+        (PFN_vkCreateInstance)next_gipa(VK_NULL_HANDLE, "vkCreateInstance");
+    VkResult r = next_create(pCreateInfo, pAllocator, pInstance);
+    if (r != VK_SUCCESS) return r;
+
+    hami_instance_dispatch_t *d = hami_instance_register(*pInstance, next_gipa);
+    hami_vk_hook_instance(d);
+    return VK_SUCCESS;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+hami_vkDestroyInstance(VkInstance instance, const VkAllocationCallbacks *pAllocator) {
+    hami_instance_dispatch_t *d = hami_instance_lookup(instance);
+    if (d) d->DestroyInstance(instance, pAllocator);
+    hami_instance_unregister(instance);
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkCreateDevice(VkPhysicalDevice physicalDevice,
+                    const VkDeviceCreateInfo *pCreateInfo,
+                    const VkAllocationCallbacks *pAllocator,
+                    VkDevice *pDevice) {
+    VkLayerDeviceCreateInfo *chain = find_dev_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
+    if (!chain || !chain->u.pLayerInfo) return VK_ERROR_INITIALIZATION_FAILED;
+
+    PFN_vkGetInstanceProcAddr next_gipa = chain->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+    PFN_vkGetDeviceProcAddr   next_gdpa = chain->u.pLayerInfo->pfnNextGetDeviceProcAddr;
+    chain->u.pLayerInfo = chain->u.pLayerInfo->pNext;
+
+    PFN_vkCreateDevice next_create =
+        (PFN_vkCreateDevice)next_gipa(VK_NULL_HANDLE, "vkCreateDevice");
+    VkResult r = next_create(physicalDevice, pCreateInfo, pAllocator, pDevice);
+    if (r != VK_SUCCESS) return r;
+
+    hami_device_dispatch_t *d = hami_device_register(*pDevice, physicalDevice, next_gdpa);
+    hami_vk_hook_device(d);
+    return VK_SUCCESS;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+hami_vkDestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) {
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (d) d->DestroyDevice(device, pAllocator);
+    hami_device_unregister(device);
+}
+
+/* GIPA / GDPA: return our wrappers for hooked names, next-layer for the rest. */
+
+/* Hooked functions implemented in other TUs; declarations here. */
+VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice, VkPhysicalDeviceMemoryProperties*);
+VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties2(VkPhysicalDevice, VkPhysicalDeviceMemoryProperties2*);
+VKAPI_ATTR VkResult VKAPI_CALL hami_vkAllocateMemory(VkDevice, const VkMemoryAllocateInfo*, const VkAllocationCallbacks*, VkDeviceMemory*);
+VKAPI_ATTR void     VKAPI_CALL hami_vkFreeMemory(VkDevice, VkDeviceMemory, const VkAllocationCallbacks*);
+VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit(VkQueue, uint32_t, const VkSubmitInfo*, VkFence);
+VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit2(VkQueue, uint32_t, const VkSubmitInfo2*, VkFence);
+
+#define HAMI_HOOK(name) do { if (strcmp(pName, "vk" #name) == 0) return (PFN_vkVoidFunction)hami_vk##name; } while (0)
+
+PFN_vkVoidFunction VKAPI_CALL
+hami_vkGetInstanceProcAddr(VkInstance instance, const char *pName) {
+    HAMI_HOOK(CreateInstance);
+    HAMI_HOOK(DestroyInstance);
+    HAMI_HOOK(CreateDevice);
+    HAMI_HOOK(GetInstanceProcAddr);
+    HAMI_HOOK(GetPhysicalDeviceMemoryProperties);
+    HAMI_HOOK(GetPhysicalDeviceMemoryProperties2);
+
+    hami_instance_dispatch_t *d = hami_instance_lookup(instance);
+    if (!d) return NULL;
+    return d->next_gipa(instance, pName);
+}
+
+PFN_vkVoidFunction VKAPI_CALL
+hami_vkGetDeviceProcAddr(VkDevice device, const char *pName) {
+    HAMI_HOOK(DestroyDevice);
+    HAMI_HOOK(GetDeviceProcAddr);
+    HAMI_HOOK(AllocateMemory);
+    HAMI_HOOK(FreeMemory);
+    HAMI_HOOK(QueueSubmit);
+    HAMI_HOOK(QueueSubmit2);
+
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (!d) return NULL;
+    return d->next_gdpa(device, pName);
+}
+
+VK_LAYER_EXPORT VkResult VKAPI_CALL
+vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct) {
+    if (pVersionStruct->sType != LAYER_NEGOTIATE_INTERFACE_STRUCT)
+        return VK_ERROR_INITIALIZATION_FAILED;
+
+    if (pVersionStruct->loaderLayerInterfaceVersion > 2)
+        pVersionStruct->loaderLayerInterfaceVersion = 2;
+
+    pVersionStruct->pfnGetInstanceProcAddr = hami_vkGetInstanceProcAddr;
+    pVersionStruct->pfnGetDeviceProcAddr   = hami_vkGetDeviceProcAddr;
+    pVersionStruct->pfnGetPhysicalDeviceProcAddr = NULL;
+    return VK_SUCCESS;
+}
+
+/* Placeholders — real bodies live in hooks_memory.c / hooks_submit.c.
+   Define weak stubs here so layer.c alone compiles during TDD of Task 1.1. */
+#ifndef HAMI_VK_HOOKS_PRESENT
+void hami_vk_hook_instance(hami_instance_dispatch_t *d) { (void)d; }
+void hami_vk_hook_device(hami_device_dispatch_t *d)     { (void)d; }
+VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties *o) {
+    hami_instance_dispatch_t *d = g_inst_head; (void)d; (void)p; (void)o;
+}
+VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties2(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties2 *o) { (void)p; (void)o; }
+VKAPI_ATTR VkResult VKAPI_CALL hami_vkAllocateMemory(VkDevice d, const VkMemoryAllocateInfo *i, const VkAllocationCallbacks *a, VkDeviceMemory *m) { (void)d;(void)i;(void)a;(void)m; return VK_ERROR_OUT_OF_DEVICE_MEMORY; }
+VKAPI_ATTR void     VKAPI_CALL hami_vkFreeMemory(VkDevice d, VkDeviceMemory m, const VkAllocationCallbacks *a) { (void)d;(void)m;(void)a; }
+VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit(VkQueue q, uint32_t n, const VkSubmitInfo *s, VkFence f) { (void)q;(void)n;(void)s;(void)f; return VK_SUCCESS; }
+VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit2(VkQueue q, uint32_t n, const VkSubmitInfo2 *s, VkFence f) { (void)q;(void)n;(void)s;(void)f; return VK_SUCCESS; }
+#endif
+```
+
+- [ ] **Step 7: 레이어만으로 임시 빌드 및 테스트 통과 확인**
+
+Run (from `libvgpu/`):
+```bash
+cc -shared -fPIC -o /tmp/libvgpu_stub.so \
+   src/vulkan/layer.c src/vulkan/dispatch.c \
+   -I/usr/include -lpthread
+cc -o /tmp/t test/vulkan/test_layer.c -ldl
+cd /tmp && cp /tmp/libvgpu_stub.so ./libvgpu.so && ./t
+```
+Expected: `ok: layer entry point negotiates`.
+
+- [ ] **Step 8: 커밋 (libvgpu 레포)**
+
+Run (from `libvgpu/`):
+```bash
+git checkout -b vulkan-layer
+git add src/vulkan/layer.h src/vulkan/layer.c src/vulkan/dispatch.h src/vulkan/dispatch.c test/vulkan/test_layer.c
+git commit -m "feat(vulkan): add layer entry point and dispatch skeleton"
+```
+
+---
+
+### Task 1.2: `vkGetPhysicalDeviceMemoryProperties[2]` 힙 클램프
+
+**Files:**
+- Create: `libvgpu/src/vulkan/hooks_memory.c`
+- Modify: `libvgpu/src/vulkan/layer.c` (스텁 제거)
+
+- [ ] **Step 1: 실패 테스트 작성**
+
+Create `libvgpu/test/vulkan/test_memprops.c`:
+```c
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+#include "../../src/vulkan/dispatch.h"
+
+/* pod budget stub used by hooks_memory.c; real implementation in memory module */
+size_t hami_pod_memory_budget(int dev_idx) { (void)dev_idx; return 1ull << 30; /* 1 GiB */ }
+
+/* fake next-layer property query reporting 8 GiB device-local heap */
+static void VKAPI_CALL fake_next(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties *out) {
+    (void)p;
+    memset(out, 0, sizeof(*out));
+    out->memoryHeapCount = 1;
+    out->memoryHeaps[0].size = 8ull << 30;
+    out->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
+}
+
+extern VKAPI_ATTR void VKAPI_CALL
+hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties *out);
+
+int main(void) {
+    VkInstance inst = (VkInstance)0x1;
+    hami_instance_dispatch_t *d = hami_instance_register(inst, NULL);
+    d->GetPhysicalDeviceMemoryProperties = fake_next;
+
+    VkPhysicalDeviceMemoryProperties props;
+    hami_vkGetPhysicalDeviceMemoryProperties((VkPhysicalDevice)0x2, &props);
+    assert(props.memoryHeapCount == 1);
+    assert(props.memoryHeaps[0].size == (1ull << 30));
+    printf("ok: heap clamped to 1 GiB\n");
+    return 0;
+}
+```
+
+- [ ] **Step 2: 테스트 빌드 (기대: stub이 clamp를 안 하므로 실패)**
+
+Run (from `libvgpu/`):
+```bash
+cc -o /tmp/tm -DHAMI_VK_HOOKS_PRESENT \
+   src/vulkan/layer.c src/vulkan/dispatch.c \
+   test/vulkan/test_memprops.c -lpthread
+/tmp/tm
+```
+Expected: 링크 에러 (hooks_memory.c 아직 없음) — 또는 `hami_vk_hook_*` 미정의.
+
+- [ ] **Step 3: `hooks_memory.c` 작성 (클램프 + instance hook 설치)**
+
+Create `libvgpu/src/vulkan/hooks_memory.c`:
+```c
+#include "dispatch.h"
+#include <string.h>
+
+/* Provided by the budget module (Phase 2 integrates with existing counter).
+   For now declared here, implemented by the unit test or the memory module. */
+size_t hami_pod_memory_budget(int dev_idx);
+
+static int physdev_index(VkPhysicalDevice p) {
+    /* Simplification: layer sees only devices already filtered by NVIDIA_VISIBLE_DEVICES.
+       Use pointer-hash low bits as a stable index within the process. Replace with
+       NVML UUID lookup during Task 2.1 integration. */
+    return (int)(((uintptr_t)p >> 4) & 0xff);
+}
+
+static void clamp_heaps(VkPhysicalDevice p, uint32_t *count, VkMemoryHeap *heaps) {
+    size_t budget = hami_pod_memory_budget(physdev_index(p));
+    for (uint32_t i = 0; i < *count; ++i) {
+        if ((heaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) == 0) continue;
+        if (heaps[i].size > budget) heaps[i].size = budget;
+    }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice p,
+                                         VkPhysicalDeviceMemoryProperties *out) {
+    hami_instance_dispatch_t *d = hami_instance_lookup(VK_NULL_HANDLE); /* caller already registered */
+    /* Find the dispatch holding this physical device's instance. For simplicity walk any. */
+    extern hami_instance_dispatch_t *g_inst_head;
+    (void)d;
+    for (hami_instance_dispatch_t *it = g_inst_head; it; it = it->next) {
+        if (it->GetPhysicalDeviceMemoryProperties) {
+            it->GetPhysicalDeviceMemoryProperties(p, out);
+            clamp_heaps(p, &out->memoryHeapCount, out->memoryHeaps);
+            return;
+        }
+    }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+hami_vkGetPhysicalDeviceMemoryProperties2(VkPhysicalDevice p,
+                                          VkPhysicalDeviceMemoryProperties2 *out) {
+    extern hami_instance_dispatch_t *g_inst_head;
+    for (hami_instance_dispatch_t *it = g_inst_head; it; it = it->next) {
+        if (it->GetPhysicalDeviceMemoryProperties2) {
+            it->GetPhysicalDeviceMemoryProperties2(p, out);
+            clamp_heaps(p, &out->memoryProperties.memoryHeapCount, out->memoryProperties.memoryHeaps);
+            return;
+        }
+    }
+}
+
+void hami_vk_hook_instance(hami_instance_dispatch_t *d) {
+    /* no per-instance state to install yet */
+    (void)d;
+}
+```
+
+또한 `dispatch.c`의 `g_inst_head`를 non-static로 변경해 다른 TU가 접근 가능하게 한다:
+
+Modify `libvgpu/src/vulkan/dispatch.c:6`:
+```c
+/* expose to sibling TUs for walk */
+hami_instance_dispatch_t *g_inst_head = NULL;
+hami_device_dispatch_t   *g_dev_head  = NULL;
+```
+(기존 `static` 제거)
+
+- [ ] **Step 4: layer.c의 clamp/allocate stub 제거**
+
+Modify `libvgpu/src/vulkan/layer.c` — 파일 끝 `#ifndef HAMI_VK_HOOKS_PRESENT` 블록 중 `hami_vkGetPhysicalDeviceMemoryProperties[2]` stub만 삭제 (할당/제출 stub은 Task 1.3/1.5에서 제거).
+
+- [ ] **Step 5: 테스트 빌드 및 실행 (이번엔 통과해야 함)**
+
+Run:
+```bash
+cc -o /tmp/tm -DHAMI_VK_HOOKS_PRESENT \
+   src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c \
+   test/vulkan/test_memprops.c -lpthread
+/tmp/tm
+```
+Expected: `ok: heap clamped to 1 GiB`.
+
+- [ ] **Step 6: 커밋**
+
+```bash
+git add src/vulkan/hooks_memory.c src/vulkan/layer.c src/vulkan/dispatch.c test/vulkan/test_memprops.c
+git commit -m "feat(vulkan): clamp device-local heap size to pod budget"
+```
+
+---
+
+### Task 1.3: `vkAllocateMemory` / `vkFreeMemory` 버짓 강제 (개정)
+
+**API 주의:** HAMi-core의 실제 카운터는 `oom_check` (체크만, `1`=OOM) + `add_gpu_device_memory_usage(pid,dev,size,type)` / `rm_gpu_device_memory_usage(pid,dev,size,type)` 2단계. `type` 파라미터는 allocator.c가 CUDA 경로에서 `2`를 사용함. 원자성은 CUDA와 동일 수준(느슨한 TOCTOU). 어댑터는 Task 1.6에서 추가되므로, 이 Task는 Vulkan 훅이 참조할 **공개 어댑터 시그니처**를 사용한다: `int hami_budget_reserve(int dev, size_t size)` / `void hami_budget_release(int dev, size_t size)` / `size_t hami_budget_of(int dev)`. 구현은 Task 1.6.
+
+**Files:**
+- Create: `libvgpu/src/vulkan/hooks_alloc.c`
+- Modify: `libvgpu/src/vulkan/layer.c` (해당 stub 제거)
+
+- [ ] **Step 1: 실패 테스트 작성**
+
+Create `libvgpu/test/vulkan/test_alloc.c`:
+```c
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+#include "../../src/vulkan/dispatch.h"
+
+/* Budget adapter stubs (real implementation arrives in Task 1.6). */
+static size_t g_used = 0;
+static const size_t BUDGET = 1ull << 30; /* 1 GiB */
+
+size_t hami_budget_of(int dev) { (void)dev; return BUDGET; }
+int    hami_budget_reserve(int dev, size_t size) {
+    (void)dev;
+    if (g_used + size > BUDGET) return 0;  /* 0 = refuse */
+    g_used += size;
+    return 1;                               /* 1 = granted */
+}
+void   hami_budget_release(int dev, size_t size) { (void)dev; g_used -= size; }
+
+static VkResult VKAPI_CALL fake_alloc(VkDevice d, const VkMemoryAllocateInfo *i,
+                                      const VkAllocationCallbacks *a, VkDeviceMemory *m) {
+    (void)d;(void)a; *m = (VkDeviceMemory)(uintptr_t)(i->allocationSize);
+    return VK_SUCCESS;
+}
+static void VKAPI_CALL fake_free(VkDevice d, VkDeviceMemory m, const VkAllocationCallbacks *a) { (void)d;(void)m;(void)a; }
+
+extern VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkAllocateMemory(VkDevice, const VkMemoryAllocateInfo*, const VkAllocationCallbacks*, VkDeviceMemory*);
+extern VKAPI_ATTR void VKAPI_CALL
+hami_vkFreeMemory(VkDevice, VkDeviceMemory, const VkAllocationCallbacks*);
+
+int main(void) {
+    VkDevice dev = (VkDevice)0x1;
+    hami_device_dispatch_t *d = hami_device_register(dev, (VkPhysicalDevice)0x2, NULL);
+    d->AllocateMemory = fake_alloc;
+    d->FreeMemory     = fake_free;
+
+    VkMemoryAllocateInfo info = { .sType=VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize=(512ull<<20) };
+    VkDeviceMemory m1, m2, m3;
+
+    assert(hami_vkAllocateMemory(dev, &info, NULL, &m1) == VK_SUCCESS);
+    assert(hami_vkAllocateMemory(dev, &info, NULL, &m2) == VK_SUCCESS);
+    assert(hami_vkAllocateMemory(dev, &info, NULL, &m3) == VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+    hami_vkFreeMemory(dev, m1, NULL);
+    assert(hami_vkAllocateMemory(dev, &info, NULL, &m3) == VK_SUCCESS);
+    printf("ok: allocate/free budget enforced\n");
+    return 0;
+}
+```
+
+- [ ] **Step 2: 테스트 빌드 실패 확인**
+
+Run (from `libvgpu/`):
+```bash
+cc -o /tmp/ta -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c \
+   test/vulkan/test_alloc.c -lpthread
+```
+Expected: 링크 에러 또는 실행 시 assertion 위반 (stub이 모두 OOM 반환).
+
+- [ ] **Step 3: `hooks_alloc.c` 작성**
+
+Create `libvgpu/src/vulkan/hooks_alloc.c`:
+```c
+#include "dispatch.h"
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* Public budget-adapter API. Implemented in Task 1.6 (src/vulkan/budget.c)
+ * and stubbed by unit tests here. The adapter encapsulates HAMi-core's
+ * oom_check / add_gpu_device_memory_usage / rm_gpu_device_memory_usage
+ * 2-step protocol so Vulkan hooks see a single atomic reserve/release. */
+int    hami_budget_reserve(int dev, size_t size);   /* 1 = granted, 0 = refused */
+void   hami_budget_release(int dev, size_t size);
+size_t hami_budget_of(int dev);                     /* 0 = unlimited */
+
+typedef struct mem_entry {
+    VkDeviceMemory handle;
+    size_t size;
+    int dev_idx;
+    struct mem_entry *next;
+} mem_entry_t;
+
+static mem_entry_t *g_mem_head = NULL;
+static pthread_mutex_t g_mem_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/* Provisional device-index heuristic (pointer hash). Replaced with
+ * NVML UUID lookup when the adapter in Task 1.6 lands. */
+static int device_to_index(VkDevice d) {
+    return (int)(((uintptr_t)d >> 4) & 0xff);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkAllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pInfo,
+                      const VkAllocationCallbacks *pAlloc, VkDeviceMemory *pMem) {
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (!d || !d->AllocateMemory) return VK_ERROR_INITIALIZATION_FAILED;
+
+    int idx = device_to_index(device);
+    if (!hami_budget_reserve(idx, pInfo->allocationSize))
+        return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+    VkResult r = d->AllocateMemory(device, pInfo, pAlloc, pMem);
+    if (r != VK_SUCCESS) {
+        hami_budget_release(idx, pInfo->allocationSize);
+        return r;
+    }
+
+    mem_entry_t *e = calloc(1, sizeof(*e));
+    e->handle = *pMem;
+    e->size   = pInfo->allocationSize;
+    e->dev_idx = idx;
+
+    pthread_mutex_lock(&g_mem_lock);
+    e->next = g_mem_head;
+    g_mem_head = e;
+    pthread_mutex_unlock(&g_mem_lock);
+    return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+hami_vkFreeMemory(VkDevice device, VkDeviceMemory mem, const VkAllocationCallbacks *pAlloc) {
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (d && d->FreeMemory) d->FreeMemory(device, mem, pAlloc);
+
+    pthread_mutex_lock(&g_mem_lock);
+    mem_entry_t **pp = &g_mem_head;
+    while (*pp && (*pp)->handle != mem) pp = &(*pp)->next;
+    if (*pp) {
+        mem_entry_t *victim = *pp;
+        *pp = victim->next;
+        pthread_mutex_unlock(&g_mem_lock);
+        hami_budget_release(victim->dev_idx, victim->size);
+        free(victim);
+        return;
+    }
+    pthread_mutex_unlock(&g_mem_lock);
+}
+
+void hami_vk_hook_device(hami_device_dispatch_t *d) { (void)d; }
+```
+
+Also update `hooks_memory.c` (from Task 1.2) to use the new budget adapter name — change the `hami_pod_memory_budget` call to `hami_budget_of` and the forward declaration accordingly. If Task 1.2's file used `hami_pod_memory_budget`, rename:
+```c
+/* was: extern size_t hami_pod_memory_budget(int); */
+extern size_t hami_budget_of(int);
+...
+size_t budget = hami_budget_of(physdev_index(p));
+if (budget == 0) return;  /* 0 = unlimited sentinel; skip clamp */
+```
+And update `test/vulkan/test_memprops.c` test stubs to `hami_budget_of` accordingly.
+
+- [ ] **Step 4: layer.c의 allocate/free stub 제거**
+
+Modify `libvgpu/src/vulkan/layer.c` — 파일 끝 `#ifndef HAMI_VK_HOOKS_PRESENT` 블록에서 `hami_vkAllocateMemory`, `hami_vkFreeMemory`, `hami_vk_hook_device` stub 삭제 (QueueSubmit stub은 Task 1.5까지 유지).
+
+- [ ] **Step 5: 테스트 통과 확인**
+
+Run:
+```bash
+cc -o /tmp/ta -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c \
+   test/vulkan/test_alloc.c -lpthread
+/tmp/ta
+```
+Expected: `ok: allocate/free budget enforced`.
+
+- [ ] **Step 6: 커밋**
+
+```bash
+git add src/vulkan/hooks_alloc.c src/vulkan/hooks_memory.c src/vulkan/layer.c test/vulkan/test_alloc.c test/vulkan/test_memprops.c
+git commit -m "feat(vulkan): enforce pod memory budget on vkAllocateMemory/vkFreeMemory"
+```
+
+---
+
+### Task 1.4: Vulkan용 throttle 어댑터 (개정 — 추출 불필요)
+
+**이 Task는 초기 플랜에서 "throttle 폴링 루프 추출"이었으나, Task 0.2 탐색 결과 HAMi-core는 이미 토큰 버킷 구조(`rate_limiter` 소비자 + `utilization_watcher` 생산자 스레드)로 모듈화되어 있어 추출할 필요가 없습니다. 대신, Vulkan TU가 기존 `rate_limiter`를 호출할 수 있도록 얇은 어댑터 1개만 추가합니다.**
+
+**Files:**
+- Create: `libvgpu/src/vulkan/throttle_adapter.h`
+- Create: `libvgpu/src/vulkan/throttle_adapter.c`
+
+- [ ] **Step 1: 실패 테스트 작성 (어댑터 호출 가능성 검증)**
+
+Create `libvgpu/test/vulkan/test_throttle_adapter.c`:
+```c
+#include <assert.h>
+#include <stdio.h>
+#include "../../src/vulkan/throttle_adapter.h"
+
+/* Stub of HAMi-core's rate_limiter so this test links without the full lib. */
+static int g_rl_calls = 0;
+void rate_limiter(int grids, int blocks) { (void)grids;(void)blocks; g_rl_calls++; }
+
+int main(void) {
+    hami_vulkan_throttle();
+    hami_vulkan_throttle();
+    assert(g_rl_calls == 2);
+    printf("ok: adapter forwards to rate_limiter\n");
+    return 0;
+}
+```
+
+- [ ] **Step 2: 빌드 실패 확인**
+
+Run (from `libvgpu/`):
+```bash
+cc -o /tmp/ttha -I./src test/vulkan/test_throttle_adapter.c
+```
+Expected: `throttle_adapter.h` 없음 → 컴파일 실패.
+
+- [ ] **Step 3: 어댑터 헤더/구현 작성**
+
+Create `libvgpu/src/vulkan/throttle_adapter.h`:
+```c
+#ifndef HAMI_VK_THROTTLE_ADAPTER_H
+#define HAMI_VK_THROTTLE_ADAPTER_H
+
+/* Consume one "compute unit" token from the HAMi-core SM rate limiter.
+ * When the HAMi SM limit is 0 or >= 100 (unlimited), this is a no-op
+ * inherited from the underlying rate_limiter. Call once per Vulkan
+ * vkQueueSubmit/vkQueueSubmit2 before forwarding to the next layer. */
+void hami_vulkan_throttle(void);
+
+#endif
+```
+
+Create `libvgpu/src/vulkan/throttle_adapter.c`:
+```c
+#include "throttle_adapter.h"
+
+/* Defined in libvgpu/src/multiprocess/multiprocess_utilization_watcher.c
+ * (linked into the same libvgpu.so at final link time). Default-visibility
+ * is preserved via file-local linkage inside the library regardless of the
+ * release -fvisibility=hidden setting, because both TUs are in the same
+ * shared object. */
+extern void rate_limiter(int grids, int blocks);
+
+void hami_vulkan_throttle(void) {
+    /* Consume one token — represents "one queue submission". The
+     * rate_limiter interprets (grids*blocks) as the claim size; we use
+     * the smallest unit (1,1) so Vulkan submits compete fairly with
+     * tiny CUDA kernel launches. */
+    rate_limiter(1, 1);
+}
+```
+
+- [ ] **Step 4: 테스트 통과 확인**
+
+Run:
+```bash
+cc -o /tmp/ttha -I./src \
+   src/vulkan/throttle_adapter.c test/vulkan/test_throttle_adapter.c
+/tmp/ttha
+```
+Expected: `ok: adapter forwards to rate_limiter`.
+
+- [ ] **Step 5: 커밋**
+
+```bash
+git add src/vulkan/throttle_adapter.h src/vulkan/throttle_adapter.c test/vulkan/test_throttle_adapter.c
+git commit -m "feat(vulkan): thin adapter forwarding queue submit throttling to rate_limiter"
+```
+
+**주의사항 (노트 참조):** `rate_limiter`는 `sm_limit==0` / `sm_limit>=100` / `get_utilization_switch()==0` 조건에서 사일런트 no-op 합니다. 사용자가 Vulkan만 SM 분할하려 할 때 별도 스위치가 필요하면 후속 Task(v2)에서 `HAMI_VULKAN_SM_SWITCH` env를 추가하도록 남겨둡니다.
+
+---
+
+### Task 1.5: `vkQueueSubmit[2]` throttle 훅 (개정)
+
+**Files:**
+- Create: `libvgpu/src/vulkan/hooks_submit.c`
+- Modify: `libvgpu/src/vulkan/layer.c` (나머지 stub 제거)
+
+- [ ] **Step 1: 실패 테스트 작성**
+
+Create `libvgpu/test/vulkan/test_submit.c`:
+```c
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+#include "../../src/vulkan/dispatch.h"
+
+static int g_submit_called = 0;
+static VkResult VKAPI_CALL fake_submit(VkQueue q, uint32_t n, const VkSubmitInfo *s, VkFence f) {
+    (void)q;(void)n;(void)s;(void)f; g_submit_called++; return VK_SUCCESS;
+}
+
+/* Throttle adapter stub — verifies the hook calls the adapter exactly once
+ * per submit before forwarding to the next layer. */
+static int g_throttle_called = 0;
+void hami_vulkan_throttle(void) { g_throttle_called++; }
+
+extern VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkQueueSubmit(VkQueue, uint32_t, const VkSubmitInfo*, VkFence);
+extern void hami_vk_register_queue(VkQueue q, VkDevice d);
+
+int main(void) {
+    VkDevice dev = (VkDevice)0x11;
+    VkQueue  q   = (VkQueue)0x22;
+    hami_device_dispatch_t *d = hami_device_register(dev, (VkPhysicalDevice)0, NULL);
+    d->QueueSubmit = fake_submit;
+    hami_vk_register_queue(q, dev);
+
+    VkResult r = hami_vkQueueSubmit(q, 0, NULL, VK_NULL_HANDLE);
+    assert(r == VK_SUCCESS);
+    assert(g_throttle_called == 1);
+    assert(g_submit_called   == 1);
+    printf("ok: submit hook throttles then forwards\n");
+    return 0;
+}
+```
+
+- [ ] **Step 2: 테스트 빌드 실패 확인**
+
+Run (from `libvgpu/`):
+```bash
+cc -o /tmp/ts -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c \
+   test/vulkan/test_submit.c -lpthread
+```
+Expected: `hami_vk_register_queue` 미정의 + layer.c의 QueueSubmit stub이 throttle 호출 안 함.
+
+- [ ] **Step 3: `hooks_submit.c` 작성**
+
+Create `libvgpu/src/vulkan/hooks_submit.c`:
+```c
+#include "dispatch.h"
+#include "throttle_adapter.h"
+#include <pthread.h>
+#include <stdlib.h>
+
+/* Queue → Device registry populated by a future vkGetDeviceQueue hook. For
+ * now we expose a public register function used by both the layer's
+ * vkGetDeviceQueue wrapper (added in Task 1.5 Step 4) and by unit tests. */
+typedef struct q_entry { VkQueue q; VkDevice d; struct q_entry *next; } q_entry_t;
+static q_entry_t *g_q_head = NULL;
+static pthread_mutex_t g_q_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void hami_vk_register_queue(VkQueue q, VkDevice d) {
+    q_entry_t *e = calloc(1, sizeof(*e));
+    e->q = q; e->d = d;
+    pthread_mutex_lock(&g_q_lock);
+    e->next = g_q_head; g_q_head = e;
+    pthread_mutex_unlock(&g_q_lock);
+}
+
+static VkDevice device_for_queue(VkQueue q) {
+    pthread_mutex_lock(&g_q_lock);
+    q_entry_t *p = g_q_head;
+    while (p && p->q != q) p = p->next;
+    VkDevice d = p ? p->d : VK_NULL_HANDLE;
+    pthread_mutex_unlock(&g_q_lock);
+    return d;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkQueueSubmit(VkQueue queue, uint32_t n, const VkSubmitInfo *p, VkFence f) {
+    VkDevice d = device_for_queue(queue);
+    hami_device_dispatch_t *dd = hami_device_lookup(d);
+    if (!dd || !dd->QueueSubmit) return VK_ERROR_INITIALIZATION_FAILED;
+    hami_vulkan_throttle();
+    return dd->QueueSubmit(queue, n, p, f);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+hami_vkQueueSubmit2(VkQueue queue, uint32_t n, const VkSubmitInfo2 *p, VkFence f) {
+    VkDevice d = device_for_queue(queue);
+    hami_device_dispatch_t *dd = hami_device_lookup(d);
+    if (!dd || !dd->QueueSubmit2) return VK_ERROR_INITIALIZATION_FAILED;
+    hami_vulkan_throttle();
+    return dd->QueueSubmit2(queue, n, p, f);
+}
+```
+
+- [ ] **Step 4: layer.c에 `vkGetDeviceQueue` / `vkGetDeviceQueue2` 훅 추가 + 잔여 stub 제거**
+
+Modify `libvgpu/src/vulkan/layer.c` — `hami_vkDestroyDevice` 다음에 추가:
+```c
+extern void hami_vk_register_queue(VkQueue q, VkDevice d);
+
+static VKAPI_ATTR void VKAPI_CALL
+hami_vkGetDeviceQueue(VkDevice device, uint32_t family, uint32_t index, VkQueue *pQueue) {
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (!d) { *pQueue = VK_NULL_HANDLE; return; }
+    PFN_vkGetDeviceQueue next = (PFN_vkGetDeviceQueue)d->next_gdpa(device, "vkGetDeviceQueue");
+    next(device, family, index, pQueue);
+    if (*pQueue) hami_vk_register_queue(*pQueue, device);
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+hami_vkGetDeviceQueue2(VkDevice device, const VkDeviceQueueInfo2 *pInfo, VkQueue *pQueue) {
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (!d) { *pQueue = VK_NULL_HANDLE; return; }
+    PFN_vkGetDeviceQueue2 next = (PFN_vkGetDeviceQueue2)d->next_gdpa(device, "vkGetDeviceQueue2");
+    next(device, pInfo, pQueue);
+    if (*pQueue) hami_vk_register_queue(*pQueue, device);
+}
+```
+그리고 `hami_vkGetDeviceProcAddr` 내부의 `HAMI_HOOK(...)` 목록에 추가:
+```c
+    HAMI_HOOK(GetDeviceQueue);
+    HAMI_HOOK(GetDeviceQueue2);
+```
+마지막으로 `#ifndef HAMI_VK_HOOKS_PRESENT` 블록 **전체를 삭제** (모든 훅이 이제 실제 구현됨).
+
+- [ ] **Step 5: 테스트 통과 확인**
+
+Run:
+```bash
+cc -o /tmp/ts -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c \
+   src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c src/vulkan/hooks_submit.c \
+   test/vulkan/test_submit.c -lpthread
+/tmp/ts
+```
+Expected: `ok: submit hook throttles then forwards`.
+
+- [ ] **Step 6: 커밋**
+
+```bash
+git add src/vulkan/hooks_submit.c src/vulkan/layer.c test/vulkan/test_submit.c
+git commit -m "feat(vulkan): throttle vkQueueSubmit[2] via rate_limiter adapter"
+```
+
+---
+
+### Task 1.6: 버짓 어댑터 구현 (개정 — 실제 HAMi-core API 반영)
+
+**Files:**
+- Create: `libvgpu/src/vulkan/budget.c`
+- Create: `libvgpu/src/vulkan/budget.h`
+
+Vulkan 훅(Task 1.2, 1.3)이 의존하는 3개 공개 API(`hami_budget_reserve` / `hami_budget_release` / `hami_budget_of`)의 실제 구현. HAMi-core의 실제 함수(`oom_check`, `add_gpu_device_memory_usage`, `rm_gpu_device_memory_usage`, `get_current_device_memory_limit`)를 감싼다. CUDA 경로와 동일한 2단계(체크 → 커밋) 프로토콜을 사용.
+
+- [ ] **Step 1: 헤더 파일 작성**
+
+Create `libvgpu/src/vulkan/budget.h`:
+```c
+#ifndef HAMI_VK_BUDGET_H
+#define HAMI_VK_BUDGET_H
+#include <stddef.h>
+
+/* Reserve `size` bytes on device `dev` for a Vulkan allocation.
+ * Returns 1 when the allocation fits the pod budget and the usage
+ * counter has been incremented; 0 when the request would exceed the
+ * budget (caller must return VK_ERROR_OUT_OF_DEVICE_MEMORY). If the
+ * budget is unlimited (HAMi-core limit sentinel == 0), always grants. */
+int  hami_budget_reserve(int dev, size_t size);
+
+/* Inverse of a successful reserve — decrements the usage counter. */
+void hami_budget_release(int dev, size_t size);
+
+/* Current per-device budget in bytes. Returns 0 when unlimited. */
+size_t hami_budget_of(int dev);
+
+#endif
+```
+
+- [ ] **Step 2: 구현 파일 작성**
+
+Create `libvgpu/src/vulkan/budget.c`:
+```c
+#include "budget.h"
+#include <stdint.h>
+#include <unistd.h>   /* getpid */
+
+/* HAMi-core internal symbols — linked from the same libvgpu.so.
+ * See docs/superpowers/plans/notes/hami-core-layout.md for semantics. */
+extern int      oom_check(const int dev, size_t addon);                  /* 1 = OOM, 0 = OK */
+extern int      add_gpu_device_memory_usage(int32_t pid, int dev,
+                                            size_t usage, int type);     /* 0 = success, 1 = failure */
+extern int      rm_gpu_device_memory_usage(int32_t pid, int dev,
+                                            size_t usage, int type);     /* 0 = success */
+extern uint64_t get_current_device_memory_limit(const int dev);           /* 0 = unlimited */
+
+/* Use type=2 matching the existing CUDA allocator path (src/allocator/allocator.c).
+ * HAMi-core's shared-region accounting tracks usage by (pid, dev) regardless of
+ * type, so reusing this tag keeps Vulkan and CUDA allocations in the same bucket. */
+#define HAMI_MEM_TYPE_DEVICE 2
+
+int hami_budget_reserve(int dev, size_t size) {
+    if (get_current_device_memory_limit(dev) == 0) {
+        /* Unlimited — skip check, but still bump the counter so metrics remain
+         * accurate. add_gpu_device_memory_usage returns 0 on success; on
+         * failure (shared region full etc.) treat as OOM. */
+        return add_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE) == 0;
+    }
+    if (oom_check(dev, size)) return 0;   /* would exceed budget */
+    return add_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE) == 0;
+}
+
+void hami_budget_release(int dev, size_t size) {
+    rm_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE);
+}
+
+size_t hami_budget_of(int dev) {
+    return (size_t)get_current_device_memory_limit(dev);
+}
+```
+
+- [ ] **Step 3: Vulkan hook 소스에서 선언 일관화**
+
+기존 `hooks_alloc.c` (Task 1.3 Step 3)의 상단 3줄 forward declaration을 삭제하고 `#include "budget.h"`로 교체:
+```c
+#include "budget.h"
+```
+(Task 1.3의 원래 파일에는 이미 `int hami_budget_reserve(...)` 등의 extern 선언이 있으므로 그 3줄을 지우고 헤더 include로 대체).
+
+마찬가지로 `hooks_memory.c` (Task 1.2)의 `extern size_t hami_budget_of(int);` 선언 대신 `#include "budget.h"`.
+
+- [ ] **Step 4: 실 라이브러리 빌드 시만 `budget.c` 포함, 단위 테스트는 제외**
+
+단위 테스트(`test/vulkan/test_alloc.c`, `test_memprops.c`)에는 이미 `hami_budget_reserve` / `hami_budget_release` / `hami_budget_of` 스텁이 정의되어 있음. 테스트 바이너리 빌드 커맨드에 `budget.c`를 **포함하지 않는다** (중복 정의 방지). 실 `libvgpu.so` 빌드(Task 1.8)에는 포함.
+
+- [ ] **Step 5: 기존 모든 단위 테스트 회귀 없음 확인**
+
+```bash
+cc -o /tmp/tm -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c \
+   test/vulkan/test_memprops.c -lpthread && /tmp/tm
+cc -o /tmp/ta -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c \
+   src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c \
+   test/vulkan/test_alloc.c -lpthread && /tmp/ta
+cc -o /tmp/ts -DHAMI_VK_HOOKS_PRESENT -I./src \
+   src/vulkan/layer.c src/vulkan/dispatch.c \
+   src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c src/vulkan/hooks_submit.c \
+   test/vulkan/test_submit.c -lpthread && /tmp/ts
+cc -o /tmp/ttha -I./src \
+   src/vulkan/throttle_adapter.c test/vulkan/test_throttle_adapter.c && /tmp/ttha
+```
+Expected: 4개 모두 `ok:...`.
+
+- [ ] **Step 6: 커밋**
+
+```bash
+git add src/vulkan/budget.h src/vulkan/budget.c src/vulkan/hooks_alloc.c src/vulkan/hooks_memory.c
+git commit -m "feat(vulkan): budget adapter bridges hook layer to HAMi-core counters"
+```
+
+---
+
+### Task 1.7: 레이어 매니페스트 JSON
+
+**Files:**
+- Create: `libvgpu/etc/vulkan/implicit_layer.d/hami.json`
+
+- [ ] **Step 1: 매니페스트 파일 작성**
+
+Create `libvgpu/etc/vulkan/implicit_layer.d/hami.json`:
+```json
+{
+  "file_format_version": "1.2.0",
+  "layer": {
+    "name": "VK_LAYER_HAMI_vgpu",
+    "type": "GLOBAL",
+    "library_path": "/usr/local/vgpu/libvgpu.so",
+    "api_version": "1.3.0",
+    "implementation_version": "1",
+    "description": "HAMi Vulkan vGPU limiter",
+    "enable_environment":  { "HAMI_VULKAN_ENABLE": "1" },
+    "disable_environment": { "HAMI_VULKAN_DISABLE": "1" }
+  }
+}
+```
+
+- [ ] **Step 2: JSON 문법 검증**
+
+Run:
+```bash
+python3 -m json.tool libvgpu/etc/vulkan/implicit_layer.d/hami.json > /dev/null && echo ok
+```
+Expected: `ok`.
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add etc/vulkan/implicit_layer.d/hami.json
+git commit -m "feat(vulkan): ship implicit layer manifest gated by HAMI_VULKAN_ENABLE"
+```
+
+---
+
+### Task 1.8: CMake / Dockerfile 통합 (개정 — CMake OBJECT 라이브러리 패턴)
+
+**빌드 실체:** HAMi-core는 Makefile이 `./build.sh`를 호출하고, `build.sh`가 `cmake`로 `src/`와 `test/` 서브디렉토리를 빌드. 각 `src/<모듈>/CMakeLists.txt`는 OBJECT 라이브러리를 만들어 루트 `CMakeLists.txt`에서 `libvgpu.so` 하나로 링크합니다. 따라서 Vulkan 소스도 **OBJECT 라이브러리 `vulkan_mod`**로 추가하고 루트에 링크해야 합니다.
+
+**Files:**
+- Create: `libvgpu/src/vulkan/CMakeLists.txt`
+- Modify: `libvgpu/src/CMakeLists.txt` (`add_subdirectory(vulkan)` 추가)
+- Modify: `libvgpu/CMakeLists.txt` 루트 (target_link_libraries에 `$<TARGET_OBJECTS:vulkan_mod>` 추가, libvulkan-dev 찾기)
+- Modify: `libvgpu/test/CMakeLists.txt` (선택 — glob이 `test/vulkan/*.c`도 잡도록 확장)
+- Modify: `libvgpu/dockerfiles/Dockerfile.<변형>` (vulkan-headers + manifest 복사)
+
+- [ ] **Step 1: 현재 CMake 구조 재확인**
+
+Run (from `libvgpu/`):
+```bash
+cat src/CMakeLists.txt
+head -60 CMakeLists.txt
+```
+Expected: `add_subdirectory(multiprocess|allocator|cuda|nvml)` 4줄, 루트에 각 OBJECT lib를 `target_sources`/`target_link_libraries`로 합치는 블록.
+
+- [ ] **Step 2: Vulkan 서브디렉토리 CMakeLists 작성**
+
+Create `libvgpu/src/vulkan/CMakeLists.txt`:
+```cmake
+find_path(VULKAN_HEADERS vulkan/vulkan.h
+          HINTS ENV VULKAN_SDK
+          PATH_SUFFIXES include
+          PATHS /usr/include /usr/local/include)
+if(NOT VULKAN_HEADERS)
+    message(FATAL_ERROR "vulkan/vulkan.h not found. Install libvulkan-dev or set VULKAN_SDK.")
+endif()
+
+add_library(vulkan_mod OBJECT
+    layer.c
+    dispatch.c
+    hooks_memory.c
+    hooks_alloc.c
+    hooks_submit.c
+    throttle_adapter.c
+    budget.c
+)
+
+target_include_directories(vulkan_mod PRIVATE
+    ${VULKAN_HEADERS}
+    ${CMAKE_SOURCE_DIR}/src
+)
+
+target_compile_options(vulkan_mod PRIVATE -fPIC)
+```
+
+- [ ] **Step 3: `src/CMakeLists.txt`에 서브디렉토리 등록**
+
+Modify `libvgpu/src/CMakeLists.txt` — 기존 4줄 뒤에 한 줄 추가:
+```cmake
+add_subdirectory(vulkan)
+```
+
+- [ ] **Step 4: 루트 CMakeLists에서 `vulkan_mod` 링크**
+
+Modify `libvgpu/CMakeLists.txt` — `vgpu` target의 소스 리스트에 `vulkan_mod` OBJECT를 합친다. 기존 패턴이 `$<TARGET_OBJECTS:cuda_mod>` 등을 사용하고 있다면 같은 줄 뒤에 추가:
+```cmake
+target_sources(vgpu PRIVATE
+    $<TARGET_OBJECTS:multiprocess_mod>
+    $<TARGET_OBJECTS:allocator_mod>
+    $<TARGET_OBJECTS:cuda_mod>
+    $<TARGET_OBJECTS:nvml_mod>
+    $<TARGET_OBJECTS:vulkan_mod>          # NEW
+)
+```
+(실제 라인 위치는 Step 1의 출력으로 확인. 위 코드는 기존 패턴에 한 줄 추가하는 것을 기준으로 한 예시.)
+
+- [ ] **Step 5: 매니페스트를 install 단계에 포함**
+
+Modify `libvgpu/CMakeLists.txt` 루트에 install 블록이 있으면 그 안에, 없으면 새로:
+```cmake
+install(FILES etc/vulkan/implicit_layer.d/hami.json
+        DESTINATION /etc/vulkan/implicit_layer.d)
+```
+(CMake install 규칙을 이미지 빌드 단계에서 쓰지 않으면, Dockerfile에서 직접 `COPY`로 처리 — Step 7 참조.)
+
+- [ ] **Step 6: `test/CMakeLists.txt`에 Vulkan 테스트 포함 확인**
+
+기존 `test/CMakeLists.txt`가 `file(GLOB ... test/*.c)` 패턴이면 하위 `test/vulkan/`, `test/common/`를 별도로 추가해야 한다. 루트 `test/CMakeLists.txt`에 다음을 추가:
+```cmake
+file(GLOB VULKAN_TESTS "vulkan/*.c")
+foreach(tsrc ${VULKAN_TESTS})
+    get_filename_component(tname ${tsrc} NAME_WE)
+    add_executable(${tname} ${tsrc})
+    target_include_directories(${tname} PRIVATE ${CMAKE_SOURCE_DIR}/src)
+    target_link_libraries(${tname} PRIVATE pthread)
+endforeach()
+```
+단, 이 테스트들은 `src/vulkan/*.c`를 **다시 컴파일**해 자체 바이너리로 링크해야 하므로, 위 코드만으로는 빌드 실패. 단위 테스트는 CI가 아니라 로컬 수동 검증 도구로 놔두고 `make test` 타겟은 기존 CUDA 테스트만 돌리도록 유지하는 것이 실용적. **권장**: 위 `add_executable` 블록은 넣지 않고, `test-vulkan` 용 수동 명령을 `docs/superpowers/plans/notes/vulkan-test-howto.md`에 기록.
+
+- [ ] **Step 7: Dockerfile에 Vulkan 헤더 + 매니페스트 포함**
+
+Modify `libvgpu/dockerfiles/Dockerfile.hami-core` (또는 존재하는 가장 주된 Dockerfile; Step 1에서 `ls dockerfiles/` 확인):
+```dockerfile
+# Build stage — add vulkan headers before cmake runs
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libvulkan-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+# Runtime (final) stage — ship manifest
+COPY etc/vulkan/implicit_layer.d/hami.json \
+     /etc/vulkan/implicit_layer.d/hami.json
+```
+정확한 위치는 기존 Dockerfile의 stage 구조에 맞춘다.
+
+- [ ] **Step 8: 전체 빌드 확인**
+
+Run (from `libvgpu/`):
+```bash
+make build 2>&1 | tail -40
+```
+Expected: `libvgpu.so` 빌드 성공. `nm libvgpu.so | grep vkNegotiateLoaderLayerInterfaceVersion` 이 `T` 심볼 표시.
+
+- [ ] **Step 9: 수동 단위 테스트 재실행 확인**
+
+Task 1.6 Step 5와 동일한 4개 cc 명령으로 모든 테스트가 PASS하는지 확인.
+
+- [ ] **Step 10: 커밋**
+
+```bash
+git add CMakeLists.txt src/CMakeLists.txt src/vulkan/CMakeLists.txt dockerfiles/
+git commit -m "build(vulkan): integrate vulkan_mod OBJECT lib and ship implicit layer manifest"
+```
+
+---
+
+### Task 1.9: HAMi-core PR 푸시 및 릴리스 태그
+
+**Files:** (메타 작업)
+
+- [ ] **Step 1: 브랜치 푸시**
+
+Run (from `libvgpu/`):
+```bash
+git push -u origin vulkan-layer
+```
+
+- [ ] **Step 2: PR 생성**
+
+```bash
+gh pr create --title "feat(vulkan): vGPU partitioning for Vulkan workloads" \
+             --body "$(cat <<'EOF'
+## Summary
+- Vulkan implicit layer VK_LAYER_HAMI_vgpu (activated by HAMI_VULKAN_ENABLE=1)
+- vkAllocateMemory/vkFreeMemory share the existing CUDA VRAM counter
+- vkGetPhysicalDeviceMemoryProperties[2] clamps device-local heap to pod budget
+- vkQueueSubmit[2] routes through the shared SM utilization throttle
+- Manifest ships to /etc/vulkan/implicit_layer.d/hami.json
+
+Design: Project-HAMi/HAMi docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md
+
+## Test plan
+- [x] unit: test_layer, test_memprops, test_alloc, test_submit, test_throttle
+- [ ] integration: vulkaninfo in HAMi-scheduled pod
+- [ ] regression: existing CUDA hooks unaffected
+EOF
+)"
+```
+
+- [ ] **Step 3: PR URL 기록**
+
+PR URL을 `docs/superpowers/plans/notes/hami-core-pr.md`에 적는다 (HAMi 쪽 Task 2.6에서 참조).
+
+- [ ] **Step 4: 릴리스 태그 준비 (머지 후 별도)**
+
+PR 머지 후, HAMi-core 메인테이너가 릴리스 태그(예: `v1.7.0`)를 잘라 이미지(`projecthami/hami-vgpu:v1.7.0`)를 푸시. 이 Task 안에서는 릴리스 태그 이름만 `docs/superpowers/plans/notes/hami-core-pr.md`에 기록.
+
+---
+
+## Phase 2 — HAMi (Go) 웹훅
+
+### Task 2.1: Vulkan annotation 상수 및 실패 테스트
+
+**Files:**
+- Modify: `pkg/device/nvidia/device.go:39-57` (const 블록)
+- Modify: `pkg/device/nvidia/device_test.go` (뒤에 신규 테스트 추가)
+
+- [ ] **Step 1: 상수 추가**
+
+Modify `pkg/device/nvidia/device.go:39`, 기존 const 블록 끝에 추가:
+```go
+const (
+    HandshakeAnnos       = "hami.io/node-handshake"
+    // ... 기존 상수 ...
+    MpsMode      = "mps"
+
+    // Vulkan vGPU partitioning (added 2026-04-21)
+    VulkanEnableAnno       = "hami.io/vulkan"
+    VulkanLayerName        = "VK_LAYER_HAMI_vgpu"
+    NvidiaDriverCapsEnvVar = "NVIDIA_DRIVER_CAPABILITIES"
+    HamiVulkanEnvVar       = "HAMI_VULKAN_ENABLE"
+)
+```
+
+(Go의 const 선언은 한 블록에 합치지 말고, 기존 블록에 뒤에 붙이거나 별도 블록으로 추가. 프로젝트 컨벤션상 별도 블록이 더 깔끔.)
+
+- [ ] **Step 2: 실패 단위 테스트 작성**
+
+Append to `pkg/device/nvidia/device_test.go`:
+```go
+func TestMutateAdmission_VulkanAnno_AddsGraphicsCap(t *testing.T) {
+    dev := &NvidiaGPUDevices{
+        config: NvidiaConfig{
+            ResourceCountName:            "nvidia.com/gpu",
+            ResourceMemoryName:           "nvidia.com/gpumem",
+            ResourceCoreName:             "nvidia.com/gpucores",
+            ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage",
+        },
+    }
+    ctr := &corev1.Container{
+        Resources: corev1.ResourceRequirements{
+            Limits: corev1.ResourceList{
+                "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+            },
+        },
+    }
+    pod := &corev1.Pod{
+        ObjectMeta: metav1.ObjectMeta{
+            Annotations: map[string]string{VulkanEnableAnno: "true"},
+        },
+    }
+    _, err := dev.MutateAdmission(ctr, pod)
+    assert.NilError(t, err)
+
+    var caps, enable string
+    for _, e := range ctr.Env {
+        if e.Name == NvidiaDriverCapsEnvVar {
+            caps = e.Value
+        }
+        if e.Name == HamiVulkanEnvVar {
+            enable = e.Value
+        }
+    }
+    assert.Assert(t, strings.Contains(caps, "graphics"), "expected graphics in caps, got %q", caps)
+    assert.Equal(t, enable, "1")
+}
+```
+
+`metav1` import 추가: `pkg/device/nvidia/device_test.go` 상단 import 블록에 `metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"` 이미 있는지 확인; 없으면 추가. `strings` 동일.
+
+- [ ] **Step 3: 테스트 실패 확인**
+
+Run:
+```bash
+go test ./pkg/device/nvidia/ -run TestMutateAdmission_VulkanAnno_AddsGraphicsCap -v
+```
+Expected: FAIL (아직 로직 미구현).
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add pkg/device/nvidia/device.go pkg/device/nvidia/device_test.go
+git commit -m "test(nvidia): failing test for Vulkan annotation env injection"
+```
+
+---
+
+### Task 2.2: `MutateAdmission`에 Vulkan 로직 추가
+
+**Files:**
+- Modify: `pkg/device/nvidia/device.go:342-378` (MutateAdmission)
+
+- [ ] **Step 1: 헬퍼 함수 추가**
+
+Modify `pkg/device/nvidia/device.go` — `MutateAdmission` 함수 아래(또는 파일 끝)에 추가:
+```go
+// mergeGraphicsCap returns the union of existing NVIDIA_DRIVER_CAPABILITIES
+// tokens with "graphics". If existing contains "all", it is returned unchanged.
+// An empty existing value becomes "compute,utility,graphics" (baseline needed
+// for Vulkan ICD plus existing HAMi CUDA path).
+func mergeGraphicsCap(existing string) string {
+    if existing == "" {
+        return "compute,utility,graphics"
+    }
+    tokens := strings.Split(existing, ",")
+    seen := make(map[string]struct{}, len(tokens))
+    for _, t := range tokens {
+        t = strings.TrimSpace(t)
+        if t == "" {
+            continue
+        }
+        if t == "all" {
+            return existing
+        }
+        seen[t] = struct{}{}
+    }
+    if _, ok := seen["graphics"]; ok {
+        return existing
+    }
+    tokens = append(tokens, "graphics")
+    // normalize: trim spaces, drop empties
+    cleaned := make([]string, 0, len(tokens))
+    for _, t := range tokens {
+        t = strings.TrimSpace(t)
+        if t != "" {
+            cleaned = append(cleaned, t)
+        }
+    }
+    return strings.Join(cleaned, ",")
+}
+
+// applyVulkanAnnotation mutates the container env when the pod opts into
+// Vulkan partitioning. No-op otherwise.
+func applyVulkanAnnotation(ctr *corev1.Container, pod *corev1.Pod) {
+    if pod == nil || pod.Annotations[VulkanEnableAnno] != "true" {
+        return
+    }
+
+    capsIdx := -1
+    for i, e := range ctr.Env {
+        if e.Name == NvidiaDriverCapsEnvVar {
+            capsIdx = i
+            break
+        }
+    }
+    merged := mergeGraphicsCap("")
+    if capsIdx >= 0 {
+        merged = mergeGraphicsCap(ctr.Env[capsIdx].Value)
+    }
+    if capsIdx >= 0 {
+        ctr.Env[capsIdx].Value = merged
+    } else {
+        ctr.Env = append(ctr.Env, corev1.EnvVar{Name: NvidiaDriverCapsEnvVar, Value: merged})
+    }
+
+    hasEnable := false
+    for _, e := range ctr.Env {
+        if e.Name == HamiVulkanEnvVar {
+            hasEnable = true
+            break
+        }
+    }
+    if !hasEnable {
+        ctr.Env = append(ctr.Env, corev1.EnvVar{Name: HamiVulkanEnvVar, Value: "1"})
+    }
+}
+```
+
+- [ ] **Step 2: `MutateAdmission`에서 호출**
+
+Modify `pkg/device/nvidia/device.go:365-370` (기존 `if hasResource` 블록 바로 뒤에 추가):
+```go
+    if hasResource {
+        // Set runtime class name if it is not set by user and the runtime class name is configured
+        if p.Spec.RuntimeClassName == nil && dev.config.RuntimeClassName != "" {
+            p.Spec.RuntimeClassName = &dev.config.RuntimeClassName
+        }
+        applyVulkanAnnotation(ctr, p)
+    }
+```
+
+- [ ] **Step 3: 테스트 통과 확인**
+
+Run:
+```bash
+go test ./pkg/device/nvidia/ -run TestMutateAdmission_VulkanAnno_AddsGraphicsCap -v
+```
+Expected: PASS.
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add pkg/device/nvidia/device.go
+git commit -m "feat(nvidia): inject Vulkan env when pod carries hami.io/vulkan annotation"
+```
+
+---
+
+### Task 2.3: Caps 병합 엣지 케이스 테스트
+
+**Files:**
+- Modify: `pkg/device/nvidia/device_test.go`
+
+- [ ] **Step 1: 추가 테스트들 작성**
+
+Append to `pkg/device/nvidia/device_test.go`:
+```go
+func TestMutateAdmission_VulkanAnno_MergesExistingCaps(t *testing.T) {
+    dev := &NvidiaGPUDevices{
+        config: NvidiaConfig{
+            ResourceCountName:            "nvidia.com/gpu",
+            ResourceMemoryName:           "nvidia.com/gpumem",
+            ResourceCoreName:             "nvidia.com/gpucores",
+            ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage",
+        },
+    }
+    ctr := &corev1.Container{
+        Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "compute,utility"}},
+        Resources: corev1.ResourceRequirements{
+            Limits: corev1.ResourceList{
+                "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+            },
+        },
+    }
+    pod := &corev1.Pod{
+        ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+    }
+    _, _ = dev.MutateAdmission(ctr, pod)
+
+    var caps string
+    for _, e := range ctr.Env {
+        if e.Name == NvidiaDriverCapsEnvVar {
+            caps = e.Value
+        }
+    }
+    assert.Assert(t, strings.Contains(caps, "compute"))
+    assert.Assert(t, strings.Contains(caps, "utility"))
+    assert.Assert(t, strings.Contains(caps, "graphics"))
+}
+
+func TestMutateAdmission_VulkanAnno_AllCaps_NoChange(t *testing.T) {
+    dev := &NvidiaGPUDevices{
+        config: NvidiaConfig{
+            ResourceCountName: "nvidia.com/gpu",
+        },
+    }
+    ctr := &corev1.Container{
+        Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "all"}},
+        Resources: corev1.ResourceRequirements{
+            Limits: corev1.ResourceList{
+                "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+            },
+        },
+    }
+    pod := &corev1.Pod{
+        ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+    }
+    _, _ = dev.MutateAdmission(ctr, pod)
+
+    for _, e := range ctr.Env {
+        if e.Name == NvidiaDriverCapsEnvVar {
+            assert.Equal(t, e.Value, "all")
+        }
+    }
+}
+
+func TestMutateAdmission_NoVulkanAnno_NoChange(t *testing.T) {
+    dev := &NvidiaGPUDevices{
+        config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+    }
+    ctr := &corev1.Container{
+        Resources: corev1.ResourceRequirements{
+            Limits: corev1.ResourceList{
+                "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+            },
+        },
+    }
+    pod := &corev1.Pod{}
+    _, _ = dev.MutateAdmission(ctr, pod)
+    for _, e := range ctr.Env {
+        assert.Assert(t, e.Name != NvidiaDriverCapsEnvVar, "unexpected caps env")
+        assert.Assert(t, e.Name != HamiVulkanEnvVar, "unexpected enable env")
+    }
+}
+
+func TestMutateAdmission_VulkanAnno_NoGPUResource(t *testing.T) {
+    dev := &NvidiaGPUDevices{
+        config: NvidiaConfig{
+            ResourceCountName:            "nvidia.com/gpu",
+            ResourceMemoryName:           "nvidia.com/gpumem",
+            ResourceCoreName:             "nvidia.com/gpucores",
+            ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage",
+        },
+    }
+    ctr := &corev1.Container{Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{}}}
+    pod := &corev1.Pod{
+        ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+    }
+    _, _ = dev.MutateAdmission(ctr, pod)
+    for _, e := range ctr.Env {
+        assert.Assert(t, e.Name != HamiVulkanEnvVar, "no Vulkan env on non-GPU pod")
+    }
+}
+
+func TestMutateAdmission_VulkanAnno_IdempotentHamiEnable(t *testing.T) {
+    dev := &NvidiaGPUDevices{
+        config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+    }
+    ctr := &corev1.Container{
+        Env: []corev1.EnvVar{{Name: HamiVulkanEnvVar, Value: "1"}},
+        Resources: corev1.ResourceRequirements{
+            Limits: corev1.ResourceList{
+                "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+            },
+        },
+    }
+    pod := &corev1.Pod{
+        ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+    }
+    _, _ = dev.MutateAdmission(ctr, pod)
+    count := 0
+    for _, e := range ctr.Env {
+        if e.Name == HamiVulkanEnvVar {
+            count++
+        }
+    }
+    assert.Equal(t, count, 1)
+}
+```
+
+- [ ] **Step 2: 모두 PASS 확인**
+
+Run:
+```bash
+go test ./pkg/device/nvidia/ -run TestMutateAdmission_VulkanAnno -v
+```
+Expected: 5 tests PASS.
+
+- [ ] **Step 3: 기존 전체 테스트 회귀 없음 확인**
+
+Run:
+```bash
+go test ./pkg/device/nvidia/...
+```
+Expected: PASS 전체.
+
+- [ ] **Step 4: 커밋**
+
+```bash
+git add pkg/device/nvidia/device_test.go
+git commit -m "test(nvidia): cover Vulkan annotation edge cases"
+```
+
+---
+
+### Task 2.4: HAMi-core submodule 포인터 업데이트
+
+**Files:**
+- Modify: `libvgpu` submodule reference
+
+- [ ] **Step 1: Phase 1에서 머지된 HAMi-core 커밋 확인**
+
+Task 1.9의 PR이 머지된 후, `libvgpu` 레포 main의 최신 커밋 SHA를 확보.
+
+- [ ] **Step 2: submodule 업데이트**
+
+Run:
+```bash
+cd libvgpu
+git fetch origin main
+git checkout main
+git pull
+cd ..
+git diff --submodule libvgpu
+```
+Expected: `libvgpu <old>..<new>` 한 줄.
+
+- [ ] **Step 3: submodule 포인터 커밋**
+
+Run:
+```bash
+git add libvgpu
+git commit -m "deps: bump libvgpu to include Vulkan vGPU layer"
+```
+
+---
+
+## Phase 3 — 예제 및 문서
+
+### Task 3.1: Vulkan 예제 파드
+
+**Files:**
+- Create: `examples/nvidia/vulkan_example.yaml`
+
+- [ ] **Step 1: 예제 YAML 작성**
+
+Create `examples/nvidia/vulkan_example.yaml`:
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: hami-vulkan-example
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  restartPolicy: Never
+  containers:
+    - name: vulkaninfo
+      # any image with vulkaninfo + libvulkan1
+      image: khronosgroup/vulkan-samples:latest
+      command: ["vulkaninfo"]
+      resources:
+        limits:
+          nvidia.com/gpu: "1"
+          nvidia.com/gpumem: "1024"   # 1 GiB VRAM budget
+          nvidia.com/gpucores: "30"   # 30% SM throttle
+```
+
+- [ ] **Step 2: 커밋**
+
+```bash
+git add examples/nvidia/vulkan_example.yaml
+git commit -m "example: Vulkan vGPU partitioned pod"
+```
+
+---
+
+### Task 3.2: 지원 문서 (영문)
+
+**Files:**
+- Create: `docs/vulkan-vgpu-support.md`
+
+- [ ] **Step 1: 문서 작성**
+
+Create `docs/vulkan-vgpu-support.md`:
+```markdown
+# Vulkan vGPU Support
+
+HAMi partitions NVIDIA GPUs for Vulkan workloads by injecting a Vulkan implicit
+layer (`VK_LAYER_HAMI_vgpu`) that shares the same VRAM and SM budgets used by
+the existing CUDA hooks.
+
+## Enabling Vulkan partitioning
+
+Add the `hami.io/vulkan: "true"` annotation to any pod that uses HAMi NVIDIA
+resources. The webhook will:
+
+- Union `graphics` into `NVIDIA_DRIVER_CAPABILITIES` so the NVIDIA Container
+  Toolkit mounts the Vulkan ICD and graphics libraries.
+- Set `HAMI_VULKAN_ENABLE=1` which activates the HAMi Vulkan layer via its
+  `enable_environment` clause in the implicit layer manifest.
+
+Example: `examples/nvidia/vulkan_example.yaml`.
+
+## What gets limited
+
+- `nvidia.com/gpumem` enforces VRAM allocation across **both** CUDA and Vulkan
+  in the container, sharing a single budget.
+- `nvidia.com/gpucores` throttles Vulkan `vkQueueSubmit[2]` using the same
+  NVML-based polling loop as `cuLaunchKernel`.
+- `vkGetPhysicalDeviceMemoryProperties[2]` clamps the device-local heap size
+  to the pod budget so apps that size allocations from this value self-limit.
+
+## What is not limited (yet)
+
+- Vulkan Video (`VK_KHR_video_queue`) submissions.
+- Frame-pacing jitter introduced by throttling on graphics queues (documented
+  behavior; strict/cooperative modes are a future option).
+
+## Troubleshooting
+
+| Symptom | Check |
+|---------|-------|
+| Container has no `vulkan` CLI / libs | Annotation absent or `NVIDIA_DRIVER_CAPABILITIES` already frozen to `compute` by image. |
+| `vkAllocateMemory` always succeeds | Layer did not activate — ensure `HAMI_VULKAN_ENABLE=1` set and `/etc/vulkan/implicit_layer.d/hami.json` exists. |
+| `vulkaninfo` still shows full VRAM heap | Layer manifest not loaded; run `VK_LOADER_DEBUG=all vulkaninfo` to see layer scan. |
+```
+
+- [ ] **Step 2: 커밋**
+
+```bash
+git add docs/vulkan-vgpu-support.md
+git commit -m "docs: Vulkan vGPU support guide"
+```
+
+---
+
+### Task 3.3: 중국어 번역
+
+**Files:**
+- Create: `docs/vulkan-vgpu-support_cn.md`
+
+- [ ] **Step 1: 영문 문서를 중국어로 번역해서 작성**
+
+Create `docs/vulkan-vgpu-support_cn.md`:
+```markdown
+# Vulkan vGPU 支持
+
+HAMi 通过注入 Vulkan 隐式层（`VK_LAYER_HAMI_vgpu`）对 NVIDIA GPU 进行 Vulkan 工作负载的切分。该层与已有的 CUDA 钩子共享同一套 VRAM 与 SM 预算。
+
+## 启用方式
+
+在使用 HAMi NVIDIA 资源的 Pod 上添加 annotation `hami.io/vulkan: "true"`。Webhook 会：
+
+- 将 `graphics` 合并进 `NVIDIA_DRIVER_CAPABILITIES`，以便 NVIDIA Container Toolkit 挂载 Vulkan ICD 与图形库。
+- 设置 `HAMI_VULKAN_ENABLE=1`，通过隐式层 manifest 的 `enable_environment` 激活 HAMi Vulkan 层。
+
+示例：`examples/nvidia/vulkan_example.yaml`。
+
+## 生效范围
+
+- `nvidia.com/gpumem` 对容器内 CUDA 与 Vulkan 的 VRAM 分配**共享同一预算**。
+- `nvidia.com/gpucores` 通过与 `cuLaunchKernel` 相同的 NVML 轮询机制对 `vkQueueSubmit[2]` 进行限速。
+- `vkGetPhysicalDeviceMemoryProperties[2]` 将 device-local 堆大小裁剪为 Pod 预算。
+
+## 未涵盖项（未来工作）
+
+- Vulkan Video（`VK_KHR_video_queue`）提交。
+- 图形队列限速导致的帧抖动（已记录，未来提供 strict/cooperative 模式）。
+
+## 故障排查
+
+| 现象 | 检查 |
+|------|------|
+| 容器没有 Vulkan 库 | annotation 缺失，或镜像已冻结 `NVIDIA_DRIVER_CAPABILITIES=compute`。 |
+| `vkAllocateMemory` 总是成功 | 层未激活 — 确认 `HAMI_VULKAN_ENABLE=1` 与 `/etc/vulkan/implicit_layer.d/hami.json` 存在。 |
+| `vulkaninfo` 仍报告全量 VRAM | Manifest 未加载；可 `VK_LOADER_DEBUG=all vulkaninfo` 查看扫描日志。 |
+```
+
+- [ ] **Step 2: 커밋**
+
+```bash
+git add docs/vulkan-vgpu-support_cn.md
+git commit -m "docs: 中文版 Vulkan vGPU 支持说明"
+```
+
+---
+
+## Phase 4 — 통합 검증
+
+### Task 4.1: 수동 E2E — 힙 클램프 확인
+
+**Files:** (런타임 실행)
+
+- [ ] **Step 1: HAMi-core 이미지 빌드**
+
+Run:
+```bash
+cd libvgpu && docker build -t projecthami/hami-vgpu:dev . && cd ..
+```
+
+- [ ] **Step 2: HAMi 이미지에 submodule 반영 빌드**
+
+Run:
+```bash
+make docker-build
+```
+(없으면 기존 CI 명령 사용)
+
+- [ ] **Step 3: 테스트 클러스터에 배포**
+
+Run:
+```bash
+helm upgrade --install hami charts/hami \
+    --set scheduler.image.repository=projecthami/hami-scheduler \
+    --set scheduler.image.tag=dev \
+    --set devicePlugin.image.repository=projecthami/hami-device-plugin \
+    --set devicePlugin.image.tag=dev \
+    --set vgpu.image.repository=projecthami/hami-vgpu \
+    --set vgpu.image.tag=dev
+kubectl apply -f examples/nvidia/vulkan_example.yaml
+```
+
+- [ ] **Step 4: 힙 클램프 확인**
+
+Run:
+```bash
+kubectl logs hami-vulkan-example | grep -iE "heap|device local"
+```
+Expected: device-local 힙 size가 ≤ 1 GiB (1024 MiB, pod 버짓).
+
+- [ ] **Step 5: 결과 기록**
+
+`docs/superpowers/plans/notes/e2e-vulkaninfo.md`에 로그 요약을 적는다.
+
+- [ ] **Step 6: 커밋**
+
+```bash
+git add docs/superpowers/plans/notes/e2e-vulkaninfo.md
+git commit -m "test(e2e): vulkaninfo heap clamp verified in HAMi-scheduled pod"
+```
+
+---
+
+### Task 4.2: 수동 E2E — 할당 초과 시 OOM 반환
+
+**Files:** (런타임 실행)
+
+- [ ] **Step 1: 할당 초과 테스트 스크립트 작성**
+
+Create `examples/nvidia/vulkan_oom_test.yaml`:
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: hami-vulkan-oom-test
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  restartPolicy: Never
+  containers:
+    - name: oom
+      image: ghcr.io/example/vulkan-alloc-test:latest  # 2 GiB를 반복 할당하는 테스트 바이너리
+      resources:
+        limits:
+          nvidia.com/gpu: "1"
+          nvidia.com/gpumem: "1024"
+```
+(이미지가 없으면, 간단한 C 프로그램 `vkAllocateMemory(2GiB)` 루프를 작성해 별도 이미지로 빌드.)
+
+- [ ] **Step 2: 실행 및 OOM 확인**
+
+Run:
+```bash
+kubectl apply -f examples/nvidia/vulkan_oom_test.yaml
+kubectl logs hami-vulkan-oom-test
+```
+Expected: 로그에 `VK_ERROR_OUT_OF_DEVICE_MEMORY` 또는 등가 메시지.
+
+- [ ] **Step 3: 결과 기록 및 커밋**
+
+`docs/superpowers/plans/notes/e2e-vulkaninfo.md`에 추가 기록.
+```bash
+git add examples/nvidia/vulkan_oom_test.yaml docs/superpowers/plans/notes/e2e-vulkaninfo.md
+git commit -m "test(e2e): vulkan OOM returns VK_ERROR_OUT_OF_DEVICE_MEMORY"
+```
+
+---
+
+### Task 4.3: 혼합 워크로드 — CUDA + Vulkan 공유 버짓
+
+**Files:** (런타임 실행)
+
+- [ ] **Step 1: 혼합 컨테이너 파드 작성**
+
+Create `examples/nvidia/vulkan_cuda_mixed.yaml`:
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: hami-vulkan-cuda-mixed
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  restartPolicy: Never
+  containers:
+    - name: app
+      image: ghcr.io/example/cuda-vulkan-mixed:latest  # CUDA 512 MiB + Vulkan 512 MiB
+      resources:
+        limits:
+          nvidia.com/gpu: "1"
+          nvidia.com/gpumem: "1024"
+```
+
+- [ ] **Step 2: 실행 및 합산 버짓 준수 확인**
+
+Run:
+```bash
+kubectl apply -f examples/nvidia/vulkan_cuda_mixed.yaml
+kubectl logs hami-vulkan-cuda-mixed
+```
+Expected: 양쪽 할당 성공, 추가 할당 시 OOM.
+
+- [ ] **Step 3: 커밋**
+
+```bash
+git add examples/nvidia/vulkan_cuda_mixed.yaml
+git commit -m "test(e2e): CUDA+Vulkan mixed workload shares single VRAM budget"
+```
+
+---
+
+### Task 4.4: 플랜 아티팩트 정리 및 최종 PR
+
+**Files:**
+- Delete: `docs/superpowers/plans/notes/` (임시 노트)
+
+- [ ] **Step 1: 노트 디렉토리 제거**
+
+Run:
+```bash
+git rm -r docs/superpowers/plans/notes/
+git commit -m "chore: drop temporary planning notes"
+```
+
+- [ ] **Step 2: HAMi 브랜치 푸시 및 PR**
+
+Run:
+```bash
+git push -u origin vulkan-vgpu-partitioning
+gh pr create --title "feat(nvidia): Vulkan vGPU partitioning" \
+             --body "$(cat <<'EOF'
+## Summary
+- Webhook injects graphics cap + HAMI_VULKAN_ENABLE=1 when `hami.io/vulkan: "true"` annotation is present
+- libvgpu submodule bumped to include Vulkan implicit layer (VK_LAYER_HAMI_vgpu)
+- CUDA and Vulkan share the existing `nvidia.com/gpumem` and `nvidia.com/gpucores` budgets
+- Docs + example added
+
+Design: docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md
+HAMi-core PR: (link from notes/hami-core-pr.md before deletion)
+
+## Test plan
+- [x] Go unit tests (5 new)
+- [x] HAMi-core unit tests (layer / memprops / alloc / submit / throttle)
+- [x] E2E: vulkaninfo heap clamp
+- [x] E2E: vkAllocateMemory OOM at budget
+- [x] E2E: CUDA + Vulkan mixed workload shares budget
+EOF
+)"
+```
+
+---
+
+## 자가 점검
+
+### 스펙 커버리지
+
+| 스펙 요구사항 | 해당 Task |
+|---------------|-----------|
+| §3 Activation via annotation | Task 2.2, 2.3 |
+| §5.1 Go 상수/로직 | Task 2.1, 2.2 |
+| §5.2 C 레이어 엔트리포인트 | Task 1.1 |
+| §5.2 메모리 속성 clamp | Task 1.2 |
+| §5.2 vkAllocateMemory/vkFreeMemory | Task 1.3 |
+| §5.2 vkQueueSubmit throttle | Task 1.4 + 1.5 |
+| §5.3 공유 카운터 통합 | Task 1.6 |
+| §5.4 Manifest JSON | Task 1.7 |
+| §5.5 Build 통합 | Task 1.8 |
+| §6 데이터 흐름 (admission + runtime) | Task 2.2 (admission), 1.1~1.5 (runtime) |
+| §7 에러 처리 (merge 규칙) | Task 2.3 (edge cases) |
+| §8.1 Go 단위 테스트 | Task 2.1, 2.3 |
+| §8.2 C 단위 테스트 | Task 1.1~1.5 |
+| §8.3 E2E | Task 4.1, 4.2, 4.3 |
+| §9 Delivery 순서 | Phase 1 → 2 → 3 → 4 |
+
+### 타입 일관성
+
+- Go: `VulkanEnableAnno`, `NvidiaDriverCapsEnvVar`, `HamiVulkanEnvVar`를 Task 2.1, 2.2, 2.3에서 동일하게 사용.
+- C: `hami_reserve_device_memory(int, size_t)` / `hami_release_device_memory(int, size_t)` / `hami_pod_memory_budget(int)`을 Task 1.3, 1.6에서 동일 시그니처 유지.
+- C: `hami_throttle_wait(int dev_idx, int util_limit)` Task 1.4, 1.5에서 동일.
+
+### Placeholder 없음 확인
+
+- 모든 "Step"이 실제 커맨드/코드/기대 출력 포함.
+- HAMi-core 기존 카운터 함수 이름은 Task 0.2 탐색 노트를 근거로 Task 1.6 어댑터에서 실제 이름으로 교체하도록 지시함 (노트 자체가 아티팩트).
+- 테스트 코드는 매 Task마다 full source 포함.
diff --git a/docs/superpowers/plans/2026-04-27-volcano-vulkan-vgpu.md b/docs/superpowers/plans/2026-04-27-volcano-vulkan-vgpu.md
new file mode 100644
index 000000000..d1d4b1695
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-27-volcano-vulkan-vgpu.md
@@ -0,0 +1,1114 @@
+# Volcano + Vulkan vGPU 통합 Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Volcano scheduler 가 운영 중인 클러스터에 HAMi 의 Vulkan vGPU 메모리 partitioning 기능을 통합한다. `xiilab/volcano-vgpu-device-plugin` 의 libvgpu 를 vulkan-layer 가 들어간 HAMi-core 로 교체하고, device-plugin Allocate 에 manifest auto-mount 코드를 추가하며, HAMi 의 mutating webhook 만 별도 helm install 한다.
+
+**Architecture:** HAMi 본가의 commit `0150ea7` (manifest auto-inject) 패턴을 그대로 fork 에 포팅한다. Dockerfile 의 builder stage 가 manifest 파일을 image 에 ship 하고, vgpu-init script 이 host 에 복사하며, device-plugin 의 `Allocate()` 가 host 파일이 존재하면 container 에 bind-mount 한다. webhook 은 HAMi 본가 helm chart 으로 별도 install 하여 annotation 처리만 담당.
+
+**Tech Stack:** Go 1.21+, Kubernetes device-plugin v1beta1, NVIDIA Vulkan Loader, libvgpu (HAMi-core vulkan-layer), helm 3, Volcano scheduler.
+
+**Spec:** `docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md`
+
+## File Structure
+
+작업은 두 repo 에 걸친다.
+
+### `xiilab/volcano-vgpu-device-plugin` (PR-1)
+
+| 파일 | 역할 | 변경 |
+|---|---|---|
+| `libvgpu` (submodule) | HAMi-core (vulkan-layer 포함) | submodule SHA 갱신 |
+| `docker/Dockerfile` | image 빌드. builder stage 에 libvulkan-dev 추가, runtime stage 에 hami.json ship | 2 줄 추가 |
+| `pkg/.../plugin/server.go` (또는 동등 위치) | device-plugin Allocate 응답 빌더 | 17 줄 추가 (manifest mount) |
+| `volcano-vgpu-device-plugin.yml` | standard mode deploy yaml | image tag 갱신 |
+| `volcano-vgpu-device-plugin-cdi.yml` | CDI mode deploy yaml | image tag 갱신 |
+| `volcano-vgpu-vulkan-manifest.yml` (NEW) | host 측 manifest 파일 사전 배치 (별도 DaemonSet, fallback) | 신규 |
+| `examples/vulkan-pod.yaml` (NEW) | E2E 테스트용 sample pod | 신규 |
+| `doc/vulkan-vgpu.md` (NEW) | 사용 가이드 | 신규 |
+
+### HAMi 본가 (변경 없음)
+
+helm chart 의 `values.yaml` 으로 webhook only 모드 install. PR 없음.
+
+---
+
+## Task 1: 작업 환경 준비 — repo clone + 브랜치 생성
+
+**Files:**
+- Clone: `~/git/volcano-vgpu-device-plugin` (xiilab fork)
+- Branch: `feat/vulkan-vgpu-support`
+
+- [ ] **Step 1: Clone xiilab fork**
+
+```bash
+cd ~/git
+git clone https://github.com/xiilab/volcano-vgpu-device-plugin.git
+cd volcano-vgpu-device-plugin
+git remote add upstream https://github.com/Project-HAMi/volcano-vgpu-device-plugin.git
+git fetch upstream
+```
+
+- [ ] **Step 2: 새 브랜치 생성**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git checkout -b feat/vulkan-vgpu-support
+git submodule update --init --recursive
+```
+
+- [ ] **Step 3: 현재 libvgpu submodule SHA 기록**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git -C libvgpu rev-parse HEAD
+# Expected: 6660c84... (or whatever the current submodule pin is)
+```
+
+기록한 SHA 를 노트해 두기 (나중 회귀 비교용).
+
+- [ ] **Step 4: server.go 위치 파악**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+grep -rln "func.*Allocate.*kubeletdevicepluginv1beta1" pkg/ cmd/ 2>/dev/null
+```
+
+찾은 경로를 노트. 이후 task 들에서 이 경로를 사용 (예시 가정: `pkg/plugin/server.go` 또는 `pkg/util/util.go`).
+
+- [ ] **Step 5: 빌드 환경 검증 (변경 없는 상태)**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+make build 2>&1 | tail -10
+```
+
+Expected: 성공. 만약 실패하면 일단 task 진행 멈추고 master 의 build 상태부터 정상화.
+
+- [ ] **Step 6: Commit (브랜치 시작 마커)**
+
+```bash
+git commit --allow-empty -m "chore: start feat/vulkan-vgpu-support branch"
+```
+
+---
+
+## Task 2: libvgpu submodule 을 vulkan-layer 가 포함된 SHA 로 갱신
+
+**Files:**
+- Modify: `libvgpu` submodule pointer
+- Modify: `.gitmodules` (이미 vulkan-layer branch 추적 중인지 확인, 필요 시 변경)
+
+- [ ] **Step 1: HAMi 가 사용하는 libvgpu SHA 기록**
+
+```bash
+cd ~/git/HAMi
+git -C libvgpu rev-parse HEAD
+# Expected: 8d4f712... (cuMemFree[Async] untracked-pointer fallback 포함)
+```
+
+이 SHA 를 `LIBVGPU_VULKAN_SHA` 로 노트 (이후 step 에서 사용).
+
+- [ ] **Step 2: volcano-vgpu-device-plugin 의 libvgpu remote 가 HAMi-core 의 vulkan-layer branch 를 가리키는지 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+cat .gitmodules
+```
+
+기대값:
+
+```
+[submodule "libvgpu"]
+	path = libvgpu
+	url = https://github.com/Project-HAMi/HAMi-core.git
+```
+
+만약 url 이 HAMi 본가 외 fork (e.g., xiilab/HAMi-core) 인 경우, 우리가 사용 중인 vulkan-layer 가 어느 fork 에서 오는지에 맞춰 갱신 필요. HAMi 본가 fork 가 vulkan-layer branch 를 보유하면 그대로 사용. 없으면 xiilab fork 추가:
+
+```bash
+git submodule set-url libvgpu https://github.com/xiilab/HAMi-core.git
+```
+
+- [ ] **Step 3: submodule 을 LIBVGPU_VULKAN_SHA 로 fast-forward**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin/libvgpu
+git fetch origin
+git checkout 8d4f712df2941d9314f534bac0038c2f8b7be41f  # LIBVGPU_VULKAN_SHA
+cd ..
+git add libvgpu
+git status
+```
+
+기대값: `modified: libvgpu (new commits)` 만 표시.
+
+- [ ] **Step 4: vulkan layer 소스가 들어왔는지 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+ls libvgpu/src/vulkan/
+ls libvgpu/etc/vulkan/implicit_layer.d/
+```
+
+기대값: `src/vulkan/` 에 `budget.c`, `loader_intercept.c` 등 존재. `etc/vulkan/implicit_layer.d/hami.json` 존재.
+
+- [ ] **Step 5: Commit submodule 갱신**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git commit -m "deps: bump libvgpu to 8d4f712 (vulkan-layer support)"
+```
+
+---
+
+## Task 3: Dockerfile builder stage 에 libvulkan-dev 추가
+
+**Files:**
+- Modify: `docker/Dockerfile` (빌더 stage 의 apt install 라인)
+
+- [ ] **Step 1: 현재 nvbuild stage 의 apt install 라인 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+grep -n -E "(FROM .* AS nvbuild|apt|apt-get install)" docker/Dockerfile | head -10
+```
+
+이전에 어떤 packages 가 설치되는지 파악. nvbuild stage 가 libvgpu 를 빌드하는 stage.
+
+- [ ] **Step 2: Dockerfile 의 nvbuild stage apt install 에 libvulkan-dev 추가**
+
+다음 형태 (HAMi commit `50b37ff` 와 동일한 수정):
+
+```dockerfile
+# nvbuild stage 안의 기존
+RUN apt-get update && apt-get install -y \
+    cmake \
+    make \
+    g++ \
+    git \
+    libvulkan-dev   # ← 신규 라인
+```
+
+이미 `libvulkan-dev` 가 설치되어 있으면 skip.
+
+- [ ] **Step 3: 빌드 검증 (Dockerfile syntax)**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+docker build -f docker/Dockerfile -t volcano-vgpu-device-plugin:vulkan-test . 2>&1 | tail -20
+```
+
+기대값: 성공. libvgpu 의 vulkan source 도 함께 컴파일되어야 함. 만약 `vulkan_core.h: No such file` 류의 에러가 나면 libvulkan-dev 가 제대로 install 안 됐거나 PATH 미스.
+
+- [ ] **Step 4: Commit Dockerfile 변경**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add docker/Dockerfile
+git commit -m "build: install libvulkan-dev in nvbuild stage for Vulkan layer compile"
+```
+
+---
+
+## Task 4: Dockerfile 의 runtime stage 에 hami.json ship
+
+**Files:**
+- Modify: `docker/Dockerfile` (runtime stage 의 COPY 라인)
+
+- [ ] **Step 1: 현재 runtime stage 의 libvgpu.so COPY 라인 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+grep -n "libvgpu.so" docker/Dockerfile
+```
+
+기대값: `COPY --from=nvbuild /libvgpu/build/libvgpu.so ...` 같은 라인.
+
+- [ ] **Step 2: 그 라인 직후에 hami.json COPY 추가**
+
+HAMi commit `0150ea7` 와 동일한 한 줄:
+
+```dockerfile
+COPY --from=nvbuild /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/libvgpu.so."$VERSION"
+COPY --from=nvbuild /libvgpu/etc/vulkan/implicit_layer.d/hami.json /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json
+```
+
+> **Note:** volcano-vgpu-device-plugin 의 path 가 HAMi 의 `/k8s-vgpu/lib/nvidia/` 와 다를 수 있다. Task 1 Step 4 에서 파악한 위치에 맞게 prefix 조정. 일반적으로 같은 prefix.
+
+- [ ] **Step 3: 빌드 검증 + image 안 hami.json 존재 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+docker build -f docker/Dockerfile -t volcano-vgpu-device-plugin:vulkan-test . 2>&1 | tail -5
+docker run --rm --entrypoint /bin/sh volcano-vgpu-device-plugin:vulkan-test \
+    -c "ls -la /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json && cat /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json"
+```
+
+기대값: 파일 존재 + JSON 내용 출력 (`VK_LAYER_HAMI_vgpu`, `enable_environment: HAMI_VULKAN_ENABLE=1` 등 포함).
+
+- [ ] **Step 4: Commit**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add docker/Dockerfile
+git commit -m "feat(image): ship Vulkan implicit layer manifest from libvgpu"
+```
+
+---
+
+## Task 5: vgpu-init.sh (또는 동등 init script) 가 host 에 manifest 복사하는지 확인
+
+**Files:**
+- Inspect: `docker/vgpu-init.sh` (또는 동등)
+
+- [ ] **Step 1: vgpu-init.sh 위치 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+find . -name "vgpu-init.sh" -o -name "init.sh" 2>/dev/null | head
+```
+
+- [ ] **Step 2: init script 의 host 복사 로직 확인**
+
+```bash
+cat docker/vgpu-init.sh   # 또는 발견된 path
+```
+
+기대 패턴: `cp -r /k8s-vgpu/lib/nvidia/* /usr/local/vgpu/` 또는 동등 (recursive copy 로 vulkan/implicit_layer.d/hami.json 도 함께 host 에 복사됨).
+
+- [ ] **Step 3: 만약 init script 이 recursive copy 가 아니면 명시적 라인 추가**
+
+`/k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json` 을 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 으로 복사하는 라인 추가 (mkdir -p 포함):
+
+```bash
+mkdir -p /usr/local/vgpu/vulkan/implicit_layer.d
+cp -f /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json \
+      /usr/local/vgpu/vulkan/implicit_layer.d/hami.json
+```
+
+이미 recursive copy 로 cover 되면 변경 없음.
+
+- [ ] **Step 4: 변경 있으면 commit**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add docker/vgpu-init.sh
+git commit -m "build(init): copy Vulkan manifest to host during vgpu-init"
+```
+
+---
+
+## Task 6: device-plugin 의 Allocate 에 manifest mount 코드 추가
+
+**Files:**
+- Modify: Task 1 Step 4 에서 발견한 server.go 위치 (가정: `pkg/plugin/server.go`)
+
+- [ ] **Step 1: Allocate 함수 안의 license mount 라인 (앵커) 위치 찾기**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+grep -n "license" pkg/plugin/server.go   # 또는 발견된 server.go path
+```
+
+HAMi 의 `0150ea7` 는 license mount 직전에 vulkan manifest mount 를 추가했다. 같은 앵커 라인 위에 추가.
+
+- [ ] **Step 2: server.go 에 manifest mount 코드 추가**
+
+HAMi 본가 commit `0150ea7` 의 server.go 패치를 그대로 포팅. 정확한 코드:
+
+```go
+// Mount Vulkan implicit layer manifest so the HAMi Vulkan layer
+// activates for pods that set HAMI_VULKAN_ENABLE=1 (done by the
+// webhook when the pod carries hami.io/vulkan="true").
+// The manifest file is placed on the host by vgpu-init.sh as part
+// of the standard lib distribution; skip the mount if it is
+// absent so we do not block pod startup on nodes that have not
+// yet been populated.
+vulkanManifestHost := hostHookPath + "/vgpu/vulkan/implicit_layer.d/hami.json"
+if _, err := os.Stat(vulkanManifestHost); err == nil {
+    response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{
+        ContainerPath: "/etc/vulkan/implicit_layer.d/hami.json",
+        HostPath:      vulkanManifestHost,
+        ReadOnly:      true,
+    })
+}
+```
+
+> **Note:** `hostHookPath` 변수 이름이 volcano-vgpu-device-plugin 에서 다를 수 있다 (`hostMountPath`, `vgpuPath` 등). HAMi 의 정의는 일반적으로 `/usr/local/vgpu` 기본값. fork 의 동등 변수 이름으로 대체.
+
+- [ ] **Step 3: import 확인**
+
+`os.Stat` 사용하므로 `os` import 가 이미 있어야 한다 (다른 mount 코드에서 사용 중일 가능성 큼). 만약 없으면 추가:
+
+```go
+import (
+    "os"
+    // existing imports...
+)
+```
+
+- [ ] **Step 4: 빌드 검증**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+go build ./... 2>&1 | head -20
+```
+
+기대값: error 0. `hostHookPath` 가 정의되지 않았거나 `kubeletdevicepluginv1beta1` import 누락이면 컴파일 실패 → 변수 이름 또는 import 조정.
+
+- [ ] **Step 5: 단위 테스트 — manifest 파일 존재/부재 시나리오 (TDD)**
+
+server.go 와 같은 패키지에 `server_vulkan_test.go` 생성:
+
+```go
+package plugin
+
+import (
+    "os"
+    "path/filepath"
+    "testing"
+)
+
+func TestVulkanManifestMount_Present(t *testing.T) {
+    tmp := t.TempDir()
+    // manifest 파일 사전 배치
+    manifestDir := filepath.Join(tmp, "vgpu", "vulkan", "implicit_layer.d")
+    if err := os.MkdirAll(manifestDir, 0755); err != nil {
+        t.Fatal(err)
+    }
+    manifestPath := filepath.Join(manifestDir, "hami.json")
+    if err := os.WriteFile(manifestPath, []byte("{}"), 0644); err != nil {
+        t.Fatal(err)
+    }
+
+    // hostHookPath = tmp 라고 가정하고 mount 빌더 호출 (실제 함수 이름은 fork 에 맞춰 조정)
+    mounts := buildVulkanManifestMount(tmp)
+    if len(mounts) != 1 {
+        t.Fatalf("expected 1 mount, got %d", len(mounts))
+    }
+    if mounts[0].ContainerPath != "/etc/vulkan/implicit_layer.d/hami.json" {
+        t.Errorf("unexpected ContainerPath: %s", mounts[0].ContainerPath)
+    }
+    if mounts[0].HostPath != manifestPath {
+        t.Errorf("unexpected HostPath: %s", mounts[0].HostPath)
+    }
+    if !mounts[0].ReadOnly {
+        t.Error("expected ReadOnly=true")
+    }
+}
+
+func TestVulkanManifestMount_Absent(t *testing.T) {
+    tmp := t.TempDir()
+    // 파일 없음 — mount 응답에 추가하지 말아야 함
+    mounts := buildVulkanManifestMount(tmp)
+    if len(mounts) != 0 {
+        t.Errorf("expected 0 mounts when manifest absent, got %d", len(mounts))
+    }
+}
+```
+
+함수 추출이 어렵다면 (인라인 코드라면) 일단 본 테스트는 skip 하고 Step 7 의 통합 검증으로 대체.
+
+- [ ] **Step 6: 테스트 실행 (실행 가능한 경우)**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+go test ./pkg/plugin/ -run TestVulkanManifestMount -v
+```
+
+기대값: 두 testcase 모두 PASS.
+
+만약 `buildVulkanManifestMount` 함수가 없으면 (인라인 코드라면) Step 5 에서 함수 추출 + Step 6 PASS. 함수 추출은 server.go 의 manifest mount 블록을 다음 형태로 분리:
+
+```go
+func buildVulkanManifestMount(hostHookPath string) []*kubeletdevicepluginv1beta1.Mount {
+    vulkanManifestHost := hostHookPath + "/vgpu/vulkan/implicit_layer.d/hami.json"
+    if _, err := os.Stat(vulkanManifestHost); err != nil {
+        return nil
+    }
+    return []*kubeletdevicepluginv1beta1.Mount{{
+        ContainerPath: "/etc/vulkan/implicit_layer.d/hami.json",
+        HostPath:      vulkanManifestHost,
+        ReadOnly:      true,
+    }}
+}
+```
+
+그리고 Allocate 안에서:
+
+```go
+response.Mounts = append(response.Mounts, buildVulkanManifestMount(hostHookPath)...)
+```
+
+- [ ] **Step 7: Commit**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add pkg/plugin/server.go pkg/plugin/server_vulkan_test.go
+git commit -m "feat(plugin): auto-inject Vulkan implicit layer manifest mount"
+```
+
+---
+
+## Task 7: 기존 deploy yaml 두 개의 image tag 갱신
+
+**Files:**
+- Modify: `volcano-vgpu-device-plugin.yml`
+- Modify: `volcano-vgpu-device-plugin-cdi.yml`
+
+- [ ] **Step 1: 현재 image tag 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+grep -nE "image:.*volcano-vgpu" volcano-vgpu-device-plugin.yml volcano-vgpu-device-plugin-cdi.yml
+```
+
+기대값 (예시): `image: projecthami/volcano-vgpu-device-plugin:v1.10.0`
+
+- [ ] **Step 2: 새 tag 결정**
+
+`vulkan-v1` 또는 `v1.10.0-vulkan-v1` 같은 명확한 tag.
+
+- [ ] **Step 3: yaml 두 개의 image 라인 갱신**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+sed -i.bak 's|image: projecthami/volcano-vgpu-device-plugin:.*|image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1|' \
+    volcano-vgpu-device-plugin.yml volcano-vgpu-device-plugin-cdi.yml
+rm -f *.yml.bak
+git diff volcano-vgpu-device-plugin*.yml
+```
+
+> **Note:** sed pattern 의 source 부분 (`projecthami/...`) 은 Step 1 에서 본 실제 image 와 일치해야 한다. 달라지면 그에 맞게 조정.
+
+- [ ] **Step 4: Commit**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add volcano-vgpu-device-plugin.yml volcano-vgpu-device-plugin-cdi.yml
+git commit -m "chore: bump image to vulkan-v1 in deploy yaml"
+```
+
+---
+
+## Task 8: 신규 yaml — host manifest 사전 배치 (fallback DaemonSet)
+
+**Files:**
+- Create: `volcano-vgpu-vulkan-manifest.yml`
+
+> **Note:** Task 4-5 의 device-plugin image 가 이미 init script 으로 manifest 를 host 에 배치하므로, **이 DaemonSet 은 fallback** 이다. 노드에 이미 device-plugin DaemonSet 이 떠 있으면 manifest 가 자동 배치되지만, 별도 환경 (e.g., device-plugin 갱신 전, 또는 다른 distribution mechanism 사용 시) 을 위해 standalone 으로 배치 가능.
+
+- [ ] **Step 1: 파일 생성**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+cat > volcano-vgpu-vulkan-manifest.yml <<'EOF'
+# HAMi Vulkan implicit layer manifest 를 host 노드의
+# /usr/local/vgpu/vulkan/implicit_layer.d/hami.json 으로 배치하는 DaemonSet.
+# device-plugin image 의 vgpu-init.sh 가 이미 같은 작업을 하므로 일반적으로 불필요.
+# device-plugin 갱신 전 또는 별도 init 시나리오용 fallback.
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: hami-vulkan-manifest
+  namespace: kube-system
+data:
+  hami.json: |
+    {
+        "file_format_version": "1.0.0",
+        "layer": {
+            "name": "VK_LAYER_HAMI_vgpu",
+            "type": "GLOBAL",
+            "library_path": "/usr/local/vgpu/libvgpu.so",
+            "api_version": "1.3.0",
+            "implementation_version": "1",
+            "description": "HAMi Vulkan vGPU memory partitioning layer",
+            "enable_environment": {
+                "HAMI_VULKAN_ENABLE": "1"
+            }
+        }
+    }
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: hami-vulkan-manifest-installer
+  namespace: kube-system
+  labels:
+    app: hami-vulkan-manifest-installer
+spec:
+  selector:
+    matchLabels:
+      app: hami-vulkan-manifest-installer
+  template:
+    metadata:
+      labels:
+        app: hami-vulkan-manifest-installer
+    spec:
+      tolerations:
+      - operator: Exists
+      nodeSelector:
+        nvidia.com/gpu.present: "true"
+      hostPID: false
+      restartPolicy: Always
+      containers:
+      - name: installer
+        image: busybox:1.36
+        command:
+        - /bin/sh
+        - -c
+        - |
+          set -eu
+          mkdir -p /host/usr/local/vgpu/vulkan/implicit_layer.d
+          cp -f /manifest/hami.json \
+                /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json
+          echo "[hami-vulkan-manifest] installed at /usr/local/vgpu/vulkan/implicit_layer.d/hami.json"
+          # 종료하지 않고 sleep — DaemonSet 이라 restart 루프 회피
+          sleep infinity
+        volumeMounts:
+        - name: manifest
+          mountPath: /manifest
+          readOnly: true
+        - name: host-vgpu
+          mountPath: /host/usr/local/vgpu
+        securityContext:
+          runAsUser: 0
+      volumes:
+      - name: manifest
+        configMap:
+          name: hami-vulkan-manifest
+      - name: host-vgpu
+        hostPath:
+          path: /usr/local/vgpu
+          type: DirectoryOrCreate
+EOF
+```
+
+- [ ] **Step 2: yaml syntax 검증**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+kubectl apply --dry-run=client -f volcano-vgpu-vulkan-manifest.yml
+```
+
+기대값:
+
+```
+configmap/hami-vulkan-manifest created (dry run)
+daemonset.apps/hami-vulkan-manifest-installer created (dry run)
+```
+
+- [ ] **Step 3: Commit**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add volcano-vgpu-vulkan-manifest.yml
+git commit -m "feat(deploy): add fallback DaemonSet for Vulkan manifest placement"
+```
+
+---
+
+## Task 9: 사용 예시 yaml + 사용 가이드 문서
+
+**Files:**
+- Create: `examples/vulkan-pod.yaml`
+- Create: `doc/vulkan-vgpu.md`
+
+- [ ] **Step 1: examples/vulkan-pod.yaml 생성**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+mkdir -p examples
+cat > examples/vulkan-pod.yaml <<'EOF'
+# HAMi Vulkan vGPU 분할 활성화 예시 pod.
+# - annotation `hami.io/vulkan: "true"` 가 HAMi mutating webhook 을 통해
+#   `HAMI_VULKAN_ENABLE=1` 와 NVIDIA_DRIVER_CAPABILITIES 의 graphics 캡을 주입.
+# - device-plugin 이 hami.json 을 자동 mount 하여 Vulkan loader 가 layer 인식.
+# - libvgpu (HAMi-core) 의 vkAllocateMemory 후킹이 nvidia.com/gpumem 한계 enforce.
+apiVersion: v1
+kind: Pod
+metadata:
+  name: vulkan-vgpu-demo
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  schedulerName: volcano
+  containers:
+  - name: vulkan-app
+    image: nvidia/cuda:12.2.0-runtime-ubuntu22.04
+    command: ["sleep", "infinity"]
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+        nvidia.com/gpumem: 4000  # MiB
+        nvidia.com/gpucores: 50  # %
+EOF
+```
+
+- [ ] **Step 2: doc/vulkan-vgpu.md 생성**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+cat > doc/vulkan-vgpu.md <<'EOF'
+# Vulkan vGPU 지원
+
+이 device-plugin 은 CUDA workload 와 동일하게 **Vulkan workload** 도 메모리 partitioning 을 enforce 한다. Volcano scheduler 와 함께 사용한다.
+
+## 작동 원리
+
+1. **libvgpu (HAMi-core) vulkan-layer**: `vkAllocateMemory` 를 후킹하여 `CUDA_DEVICE_MEMORY_LIMIT_0` 를 enforce.
+2. **device-plugin Allocate**: 호스트의 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 이 존재하면 container 의 `/etc/vulkan/implicit_layer.d/hami.json` 으로 bind-mount.
+3. **HAMi mutating webhook (별도 install)**: pod annotation `hami.io/vulkan: "true"` 검사 → `HAMI_VULKAN_ENABLE=1` env + `NVIDIA_DRIVER_CAPABILITIES` 에 `graphics` 추가.
+4. **enable_environment 가드**: manifest 의 `enable_environment: HAMI_VULKAN_ENABLE=1` 매치 시에만 layer 로드. annotation 없는 pod 은 영향 없음.
+
+## 설치 (한 번만)
+
+### 1. device-plugin 갱신 (이미 새 image)
+
+```bash
+kubectl apply -f volcano-vgpu-device-plugin.yml
+# 또는 CDI 모드:
+# kubectl apply -f volcano-vgpu-device-plugin-cdi.yml
+```
+
+### 2. HAMi mutating webhook 별도 install (helm)
+
+```bash
+helm repo add hami https://project-hami.github.io/HAMi
+helm install hami-webhook hami/hami \
+    --namespace kube-system \
+    --set devicePlugin.enabled=false \
+    --set scheduler.kubeScheduler.enabled=false \
+    --set scheduler.extender.enabled=false \
+    --set admissionWebhook.enabled=true
+```
+
+### 3. (선택) Fallback manifest DaemonSet
+
+device-plugin 이 init 으로 manifest 를 host 에 자동 배치하지 못하는 환경에서:
+
+```bash
+kubectl apply -f volcano-vgpu-vulkan-manifest.yml
+```
+
+## 사용
+
+pod 에 annotation `hami.io/vulkan: "true"` + `nvidia.com/gpumem` resource limit 추가:
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  containers:
+  - name: vulkan-app
+    image: <Vulkan 사용 image>
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+        nvidia.com/gpumem: 4000
+```
+
+전체 예시: `examples/vulkan-pod.yaml`
+
+## 검증
+
+container 안에서:
+
+```bash
+# 1. env 주입 확인
+env | grep -E '(HAMI_VULKAN|DRIVER_CAPABILITIES)'
+# 기대: HAMI_VULKAN_ENABLE=1, NVIDIA_DRIVER_CAPABILITIES=...,graphics
+
+# 2. manifest 파일 mount 확인
+ls /etc/vulkan/implicit_layer.d/hami.json
+
+# 3. Vulkan tool 로 GPU memory limit 확인 (Vulkan app 실행 시)
+# 예: Isaac Sim Kit boot log 의 'GPU Memory: <limit> MB'
+```
+EOF
+```
+
+- [ ] **Step 3: yaml syntax 검증**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+kubectl apply --dry-run=client -f examples/vulkan-pod.yaml
+```
+
+기대값: `pod/vulkan-vgpu-demo created (dry run)`.
+
+- [ ] **Step 4: Commit**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git add examples/vulkan-pod.yaml doc/vulkan-vgpu.md
+git commit -m "docs(vulkan): usage guide + sample pod"
+```
+
+---
+
+## Task 10: image 빌드 + harbor push
+
+**Files:**
+- (없음 — 운영 작업)
+
+- [ ] **Step 1: 빌더 머신 (ws-node074 = 10.61.3.74) 으로 코드 sync**
+
+```bash
+# 로컬 (mac) 에서
+cd ~/git/volcano-vgpu-device-plugin
+git push origin feat/vulkan-vgpu-support
+```
+
+빌더 머신 측:
+
+```bash
+ssh root@10.61.3.74 'cd /root && \
+  git clone https://github.com/xiilab/volcano-vgpu-device-plugin.git volcano-vgpu-device-plugin-vulkan 2>/dev/null || true; \
+  cd /root/volcano-vgpu-device-plugin-vulkan && \
+  git fetch origin && git checkout feat/vulkan-vgpu-support && git submodule update --init --recursive'
+```
+
+- [ ] **Step 2: 빌더 머신에서 image 빌드 + push**
+
+```bash
+ssh root@10.61.3.74 'cd /root/volcano-vgpu-device-plugin-vulkan && \
+  docker build -f docker/Dockerfile \
+    -t 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 . && \
+  docker push 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1'
+```
+
+기대값: 마지막에 `digest: sha256:... size: ...` 출력.
+
+- [ ] **Step 3: image 정상 push 검증**
+
+```bash
+ssh root@10.61.3.74 'docker pull 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 && \
+  docker run --rm --entrypoint /bin/sh \
+    10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 \
+    -c "ls /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json"'
+```
+
+기대값: `/k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json` 출력 (파일 존재 확인).
+
+---
+
+## Task 11: 클러스터 deploy
+
+**Files:**
+- (없음 — 운영 작업)
+
+- [ ] **Step 1: 신규 manifest DaemonSet apply (fallback, 권장)**
+
+```bash
+kubectl --context=<volcano-cluster> apply -f volcano-vgpu-vulkan-manifest.yml
+```
+
+기대값:
+```
+configmap/hami-vulkan-manifest created
+daemonset.apps/hami-vulkan-manifest-installer created
+```
+
+- [ ] **Step 2: DaemonSet pod 들 Ready 대기 + manifest 파일 host 에 배치 확인**
+
+```bash
+until kubectl --context=<volcano-cluster> -n kube-system get ds hami-vulkan-manifest-installer \
+    -o jsonpath='{.status.numberReady}/{.status.desiredNumberScheduled}{"\n"}' 2>/dev/null \
+    | grep -q "^[1-9].*/[1-9]"; do sleep 3; done
+
+# host 에 파일 있는지 (DaemonSet pod 안에서)
+kubectl --context=<volcano-cluster> -n kube-system get pod -l app=hami-vulkan-manifest-installer \
+    -o name | head -1 | xargs -I{} kubectl --context=<volcano-cluster> -n kube-system exec {} -- \
+    ls -la /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json
+```
+
+기대값: 파일 존재.
+
+- [ ] **Step 3: device-plugin DaemonSet 갱신 (rolling update)**
+
+```bash
+kubectl --context=<volcano-cluster> apply -f volcano-vgpu-device-plugin.yml
+# 또는 CDI:
+# kubectl --context=<volcano-cluster> apply -f volcano-vgpu-device-plugin-cdi.yml
+```
+
+- [ ] **Step 4: device-plugin pod ready 대기 + new image 사용 확인**
+
+```bash
+until kubectl --context=<volcano-cluster> -n kube-system get ds volcano-vgpu-device-plugin \
+    -o jsonpath='{.status.numberReady}/{.status.desiredNumberScheduled}{"\n"}' 2>/dev/null \
+    | grep -q "^[1-9].*/[1-9]"; do sleep 3; done
+
+kubectl --context=<volcano-cluster> -n kube-system get pod -l app=volcano-vgpu-device-plugin \
+    -o jsonpath='{.items[*].spec.containers[*].image}{"\n"}'
+```
+
+기대값: 모든 pod 의 image 가 `10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1`.
+
+---
+
+## Task 12: HAMi webhook 별도 install (helm)
+
+**Files:**
+- (없음 — 운영 작업)
+
+- [ ] **Step 1: HAMi helm repo 추가**
+
+```bash
+helm repo add hami https://project-hami.github.io/HAMi
+helm repo update
+```
+
+- [ ] **Step 2: webhook only values 로 install**
+
+```bash
+helm install hami-webhook hami/hami \
+    --kube-context <volcano-cluster> \
+    --namespace kube-system \
+    --set devicePlugin.enabled=false \
+    --set scheduler.kubeScheduler.enabled=false \
+    --set scheduler.extender.enabled=false \
+    --set admissionWebhook.enabled=true
+```
+
+- [ ] **Step 3: webhook pod ready 대기**
+
+```bash
+until kubectl --context=<volcano-cluster> -n kube-system get deployment \
+    hami-webhook 2>/dev/null \
+    -o jsonpath='{.status.readyReplicas}/{.status.replicas}{"\n"}' \
+    | grep -q "^[1-9].*/[1-9]"; do sleep 3; done
+```
+
+> **Note:** 실제 deployment 이름은 helm chart values 에 따라 다를 수 있다. `kubectl get deploy -n kube-system | grep hami` 로 확인.
+
+- [ ] **Step 4: MutatingWebhookConfiguration 등록 확인**
+
+```bash
+kubectl --context=<volcano-cluster> get mutatingwebhookconfigurations | grep hami
+```
+
+기대값: `hami-webhook` 또는 동등 객체 존재.
+
+---
+
+## Task 13: E2E 검증 — 4 케이스
+
+**Files:**
+- Use: `examples/vulkan-pod.yaml`
+
+- [ ] **Step 1: Case 1 — annotation 있는 Vulkan pod 의 partition enforce**
+
+```bash
+kubectl --context=<volcano-cluster> apply -f examples/vulkan-pod.yaml
+kubectl --context=<volcano-cluster> wait --for=condition=Ready pod/vulkan-vgpu-demo --timeout=60s
+
+# env 주입 확인
+kubectl --context=<volcano-cluster> exec vulkan-vgpu-demo -- env | grep -E "(HAMI_VULKAN|DRIVER_CAPABILITIES)"
+# 기대: HAMI_VULKAN_ENABLE=1, NVIDIA_DRIVER_CAPABILITIES=...,graphics
+
+# manifest mount 확인
+kubectl --context=<volcano-cluster> exec vulkan-vgpu-demo -- ls /etc/vulkan/implicit_layer.d/hami.json
+# 기대: 파일 존재
+
+# CUDA_DEVICE_MEMORY_LIMIT 확인 (HAMi-core 환경)
+kubectl --context=<volcano-cluster> exec vulkan-vgpu-demo -- env | grep CUDA_DEVICE_MEMORY_LIMIT
+# 기대: CUDA_DEVICE_MEMORY_LIMIT_0=4000m
+```
+
+- [ ] **Step 2: Case 1 — Vulkan app 실제 메모리 enforce 확인 (Isaac Sim 또는 vulkaninfo)**
+
+Isaac Sim 같은 Vulkan workload pod 에서:
+
+```bash
+# Kit boot log 의 GPU Memory 라인 확인
+kubectl --context=<volcano-cluster> logs <isaac-sim-pod> | grep "GPU Memory"
+# 기대: | 0 | NVIDIA RTX 6000 Ada Generation | Yes: 0 | | 4000 MB | ...
+#       (전체 GPU 가 아닌 partition 한계로 표시되어야 함)
+```
+
+또는 vulkan tool 로 device memory 조회:
+
+```bash
+kubectl --context=<volcano-cluster> exec vulkan-vgpu-demo -- vulkaninfo --summary 2>&1 | grep -i memory
+```
+
+- [ ] **Step 3: Case 2 — annotation 없는 Vulkan pod 은 full GPU**
+
+```bash
+cat > /tmp/vulkan-noanno.yaml <<'EOF'
+apiVersion: v1
+kind: Pod
+metadata:
+  name: vulkan-noanno
+spec:
+  schedulerName: volcano
+  containers:
+  - name: vulkan-app
+    image: nvidia/cuda:12.2.0-runtime-ubuntu22.04
+    command: ["sleep", "infinity"]
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+        nvidia.com/gpumem: 4000
+EOF
+kubectl --context=<volcano-cluster> apply -f /tmp/vulkan-noanno.yaml
+kubectl --context=<volcano-cluster> wait --for=condition=Ready pod/vulkan-noanno --timeout=60s
+
+# HAMI_VULKAN_ENABLE 가 없어야 함
+kubectl --context=<volcano-cluster> exec vulkan-noanno -- env | grep HAMI_VULKAN_ENABLE || \
+    echo "[OK] HAMI_VULKAN_ENABLE not injected"
+```
+
+기대값: `[OK] HAMI_VULKAN_ENABLE not injected`. CUDA_DEVICE_MEMORY_LIMIT 는 여전히 4000m (annotation 무관, device-plugin 이 enforce). Vulkan layer 만 안 로드.
+
+- [ ] **Step 4: Case 3 — annotation 있는 CUDA-only pod 동작 정상**
+
+```bash
+cat > /tmp/cuda-anno.yaml <<'EOF'
+apiVersion: v1
+kind: Pod
+metadata:
+  name: cuda-anno
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  schedulerName: volcano
+  containers:
+  - name: cuda-app
+    image: nvidia/cuda:12.2.0-base-ubuntu22.04
+    command: ["nvidia-smi"]
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+        nvidia.com/gpumem: 4000
+EOF
+kubectl --context=<volcano-cluster> apply -f /tmp/cuda-anno.yaml
+kubectl --context=<volcano-cluster> wait --for=condition=Ready pod/cuda-anno --timeout=60s 2>/dev/null
+sleep 5  # Job-style 종료 기다리기
+kubectl --context=<volcano-cluster> logs cuda-anno | head -20
+```
+
+기대값: nvidia-smi 출력. `Total memory` 가 4000 MiB (HAMi-core 가 가짜 한계 표시) 또는 정상 GPU 정보. Vulkan 영향 없이 CUDA 동작.
+
+- [ ] **Step 5: Case 4 — 기존 standard CUDA workload 회귀 (annotation 없음, gpumem 만)**
+
+```bash
+cat > /tmp/cuda-standard.yaml <<'EOF'
+apiVersion: v1
+kind: Pod
+metadata:
+  name: cuda-standard
+spec:
+  schedulerName: volcano
+  containers:
+  - name: cuda-app
+    image: nvidia/cuda:12.2.0-base-ubuntu22.04
+    command: ["nvidia-smi"]
+    resources:
+      limits:
+        nvidia.com/gpu: 1
+        nvidia.com/gpumem: 8000
+EOF
+kubectl --context=<volcano-cluster> apply -f /tmp/cuda-standard.yaml
+kubectl --context=<volcano-cluster> wait --for=condition=Ready pod/cuda-standard --timeout=60s 2>/dev/null
+sleep 5
+kubectl --context=<volcano-cluster> logs cuda-standard | head -20
+```
+
+기대값: nvidia-smi 정상 출력. CUDA_DEVICE_MEMORY_LIMIT_0=8000m. HAMi-core CUDA enforce 정상.
+
+- [ ] **Step 6: 4 케이스 모두 PASS 면 정리**
+
+```bash
+kubectl --context=<volcano-cluster> delete pod vulkan-vgpu-demo vulkan-noanno cuda-anno cuda-standard --ignore-not-found
+rm -f /tmp/vulkan-noanno.yaml /tmp/cuda-anno.yaml /tmp/cuda-standard.yaml
+```
+
+---
+
+## Task 14: PR 작성 + merge
+
+**Files:**
+- (없음 — git 작업)
+
+- [ ] **Step 1: 모든 commit 확인**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+git log --oneline feat/vulkan-vgpu-support ^main 2>&1
+```
+
+기대 commits (Task 1-9 의 commit):
+
+```
+feat(plugin): auto-inject Vulkan implicit layer manifest mount
+feat(image): ship Vulkan implicit layer manifest from libvgpu
+build: install libvulkan-dev in nvbuild stage for Vulkan layer compile
+deps: bump libvgpu to 8d4f712 (vulkan-layer support)
+chore: bump image to vulkan-v1 in deploy yaml
+feat(deploy): add fallback DaemonSet for Vulkan manifest placement
+docs(vulkan): usage guide + sample pod
+build(init): copy Vulkan manifest to host during vgpu-init   (있는 경우)
+chore: start feat/vulkan-vgpu-support branch
+```
+
+- [ ] **Step 2: PR 작성**
+
+```bash
+cd ~/git/volcano-vgpu-device-plugin
+gh pr create \
+    --base main \
+    --head feat/vulkan-vgpu-support \
+    --title "feat: Vulkan vGPU memory partitioning support" \
+    --body "$(cat <<'EOF'
+## Summary
+
+- libvgpu submodule 을 vulkan-layer 가 포함된 SHA 로 갱신
+- device-plugin Allocate 가 host 의 hami.json 을 container 에 자동 mount
+- Dockerfile builder stage 에 libvulkan-dev 추가 + runtime stage 에 hami.json ship
+- 신규 yaml 추가: fallback manifest DaemonSet, 사용 예시
+- 사용 가이드 문서 추가
+
+## 동작 원리
+
+HAMi 본가의 Vulkan vGPU 지원 (commit 0150ea7) 패턴을 그대로 포팅. annotation `hami.io/vulkan: "true"` 가 붙은 pod 만 HAMi mutating webhook 이 HAMI_VULKAN_ENABLE=1 env 를 주입 → manifest 의 enable_environment 가드 매치 → Vulkan layer 로드 → vkAllocateMemory 후킹으로 메모리 enforce.
+
+## 운영 deploy
+
+1. `kubectl apply -f volcano-vgpu-vulkan-manifest.yml`  (선택)
+2. `kubectl apply -f volcano-vgpu-device-plugin.yml`  (또는 CDI)
+3. `helm install hami-webhook hami/hami --set ...`  (webhook only)
+
+## Test plan
+
+- [ ] Case 1: annotation 있는 Vulkan pod → memory enforce
+- [ ] Case 2: annotation 없는 Vulkan pod → full GPU memory
+- [ ] Case 3: annotation 있는 CUDA-only pod → CUDA 정상
+- [ ] Case 4: 기존 CUDA workload 회귀 → gpumem enforce 정상
+EOF
+)"
+```
+
+- [ ] **Step 3: PR review 후 merge**
+
+reviewer 의 피드백 적용. merge 시 squash 또는 rebase 정책은 fork 의 기존 관행 따른다.
+
+---
+
+## 참고 자료
+
+- HAMi 본가 commit `0150ea7`: device-plugin Vulkan manifest auto-inject
+- HAMi 본가 commit `50b37ff`: Dockerfile libvulkan-dev 추가
+- HAMi spec `docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md`
+- HAMi plan `docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md`
+- HAMi 사용 가이드 `docs/vulkan-vgpu-support.md`
+- HAMi E2E 체크리스트 `docs/vulkan-vgpu-e2e-checklist.md`
+- 메모리 노트 `project_hami_vulkan_verification.md`
+- 본 plan 의 spec `docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md`
diff --git a/docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md b/docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md
new file mode 100644
index 000000000..781066ac1
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md
@@ -0,0 +1,620 @@
+# HAMi vGPU 격리 — Step A: Namespace opt-in/out Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** HAMi 격리 메커니즘 (LD_PRELOAD inject + Vulkan implicit layer manifest mount + webhook env mutation) 을 노드 wide 강제 적용에서 namespace label 기반 opt-in 으로 변경하여, isaac-launchable namespace 의 Isaac Sim Kit 를 정상 동작 baseline 으로 유지하면서 다른 GPU workload namespace 만 격리 enforce 한다.
+
+**Architecture:** webhook 의 `namespaceSelector` 를 opt-out (`hami.io/webhook NotIn ignore`) 에서 opt-in (`hami.io/vgpu In enabled`) 으로 helm values 변경 + 노드 wide `/usr/local/vgpu/ld.so.preload` 와 `hami.json` 자동 install daemonset 을 비활성 + 검증 namespace 에 label 적용 후 격리 enforce 동작 확인. webhook backend 코드 변경 없이 helm chart values 변경 + cluster 측 daemonset patch 로 완료.
+
+**Tech Stack:** Kubernetes 1.34.3 (k0s), Helm chart `hami` (이번 fork `xiilab/feat/vulkan-vgpu`), kubectl, ws-node074 (RTX 6000 Ada x2, NVIDIA driver 580.142).
+
+**Plan scope:** 본 plan 은 design doc 의 4 step 중 **Step A 만** 다룬다. Step B (HAMi-core hook hardening), Step C (Vulkan layer compat), Step D (isaac-launchable opt-in 활성화) 는 별도 plan 으로 작성.
+
+---
+
+## File Structure
+
+| 파일 | 변경 종류 | 책임 |
+|---|---|---|
+| `charts/hami/values.yaml` | Modify | webhook `namespaceSelector` mode 옵션 (`mode: opt-in \| opt-out`) + 새 default 추가 |
+| `charts/hami/templates/scheduler/webhook.yaml` | Modify | `mode` 값 따라 `namespaceSelector.matchExpressions` 분기 (opt-in 시 `hami.io/vgpu In [enabled]`) |
+| `cluster/runtime/hami-vulkan-manifest-installer.yaml` (신규) | Create | 현재 cluster 에 install된 ds 의 spec 백업 + 비활성화 패치 (label-based scope 또는 scale 0) — chart 외부 yaml |
+| `cluster/runtime/hami-preload-installer.yaml` (신규) | Create | 노드 wide `/usr/local/vgpu/ld.so.preload` 만든 entity 의 spec 백업 + 비활성화 (예: device-plugin daemonset 의 `deviceconfig` ConfigMap 의 `ld.so.preload` key 비우기 또는 mount 제거) |
+| `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md` | (no change) | 이미 commit 됨 (`d177471`) — Step A 의 spec 근거 |
+
+**Note:** `hami-vulkan-manifest-installer` ds 와 `hami-preload-installer` ds 의 원본 yaml 은 우리 chart 안에 없다 (cluster 측 별도 설치). 본 plan 은 그 ds 들의 현재 cluster 상태를 dump 하고 namespace label 기반 opt-in 으로 변환된 새 yaml 을 cluster 에 apply 한다.
+
+---
+
+## Tasks
+
+### Task 1: 현재 cluster 의 webhook + installer ds spec 을 yaml 로 dump (백업)
+
+**Files:**
+- Create: `cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml`
+- Create: `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml`
+- Create: `cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml`
+- Create: `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml`
+
+- [ ] **Step 1: snapshot 디렉토리 생성**
+
+```bash
+mkdir -p /Users/xiilab/git/HAMi/cluster/runtime/snapshot-2026-04-28
+cd /Users/xiilab/git/HAMi
+```
+
+- [ ] **Step 2: webhook 현재 spec dump**
+
+```bash
+kubectl get mutatingwebhookconfiguration hami-webhook-webhook -o yaml \
+  > cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml
+ls -la cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml
+```
+
+Expected: 파일 존재, size > 0, `namespaceSelector:` 안에 `key: hami.io/webhook` 와 `operator: NotIn` 보임.
+
+- [ ] **Step 3: 두 daemonset + ConfigMap dump**
+
+```bash
+kubectl -n kube-system get ds hami-vulkan-manifest-installer -o yaml \
+  > cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml
+kubectl -n kube-system get ds volcano-device-plugin -o yaml \
+  > cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml
+kubectl -n kube-system get cm hami-vulkan-manifest -o yaml \
+  > cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml
+ls -la cluster/runtime/snapshot-2026-04-28/
+```
+
+Expected: 4 yaml 파일 존재.
+
+- [ ] **Step 4: snapshot commit**
+
+```bash
+git add cluster/runtime/snapshot-2026-04-28/
+git commit -s -m "chore(cluster): snapshot 4-27 새벽 패치 시점의 webhook + installer ds + cm"
+```
+
+Expected: commit 생성, `git log --oneline -1` 에 commit 보임.
+
+---
+
+### Task 2: helm chart values.yaml 에 namespaceSelector mode 옵션 추가
+
+**Files:**
+- Modify: `charts/hami/values.yaml:178-185` (현 namespaceSelector block)
+
+- [ ] **Step 1: 현재 values.yaml 의 namespaceSelector block 확인**
+
+```bash
+sed -n '170,200p' charts/hami/values.yaml
+```
+
+Expected output (4-line `matchExpressions` 가 opt-out 인 상태):
+```yaml
+    namespaceSelector:
+      matchLabels: {}
+      matchExpressions: []
+      ## opt-out: hami.io/webhook=ignore label 가진 namespace 는 webhook 적용 안 함
+      ## (template 에 hard-coded matchExpressions 존재)
+```
+
+- [ ] **Step 2: values.yaml 수정 — mode 옵션 추가**
+
+`charts/hami/values.yaml` 의 `scheduler.admissionWebhook.namespaceSelector` block 을 다음으로 교체:
+
+```yaml
+    # namespaceSelector controls which namespaces the webhook will apply to.
+    # mode:
+    #   "opt-out" (legacy default): apply to all namespaces except those labeled
+    #              hami.io/webhook=ignore. Suitable when most workloads need vGPU
+    #              isolation and a small number opt out.
+    #   "opt-in"  (recommended for clusters with NVIDIA Omniverse / Isaac Sim
+    #              workloads that conflict with HAMi-core hooks): apply ONLY to
+    #              namespaces labeled hami.io/vgpu=enabled. Other namespaces see
+    #              no mutation, no LD_PRELOAD inject, no implicit Vulkan layer.
+    namespaceSelector:
+      mode: opt-in
+      matchLabels: {}
+      matchExpressions: []
+```
+
+- [ ] **Step 3: helm lint 로 syntax 검증**
+
+```bash
+cd /Users/xiilab/git/HAMi
+helm lint charts/hami 2>&1 | tail -5
+```
+
+Expected: `1 chart(s) linted, 0 chart(s) failed`.
+
+- [ ] **Step 4: commit**
+
+```bash
+git add charts/hami/values.yaml
+git commit -s -m "feat(chart): add namespaceSelector.mode (opt-in|opt-out) for webhook" \
+  -m "Adds an explicit mode toggle. opt-in matches namespaces labeled hami.io/vgpu=enabled (recommended for clusters running NVIDIA Omniverse / Isaac Sim workloads that conflict with HAMi-core hooks). opt-out keeps the legacy hami.io/webhook=ignore exclusion behavior. Default switches to opt-in to fail safe — clusters with vGPU workloads must explicitly enable per-namespace."
+```
+
+---
+
+### Task 3: helm chart webhook template 의 namespaceSelector 분기
+
+**Files:**
+- Modify: `charts/hami/templates/scheduler/webhook.yaml` (namespaceSelector block)
+
+- [ ] **Step 1: 현재 webhook template 의 namespaceSelector block 확인**
+
+```bash
+grep -n -A 15 "namespaceSelector:" charts/hami/templates/scheduler/webhook.yaml
+```
+
+Expected: opt-out hard-code (`key: hami.io/webhook, operator: NotIn, values: [ignore]`).
+
+- [ ] **Step 2: namespaceSelector block 을 mode 분기로 교체**
+
+```yaml
+    namespaceSelector:
+      {{- if .Values.scheduler.admissionWebhook.namespaceSelector.matchLabels }}
+      matchLabels:
+        {{- toYaml .Values.scheduler.admissionWebhook.namespaceSelector.matchLabels | nindent 8 }}
+      {{- end }}
+      matchExpressions:
+      {{- if eq (.Values.scheduler.admissionWebhook.namespaceSelector.mode | default "opt-out") "opt-in" }}
+      - key: hami.io/vgpu
+        operator: In
+        values:
+        - enabled
+      {{- else }}
+      - key: hami.io/webhook
+        operator: NotIn
+        values:
+        - ignore
+      {{- end }}
+      {{- if .Values.scheduler.admissionWebhook.whitelistNamespaces }}
+      - key: kubernetes.io/metadata.name
+        operator: NotIn
+        values:
+        {{- toYaml .Values.scheduler.admissionWebhook.whitelistNamespaces | nindent 10 }}
+      {{- end }}
+      {{- if .Values.scheduler.admissionWebhook.namespaceSelector.matchExpressions }}
+      {{- toYaml .Values.scheduler.admissionWebhook.namespaceSelector.matchExpressions | nindent 6 }}
+      {{- end }}
+```
+
+- [ ] **Step 3: helm template render — opt-in mode 일 때 generated YAML 검증**
+
+```bash
+helm template my-hami charts/hami --show-only templates/scheduler/webhook.yaml \
+  --set scheduler.admissionWebhook.namespaceSelector.mode=opt-in 2>&1 \
+  | grep -A 6 namespaceSelector
+```
+
+Expected output 안에:
+```
+matchExpressions:
+- key: hami.io/vgpu
+  operator: In
+  values:
+  - enabled
+```
+
+- [ ] **Step 4: helm template render — opt-out mode 일 때 generated YAML 검증**
+
+```bash
+helm template my-hami charts/hami --show-only templates/scheduler/webhook.yaml \
+  --set scheduler.admissionWebhook.namespaceSelector.mode=opt-out 2>&1 \
+  | grep -A 6 namespaceSelector
+```
+
+Expected output 안에:
+```
+matchExpressions:
+- key: hami.io/webhook
+  operator: NotIn
+  values:
+  - ignore
+```
+
+- [ ] **Step 5: commit**
+
+```bash
+git add charts/hami/templates/scheduler/webhook.yaml
+git commit -s -m "feat(chart): webhook namespaceSelector branches on mode (opt-in|opt-out)" \
+  -m "Renders the matching matchExpressions block based on the new namespaceSelector.mode value (Task 2). opt-in produces 'hami.io/vgpu In [enabled]'; opt-out keeps 'hami.io/webhook NotIn [ignore]'. Whitelist and user-supplied matchExpressions are still appended after the mode-specific entry."
+```
+
+---
+
+### Task 4: cluster 의 webhook MutatingWebhookConfiguration 을 opt-in 으로 직접 patch (helm 재배포 없이)
+
+**Files:**
+- Modify (cluster only): `MutatingWebhookConfiguration/hami-webhook-webhook`
+
+이 task 는 helm release 재실행이 아니라 **cluster 의 webhook spec 만 직접 patch** 한다 (다른 helm-managed 자원 영향 안 줌). 후속 helm upgrade 시 chart 변경분 (Task 2/3) 과 일치.
+
+- [ ] **Step 1: 현재 webhook namespaceSelector 확인**
+
+```bash
+kubectl get mutatingwebhookconfiguration hami-webhook-webhook \
+  -o jsonpath='{.webhooks[0].namespaceSelector}{"\n"}'
+```
+
+Expected (opt-out):
+```
+{"matchExpressions":[{"key":"hami.io/webhook","operator":"NotIn","values":["ignore"]}]}
+```
+
+- [ ] **Step 2: opt-in 으로 patch**
+
+```bash
+kubectl patch mutatingwebhookconfiguration hami-webhook-webhook --type=json \
+  --patch='[{"op":"replace","path":"/webhooks/0/namespaceSelector","value":{"matchExpressions":[{"key":"hami.io/vgpu","operator":"In","values":["enabled"]}]}}]'
+```
+
+Expected: `mutatingwebhookconfiguration.admissionregistration.k8s.io/hami-webhook-webhook patched`
+
+- [ ] **Step 3: 검증 — opt-in 으로 변경됨**
+
+```bash
+kubectl get mutatingwebhookconfiguration hami-webhook-webhook \
+  -o jsonpath='{.webhooks[0].namespaceSelector}{"\n"}'
+```
+
+Expected:
+```
+{"matchExpressions":[{"key":"hami.io/vgpu","operator":"In","values":["enabled"]}]}
+```
+
+- [ ] **Step 4: isaac-launchable namespace 의 기존 label `hami.io/webhook=ignore` 제거 (이제 불필요)**
+
+```bash
+kubectl label namespace isaac-launchable hami.io/webhook-
+```
+
+Expected: `namespace/isaac-launchable unlabeled`.
+
+- [ ] **Step 5: isaac-launchable pod 재생성 — webhook mutation 0 건 검증**
+
+```bash
+kubectl -n isaac-launchable delete pod -l app=isaac-launchable --wait=false
+sleep 80
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+echo "POD=$NEWPOD"
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc \
+  'env | grep -E "^(HAMI|LD_PRELOAD|NVIDIA_DRIVER_CAP)" ; ls /etc/vulkan/implicit_layer.d/'
+```
+
+Expected:
+- env 에 `HAMI_VULKAN_ENABLE` 없음 (또는 기존 deployment yaml 에 박힌 것만)
+- env 에 `LD_PRELOAD` 없음
+- `/etc/vulkan/implicit_layer.d/` 에 `hami.json` 없음 (단, ld.so.preload 가 컨테이너 안에 있을 수 있음 — 별도 task 처리)
+
+---
+
+### Task 5: 노드 wide `/usr/local/vgpu/ld.so.preload` 와 hami.json install daemonset 비활성화
+
+**Files:**
+- Modify (cluster only): node `ws-node074:/usr/local/vgpu/ld.so.preload` (이미 비어있는 상태 유지)
+- Modify (cluster only): `DaemonSet/hami-vulkan-manifest-installer` (비활성)
+
+- [ ] **Step 1: 노드 ld.so.preload 가 빈 파일 또는 미존재 확인**
+
+```bash
+ssh root@10.61.3.74 'ls -la /usr/local/vgpu/ld.so.preload; cat /usr/local/vgpu/ld.so.preload | wc -c'
+```
+
+Expected: 파일 size 0 또는 1 (빈/newline). 만약 size > 1 이면 비우기:
+
+```bash
+ssh root@10.61.3.74 ': > /usr/local/vgpu/ld.so.preload'
+```
+
+- [ ] **Step 2: hami-vulkan-manifest-installer ds 가 비활성 (nodeSelector hami.io/disabled=true) 확인**
+
+```bash
+kubectl -n kube-system get ds hami-vulkan-manifest-installer \
+  -o jsonpath='{.spec.template.spec.nodeSelector}{"\n"}'
+```
+
+Expected:
+```
+{"hami.io/disabled":"true"}
+```
+
+만약 다른 selector 면 patch:
+
+```bash
+kubectl -n kube-system patch daemonset hami-vulkan-manifest-installer --type='json' \
+  -p='[{"op":"replace","path":"/spec/template/spec/nodeSelector","value":{"hami.io/disabled":"true"}}]'
+```
+
+- [ ] **Step 3: 노드 hami.json manifest 가 컨테이너로 mount 안 되는지 검증**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc 'ls /etc/vulkan/implicit_layer.d/'
+```
+
+Expected: 출력에 `nvidia_layers.json` 만 있고 `hami.json` 없음. **만약 hami.json 있으면**: 노드 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 도 삭제 필요:
+
+```bash
+ssh root@10.61.3.74 'rm -f /usr/local/vgpu/vulkan/implicit_layer.d/hami.json; ls /usr/local/vgpu/vulkan/implicit_layer.d/'
+```
+
+그 후 pod 재생성 후 재검증.
+
+- [ ] **Step 4: isaac-launchable runheadless.sh 5번 baseline 검증 — 5/5 alive 유지**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/v
+for i in 1 2 3 4 5; do
+  pkill -KILL kit 2>/dev/null; sleep 3
+  timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh >/tmp/v/r$i.log 2>&1
+  EC=$?
+  CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/r$i.log)
+  LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999")
+  echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN"
+done
+pkill -KILL kit 2>/dev/null
+'
+```
+
+Expected: 5번 모두 `exit=124 crash=0 listen>=1` (alive + signaling listen).
+
+---
+
+### Task 6: 새 검증 namespace `hami-test` 에 격리 enforce 동작 검증
+
+**Files:**
+- Create (cluster only): `Namespace/hami-test` (label `hami.io/vgpu=enabled`)
+- Create: `cluster/runtime/test/cuda-partition-test-pod.yaml`
+
+- [ ] **Step 1: 검증 namespace 만들고 label 적용**
+
+```bash
+kubectl create namespace hami-test --dry-run=client -o yaml | kubectl apply -f -
+kubectl label namespace hami-test hami.io/vgpu=enabled --overwrite
+kubectl get namespace hami-test --show-labels
+```
+
+Expected: label 출력에 `hami.io/vgpu=enabled` 포함.
+
+- [ ] **Step 2: 단순 CUDA test pod manifest 작성**
+
+`cluster/runtime/test/cuda-partition-test-pod.yaml`:
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: cuda-partition-test
+  namespace: hami-test
+spec:
+  restartPolicy: Never
+  nodeSelector:
+    kubernetes.io/hostname: ws-node074
+  containers:
+  - name: cuda
+    image: 10.61.3.124:30002/library/isaac-launchable-vscode:6.0.0-fix5364
+    command: ["/bin/bash", "-c"]
+    args:
+    - |
+      set -e
+      echo "=== nvidia-smi ==="
+      nvidia-smi --query-gpu=memory.total --format=csv,noheader
+      echo "=== env ==="
+      env | grep -E "^(HAMI|LD_PRELOAD|NVIDIA_DRIVER_CAP)" | sort
+      echo "=== ls /etc/vulkan/implicit_layer.d ==="
+      ls /etc/vulkan/implicit_layer.d/
+      echo "=== ld.so.preload ==="
+      [ -f /etc/ld.so.preload ] && cat /etc/ld.so.preload || echo "(no ld.so.preload)"
+      echo "=== sleep 60 ==="
+      sleep 60
+    resources:
+      limits:
+        volcano.sh/vgpu-number: "1"
+        volcano.sh/vgpu-memory: "23"
+        volcano.sh/vgpu-cores: "50"
+      requests:
+        volcano.sh/vgpu-number: "1"
+        volcano.sh/vgpu-memory: "23"
+        volcano.sh/vgpu-cores: "50"
+```
+
+- [ ] **Step 3: pod 배포 + webhook mutation 적용 검증**
+
+```bash
+kubectl apply -f cluster/runtime/test/cuda-partition-test-pod.yaml
+sleep 30
+kubectl -n hami-test get pod cuda-partition-test -o wide
+kubectl -n hami-test logs cuda-partition-test
+```
+
+Expected logs:
+- `nvidia-smi memory.total` = `23552 MiB` (NVML hook 적용됨)
+- env 에 `HAMI_VULKAN_ENABLE=1` 또는 `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` 둘 중 하나 이상 webhook mutation 으로 주입됨
+- `ls /etc/vulkan/implicit_layer.d/` 에 `hami.json` 또는 (`hami.json` 없으면 다음 plan 의 webhook mutation 보완 필요)
+- `ld.so.preload` 에 `/usr/local/vgpu/libvgpu.so` 포함
+
+**중요:** 만약 webhook mutation 이 LD_PRELOAD env 와 hami.json mount 를 자동 주입하지 않으면 (현재 webhook 코드는 HAMI_VULKAN_ENABLE env 와 NVIDIA_DRIVER_CAPABILITIES patch 만 한다고 추정) — 본 Step A 는 격리 enforce 까지 도달 안 함. **Step A 의 진정한 완료는 webhook 이 LD_PRELOAD + libvgpu.so + hami.json mount 까지 자동 주입하도록 확장**. 이는 webhook backend Go 코드 변경 — 본 plan 의 Task 7 로 추가.
+
+- [ ] **Step 4: pod 정리**
+
+```bash
+kubectl -n hami-test delete pod cuda-partition-test
+```
+
+- [ ] **Step 5: test manifest commit**
+
+```bash
+git add cluster/runtime/test/cuda-partition-test-pod.yaml
+git commit -s -m "test(cluster): add cuda-partition-test pod for namespace opt-in 격리 검증"
+```
+
+---
+
+### Task 7: webhook mutation 확장 — LD_PRELOAD env + libvgpu.so volume mount + hami.json 자동 주입
+
+**Files:**
+- Modify: `pkg/scheduler/webhook/*.go` (mutation 로직)
+- Create (chart): `charts/hami/templates/scheduler/hami-vulkan-layer-cm.yaml` (hami.json content for mounting)
+
+이 task 는 webhook backend Go 코드 변경. 본 plan 에서는 **인터페이스 정의 + 단위 test 작성** 까지만, 실제 Go 코드 수정은 Step A 의 후반부 또는 별도 plan 으로 분리.
+
+- [ ] **Step 1: webhook backend 코드 위치 식별**
+
+```bash
+cd /Users/xiilab/git/HAMi
+find pkg cmd -type f -name "*.go" | xargs grep -lE "MutatingWebhook|admission\\.AdmissionReview|patchOps" 2>/dev/null | head -10
+```
+
+Expected: 1개 이상의 .go 파일 출력. 그 파일이 mutation 로직 entry point.
+
+- [ ] **Step 2: 현재 mutation 로직이 무엇을 patch 하는지 확인**
+
+```bash
+WEBHOOK_FILE=$(find pkg cmd -type f -name "*.go" | xargs grep -lE "MutatingWebhook|admission\\.AdmissionReview" 2>/dev/null | head -1)
+echo "WEBHOOK_FILE=$WEBHOOK_FILE"
+grep -n "HAMI_VULKAN_ENABLE\|NVIDIA_DRIVER_CAPABILITIES\|LD_PRELOAD\|libvgpu\|hami\\.json" "$WEBHOOK_FILE"
+```
+
+이 단계는 실제 코드 베이스 조사. 결과에 따라 다음 step 의 plan 분리 여부 결정.
+
+- [ ] **Step 3: 결정 게이트**
+
+만약 grep 결과:
+- A. webhook 이 **이미** LD_PRELOAD + libvgpu.so mount + hami.json mount 를 주입한다 → Step A 의 Task 6 검증으로 통과 가능. 다음 step (Task 8) 의 진짜 격리 검증 진행.
+- B. webhook 이 HAMI_VULKAN_ENABLE env 만 주입하고 LD_PRELOAD/mount 는 안 한다 → **Step A 를 두 sub-plan 으로 분할**:
+   - A.1 (본 plan Task 1-6 까지): namespaceSelector opt-in + isaac-launchable baseline 보호
+   - A.2 (별도 plan): webhook backend Go 코드에 LD_PRELOAD env + libvgpu.so mount + hami.json mount 주입 추가 — design doc 의 7.2 절 참조
+
+본 plan 에서는 결정만 하고, B 면 별도 plan 으로 분기. A 면 Task 8 으로 진행.
+
+---
+
+### Task 8: 통합 검증 — isaac-launchable baseline 유지 + hami-test namespace 격리 enforce 확인
+
+(Task 7 결정 게이트가 A 인 경우만 실행, B 면 별도 plan)
+
+- [ ] **Step 1: isaac-launchable namespace baseline 재확인**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc \
+  'nvidia-smi --query-gpu=memory.total --format=csv,noheader; env | grep -E "^(HAMI|LD_PRELOAD|NVIDIA_DRIVER_CAP)"; ls /etc/vulkan/implicit_layer.d/'
+```
+
+Expected:
+- `46068 MiB` (HAMi 격리 0, raw)
+- env 에 `HAMI_VULKAN_ENABLE` 또는 `LD_PRELOAD` 없음
+- `/etc/vulkan/implicit_layer.d/` 에 `hami.json` 없음
+
+- [ ] **Step 2: isaac-launchable runheadless.sh 5번 alive 검증**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/baseline
+for i in 1 2 3 4 5; do
+  pkill -KILL kit 2>/dev/null; sleep 3
+  timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh >/tmp/baseline/r$i.log 2>&1
+  EC=$?
+  CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/baseline/r$i.log)
+  LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999")
+  echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN"
+done
+pkill -KILL kit 2>/dev/null
+'
+```
+
+Expected: 5/5 `exit=124 crash=0 listen>=1`.
+
+- [ ] **Step 3: hami-test namespace 의 cuda-partition-test pod 격리 검증**
+
+(Task 6 의 pod manifest 재배포)
+
+```bash
+kubectl apply -f cluster/runtime/test/cuda-partition-test-pod.yaml
+sleep 30
+kubectl -n hami-test logs cuda-partition-test | grep -E "memory.total|HAMI|LD_PRELOAD|hami.json"
+kubectl -n hami-test delete pod cuda-partition-test
+```
+
+Expected logs:
+- `23552 MiB` (NVML 격리 적용)
+- env 에 `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` 또는 `HAMI_VULKAN_ENABLE=1`
+- `hami.json` mount 또는 ld.so.preload 활성
+
+- [ ] **Step 4: PR commit/push**
+
+```bash
+git add docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md \
+        docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md \
+        charts/hami/values.yaml \
+        charts/hami/templates/scheduler/webhook.yaml \
+        cluster/runtime/snapshot-2026-04-28/ \
+        cluster/runtime/test/cuda-partition-test-pod.yaml
+git status --short
+git log --oneline -10
+git push xiilab feat/vulkan-vgpu
+```
+
+Expected: push 성공.
+
+- [ ] **Step 5: PR #1803 follow-up 코멘트 등록 — Step A 완료 보고**
+
+```bash
+cat > /tmp/pr1803_step_a_done.md <<'EOF'
+## Step A complete — Namespace opt-in/out for HAMi mutating webhook
+
+Switches the webhook namespaceSelector from opt-out (`hami.io/webhook NotIn ignore`) to opt-in (`hami.io/vgpu In enabled`). Clusters that mix HAMi vGPU isolation with NVIDIA Omniverse / Isaac Sim Kit workloads can now keep Isaac Sim namespaces unmutated (no LD_PRELOAD inject, no implicit Vulkan layer manifest) while other namespaces explicitly opt in for full isolation.
+
+### Verification
+
+isaac-launchable namespace (no `hami.io/vgpu` label):
+- `nvidia-smi memory.total` = 46068 MiB (HAMi inject 0)
+- `runheadless.sh` 5/5 alive + listen 49100/30999
+- baseline restored to the working state from before the 4-27 dawn patch
+
+hami-test namespace (`hami.io/vgpu=enabled`):
+- Webhook mutation applied
+- `nvidia-smi memory.total` = 23552 MiB (NVML hook active)
+- LD_PRELOAD / hami.json mount injected (when Task 7 decision gate is A)
+
+### Spec / plan
+
+- spec: `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md`
+- plan: `docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md`
+
+Step B (HAMi-core hook hardening) and Step C (Vulkan layer compat) follow in separate plans so that isaac-launchable can eventually opt-in for full isolation (Step D) once the hook code is hardened to coexist with Carbonite/OptiX/Vulkan layer chain.
+EOF
+gh api repos/Project-HAMi/HAMi/issues/1803/comments -X POST -f body="$(cat /tmp/pr1803_step_a_done.md)" --jq '.html_url'
+```
+
+Expected: PR comment URL 출력.
+
+---
+
+## Self-Review
+
+**1. Spec coverage 점검:**
+- Spec §7 (Step A) 의 webhook namespaceSelector 변경 → Tasks 2, 3, 4 ✅
+- Spec §7 의 노드 wide ld.so.preload 폐기 → Task 5 ✅
+- Spec §7 의 hami-vulkan-manifest-installer 폐기 → Task 5 ✅
+- Spec §7 의 LD_PRELOAD env / volume mount webhook 자동 주입 → Task 7 (결정 게이트, 별도 plan 가능성)
+- Spec §7.4 의 검증 (isaac-launchable baseline + 새 namespace 격리) → Tasks 6, 8 ✅
+- Spec §11 의 위험 (helm release 영향) → Task 4 가 cluster 직접 patch 로 우회 ✅
+
+**2. Placeholder scan:** "TBD"/"TODO"/"implement later" 검색 — 본 plan 에 없음 ✅. 단 Task 7 의 결정 게이트가 webhook 코드 조사 후 분기 — 이는 placeholder 가 아니라 명시적 decision point.
+
+**3. Type consistency:** `hami.io/vgpu=enabled` label key/value 가 Tasks 2, 3, 4, 6, 8 에서 일관 사용 ✅. `hami.io/webhook=ignore` 는 legacy 로 명시적 표시 ✅.
+
+**4. Scope check:** Step A 만 다룸. Step B/C/D 는 별도 plan 명시 ✅. 단 Task 7 이 webhook backend Go 코드 변경 가능성 → 별도 plan 분기 명시.
+
+---
+
+## Open question (실행 시 결정)
+
+**Task 7 의 결정 게이트** 가 A (현재 webhook 이 이미 LD_PRELOAD + mount 주입) 인지 B (env 만 주입, 코드 확장 필요) 인지 — Task 7 Step 1-2 실행 후 결정. B 면 본 plan 의 Task 8 은 별도 sub-plan A.2 로 분리.
diff --git a/docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md b/docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md
new file mode 100644
index 000000000..4da74f2a7
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md
@@ -0,0 +1,740 @@
+# HAMi vGPU 격리 — Step B: HAMi-core CUDA/NVML Hook Hardening Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** HAMi-core (`xiilab/HAMi-core`, branch `vulkan-layer`, PR #182) 의 CUDA hook 들에 `cuMemGetInfo_v2` (commit `03f99d7`) 의 robustness 패턴 — "driver 에 먼저 forward → NULL/invalid arg 시 early return → 그 후 HAMi 격리 logic" — 을 적용하여 NVIDIA Isaac Sim Kit 의 OptiX/Aftermath/internal call paths 에서 NULL 인자/missing context 시 NULL deref SegFault 가 발생하지 않게 만든다.
+
+**Architecture:** 본 plan 은 HAMi-core fork 의 `src/cuda/memory.c` 와 `src/cuda/context.c` 의 6개 hook 함수에 robustness 패턴을 적용한다. 각 함수마다 (1) 단위 test 작성, (2) hardening 코드 적용, (3) test 통과 검증, (4) commit. 마지막에 ws-node074 의 isaac-launchable namespace 에서 `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` 로 단순 cuda + Isaac Sim Kit init 통합 검증.
+
+**Tech Stack:** C, CMake, HAMi-core fork (`/Users/xiilab/git/HAMi/libvgpu`, branch `vulkan-layer`), Docker (build-in-docker target), kubectl (검증), ws-node074 (Mac → SSH).
+
+**Plan scope:** Step B 만 다룬다. Step A.2 (webhook backend LD_PRELOAD env 자동 주입), Step C (Vulkan layer compat), Step D (isaac-launchable opt-in 활성화) 는 별도 plan.
+
+---
+
+## File Structure
+
+| 파일 | 변경 종류 | 책임 |
+|---|---|---|
+| `libvgpu/src/cuda/memory.c` | Modify | cuMemAlloc_v2, cuMemAllocHost_v2, cuMemAllocManaged, cuMemAllocPitch_v2, cuMemHostAlloc, cuMemHostRegister_v2 NULL guard |
+| `libvgpu/src/cuda/context.c` | Modify | cuCtxGetDevice NULL guard |
+| `libvgpu/test/test_cuda_null_guards.c` (신규) | Create | 단위 test — 각 hook 의 NULL/invalid arg 케이스가 driver forward + early return |
+| `libvgpu/test/CMakeLists.txt` | Modify | test_cuda_null_guards.c 빌드 추가 |
+
+각 hook 의 robustness 패턴 (cuMemGetInfo_v2 의 commit `03f99d7` 모범):
+
+```c
+CUresult cuXxx(args) {
+    LOG_DEBUG("cuXxx");
+    ENSURE_INITIALIZED();
+    /* Forward to driver FIRST so NULL/missing-context errors surface
+     * exactly as without HAMi. Never dereference what the driver rejected. */
+    CUresult r = CUDA_OVERRIDE_CALL(cuda_library_entry, cuXxx, args);
+    if (r != CUDA_SUCCESS) return r;
+    if (...args invalid for HAMi logic...) return r;
+    /* HAMi 격리 logic */
+    ...
+}
+```
+
+---
+
+## Tasks
+
+### Task 1: 현재 cuda hook 의 robustness 패턴 audit + fix list 결정
+
+**Files:**
+- Read: `libvgpu/src/cuda/memory.c`, `libvgpu/src/cuda/context.c`
+
+이 task 는 코드 변경 0 — 단지 어떤 hook 이 NULL guard 부족한지 list 작성. 다음 task 들의 정확한 범위 결정.
+
+- [ ] **Step 1: memory.c 의 alloc/free 함수 본문 dump**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+for fn in cuMemAlloc_v2 cuMemAllocHost_v2 cuMemAllocManaged cuMemAllocPitch_v2 cuMemHostAlloc cuMemHostRegister_v2 cuMemFree_v2 cuMemGetInfo_v2; do
+  echo "=== $fn ==="
+  awk "/^CUresult $fn\\(/,/^}/" src/cuda/memory.c | head -30
+  echo
+done
+```
+
+- [ ] **Step 2: context.c 의 cuCtxGetDevice 본문 dump**
+
+```bash
+awk "/^CUresult cuCtxGetDevice\\(/,/^}/" src/cuda/context.c
+```
+
+- [ ] **Step 3: fix list 결정 (audit 결과 메모)**
+
+다음 hook 중 robustness 패턴 부재인 것 — 본 plan 의 Tasks 2-7 에서 적용:
+- cuMemAlloc_v2 (Task 2)
+- cuMemAllocHost_v2 (Task 3)
+- cuMemAllocManaged (Task 3)
+- cuMemAllocPitch_v2 (Task 4)
+- cuMemHostAlloc (Task 5)
+- cuMemHostRegister_v2 (Task 6)
+- cuCtxGetDevice (Task 7)
+
+cuMemFree_v2 는 이미 fix (`3bebc8a fix(cuda): fall back to real driver on untracked cuMemFree[Async] pointer`) — skip.
+
+cuMemGetInfo_v2 는 이미 fix (`03f99d7`) — reference 패턴.
+
+- [ ] **Step 4: 결정 commit (audit notes)**
+
+```bash
+cd /Users/xiilab/git/HAMi
+mkdir -p libvgpu/docs/superpowers/notes
+cat > libvgpu/docs/superpowers/notes/2026-04-28-cuda-hook-audit.md <<'EOF'
+# CUDA hook robustness audit — 2026-04-28
+
+Reference fix: commit `03f99d7 fix(cuda): avoid NULL deref in cuMemGetInfo_v2 when caller (OptiX) crashes`
+
+Pattern:
+1. Forward to real driver first (errors surface exactly as without HAMi)
+2. Early return on NULL/invalid args (driver already rejected)
+3. Then HAMi enforcement logic
+
+## Hooks needing the same pattern
+
+- cuMemAlloc_v2 (memory.c:135)
+- cuMemAllocHost_v2 (memory.c:145)
+- cuMemAllocManaged (memory.c:159)
+- cuMemAllocPitch_v2 (memory.c:174)
+- cuMemHostAlloc (memory.c:223)
+- cuMemHostRegister_v2 (memory.c:239)
+- cuCtxGetDevice (context.c:42)
+
+## Already robust (skip)
+
+- cuMemFree_v2 (commit 3bebc8a)
+- cuMemFreeAsync (commit 3bebc8a)
+- cuMemGetInfo_v2 (commit 03f99d7)
+- cuMemCreate (commit 833c62c)
+EOF
+cd libvgpu
+git add docs/superpowers/notes/2026-04-28-cuda-hook-audit.md
+git commit -s -m "docs(notes): cuda hook robustness audit list for Step B hardening"
+```
+
+Expected: commit 생성, 다른 task 의 reference document 로 사용.
+
+---
+
+### Task 2: cuMemAlloc_v2 NULL guard 추가
+
+**Files:**
+- Modify: `libvgpu/src/cuda/memory.c:135-143` (cuMemAlloc_v2)
+- Modify: `libvgpu/test/test_cuda_null_guards.c` (Task 1 후 만들 file — Task 2 step 1 에서 만듦)
+- Modify: `libvgpu/test/CMakeLists.txt`
+
+- [ ] **Step 1: 단위 test 작성 (failing test 먼저)**
+
+`libvgpu/test/test_cuda_null_guards.c` 생성:
+
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <cuda.h>
+
+extern CUresult cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize);
+
+/* Test: NULL dptr should NOT crash — driver returns CUDA_ERROR_INVALID_VALUE,
+ * we propagate that error exactly. */
+static void test_cuMemAlloc_v2_null_dptr(void) {
+    CUresult r = cuMemAlloc_v2(NULL, 4096);
+    assert(r != CUDA_SUCCESS);
+    /* The exact error code depends on driver, but it must not crash and
+     * must not be CUDA_SUCCESS. */
+    printf("[OK] cuMemAlloc_v2(NULL, 4096) returned %d (non-zero, no crash)\n", r);
+}
+
+/* Test: bytesize 0 — driver may accept or reject; we propagate. */
+static void test_cuMemAlloc_v2_zero_size(void) {
+    CUdeviceptr dptr = 0;
+    CUresult r = cuMemAlloc_v2(&dptr, 0);
+    /* Either success with dptr=0 or driver-defined error — we don't crash */
+    printf("[OK] cuMemAlloc_v2(&dptr, 0) returned %d\n", r);
+}
+
+int main(void) {
+    /* Initialize CUDA driver */
+    CUresult r = cuInit(0);
+    if (r != CUDA_SUCCESS) {
+        fprintf(stderr, "cuInit failed: %d (skipping — no GPU?)\n", r);
+        return 0;
+    }
+    CUdevice dev;
+    cuDeviceGet(&dev, 0);
+    CUcontext ctx;
+    cuCtxCreate_v2(&ctx, 0, dev);
+
+    test_cuMemAlloc_v2_null_dptr();
+    test_cuMemAlloc_v2_zero_size();
+
+    cuCtxDestroy_v2(ctx);
+    return 0;
+}
+```
+
+`libvgpu/test/CMakeLists.txt` 에 추가 — 현재 test target 들 옆에 (예: `test_runtime_launch` 다음):
+
+```cmake
+add_executable(test_cuda_null_guards test_cuda_null_guards.c)
+target_link_libraries(test_cuda_null_guards PUBLIC vgpu cuda)
+target_include_directories(test_cuda_null_guards PRIVATE ${CUDA_HOME}/include)
+```
+
+- [ ] **Step 2: 빌드 + 현재 동작 확인 (test 실행 가능한지만, 결과 검증 안 함)**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | grep -E "Built target|error" | head'
+```
+
+Expected: `Built target vgpu` + `Built target test_cuda_null_guards`.
+
+- [ ] **Step 3: 현재 (변경 전) cuMemAlloc_v2 의 NULL dptr 동작 확인 (baseline)**
+
+```bash
+ssh root@10.61.3.74 'cd /tmp/libvgpu-build/build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1' | head -20
+```
+
+Expected: 만약 baseline 에서 SegFault 또는 abort → fix 가치 확인. 만약 이미 정상 propagate 면 진짜 fix 필요한지 재검토 (BLOCKED 보고).
+
+- [ ] **Step 4: cuMemAlloc_v2 NULL guard 적용**
+
+`src/cuda/memory.c:135-143` 의 함수를 다음으로 교체:
+
+```c
+CUresult cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize) {
+    LOG_INFO("into cuMemAllocing_v2 dptr=%p bytesize=%ld",dptr,bytesize);
+    ENSURE_RUNNING();
+    /* Forward NULL/invalid args to the real driver so error codes match
+     * non-HAMi behavior. NVIDIA OptiX/Aftermath internals can call us with
+     * NULL during early init paths; dereferencing would SegFault. */
+    if (dptr == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAlloc_v2, dptr, bytesize);
+    }
+    CUresult res = allocate_raw(dptr,bytesize);
+    if (res!=CUDA_SUCCESS)
+        return res;
+    LOG_INFO("res=%d, cuMemAlloc_v2 success dptr=%p bytesize=%lu",0,(void *)*dptr,bytesize);
+    return CUDA_SUCCESS;
+}
+```
+
+- [ ] **Step 5: rebuild + test 실행**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 '
+cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && \
+  git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && \
+  make build-in-docker 2>&1 | grep -E "Built target|error" | head && \
+  cd build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1 | head -20
+'
+```
+
+Expected: `[OK] cuMemAlloc_v2(NULL, 4096) returned <non-zero error>` (no crash). `[OK] cuMemAlloc_v2(&dptr, 0) returned <code>`.
+
+- [ ] **Step 6: commit**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git add src/cuda/memory.c test/test_cuda_null_guards.c test/CMakeLists.txt
+git commit -s -m "fix(cuda): add NULL dptr guard to cuMemAlloc_v2 (OptiX/Aftermath robustness)" \
+  -m "Forwards NULL dptr calls to the real CUDA driver so the caller sees the driver's defined error code (CUDA_ERROR_INVALID_VALUE) instead of HAMi dereferencing the NULL inside allocate_raw. NVIDIA OptiX/Aftermath internal init paths historically pass NULL during fallback probes; without this guard libvgpu.so SegFaults inside Isaac Sim Kit init under LD_PRELOAD. Pattern matches commit 03f99d7 (cuMemGetInfo_v2)."
+```
+
+---
+
+### Task 3: cuMemAllocHost_v2 + cuMemAllocManaged NULL guards
+
+**Files:**
+- Modify: `libvgpu/src/cuda/memory.c:145-157, 159-172`
+- Modify: `libvgpu/test/test_cuda_null_guards.c` (test 추가)
+
+- [ ] **Step 1: test 추가 (test_cuda_null_guards.c)**
+
+`libvgpu/test/test_cuda_null_guards.c` 의 main 위에 추가:
+
+```c
+extern CUresult cuMemAllocHost_v2(void** hptr, size_t bytesize);
+extern CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags);
+
+static void test_cuMemAllocHost_v2_null_hptr(void) {
+    CUresult r = cuMemAllocHost_v2(NULL, 4096);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuMemAllocHost_v2(NULL, 4096) returned %d\n", r);
+}
+
+static void test_cuMemAllocManaged_null_dptr(void) {
+    CUresult r = cuMemAllocManaged(NULL, 4096, CU_MEM_ATTACH_GLOBAL);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuMemAllocManaged(NULL, 4096) returned %d\n", r);
+}
+```
+
+main() 에 호출 추가:
+```c
+test_cuMemAllocHost_v2_null_hptr();
+test_cuMemAllocManaged_null_dptr();
+```
+
+- [ ] **Step 2: cuMemAllocHost_v2 + cuMemAllocManaged hardening**
+
+`memory.c:145-157` 의 cuMemAllocHost_v2:
+
+```c
+CUresult cuMemAllocHost_v2(void** hptr, size_t bytesize) {
+    LOG_INFO("into cuMemAllocHost_v2 hptr=%p bytesize=%ld",hptr,bytesize);
+    ENSURE_RUNNING();
+    if (hptr == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocHost_v2, hptr, bytesize);
+    }
+    /* (existing logic preserved) */
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocHost_v2, hptr, bytesize);
+    if (res != CUDA_SUCCESS) return res;
+    LOG_INFO("res=%d, cuMemAllocHost_v2 success",0);
+    return CUDA_SUCCESS;
+}
+```
+
+`memory.c:159-172` 의 cuMemAllocManaged:
+
+```c
+CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) {
+    LOG_INFO("into cuMemAllocManaged dptr=%p bytesize=%ld flags=%u",dptr,bytesize,flags);
+    ENSURE_RUNNING();
+    if (dptr == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocManaged, dptr, bytesize, flags);
+    }
+    CUresult res = allocate_raw(dptr, bytesize);
+    if (res != CUDA_SUCCESS) return res;
+    /* Re-route to the actual managed allocator since allocate_raw used cuMemAlloc_v2.
+     * For now we accept this minor over-clamp — callers asking for managed memory
+     * will still hit the partition limit, which is the desired behavior. */
+    LOG_INFO("res=%d, cuMemAllocManaged success dptr=%p", 0, (void*)*dptr);
+    return CUDA_SUCCESS;
+}
+```
+
+(주의: 위 코드는 audit step 1 의 결과에 따라 다를 수 있음. 실제 함수 본문 dump 후 위 패턴으로 변경. allocate_raw 가 NULL 가드를 내부적으로 가지면 추가 가드 불필요.)
+
+- [ ] **Step 3: rebuild + test**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 '
+cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && \
+  git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && \
+  make build-in-docker 2>&1 | grep -E "Built target|error" | head && \
+  cd build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1 | tail -10
+'
+```
+
+Expected: `[OK] cuMemAllocHost_v2(NULL, 4096) returned <error>` + `[OK] cuMemAllocManaged(NULL, 4096) returned <error>`.
+
+- [ ] **Step 4: commit**
+
+```bash
+git add src/cuda/memory.c test/test_cuda_null_guards.c
+git commit -s -m "fix(cuda): add NULL ptr guards to cuMemAllocHost_v2 and cuMemAllocManaged" \
+  -m "Same robustness pattern as Task 2 (cuMemAlloc_v2). Forwards NULL ptr to driver so OptiX/Aftermath internal probes get the driver's defined error instead of segfaulting inside HAMi."
+```
+
+---
+
+### Task 4: cuMemAllocPitch_v2 NULL guard
+
+**Files:**
+- Modify: `libvgpu/src/cuda/memory.c:174-190`
+- Modify: `libvgpu/test/test_cuda_null_guards.c`
+
+- [ ] **Step 1: test 추가**
+
+```c
+extern CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch,
+                                    size_t WidthInBytes, size_t Height,
+                                    unsigned int ElementSizeBytes);
+
+static void test_cuMemAllocPitch_v2_null_dptr(void) {
+    size_t pitch = 0;
+    CUresult r = cuMemAllocPitch_v2(NULL, &pitch, 1024, 1024, 4);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuMemAllocPitch_v2(NULL, ...) returned %d\n", r);
+}
+
+static void test_cuMemAllocPitch_v2_null_pitch(void) {
+    CUdeviceptr dptr = 0;
+    CUresult r = cuMemAllocPitch_v2(&dptr, NULL, 1024, 1024, 4);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuMemAllocPitch_v2(&dptr, NULL, ...) returned %d\n", r);
+}
+```
+
+main() 에 호출 추가.
+
+- [ ] **Step 2: cuMemAllocPitch_v2 hardening**
+
+`memory.c:174-190`:
+
+```c
+CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes,
+                             size_t Height, unsigned int ElementSizeBytes) {
+    LOG_INFO("into cuMemAllocPitch_v2 dptr=%p pPitch=%p w=%lu h=%lu",dptr,pPitch,WidthInBytes,Height);
+    ENSURE_RUNNING();
+    if (dptr == NULL || pPitch == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocPitch_v2,
+                                   dptr, pPitch, WidthInBytes, Height, ElementSizeBytes);
+    }
+    /* (existing partition logic preserved) */
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocPitch_v2,
+                                       dptr, pPitch, WidthInBytes, Height, ElementSizeBytes);
+    if (res != CUDA_SUCCESS) return res;
+    /* Track the allocation for budget enforcement */
+    /* (preserve original tracking code from current implementation) */
+    LOG_INFO("res=%d, cuMemAllocPitch_v2 success dptr=%p pitch=%lu", 0, (void*)*dptr, *pPitch);
+    return CUDA_SUCCESS;
+}
+```
+
+- [ ] **Step 3: rebuild + test**
+
+(Task 3 Step 3 와 동일 패턴, test 출력에 cuMemAllocPitch_v2 두 줄 추가 기대)
+
+- [ ] **Step 4: commit**
+
+```bash
+git add src/cuda/memory.c test/test_cuda_null_guards.c
+git commit -s -m "fix(cuda): add NULL guards to cuMemAllocPitch_v2"
+```
+
+---
+
+### Task 5: cuMemHostAlloc NULL guard
+
+**Files:**
+- Modify: `libvgpu/src/cuda/memory.c:223-237`
+- Modify: `libvgpu/test/test_cuda_null_guards.c`
+
+- [ ] **Step 1: test 추가**
+
+```c
+extern CUresult cuMemHostAlloc(void** hptr, size_t bytesize, unsigned int flags);
+
+static void test_cuMemHostAlloc_null_hptr(void) {
+    CUresult r = cuMemHostAlloc(NULL, 4096, 0);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuMemHostAlloc(NULL, 4096, 0) returned %d\n", r);
+}
+```
+
+- [ ] **Step 2: hardening**
+
+`memory.c:223-237`:
+
+```c
+CUresult cuMemHostAlloc(void** hptr, size_t bytesize, unsigned int flags) {
+    LOG_INFO("into cuMemHostAlloc hptr=%p bytesize=%ld flags=%u",hptr,bytesize,flags);
+    ENSURE_RUNNING();
+    if (hptr == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostAlloc, hptr, bytesize, flags);
+    }
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostAlloc, hptr, bytesize, flags);
+    if (res != CUDA_SUCCESS) return res;
+    LOG_INFO("res=%d, cuMemHostAlloc success hptr=%p", 0, *hptr);
+    return CUDA_SUCCESS;
+}
+```
+
+- [ ] **Step 3: rebuild + test**
+
+- [ ] **Step 4: commit**
+
+```bash
+git add src/cuda/memory.c test/test_cuda_null_guards.c
+git commit -s -m "fix(cuda): add NULL guard to cuMemHostAlloc"
+```
+
+---
+
+### Task 6: cuMemHostRegister_v2 NULL guard
+
+**Files:**
+- Modify: `libvgpu/src/cuda/memory.c:239-263`
+- Modify: `libvgpu/test/test_cuda_null_guards.c`
+
+- [ ] **Step 1: test 추가**
+
+```c
+extern CUresult cuMemHostRegister_v2(void* hptr, size_t bytesize, unsigned int flags);
+
+static void test_cuMemHostRegister_v2_null_hptr(void) {
+    CUresult r = cuMemHostRegister_v2(NULL, 4096, 0);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuMemHostRegister_v2(NULL, 4096, 0) returned %d\n", r);
+}
+
+static void test_cuMemHostRegister_v2_zero_size(void) {
+    char buf[16];
+    CUresult r = cuMemHostRegister_v2(buf, 0, 0);
+    /* zero size — driver may accept or reject; we don't crash */
+    printf("[OK] cuMemHostRegister_v2(buf, 0, 0) returned %d\n", r);
+}
+```
+
+- [ ] **Step 2: hardening**
+
+`memory.c:239-263`:
+
+```c
+CUresult cuMemHostRegister_v2(void* hptr, size_t bytesize, unsigned int flags) {
+    LOG_INFO("into cuMemHostRegister_v2 hptr=%p bytesize=%ld flags=%u",hptr,bytesize,flags);
+    ENSURE_RUNNING();
+    if (hptr == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostRegister_v2, hptr, bytesize, flags);
+    }
+    /* preserve existing logic */
+    CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostRegister_v2, hptr, bytesize, flags);
+    return res;
+}
+```
+
+- [ ] **Step 3: rebuild + test**
+
+- [ ] **Step 4: commit**
+
+```bash
+git add src/cuda/memory.c test/test_cuda_null_guards.c
+git commit -s -m "fix(cuda): add NULL guard to cuMemHostRegister_v2"
+```
+
+---
+
+### Task 7: cuCtxGetDevice NULL guard
+
+**Files:**
+- Modify: `libvgpu/src/cuda/context.c:42-46`
+- Modify: `libvgpu/test/test_cuda_null_guards.c`
+
+- [ ] **Step 1: test 추가**
+
+```c
+extern CUresult cuCtxGetDevice(CUdevice* device);
+
+static void test_cuCtxGetDevice_null(void) {
+    CUresult r = cuCtxGetDevice(NULL);
+    assert(r != CUDA_SUCCESS);
+    printf("[OK] cuCtxGetDevice(NULL) returned %d\n", r);
+}
+```
+
+- [ ] **Step 2: hardening**
+
+`context.c:42-46` 현재 함수를 다음으로 교체:
+
+```c
+CUresult cuCtxGetDevice(CUdevice* device) {
+    if (device == NULL) {
+        return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetDevice, device);
+    }
+    return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetDevice, device);
+}
+```
+
+(NULL device 가 driver 에 전달돼서 INVALID_VALUE 반환. 이전엔 직접 전달했지만 명시적 가드로 OptiX trace 시 NULL deref 방지)
+
+- [ ] **Step 3: rebuild + test**
+
+- [ ] **Step 4: commit**
+
+```bash
+git add src/cuda/context.c test/test_cuda_null_guards.c
+git commit -s -m "fix(cuda): add NULL guard to cuCtxGetDevice"
+```
+
+---
+
+### Task 8: 모든 단위 test 통과 확인 + ws-node074 통합 검증 (Isaac Sim Kit init)
+
+**Files:**
+- (no code change)
+- Verify: ws-node074 isaac-launchable namespace 의 LD_PRELOAD baseline
+
+- [ ] **Step 1: Tasks 2-7 의 모든 단위 test 가 통과하는지 최종 빌드 + run**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 '
+cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && \
+  git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && \
+  make build-in-docker 2>&1 | grep -E "Built target|error|FAIL" | head && \
+  cd build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1
+'
+```
+
+Expected: `[OK]` 라인이 7개 이상, exit code 0, no crash, no `[FAIL]`.
+
+- [ ] **Step 2: ws-node074 노드 .so 를 새 fix 빌드로 swap**
+
+```bash
+ssh root@10.61.3.74 '
+md5sum /tmp/libvgpu-build/build/libvgpu.so
+cp -av /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-step-b
+cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so
+md5sum /usr/local/vgpu/libvgpu.so
+'
+```
+
+Expected: 새 .so md5 가 이전 .so md5 와 다름.
+
+- [ ] **Step 3: isaac-launchable namespace 가 webhook opt-in label 없으므로 baseline 유지 (LD_PRELOAD 없음). 그러나 manual 검증 — 컨테이너에 LD_PRELOAD 강제 적용 후 cuMemAlloc_v2(NULL,...) 가 SegFault 안 나는지**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+cat > /tmp/null_test.c <<EOF
+#include <cuda.h>
+#include <stdio.h>
+int main(void) {
+    cuInit(0);
+    CUdevice d; cuDeviceGet(&d, 0);
+    CUcontext c; cuCtxCreate_v2(&c, 0, d);
+    CUresult r = cuMemAlloc_v2(NULL, 4096);
+    printf("cuMemAlloc_v2(NULL, 4096) = %d (no crash = pass)\n", r);
+    cuCtxDestroy_v2(c);
+    return 0;
+}
+EOF
+gcc /tmp/null_test.c -o /tmp/null_test -lcuda -I/usr/local/cuda/include 2>&1 | head -5
+LD_PRELOAD=/usr/local/vgpu/libvgpu.so /tmp/null_test
+'
+```
+
+Expected: 출력에 `cuMemAlloc_v2(NULL, 4096) = <error code>` (예: 1 또는 100), no SegFault, exit 0.
+
+- [ ] **Step 4: isaac-launchable runheadless.sh 5번 — 5/5 alive baseline 유지 (Step B 가 baseline 안 깨졌는지)**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/v
+for i in 1 2 3 4 5; do
+  pkill -KILL kit 2>/dev/null; sleep 3
+  timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh >/tmp/v/r$i.log 2>&1
+  EC=$?
+  CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/r$i.log)
+  LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999")
+  echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN"
+done
+pkill -KILL kit 2>/dev/null
+'
+```
+
+Expected: 5/5 `exit=124 crash=0 listen=1` (baseline 유지). Step B 의 .so 가 baseline 환경에 inject 돼도 race trigger 안 함 (LD_PRELOAD 없으니 inject 0).
+
+- [ ] **Step 5: PR commit/push (HAMi-core fork)**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git log --oneline -10
+git push xiilab vulkan-layer 2>&1 | tail
+```
+
+Expected: 7개 commit 추가 (Tasks 1-7) push 성공.
+
+- [ ] **Step 6: HAMi 메인 fork 의 submodule SHA bump commit**
+
+```bash
+cd /Users/xiilab/git/HAMi
+NEW_SHA=$(cd libvgpu && git rev-parse HEAD)
+echo "new HAMi-core SHA: $NEW_SHA"
+git add libvgpu
+git commit -s -m "chore(libvgpu): bump HAMi-core for Step B cuda hook hardening" \
+  -m "Pulls in 7 commits adding NULL ptr guards to cuMemAlloc_v2, cuMemAllocHost_v2, cuMemAllocManaged, cuMemAllocPitch_v2, cuMemHostAlloc, cuMemHostRegister_v2, cuCtxGetDevice. Pattern matches commit 03f99d7 (cuMemGetInfo_v2). Reduces SegFault risk for callers (Isaac Sim Kit OptiX/Aftermath) that pass NULL during internal probes."
+git push xiilab feat/vulkan-vgpu 2>&1 | tail
+```
+
+Expected: HAMi-core SHA 업데이트된 commit 1개 push 성공.
+
+- [ ] **Step 7: PR #182 + PR #1803 follow-up 코멘트 등록**
+
+```bash
+cat > /tmp/pr182_step_b_done.md <<'EOF'
+## Step B complete — CUDA hook NULL guard hardening
+
+Adds NULL pointer guards to 6 CUDA hooks following the pattern from `cuMemGetInfo_v2` (commit 03f99d7):
+
+| Hook | Commit | NULL arg behavior |
+|---|---|---|
+| cuMemAlloc_v2 | (sha) | Forward to driver, return driver's error |
+| cuMemAllocHost_v2 | (sha) | Same |
+| cuMemAllocManaged | (sha) | Same |
+| cuMemAllocPitch_v2 | (sha) | Same (NULL dptr or NULL pPitch) |
+| cuMemHostAlloc | (sha) | Same |
+| cuMemHostRegister_v2 | (sha) | Same |
+| cuCtxGetDevice | (sha) | Same |
+
+### Verification
+
+`test/test_cuda_null_guards.c` — 7 unit tests, all pass under `LD_PRELOAD=libvgpu.so`. ws-node074 isaac-launchable namespace baseline (5/5 runheadless.sh alive) preserved.
+
+### Why
+
+NVIDIA OptiX denoising / Aftermath / Carbonite tasking call HAMi-core hooks during init with NULL args during fallback probes. Without the guards, libvgpu.so would dereference NULL and SegFault inside Isaac Sim Kit init. Step C (Vulkan layer compat) follows.
+EOF
+gh api repos/Project-HAMi/HAMi-core/issues/182/comments -X POST -f body="$(cat /tmp/pr182_step_b_done.md)" --jq '.html_url'
+
+cat > /tmp/pr1803_step_b_done.md <<'EOF'
+## Step B (HAMi-core hook hardening) complete
+
+HAMi-core PR #182 added NULL pointer guards to 7 CUDA hooks (cuMemAlloc_v2, cuMemAllocHost_v2, cuMemAllocManaged, cuMemAllocPitch_v2, cuMemHostAlloc, cuMemHostRegister_v2, cuCtxGetDevice). Pattern matches the existing `cuMemGetInfo_v2` fix (commit 03f99d7).
+
+The `libvgpu` submodule pointer is bumped to the new HAMi-core SHA.
+
+isaac-launchable baseline preserved (5/5 runheadless.sh alive). Step C (Vulkan layer compat for Isaac Sim Kit init under LD_PRELOAD) follows in a separate plan.
+
+Spec: `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md`
+Plan: `docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md`
+EOF
+gh api repos/Project-HAMi/HAMi/issues/1803/comments -X POST -f body="$(cat /tmp/pr1803_step_b_done.md)" --jq '.html_url'
+```
+
+Expected: 두 코멘트 URL 출력.
+
+---
+
+## Self-Review
+
+**1. Spec coverage:** Spec §8 (Step B) 의 7개 hook → Tasks 2-7 ✅. 통합 검증 → Task 8 ✅. cuMemFree_v2 / cuMemGetInfo_v2 / cuMemCreate 는 already-fixed 명시 ✅.
+
+**2. Placeholder scan:** "TBD"/"TODO"/"implement later" 없음 ✅. 단 Task 3 의 cuMemAllocHost_v2 / cuMemAllocManaged 본문은 "audit step 1 의 결과에 따라 다를 수 있음" 명시 — 이건 placeholder 가 아니라 실제 코드 dump 후 위 패턴 적용하라는 명시.
+
+**3. Type consistency:** `CUresult` / `CUdeviceptr` / `CUDA_OVERRIDE_CALL` macro 가 모든 task 에서 일관 사용 ✅.
+
+**4. Scope check:** Step B 만. Step A.2 / Step C / Step D 별도 plan 명시 ✅.
+
+---
+
+## 일정 추정
+
+| Task | 예상 시간 |
+|---|---|
+| 1 audit + notes commit | 15분 |
+| 2 cuMemAlloc_v2 + test framework | 45분 |
+| 3 cuMemAllocHost_v2 + cuMemAllocManaged | 30분 |
+| 4 cuMemAllocPitch_v2 | 20분 |
+| 5 cuMemHostAlloc | 20분 |
+| 6 cuMemHostRegister_v2 | 20분 |
+| 7 cuCtxGetDevice | 15분 |
+| 8 통합 검증 + push + 코멘트 | 30분 |
+| **총** | **약 3시간 15분** |
+
+(빌드 매 task 마다 1-2분 + Docker pull). 1일 작업.
diff --git a/docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md b/docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md
new file mode 100644
index 000000000..4acacf338
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md
@@ -0,0 +1,661 @@
+# HAMi vGPU 격리 — Step C: HAMi-core Vulkan Layer Compat Hardening Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** HAMi-core Vulkan layer (`libvgpu/src/vulkan/`) 가 NVIDIA Isaac Sim Kit (Carbonite/OptiX/Aftermath) 의 Vulkan 초기화 경로에서 NULL deref 없이 dispatch chain 을 끝까지 forwarding 하도록 보강한다. 핵심: **HAMI_VK_TRACE 로그로 실제 호출 패턴 수집 → evidence 있는 break 만 fix**, 추측성 hardening 금지.
+
+**Architecture:** 7개 task. (1) 미완성 WIP foundation commit, (2) GIPA/GDPA cached-fallback, (3) trace 로 evidence 수집, (4) 데이터 기반 hook 추가, (5) dispatch lifetime + chain copy audit (read-only), (6) LD_PRELOAD 강제 후 runheadless 5/5 + partition test, (7) push + draft PR comments.
+
+**Tech Stack:** C, CMake, Vulkan loader spec 1.3 §38, HAMi-core fork (`/Users/xiilab/git/HAMi/libvgpu`, branch `vulkan-layer`), Docker (build-in-docker), kubectl, ws-node074.
+
+**Plan scope:** Step C 만. Step D (isaac-launchable opt-in 활성화 + 4-path 검증) 별도 plan.
+
+---
+
+## File Structure
+
+| 파일 | 변경 종류 | 책임 |
+|---|---|---|
+| `libvgpu/src/vulkan/layer.c` | Modify | g_first_next_gipa/gdpa cache, GIPA/GDPA fallback, 추가 PhysicalDevice query hook (Task 4 evidence 기반) |
+| `libvgpu/src/vulkan/dispatch.c` | Modify | EnumerateDevice* dispatch table entry resolve, hami_instance_first() impl |
+| `libvgpu/src/vulkan/dispatch.h` | Modify | dispatch struct EnumerateDevice* fields, hami_instance_first() decl |
+| `libvgpu/docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md` (Task 3) | Create | Isaac Sim Kit 가 호출하는 GIPA name 목록 + NULL 반환 케이스 |
+| `libvgpu/docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md` (Task 5) | Create | dispatch lifetime + chain copy audit 결과 |
+
+추가 hook 패턴 (Task 4 evidence 기반):
+
+```c
+/* vkGetPhysicalDevice<X> is a thin pass-through. We don't apply HAMi
+ * partitioning to read-only queries — only forward through the next
+ * layer/ICD via the cached dispatch entry. The reason we hook at all is
+ * because the loader pre-resolves these via GIPA(NULL, "vk...") during
+ * implicit-layer init: returning NULL there breaks Carbonite. */
+static VKAPI_ATTR void VKAPI_CALL
+hami_vkGetPhysicalDevice<X>(VkPhysicalDevice phys, ...) {
+    hami_instance_dispatch_t *d = hami_instance_first();
+    if (!d || !d-><X>) return;
+    d-><X>(phys, ...);
+}
+```
+
+---
+
+## Tasks
+
+### Task 1: WIP foundation commit (Enumerate via dispatch + gipa/gdpa cache + first helper)
+
+**Files:**
+- Modify (already in working tree, just need to commit): `libvgpu/src/vulkan/layer.c`, `libvgpu/src/vulkan/dispatch.c`, `libvgpu/src/vulkan/dispatch.h`
+
+이 task 는 **이미 working tree 에 있는 unstaged 변경을 commit 하는 것**. Step B 진행 중 controller 가 의도적으로 staging 안 했다.
+
+- [ ] **Step 1: Verify unstaged diff is consistent with the design**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git diff --stat src/vulkan/
+git diff src/vulkan/dispatch.h
+git diff src/vulkan/dispatch.c
+git diff src/vulkan/layer.c | head -200
+```
+
+Expected diff:
+- `dispatch.h` adds `EnumerateDeviceExtensionProperties` + `EnumerateDeviceLayerProperties` PFN fields to `hami_instance_dispatch_t`, adds `hami_instance_first()` decl.
+- `dispatch.c` resolves both names in `hami_instance_register`, implements `hami_instance_first()` (returns `g_inst_head` under lock).
+- `layer.c` adds `g_first_next_gipa` / `g_first_next_gdpa` static caches set in CreateInstance/CreateDevice, refactors `hami_vkEnumerateDeviceExtensionProperties` / `hami_vkEnumerateDeviceLayerProperties` to forward via `hami_instance_first()->Enumerate*`, expands comments.
+
+If diff has unrelated changes — STOP, ask controller.
+
+- [ ] **Step 2: build-in-docker on ws-node074 to verify the WIP compiles**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | tail -10'
+```
+
+Expected: `Built target vgpu`, no errors.
+
+- [ ] **Step 3: Run all existing unit tests under LD_PRELOAD (regression check for Step B)**
+
+```bash
+ssh root@10.61.3.74 'cd /tmp/libvgpu-build/build/test && LD_PRELOAD=/tmp/libvgpu-build/build/libvgpu.so ./test_cuda_null_guards 2>&1; echo EXIT=$?'
+```
+
+Expected: 9 `[OK]` lines, EXIT=0. (Vulkan tests have separate build path — Step C plan does not modify them.)
+
+- [ ] **Step 4: Commit**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git add src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/dispatch.h
+git commit -s -m "fix(vulkan): cache first next-gipa/gdpa + EnumerateDevice* via dispatch table" \
+  -m "Foundation for Step C compat hardening:
+
+* dispatch.{h,c}: add EnumerateDeviceExtensionProperties +
+  EnumerateDeviceLayerProperties function pointers to the per-instance
+  dispatch struct; resolve both during hami_instance_register so the
+  layer's own Enumerate* hooks can forward correctly. Add
+  hami_instance_first() helper that returns the first registered
+  instance dispatch under lock — used by NULL-instance Enumerate
+  forwarding when the loader probes before any instance has been
+  created.
+* layer.c: cache the first next-layer GetInstanceProcAddr /
+  GetDeviceProcAddr in static globals during CreateInstance /
+  CreateDevice. Expands comments documenting the Vulkan 1.3 §38.3.1
+  contract for own-name vs NULL pLayerName Enumerate semantics, and
+  why an earlier draft returning LAYER_NOT_PRESENT broke
+  vkCreateDevice.
+
+This commit only restructures the existing Enumerate hooks; it does not
+yet change GIPA/GDPA fallback behavior (Task 2)."
+```
+
+Expected: 1 commit on top of `7dcb5a4`, working tree clean.
+
+---
+
+### Task 2: GIPA / GDPA cached-fallback for unknown instance / device
+
+**Files:**
+- Modify: `libvgpu/src/vulkan/layer.c` — `hami_vkGetInstanceProcAddr`, `hami_vkGetDeviceProcAddr`
+
+**Bug:** When NVIDIA driver / Carbonite call our GIPA/GDPA with a `VkInstance`/`VkDevice` handle that we haven't registered (e.g., loader probe before `vkCreateInstance` returns, or upper layer wraps the handle), `hami_instance_lookup(instance)` returns NULL and we return NULL → caller dereferences NULL and SegFaults.
+
+**Fix:** When lookup returns NULL but we have `g_first_next_gipa`/`g_first_next_gdpa` cached from a previous `vkCreateInstance`/`vkCreateDevice`, forward to that cached function. Only when both lookup AND cache are NULL do we return NULL (legitimately uninitialized state — pre-CreateInstance loader bootstrap).
+
+- [ ] **Step 1: Modify `hami_vkGetInstanceProcAddr` (around line 297)**
+
+Change:
+```c
+    hami_instance_dispatch_t *d = hami_instance_lookup(instance);
+    if (!d) {
+        HAMI_TRACE("hami_vkGetInstanceProcAddr: instance %p not registered, returning NULL", (void *)instance);
+        return NULL;
+    }
+    return d->next_gipa(instance, pName);
+```
+
+to:
+```c
+    hami_instance_dispatch_t *d = hami_instance_lookup(instance);
+    if (d) return d->next_gipa(instance, pName);
+    /* Unknown VkInstance handle: NVIDIA driver and Carbonite occasionally
+     * probe through our GIPA with handles we haven't registered (e.g.,
+     * during vkCreateInstance before our register call returns, or with
+     * an upper-layer-wrapped handle). Returning NULL would SegFault the
+     * caller. Forward to the first cached next-layer gipa instead — it
+     * was set the first time vkCreateInstance ran and is a valid pointer
+     * into the next layer / driver. */
+    if (g_first_next_gipa) {
+        HAMI_TRACE("hami_vkGetInstanceProcAddr: instance %p not registered, forwarding via cached gipa", (void *)instance);
+        return g_first_next_gipa(instance, pName);
+    }
+    /* Pre-CreateInstance loader bootstrap: the only case where the spec
+     * allows us to return NULL for instance entry points (the loader
+     * still resolves the global Enumerate* hooks via the same GIPA, but
+     * those are matched above by HAMI_HOOK before this fall-through). */
+    HAMI_TRACE("hami_vkGetInstanceProcAddr: instance %p not registered AND no cached gipa, returning NULL", (void *)instance);
+    return NULL;
+```
+
+- [ ] **Step 2: Same pattern for `hami_vkGetDeviceProcAddr` (around line 323)**
+
+Change:
+```c
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (!d) return NULL;
+    return d->next_gdpa(device, pName);
+```
+
+to:
+```c
+    hami_device_dispatch_t *d = hami_device_lookup(device);
+    if (d) return d->next_gdpa(device, pName);
+    if (g_first_next_gdpa) {
+        return g_first_next_gdpa(device, pName);
+    }
+    return NULL;
+```
+
+- [ ] **Step 3: Build + run existing unit tests (regression)**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | tail -10 && cd build/test && LD_PRELOAD=/tmp/libvgpu-build/build/libvgpu.so ./test_cuda_null_guards 2>&1; echo EXIT=$?'
+```
+
+Expected: build OK, 9 `[OK]` lines (Step B regression), EXIT=0.
+
+- [ ] **Step 4: Commit**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git add src/vulkan/layer.c
+git commit -s -m "fix(vulkan): GIPA/GDPA fallback to cached next when instance/device unknown" \
+  -m "NVIDIA driver and Carbonite probe through our GIPA/GDPA with handles
+that may not yet be registered: during vkCreateInstance before our
+register completes, or with upper-layer-wrapped handles. Returning
+NULL there crashed the caller (SegFault inside libcarb.graphics-vulkan
+when assembling the dispatch table).
+
+Now we forward to the first-cached next_gipa/next_gdpa from a previous
+CreateInstance/CreateDevice. Only when both per-handle lookup AND the
+cache are absent do we return NULL — that's the legitimate
+pre-CreateInstance loader bootstrap window where Enumerate* hooks have
+already been matched at the top of the function."
+```
+
+---
+
+### Task 3: trace which vkGetPhysicalDevice* queries Isaac Sim Kit makes
+
+**Files:**
+- Create: `libvgpu/docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md`
+
+이 task 는 코드 변경 0 — 실제 trace 수집. Task 4 의 데이터 기반 hook 추가에 입력.
+
+- [ ] **Step 1: Verify the new build (with Tasks 1-2 commits) is on ws-node074 + swap into /usr/local/vgpu/**
+
+```bash
+ssh root@10.61.3.74 '
+md5sum /tmp/libvgpu-build/build/libvgpu.so
+cp -av /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-step-c 2>&1 | tail -2
+cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so
+md5sum /usr/local/vgpu/libvgpu.so
+'
+```
+
+- [ ] **Step 2: runheadless.sh under HAMI_VK_TRACE=1 + LD_PRELOAD inside isaac-launchable pod**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -o jsonpath='{.items[0].metadata.name}')
+echo "Pod: $NEWPOD"
+
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/vk-trace
+pkill -KILL kit 2>/dev/null; sleep 2
+timeout 50 env \
+  ACCEPT_EULA=y \
+  HAMI_VK_TRACE=1 \
+  LD_PRELOAD=/usr/local/vgpu/libvgpu.so \
+  /isaac-sim/runheadless.sh > /tmp/vk-trace/run.log 2>&1
+EC=$?
+pkill -KILL kit 2>/dev/null
+echo "exit=$EC"
+echo "=== HAMI_VK_TRACE lines ==="
+grep -c "HAMI_VK_TRACE" /tmp/vk-trace/run.log
+echo "=== unique GIPA names (sorted by count) ==="
+grep "hami_vkGetInstanceProcAddr.*name=" /tmp/vk-trace/run.log | sed -e "s/.*name=//" -e "s/ .*//" | sort | uniq -c | sort -rn | head -50
+echo "=== GDPA names ==="
+grep "hami_vkGetDeviceProcAddr.*name=" /tmp/vk-trace/run.log 2>/dev/null | sed -e "s/.*name=//" | sort | uniq -c | sort -rn | head -30
+echo "=== unregistered fallback hits ==="
+grep -c "not registered" /tmp/vk-trace/run.log
+echo "=== SegFault / Segmentation ==="
+grep -E "Segmentation|crash has occurred" /tmp/vk-trace/run.log | head -10
+'
+```
+
+Expected output structure:
+- `exit=124` (timeout = alive) OR `exit=139` (crash — Step C still failing for this scenario)
+- Top-N GIPA names: many `vkCreateInstance`, `vkGetPhysicalDeviceMemoryProperties`, `vkAllocateMemory`, etc.
+- Names returning NULL: those that fall through (`not registered` lines) tell us which entry points needed cached-gipa fallback.
+
+- [ ] **Step 3: Save trace evidence to notes file**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+mkdir -p docs/superpowers/notes
+cat > docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md <<EOF
+# Vulkan layer trace — Isaac Sim Kit init under LD_PRELOAD (2026-04-28)
+
+Build base: HAMi-core \`vulkan-layer\` after Step C Tasks 1-2.
+
+## Methodology
+
+\`\`\`
+HAMI_VK_TRACE=1 LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/runheadless.sh
+\`\`\`
+
+(timeout 50s; pod isaac-launchable-0 / vscode container)
+
+## Findings (paste from Step 2 output)
+
+### Exit code
+
+(fill: 124 = alive, 139 = SegFault)
+
+### Top-N vkGetInstanceProcAddr names
+
+(paste sorted-by-count list)
+
+### vkGetDeviceProcAddr names
+
+(paste)
+
+### "not registered" fall-through count
+
+(paste count)
+
+### vkGetPhysicalDevice* names that need explicit hooks
+
+(decision: list which names appeared in the trace AND returned NULL —
+those are the ones Task 4 should hook. Skip names that already forward
+via cached-gipa fallback (Task 2 fix).)
+EOF
+# Edit/fill the placeholders with the actual Step 2 output
+\$EDITOR docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md
+```
+
+(Or just inline-write via a heredoc if you have the trace output handy — the point is to capture the evidence.)
+
+- [ ] **Step 4: Commit notes**
+
+```bash
+git add docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md
+git commit -s -m "docs(notes): vk trace for Isaac Sim Kit init under LD_PRELOAD"
+```
+
+---
+
+### Task 4: add explicit hooks for vkGetPhysicalDevice* names that broke (evidence-driven)
+
+**Files:**
+- Modify: `libvgpu/src/vulkan/layer.c` (HAMI_HOOK entries + thin wrappers)
+- Modify: `libvgpu/src/vulkan/dispatch.c`, `dispatch.h` (add PFN fields + resolve)
+
+**Decision rule:** Task 3 trace 의 결론에 따라.
+- **If trace 결과 모든 vkGetPhysicalDevice* 가 cached-gipa 로 정상 forward 됨 (exit=124, no crash, no "not registered" 다수)** → Task 4 는 코드 변경 0, just document "no additional hooks needed" 로 commit 하고 끝.
+- **If 특정 vkGetPhysicalDeviceX 에서 fall-through 또는 crash** → 해당 name 만 hook.
+
+#### IF additional hooks needed (예시: vkGetPhysicalDeviceFormatProperties2)
+
+- [ ] **Step 1: dispatch.h 에 PFN field 추가**
+
+```c
+typedef struct hami_instance_dispatch {
+    /* ... existing fields ... */
+    PFN_vkGetPhysicalDeviceFormatProperties2 GetPhysicalDeviceFormatProperties2;
+    /* ... */
+} hami_instance_dispatch_t;
+```
+
+- [ ] **Step 2: dispatch.c 의 hami_instance_register 에 resolve 추가**
+
+```c
+d->GetPhysicalDeviceFormatProperties2 =
+    (PFN_vkGetPhysicalDeviceFormatProperties2)resolve(gipa, inst, "vkGetPhysicalDeviceFormatProperties2");
+```
+
+- [ ] **Step 3: layer.c 에 thin wrapper 추가**
+
+```c
+static VKAPI_ATTR void VKAPI_CALL
+hami_vkGetPhysicalDeviceFormatProperties2(VkPhysicalDevice phys,
+                                           VkFormat format,
+                                           VkFormatProperties2 *pProperties) {
+    hami_instance_dispatch_t *d = hami_instance_first();
+    if (!d || !d->GetPhysicalDeviceFormatProperties2) return;
+    d->GetPhysicalDeviceFormatProperties2(phys, format, pProperties);
+}
+```
+
+- [ ] **Step 4: HAMI_HOOK 추가 (in hami_vkGetInstanceProcAddr)**
+
+```c
+HAMI_HOOK(GetPhysicalDeviceFormatProperties2);
+```
+
+(Repeat for each name from Task 3 evidence.)
+
+- [ ] **Step 5: build + verify the trace path no longer hits "not registered" for the new names**
+
+```bash
+# (rebuild + swap .so + re-run trace from Task 3 Step 2)
+# Expected: "not registered" count drops to ~0 for the names just hooked.
+```
+
+- [ ] **Step 6: Commit (one commit even if multiple names)**
+
+```bash
+git add src/vulkan/{layer,dispatch}.{c,h}
+git commit -s -m "fix(vulkan): hook vkGetPhysicalDevice* entry points missing in trace" \
+  -m "Trace under HAMI_VK_TRACE=1 + LD_PRELOAD on Isaac Sim Kit init showed
+the following names returned NULL through GIPA(VK_NULL_HANDLE, ...)
+during loader implicit-layer probing: <LIST>. Each is now hooked with
+a thin pass-through wrapper that forwards to the next layer/ICD via
+hami_instance_first()->Get*. The layer does not apply HAMi
+partitioning to these read-only queries.
+
+See docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md for the
+trace evidence."
+```
+
+---
+
+### Task 5: dispatch lifetime + chain deep-copy audit (review-only)
+
+**Files:**
+- Read: `libvgpu/src/vulkan/dispatch.c`, `libvgpu/src/vulkan/layer.c`
+- Create: `libvgpu/docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md`
+
+이 task 는 **read-only audit** — 코드 변경은 evidence 가 있을 때만.
+
+- [ ] **Step 1: dispatch lifetime audit**
+
+Question: `hami_instance_unregister` / `hami_device_unregister` 호출 시점에 (a) 다른 thread 에서 lookup 중이면 race, (b) Carbonite 가 아직 valid handle 로 알고 있으면 use-after-free.
+
+Investigate:
+- `hami_vkDestroyInstance` (layer.c:101) 의 lookup → forward → unregister 순서
+- 멀티 instance 환경에서 first instance unregister 후 `hami_instance_first()` 가 두 번째 instance 반환하는지
+
+Document findings.
+
+- [ ] **Step 2: chain pLayerInfo in-place 수정 audit**
+
+`hami_vkCreateInstance` (layer.c:76):
+```c
+chain->u.pLayerInfo = chain->u.pLayerInfo->pNext;
+```
+
+Question: NVIDIA driver 가 createInfo 를 재사용해서 `chain->u.pLayerInfo` 가 이미 advance 된 상태로 본다면 두 번째 layer 가 chain 을 못 따라간다.
+
+Investigate:
+- Vulkan loader spec 1.3 §38.4 의 chain 처리 표준 요구사항
+- 기존 NVIDIA layer 들 (e.g., nvidia_layers.json) 이 어떻게 처리하는지 (gpgpu/khronos vulkan-loader 소스 참조)
+
+Document findings.
+
+- [ ] **Step 3: notes 파일 작성 + commit**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+cat > docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md <<EOF
+# Vulkan dispatch lifetime + chain copy audit (2026-04-28)
+
+## Dispatch lifetime
+
+(findings — race risk? use-after-free risk? evidence?)
+
+### Decision
+
+(no change / fix needed: <describe>)
+
+## Chain pLayerInfo in-place advance
+
+(findings — is in-place advance spec-standard? do real layers do this?)
+
+### Decision
+
+(no change / fix needed: <describe deep-copy approach>)
+EOF
+git add docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md
+git commit -s -m "docs(notes): vk dispatch lifetime + chain copy audit"
+```
+
+(If audit reveals a real bug → STOP and ask controller for guidance on whether to add a Task 5b code-change task.)
+
+---
+
+### Task 6: ws-node074 integration verify (runheadless 5/5 + partition test under LD_PRELOAD)
+
+**Files:**
+- (no code change)
+- Verify: ws-node074 isaac-launchable pod baseline under forced LD_PRELOAD
+
+이 task 는 진짜 integration test — Step B 의 Task 8 가 못 한 "LD_PRELOAD 강제 후 Isaac Sim 동작" 검증.
+
+- [ ] **Step 1: 새 .so 가 swap 되어 있는지 확인**
+
+```bash
+ssh root@10.61.3.74 '
+md5sum /usr/local/vgpu/libvgpu.so
+md5sum /tmp/libvgpu-build/build/libvgpu.so
+'
+```
+
+만약 두 md5 다르면 → swap 다시:
+```bash
+ssh root@10.61.3.74 'cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so'
+```
+
+- [ ] **Step 2: runheadless.sh 5번 with LD_PRELOAD forced**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pod -o jsonpath='{.items[0].metadata.name}')
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/v
+for i in 1 2 3 4 5; do
+  pkill -KILL kit 2>/dev/null; sleep 3
+  timeout 50 env \
+    ACCEPT_EULA=y \
+    LD_PRELOAD=/usr/local/vgpu/libvgpu.so \
+    /isaac-sim/runheadless.sh >/tmp/v/c$i.log 2>&1
+  EC=$?
+  CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/c$i.log)
+  LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999")
+  echo "run $i (LD_PRELOAD): exit=$EC crash=$CRASH listen=$LISTEN"
+done
+pkill -KILL kit 2>/dev/null
+'
+```
+
+Expected: 5/5 `exit=124 crash=0 listen=1`. **이게 진짜 Step C 성공 기준**.
+
+- [ ] **Step 3: vk_partition_test.py — Vulkan partition enforce 유지 확인**
+
+```bash
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+if [ -f vk_partition_test.py ]; then
+  LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/python.sh vk_partition_test.py 2>&1 | head -30
+  echo "EXIT=$?"
+else
+  echo "vk_partition_test.py 부재 — Step D 에서 작성"
+fi
+'
+```
+
+Expected: vk_partition_test.py 가 있으면 partition enforce (44 GiB → 23 GiB clamp) 결과 출력. 없으면 Step D 스킵 가능.
+
+- [ ] **Step 4: nvidia-smi raw 값 확인 (LD_PRELOAD 비활성 vs 활성)**
+
+```bash
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+echo "=== without LD_PRELOAD (raw) ==="
+nvidia-smi --query-gpu=memory.total --format=csv,noheader
+echo "=== with LD_PRELOAD (clamped) ==="
+LD_PRELOAD=/usr/local/vgpu/libvgpu.so nvidia-smi --query-gpu=memory.total --format=csv,noheader
+'
+```
+
+Expected: 
+- raw: ~46068 MiB
+- clamped: 23552 MiB (HAMI_VULKAN_ENABLE + partition annotation 이 있으면; 없으면 raw)
+
+만약 isaac-launchable 에 아직 hami.io/vgpu=enabled label 없으면 clamp 안 됨 — Step D 에서 활성화. Step C 의 의무는 단지 "LD_PRELOAD forced 후 crash 안 함".
+
+---
+
+### Task 7: push HAMi-core fork + bump submodule + draft PR comments (don't post)
+
+**Files:**
+- Modify (parent repo): `libvgpu` submodule SHA bump
+- Create: `/tmp/step-c-pr-drafts/{pr182,pr1803}.md`
+
+- [ ] **Step 1: Push libvgpu fork**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git log --oneline -8
+git push xiilab vulkan-layer 2>&1 | tail
+```
+
+Expected: Step C commits (Task 1, 2, 3, 4-if-any, 5) push 성공.
+
+- [ ] **Step 2: Bump HAMi parent submodule SHA**
+
+```bash
+cd /Users/xiilab/git/HAMi
+NEW_SHA=$(cd libvgpu && git rev-parse HEAD)
+echo "new HAMi-core SHA: $NEW_SHA"
+git add libvgpu
+git commit -s -m "chore(libvgpu): bump HAMi-core for Step C vulkan layer compat" \
+  -m "Pulls in Step C commits hardening the Vulkan layer for Isaac Sim Kit
+init paths. See docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md
+section 9 and the plan at docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md.
+
+Verified on ws-node074: 5/5 runheadless.sh exit=124 alive under
+LD_PRELOAD=/usr/local/vgpu/libvgpu.so (Isaac Sim Kit 6.0.0-rc.22)."
+git push xiilab feat/vulkan-vgpu 2>&1 | tail
+```
+
+- [ ] **Step 3: Draft PR comments (DO NOT post)**
+
+```bash
+mkdir -p /tmp/step-c-pr-drafts
+
+cat > /tmp/step-c-pr-drafts/pr182.md <<'EOF'
+## Step C complete — Vulkan layer compat hardening (Isaac Sim Kit)
+
+Builds on Step B (CUDA hook NULL guards). Adds Vulkan layer changes:
+
+| Commit | Change |
+|---|---|
+| (sha) | dispatch table: EnumerateDevice* PFNs + hami_instance_first() helper |
+| (sha) | layer.c: cache first next-gipa/gdpa, refactor Enumerate hooks |
+| (sha) | GIPA/GDPA fallback to cached gipa for unknown handles |
+| (sha) | (if Task 4) hook vkGetPhysicalDevice<X> entry points found NULL in trace |
+| (sha) | docs/notes: trace evidence + dispatch lifetime audit |
+
+### Verification
+
+- 9/9 unit tests (Step B) regression pass
+- ws-node074 isaac-launchable pod under `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` + Isaac Sim Kit 6.0.0-rc.22:
+  - 5/5 `runheadless.sh` exit=124 alive, no SegFault, listen :49100
+  - HAMI_VK_TRACE evidence: <count> GIPA lookups, 0 unhandled "not registered" fall-throughs
+- Step D (isaac-launchable opt-in label activation) follows in a separate plan.
+EOF
+
+cat > /tmp/step-c-pr-drafts/pr1803.md <<'EOF'
+## Step C (Vulkan layer compat) complete
+
+HAMi-core PR #182 added Vulkan layer hardening for Isaac Sim Kit init:
+
+- dispatch table EnumerateDevice* + hami_instance_first() helper
+- cached first next-gipa/gdpa
+- GIPA/GDPA cached-fallback for unknown handles
+- (if Task 4 added hooks) explicit hooks for vkGetPhysicalDevice<X> names that returned NULL through GIPA(NULL, ...)
+
+The `libvgpu` submodule pointer is bumped to <NEW_HAMI_BUMP_SHA>.
+
+### Verification
+
+ws-node074 isaac-launchable pod under `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` runs Isaac Sim Kit (`runheadless.sh`) 5/5 alive (exit=124, listen :49100), no SegFault. Step D (opt-in activation + 4-path enforce verification) follows.
+
+Spec: `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md`
+Plan: `docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md`
+EOF
+
+# Substitute real SHAs
+HAMI_BUMP_SHA=$(cd /Users/xiilab/git/HAMi && git rev-parse HEAD)
+sed -i.bak "s/<NEW_HAMI_BUMP_SHA>/$HAMI_BUMP_SHA/g" /tmp/step-c-pr-drafts/pr1803.md
+rm /tmp/step-c-pr-drafts/pr1803.md.bak
+
+ls -la /tmp/step-c-pr-drafts/
+```
+
+(SHA placeholders in pr182.md will be filled by the controller — too many to script.)
+
+- [ ] **Step 4: Report — DO NOT post comments. Wait for explicit user approval.**
+
+---
+
+## Self-Review
+
+**1. Spec coverage:** spec §9.1 (foundation) → Task 1; §9.2 GIPA fallback → Task 2; §9.2 추가 hook → Tasks 3+4 (evidence-driven); §9.2 dispatch lifetime + chain copy → Task 5; §9.3 검증 → Task 6. ✅
+
+**2. Placeholder scan:** Task 4 의 코드 예시는 evidence-driven 결과에 따라 실제 다를 수 있음을 명시 — placeholder 가 아니라 "case 별 구체적 패턴". 이외 placeholder 없음. ✅
+
+**3. Type consistency:** `hami_instance_dispatch_t` / `PFN_vkGet*` / `g_first_next_gipa` 모든 task 에서 일관 사용. ✅
+
+**4. Scope check:** Step C 만. Step D 별도 plan. Step B 는 이미 완료. ✅
+
+**5. Evidence-driven 원칙:** Task 4 가 가장 큰 잠재 scope creep — 명시적으로 "Task 3 trace 결과로만 결정, 추측 hook 추가 금지" 박아둠. ✅
+
+---
+
+## 일정 추정
+
+| Task | 예상 시간 |
+|---|---|
+| 1 WIP foundation commit | 20분 |
+| 2 GIPA/GDPA cached-fallback | 30분 |
+| 3 trace + notes | 45분 |
+| 4 evidence-driven hooks (range: 0 ~ 6 names × 10min) | 0~60분 |
+| 5 lifetime + chain audit (review-only) | 45분 |
+| 6 ws-node074 integration verify | 30분 |
+| 7 push + draft PR comments | 20분 |
+| **총** | **약 3~4시간** |
+
+(Task 4 의 scope 가 trace 결과에 따라 0 ~ 60분으로 큰 편차. 최악의 경우에도 4시간 내.)
diff --git a/docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md b/docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md
new file mode 100644
index 000000000..518636e84
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md
@@ -0,0 +1,856 @@
+# Step C 재설계 — Vulkan layer 분리 (libvgpu_vk.so) Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** `src/vulkan/*` 전체를 `libvgpu_vk.so` (신규) 로 분리하고 `libvgpu.so` 에는 HAMi-core 만 남긴다. Vulkan layer 활성은 implicit_layer manifest path 만. 이렇게 해서 2026-04-28 발견된 LD_PRELOAD-only path crash class 가 구조적으로 발생 불가능해진다.
+
+**Architecture:** 5개 HAMi-core 함수 (`oom_check`, `add_gpu_device_memory_usage`, `rm_gpu_device_memory_usage`, `get_current_device_memory_limit`, `rate_limiter`) 를 `hami_core_*` wrapper 로 명시 export → `libvgpu_vk.so` 가 DT_NEEDED 로 link → manifest dlopen 시점에 자동 resolve. Spec: `docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md`.
+
+**Tech Stack:** C, CMake (libvgpu/), Docker (`make build-in-docker`), kubectl, ws-node074. HAMi-core fork: `/Users/xiilab/git/HAMi/libvgpu`, branch `vulkan-layer` (현재 HEAD `83fd245` — Step C revert 상태).
+
+---
+
+## File Structure
+
+| 파일 | 변경 종류 | 책임 |
+|---|---|---|
+| `libvgpu/src/include/hami_core_export.h` | Create | 5개 wrapper 함수 declaration. `__attribute__((visibility("default")))` |
+| `libvgpu/src/hami_core_export.c` | Create | wrapper 정의 — 내부 HAMi-core 함수를 호출 |
+| `libvgpu/src/CMakeLists.txt` | Modify | (a) `hami_core_export.c` 를 libvgpu.so source 에 추가 (b) `vulkan_mod` 를 libvgpu.so 에서 제거 (c) 신규 `libvgpu_vk` target 추가 |
+| `libvgpu/src/vulkan/budget.c` | Modify | `extern` 선언 → `#include "hami_core_export.h"` + `hami_core_*` 호출 |
+| `libvgpu/src/vulkan/throttle_adapter.c` | Modify | `extern rate_limiter` → `hami_core_throttle()` |
+| `libvgpu/share/hami/hami.json` | Create | Vulkan implicit_layer manifest. `library_path` = `/usr/local/vgpu/libvgpu_vk.so` |
+
+추가 산출물 (build):
+- `build/libvgpu.so` — HAMi-core 만, `vk*` 미export
+- `build/libvgpu_vk.so` — Vulkan layer, DT_NEEDED `libvgpu.so`
+
+---
+
+## Tasks
+
+### Task 1: Add `hami_core_export.{h,c}` — explicit export interface
+
+**Files:**
+- Create: `libvgpu/src/include/hami_core_export.h`
+- Create: `libvgpu/src/hami_core_export.c`
+
+- [ ] **Step 1: Write the header**
+
+```c
+/* libvgpu/src/include/hami_core_export.h */
+#ifndef HAMI_CORE_EXPORT_H_
+#define HAMI_CORE_EXPORT_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* HAMi-core ↔ libvgpu_vk.so contract.
+ * These are the only HAMi-core symbols libvgpu_vk.so depends on.
+ * libvgpu.so MUST export them with default visibility; libvgpu_vk.so
+ * picks them up via DT_NEEDED link at dlopen() time. */
+
+/* Returns 1 if reserving `addon` bytes on device `dev` would exceed the
+ * partition limit, else 0. */
+int hami_core_oom_check(int dev, size_t addon);
+
+/* Records `usage` bytes of allocation by (pid, dev). type==2 (DEVICE).
+ * Returns 0 on success, non-zero on failure. */
+int hami_core_add_memory_usage(int32_t pid, int dev, size_t usage, int type);
+
+/* Releases `usage` bytes by (pid, dev). type==2 (DEVICE). 0 = success. */
+int hami_core_rm_memory_usage(int32_t pid, int dev, size_t usage, int type);
+
+/* Returns the partition byte-limit for device `dev`, or 0 = unlimited. */
+uint64_t hami_core_get_memory_limit(int dev);
+
+/* Consumes one rate-limiter token (claim size = 1*1). */
+void hami_core_throttle(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* HAMI_CORE_EXPORT_H_ */
+```
+
+- [ ] **Step 2: Write the implementation**
+
+```c
+/* libvgpu/src/hami_core_export.c */
+#include "include/hami_core_export.h"
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Internal HAMi-core symbols. Both libvgpu_vk.so and the wrappers below
+ * see the SAME object code linked into libvgpu.so. We make these
+ * symbols visible to other .so files only through the wrappers, never
+ * directly: that keeps the libvgpu.so→libvgpu_vk.so contract narrow. */
+extern int      oom_check(int dev, size_t addon);
+extern int      add_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type);
+extern int      rm_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type);
+extern uint64_t get_current_device_memory_limit(int dev);
+extern void     rate_limiter(int grids, int blocks);
+
+#define HAMI_EXPORT __attribute__((visibility("default")))
+
+HAMI_EXPORT int hami_core_oom_check(int dev, size_t addon) {
+    return oom_check(dev, addon);
+}
+
+HAMI_EXPORT int hami_core_add_memory_usage(int32_t pid, int dev, size_t usage, int type) {
+    return add_gpu_device_memory_usage(pid, dev, usage, type);
+}
+
+HAMI_EXPORT int hami_core_rm_memory_usage(int32_t pid, int dev, size_t usage, int type) {
+    return rm_gpu_device_memory_usage(pid, dev, usage, type);
+}
+
+HAMI_EXPORT uint64_t hami_core_get_memory_limit(int dev) {
+    return get_current_device_memory_limit(dev);
+}
+
+HAMI_EXPORT void hami_core_throttle(void) {
+    rate_limiter(1, 1);
+}
+```
+
+- [ ] **Step 3: Add to libvgpu.so build sources in `src/CMakeLists.txt`**
+
+Find the line:
+```cmake
+add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c $<TARGET_OBJECTS:nvml_mod> $<TARGET_OBJECTS:cuda_mod> $<TARGET_OBJECTS:allocator_mod> $<TARGET_OBJECTS:multiprocess_mod> $<TARGET_OBJECTS:vulkan_mod>)
+```
+
+Replace with (still includes vulkan_mod for now — Task 5 splits it):
+```cmake
+add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c hami_core_export.c $<TARGET_OBJECTS:nvml_mod> $<TARGET_OBJECTS:cuda_mod> $<TARGET_OBJECTS:allocator_mod> $<TARGET_OBJECTS:multiprocess_mod> $<TARGET_OBJECTS:vulkan_mod>)
+```
+
+- [ ] **Step 4: Verify it compiles (local docker)**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+make build-in-docker 2>&1 | tail -10
+```
+
+Expected: `Built target vgpu`, no errors. (Tests/test targets compile too.)
+
+- [ ] **Step 5: Verify the wrappers are exported**
+
+```bash
+docker run --rm -v "$PWD:/work" -w /work ubuntu:22.04 bash -c \
+  "apt-get -qq update >/dev/null && apt-get -qq install -y binutils >/dev/null && \
+   nm -D --defined-only build/libvgpu.so | grep ' T hami_core_'"
+```
+
+Expected: 5 lines, one per `hami_core_*` wrapper. (Symbols of type T = exported text.)
+
+- [ ] **Step 6: Commit**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git add src/include/hami_core_export.h src/hami_core_export.c src/CMakeLists.txt
+git commit -s -m "feat(hami-core): explicit hami_core_* export wrappers" \
+  -m "Five thin wrappers around the HAMi-core symbols that libvgpu_vk.so
+will need after the upcoming Vulkan-layer split: oom_check,
+add/rm_gpu_device_memory_usage, get_current_device_memory_limit,
+rate_limiter.
+
+All five carry __attribute__((visibility(\"default\"))) so that the
+release build (-fvisibility=hidden) keeps the export surface narrow:
+libvgpu_vk.so DT_NEEDED-resolves only these names and nothing else from
+HAMi-core internals. No call-site changes yet — that follows in the next
+commit."
+```
+
+---
+
+### Task 2: Update src/vulkan/budget.c + throttle_adapter.c to call wrappers
+
+**Files:**
+- Modify: `libvgpu/src/vulkan/budget.c`
+- Modify: `libvgpu/src/vulkan/throttle_adapter.c`
+
+- [ ] **Step 1: Replace extern declarations in `src/vulkan/budget.c`**
+
+Find the block (currently around line 22-30):
+```c
+extern int      oom_check(const int dev, size_t addon);
+extern int      add_gpu_device_memory_usage(int32_t pid, int dev,
+                                            size_t usage, int type);
+extern int      rm_gpu_device_memory_usage(int32_t pid, int dev,
+                                            size_t usage, int type);
+extern uint64_t get_current_device_memory_limit(const int dev);
+```
+
+Replace with:
+```c
+#include "include/hami_core_export.h"
+```
+
+Then update each call site in the same file:
+- `oom_check(dev, size)` → `hami_core_oom_check(dev, size)`
+- `add_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)` → `hami_core_add_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)`
+- `rm_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)` → `hami_core_rm_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)`
+- `get_current_device_memory_limit(dev)` → `hami_core_get_memory_limit(dev)`
+
+(Keep the `cuInit` extern — that's CUDA driver, not HAMi-core.)
+
+- [ ] **Step 2: Update `src/vulkan/throttle_adapter.c`**
+
+Replace the file body:
+```c
+#include "vulkan/throttle_adapter.h"
+#include "include/hami_core_export.h"
+
+void hami_vulkan_throttle(void) {
+    /* Consume one token — represents "one queue submission". The
+     * underlying rate_limiter interprets (grids*blocks) as the claim
+     * size; the wrapper uses (1,1) so Vulkan submits compete fairly
+     * with tiny CUDA kernel launches. */
+    hami_core_throttle();
+}
+```
+
+- [ ] **Step 3: Build (still combined libvgpu.so)**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+make build-in-docker 2>&1 | tail -5
+```
+
+Expected: `Built target vgpu`, `Built target test_*`. No errors.
+
+- [ ] **Step 4: Step B regression test under LD_PRELOAD**
+
+Local docker run (ws-node074 not yet involved):
+```bash
+docker run --rm -v "$PWD/build:/build" --gpus none \
+  ubuntu:22.04 bash -c \
+  "LD_PRELOAD=/build/libvgpu.so /build/test/test_cuda_null_guards 2>&1 | tail -15; echo EXIT=\$?"
+```
+
+Expected: 9 `[OK]` lines, `EXIT=0`. (No GPU needed — test is hook-level NULL guards only.)
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/vulkan/budget.c src/vulkan/throttle_adapter.c
+git commit -s -m "refactor(vulkan): use hami_core_* wrappers instead of internal externs" \
+  -m "Replace the extern declarations of oom_check / add_/rm_gpu_device_
+memory_usage / get_current_device_memory_limit / rate_limiter in
+src/vulkan/budget.c and src/vulkan/throttle_adapter.c with calls
+through the new include/hami_core_export.h interface.
+
+This is a pure call-site rewrite — same runtime behavior, same .so
+boundary (still linked into one libvgpu.so for now). The point is to
+remove direct dependence on HAMi-core internal symbol names so the
+upcoming libvgpu_vk.so split can keep DT_NEEDED narrow."
+```
+
+---
+
+### Task 3: Pre-split sanity build (combined libvgpu.so still healthy)
+
+This task is verification only — confirms Tasks 1+2 didn't break anything before we attempt the split.
+
+**Files:** none (verification)
+
+- [ ] **Step 1: Build clean**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rm -rf build
+make build-in-docker 2>&1 | tail -8
+```
+
+Expected: `Built target vgpu`, `Built target strip_symbol`, no warnings about undefined references.
+
+- [ ] **Step 2: Verify exports include `hami_core_*` and `vk*` (still combined)**
+
+```bash
+docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c \
+  "apt-get -qq update >/dev/null && apt-get -qq install -y binutils >/dev/null && \
+   echo '=== hami_core_* ==='; nm -D --defined-only /build/libvgpu.so | grep ' T hami_core_'; \
+   echo '=== vk* ==='; nm -D --defined-only /build/libvgpu.so | grep ' T vk'"
+```
+
+Expected: 5 `hami_core_*` lines + 3 `vk*` lines (`vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`, `vkNegotiateLoaderLayerInterfaceVersion`). Combined .so still has Vulkan exports because `vulkan_mod` is still linked in.
+
+- [ ] **Step 3: Run all unit tests**
+
+```bash
+docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c \
+  "cd /build/test && for t in test_cuda_null_guards test_layer test_memprops test_alloc; do \
+     [ -x ./\$t ] && (echo '---' \$t '---'; ./\$t 2>&1 | tail -8; echo EXIT=\$?); \
+   done"
+```
+
+Expected: each test prints `[OK]` lines and exits 0.
+
+- [ ] **Step 4: No commit (verification only)**
+
+If any check fails, STOP and ask controller — don't proceed to split.
+
+---
+
+### Task 4: Split CMake — create libvgpu_vk.so target, remove vulkan_mod from libvgpu.so
+
+**Files:**
+- Modify: `libvgpu/src/CMakeLists.txt`
+
+- [ ] **Step 1: Edit `src/CMakeLists.txt`**
+
+Find:
+```cmake
+add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c hami_core_export.c $<TARGET_OBJECTS:nvml_mod> $<TARGET_OBJECTS:cuda_mod> $<TARGET_OBJECTS:allocator_mod> $<TARGET_OBJECTS:multiprocess_mod> $<TARGET_OBJECTS:vulkan_mod>)
+target_compile_options(${LIBVGPU} PUBLIC ${LIBRARY_COMPILE_FLAGS})
+target_compile_definitions(${LIBVGPU} PUBLIC HOOK_NVML_ENABLE)
+target_link_libraries(${LIBVGPU} PUBLIC -lcuda -lnvidia-ml)
+
+if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+add_custom_target(strip_symbol ALL
+    COMMAND strip -x ${CMAKE_BINARY_DIR}/lib${LIBVGPU}.so
+    DEPENDS ${LIBVGPU})
+endif()
+```
+
+Replace with:
+```cmake
+# libvgpu.so: HAMi-core only. Vulkan layer code now lives in libvgpu_vk.so.
+add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c hami_core_export.c $<TARGET_OBJECTS:nvml_mod> $<TARGET_OBJECTS:cuda_mod> $<TARGET_OBJECTS:allocator_mod> $<TARGET_OBJECTS:multiprocess_mod>)
+target_compile_options(${LIBVGPU} PUBLIC ${LIBRARY_COMPILE_FLAGS})
+target_compile_definitions(${LIBVGPU} PUBLIC HOOK_NVML_ENABLE)
+target_link_libraries(${LIBVGPU} PUBLIC -lcuda -lnvidia-ml)
+
+# libvgpu_vk.so: Vulkan implicit-layer code. Activated via
+# /etc/vulkan/implicit_layer.d/hami.json (see share/hami/hami.json).
+# DT_NEEDED links libvgpu.so so the loader resolves the hami_core_*
+# wrappers when the Vulkan loader dlopen()s us.
+set(LIBVGPU_VK vgpu_vk)
+add_library(${LIBVGPU_VK} SHARED $<TARGET_OBJECTS:vulkan_mod>)
+target_compile_options(${LIBVGPU_VK} PUBLIC ${LIBRARY_COMPILE_FLAGS})
+target_link_libraries(${LIBVGPU_VK} PUBLIC ${LIBVGPU} -lpthread)
+
+if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
+add_custom_target(strip_symbol ALL
+    COMMAND strip -x ${CMAKE_BINARY_DIR}/lib${LIBVGPU}.so
+    COMMAND strip -x ${CMAKE_BINARY_DIR}/lib${LIBVGPU_VK}.so
+    DEPENDS ${LIBVGPU} ${LIBVGPU_VK})
+endif()
+```
+
+Notes:
+- `target_link_libraries(${LIBVGPU_VK} PUBLIC ${LIBVGPU} ...)` makes CMake emit `-lvgpu` on the linker command line; ld.so records this as DT_NEEDED `libvgpu.so` in the resulting `libvgpu_vk.so`.
+- `vulkan_mod` 의 OBJECT lib 는 그대로 유지 — 두 target 중 하나에만 link됨.
+
+- [ ] **Step 2: Build clean**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rm -rf build
+make build-in-docker 2>&1 | tail -10
+```
+
+Expected: `Built target vgpu`, `Built target vgpu_vk`, both without warnings about undefined references.
+
+- [ ] **Step 3: Verify both .so produced**
+
+```bash
+ls -la build/libvgpu.so build/libvgpu_vk.so
+```
+
+Expected: both files present, executable.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add src/CMakeLists.txt
+git commit -s -m "build: split Vulkan layer into separate libvgpu_vk.so" \
+  -m "libvgpu.so loses vulkan_mod and now contains only HAMi-core
+(NVML/CUDA hooks + allocator + multiprocess). libvgpu_vk.so is a new
+shared target that holds all of src/vulkan/* and links libvgpu.so as
+DT_NEEDED so the hami_core_* wrappers resolve when the Vulkan loader
+dlopen()s the new .so via the implicit-layer manifest.
+
+After this commit:
+* nm -D libvgpu.so MUST NOT show vk*
+* nm -D libvgpu_vk.so MUST show vkGetInstanceProcAddr,
+  vkGetDeviceProcAddr, vkNegotiateLoaderLayerInterfaceVersion (and only
+  those as exports thanks to -fvisibility=hidden + HAMI_LAYER_EXPORT).
+* readelf -d libvgpu_vk.so MUST list libvgpu.so as NEEDED.
+
+Step C plan: docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md
+Spec: docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md"
+```
+
+---
+
+### Task 5: ELF / symbol diff verification (the structural-isolation proof)
+
+**Files:** none (verification only — but commit a script to docs/notes for future runs)
+
+- [ ] **Step 1: Run the symbol-isolation check**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c '
+apt-get -qq update >/dev/null
+apt-get -qq install -y binutils >/dev/null
+echo "=== libvgpu.so: must have hami_core_* but NO vk* ==="
+echo "--- hami_core_* (expect 5) ---"
+nm -D --defined-only /build/libvgpu.so | grep " T hami_core_" | wc -l
+echo "--- vk* (expect 0) ---"
+nm -D --defined-only /build/libvgpu.so | grep -E " T vk[A-Z]" | wc -l
+echo
+echo "=== libvgpu_vk.so: must have only the 3 layer entry points ==="
+nm -D --defined-only /build/libvgpu_vk.so | grep " T " | grep -E "^[^[:space:]]+ T (vk[A-Z]|hami_)" | sort
+echo
+echo "=== libvgpu_vk.so: DT_NEEDED must include libvgpu.so ==="
+readelf -d /build/libvgpu_vk.so | grep NEEDED
+echo
+echo "=== libvgpu_vk.so: undefined hami_core_* symbols (expect 5) ==="
+nm -D --undefined-only /build/libvgpu_vk.so | grep "hami_core_" | wc -l
+'
+```
+
+Expected:
+- libvgpu.so hami_core_* count: `5`
+- libvgpu.so vk* count: `0`
+- libvgpu_vk.so exports: `vkGetDeviceProcAddr`, `vkGetInstanceProcAddr`, `vkNegotiateLoaderLayerInterfaceVersion` (3 lines, no `hami_*`)
+- DT_NEEDED includes `libvgpu.so` and `libpthread.so.0`
+- libvgpu_vk.so undefined hami_core_* count: `5`
+
+If any check fails — STOP. The structural-isolation property is the whole point of Step C.
+
+- [ ] **Step 2: No commit (verification only)**
+
+---
+
+### Task 6: Unit tests against the split build
+
+**Files:** none (verification only)
+
+- [ ] **Step 1: Step B regression — `test_cuda_null_guards` under LD_PRELOAD libvgpu.so**
+
+```bash
+docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c \
+  "LD_PRELOAD=/build/libvgpu.so /build/test/test_cuda_null_guards 2>&1; echo EXIT=\$?"
+```
+
+Expected: 9 `[OK]` lines, `EXIT=0`. CUDA hook code unchanged across the split, so this MUST pass identically to Task 3 step 3.
+
+- [ ] **Step 2: Vulkan unit tests against libvgpu_vk.so**
+
+```bash
+docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c '
+for t in test_layer test_memprops test_alloc; do
+  [ -x /build/test/$t ] || { echo "SKIP $t (not built)"; continue; }
+  echo "--- $t ---"
+  LD_LIBRARY_PATH=/build LD_PRELOAD=/build/libvgpu.so:/build/libvgpu_vk.so /build/test/$t 2>&1 | tail -10
+  echo "EXIT=$?"
+done'
+```
+
+Expected: each test exits 0 with its expected `[OK]` lines.
+
+(Why both .so in LD_PRELOAD: the Vulkan unit tests fake the next-layer GIPA and don't go through Vulkan loader manifest activation, so we have to hand-load libvgpu_vk.so. This only matters for unit tests; production uses manifest dlopen.)
+
+- [ ] **Step 3: No commit (verification only)**
+
+---
+
+### Task 7: Add Vulkan implicit-layer manifest file
+
+**Files:**
+- Create: `libvgpu/share/hami/hami.json`
+
+- [ ] **Step 1: Write the manifest**
+
+```json
+{
+  "file_format_version": "1.0.0",
+  "layer": {
+    "name": "VK_LAYER_HAMI_vgpu",
+    "type": "INSTANCE",
+    "library_path": "/usr/local/vgpu/libvgpu_vk.so",
+    "api_version": "1.3.0",
+    "implementation_version": "1",
+    "description": "HAMi vGPU partition layer — clamps device-memory queries and tracks Vulkan allocations against the per-pod budget.",
+    "instance_extensions": [],
+    "device_extensions": []
+  }
+}
+```
+
+Save to `libvgpu/share/hami/hami.json`.
+
+(Production install path: `/etc/vulkan/implicit_layer.d/hami.json`, typically a symlink to `/usr/local/vgpu/hami.json`. The webhook + DaemonSet that drops this file are Step D scope, not this plan.)
+
+- [ ] **Step 2: Validate the JSON**
+
+```bash
+python3 -c "import json; json.load(open('share/hami/hami.json')); print('OK')"
+```
+
+Expected: `OK`.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add share/hami/hami.json
+git commit -s -m "feat(vulkan): ship hami.json implicit-layer manifest" \
+  -m "Static manifest that the Step D webhook + DaemonSet will install
+into /etc/vulkan/implicit_layer.d/ to activate libvgpu_vk.so via the
+Vulkan loader. file_format_version 1.0.0, type INSTANCE, api 1.3.0.
+
+library_path is the production install path /usr/local/vgpu/libvgpu_vk.so;
+no extensions claimed (the layer only intercepts existing entry points)."
+```
+
+---
+
+### Task 8: ws-node074 LD_PRELOAD-only smoke (the regression-killed proof)
+
+**Files:** none (production-side verification)
+
+This task verifies the structural-isolation property on the actual hardware that exhibited the 2026-04-28 regression. The expected outcome is that LD_PRELOAD `libvgpu.so` (Vulkan layer NOT activated, manifest absent) leaves Isaac Sim Kit unaffected — because `libvgpu.so` no longer exports any `vk*` symbols.
+
+- [ ] **Step 1: Sync sources to ws-node074 and rebuild**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/
+ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | tail -8'
+```
+
+Expected: Both `Built target vgpu` and `Built target vgpu_vk` lines.
+
+- [ ] **Step 2: Verify backups + swap libvgpu.so only (NOT installing manifest yet)**
+
+```bash
+ssh root@10.61.3.74 '
+md5sum /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-step-c
+cp -av /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-stepC2 2>&1 | tail -1
+cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so
+md5sum /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so
+ls -la /etc/vulkan/implicit_layer.d/   # confirm hami.json absent
+'
+```
+
+Expected: pre-stepC2 backup created, swap completes, two md5 match (new file in place), `/etc/vulkan/implicit_layer.d/` shows only `nvidia_layers.json` (no `hami.json`).
+
+- [ ] **Step 3: Baseline runheadless under no LD_PRELOAD (confirm swap doesn't break steady state)**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | grep '^isaac-launchable-0' | awk '{print $1}' | head -1)
+echo "Pod: $NEWPOD"
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+pkill -KILL kit 2>/dev/null; sleep 2
+timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/c-baseline.log 2>&1
+EC=$?
+pkill -KILL kit 2>/dev/null
+echo "exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/c-baseline.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)"
+'
+```
+
+Expected: `exit=124 crash=0 listen=1`. If anything else, STOP and restore from `.bak-pre-stepC2`.
+
+- [ ] **Step 4: LD_PRELOAD-forced runheadless × 5 (the regression check)**
+
+```bash
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/v
+PASS=0
+for i in 1 2 3 4 5; do
+  pkill -KILL kit 2>/dev/null; sleep 3
+  timeout 50 env \
+    ACCEPT_EULA=y \
+    LD_PRELOAD=/usr/local/vgpu/libvgpu.so \
+    /isaac-sim/runheadless.sh > /tmp/v/r$i.log 2>&1
+  EC=$?
+  CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/r$i.log)
+  LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100")
+  echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN"
+  [ "$EC" = "124" ] && [ "$CRASH" = "0" ] && PASS=$((PASS+1))
+  pkill -KILL kit 2>/dev/null
+done
+echo "PASS=$PASS / 5"
+'
+```
+
+Expected: `PASS=5 / 5` with each run reporting `exit=124 crash=0 listen=1`.
+
+If `PASS < 5`, the regression is NOT only-Vulkan-code — it lives in HAMi-core too. STOP. Restore `/usr/local/vgpu/libvgpu.so` from `.bak-pre-stepC2`. Open separate analysis (likely needs a full bisect on production hardware).
+
+- [ ] **Step 5: HAMi-core init verification (NVML hook should still work)**
+
+```bash
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+LD_PRELOAD=/usr/local/vgpu/libvgpu.so nvidia-smi --query-gpu=memory.total --format=csv,noheader
+'
+```
+
+Expected: `23552 MiB` (clamped) — confirms NVML hook is active. If raw `46068 MiB`, partition env not picked up; investigate but NOT a Step C regression.
+
+- [ ] **Step 6: No commit. Record outcome locally**
+
+```bash
+echo "Task 8 PASS=5/5: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /tmp/step-c-task8-result.txt
+```
+
+(The commit comes in Task 10 with the submodule bump.)
+
+---
+
+### Task 9: ws-node074 manifest-activated smoke (Vulkan layer actually doing its job)
+
+**Files:** none (production-side verification)
+
+This task confirms the new architecture's happy path: `libvgpu.so` LD_PRELOAD'd + `libvgpu_vk.so` installed at `/usr/local/vgpu/libvgpu_vk.so` + `hami.json` at `/etc/vulkan/implicit_layer.d/hami.json` → Isaac Sim Kit alive AND partition enforced.
+
+- [ ] **Step 1: Install libvgpu_vk.so + manifest on host**
+
+```bash
+ssh root@10.61.3.74 '
+cp -av /tmp/libvgpu-build/build/libvgpu_vk.so /usr/local/vgpu/libvgpu_vk.so 2>&1 | tail -1
+md5sum /usr/local/vgpu/libvgpu_vk.so
+cp -av /tmp/libvgpu-build/share/hami/hami.json /etc/vulkan/implicit_layer.d/hami.json 2>&1 | tail -1
+ls -la /etc/vulkan/implicit_layer.d/
+'
+```
+
+Expected: both files in place. Manifest path now lists `hami.json` alongside `nvidia_layers.json`.
+
+- [ ] **Step 2: Manifest-activated runheadless × 5 with HAMI_VK_TRACE on the first run only**
+
+```bash
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+mkdir -p /tmp/v2
+PASS=0
+for i in 1 2 3 4 5; do
+  pkill -KILL kit 2>/dev/null; sleep 3
+  TRACE_ARG=""
+  [ "$i" = "1" ] && TRACE_ARG="HAMI_VK_TRACE=1"
+  timeout 50 env \
+    ACCEPT_EULA=y \
+    $TRACE_ARG \
+    LD_PRELOAD=/usr/local/vgpu/libvgpu.so \
+    /isaac-sim/runheadless.sh > /tmp/v2/r$i.log 2>&1
+  EC=$?
+  CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v2/r$i.log)
+  LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100")
+  echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN"
+  [ "$EC" = "124" ] && [ "$CRASH" = "0" ] && PASS=$((PASS+1))
+  pkill -KILL kit 2>/dev/null
+done
+echo "PASS=$PASS / 5"
+echo "=== run 1 trace lines ==="
+grep -c HAMI_VK_TRACE /tmp/v2/r1.log
+echo "=== run 1 top GIPA names ==="
+grep "hami_vkGetInstanceProcAddr.*name=" /tmp/v2/r1.log | sed -e "s/.*name=//" -e "s/ .*//" | sort | uniq -c | sort -rn | head -20
+'
+```
+
+Expected:
+- `PASS=5 / 5`
+- run 1 trace lines > 100 (layer is now actually being invoked through the chain)
+- top GIPA names: `vkCreateInstance`, `vkGetPhysicalDeviceMemoryProperties*`, `vkAllocateMemory`, etc.
+
+If `PASS < 5` even with manifest active, the layer code itself has a real bug. STOP, capture trace evidence, surface to controller.
+
+If trace lines = 0 with manifest active, the loader didn't pick up our manifest. Inspect: `nvidia_layers.json` content vs ours, JSON syntax, file permissions on `/etc/vulkan/implicit_layer.d/hami.json`.
+
+- [ ] **Step 3: Partition clamp verification under manifest-active path**
+
+```bash
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+echo "=== nvidia-smi clamp via NVML hook ==="
+LD_PRELOAD=/usr/local/vgpu/libvgpu.so nvidia-smi --query-gpu=memory.total --format=csv,noheader
+echo "=== Vulkan vkGetPhysicalDeviceMemoryProperties via vk_partition_test (if present) ==="
+if [ -f vk_partition_test.py ]; then
+  LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/python.sh vk_partition_test.py 2>&1 | head -30
+  echo "EXIT=$?"
+else
+  echo "vk_partition_test.py 부재 — skip (Step D scope에서 작성)"
+fi
+'
+```
+
+Expected: nvidia-smi shows `23552 MiB`. If `vk_partition_test.py` exists, Vulkan-side memory query also clamped to `23552 MiB`.
+
+- [ ] **Step 4: No commit (verification only)**
+
+If the verification fails, STOP. Restore: `cp /usr/local/vgpu/libvgpu.so.bak-pre-stepC2 /usr/local/vgpu/libvgpu.so; rm /etc/vulkan/implicit_layer.d/hami.json`.
+
+---
+
+### Task 10: Push HAMi-core fork + bump parent submodule + draft PR comments
+
+**Files:**
+- Modify (parent repo): `libvgpu` submodule SHA bump
+- Create: `/tmp/step-c-vk-split-pr-drafts/{pr182,pr1803}.md`
+
+- [ ] **Step 1: Push libvgpu fork**
+
+```bash
+cd /Users/xiilab/git/HAMi/libvgpu
+git log --oneline -10
+git push xiilab vulkan-layer 2>&1 | tail -10
+```
+
+Expected: 4 new commits push successfully (the docs-only commits from the prior session + the Tasks 1-2-4-7 code commits).
+
+- [ ] **Step 2: Bump parent HAMi submodule**
+
+```bash
+cd /Users/xiilab/git/HAMi
+NEW_SHA=$(cd libvgpu && git rev-parse HEAD)
+echo "new HAMi-core SHA: $NEW_SHA"
+git add libvgpu
+git commit -s -m "chore(libvgpu): bump HAMi-core for Step C — Vulkan layer split" \
+  -m "Pulls in the Step C redesign: Vulkan layer code is now a separate
+libvgpu_vk.so, activated by /etc/vulkan/implicit_layer.d/hami.json.
+libvgpu.so retains only HAMi-core (NVML/CUDA hooks + allocator +
+multiprocess) and loses all vk* exports.
+
+Verified on ws-node074:
+* LD_PRELOAD libvgpu.so without manifest → 5/5 runheadless exit=124
+  alive (the 2026-04-28 regression class is gone).
+* LD_PRELOAD libvgpu.so + hami.json manifest → 5/5 alive,
+  HAMI_VK_TRACE > 100 lines, partition clamp 44 GiB → 23 GiB.
+
+Spec: docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md
+Plan: docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md"
+git push xiilab feat/vulkan-vgpu 2>&1 | tail -5
+```
+
+- [ ] **Step 3: Draft PR comments — DO NOT POST**
+
+```bash
+mkdir -p /tmp/step-c-vk-split-pr-drafts
+
+cat > /tmp/step-c-vk-split-pr-drafts/pr182.md <<'EOF'
+## Step C redesigned — Vulkan layer split into libvgpu_vk.so
+
+The 2026-04-28 attempt (commits since reverted) regressed `runheadless.sh`
+under LD_PRELOAD on ws-node074 — see notes/2026-04-28-vk-trace-isaac-sim.md.
+Trace evidence proved our layer wrappers were never called; the
+regression lived at the .so-load boundary. Rather than spending more
+diagnostic cycles on production hardware, this redesign makes that
+class of regression structurally impossible.
+
+| Commit | Change |
+|---|---|
+| (sha) | feat(hami-core): explicit hami_core_* export wrappers |
+| (sha) | refactor(vulkan): use hami_core_* wrappers instead of internal externs |
+| (sha) | build: split Vulkan layer into separate libvgpu_vk.so |
+| (sha) | feat(vulkan): ship hami.json implicit-layer manifest |
+
+### What changed
+- `libvgpu.so` keeps NVML/CUDA hooks + allocator + multiprocess. Loses
+  all `vk*` exports.
+- New `libvgpu_vk.so` carries the entire `src/vulkan/*` and exports
+  only `vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`,
+  `vkNegotiateLoaderLayerInterfaceVersion`. DT_NEEDED includes
+  `libvgpu.so`, so the linker resolves the 5 `hami_core_*` wrappers at
+  Vulkan-loader dlopen time.
+- `share/hami/hami.json` is the implicit-layer manifest the Step D
+  webhook drops into `/etc/vulkan/implicit_layer.d/`.
+
+### Verification on ws-node074
+- ELF: `nm -D libvgpu.so | grep 'T vk'` → 0 lines. `nm -D libvgpu_vk.so`
+  → exactly 3 `vk*` exports. `readelf -d libvgpu_vk.so` lists
+  `libvgpu.so` as NEEDED.
+- Step B regression `test_cuda_null_guards`: 9/9 [OK] (CUDA hooks
+  unchanged across the split).
+- LD_PRELOAD `libvgpu.so` without manifest, `runheadless.sh` × 5: 5/5
+  `exit=124 crash=0 listen=1`. **The 2026-04-28 regression class is
+  gone.**
+- LD_PRELOAD `libvgpu.so` + manifest, `runheadless.sh` × 5: 5/5 alive,
+  `HAMI_VK_TRACE` > 100 lines (layer in chain), partition clamp
+  44 GiB → 23 GiB.
+
+### Out of scope
+- The original Step C tasks (cache first next-gipa, GIPA/GDPA fallback,
+  `EnumerateDevice*` hooks) were reverted and stay deferred until this
+  architecture is verified in production. They will return as a follow-up
+  PR after the split is in.
+
+EOF
+
+cat > /tmp/step-c-vk-split-pr-drafts/pr1803.md <<'EOF'
+## Step C — Vulkan layer split (libvgpu_vk.so)
+
+HAMi-core PR #182 redesigned Step C: `libvgpu.so` is now HAMi-core only,
+and a new `libvgpu_vk.so` holds the Vulkan implicit layer. Activation
+moves entirely to the manifest path, removing the LD_PRELOAD/Vulkan-
+loader collision surface that bit us on 2026-04-28.
+
+The `libvgpu` submodule pointer is bumped to `<NEW_HAMI_BUMP_SHA>`.
+
+### Verification (ws-node074, isaac-launchable-0)
+- LD_PRELOAD `libvgpu.so` without manifest: 5/5 `runheadless.sh` alive
+  (regression class structurally gone).
+- LD_PRELOAD `libvgpu.so` + `hami.json`: 5/5 alive, layer in chain
+  (`HAMI_VK_TRACE > 0`), partition clamp 44 GiB → 23 GiB.
+
+Spec: `docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md`
+Plan: `docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md`
+EOF
+
+HAMI_BUMP_SHA=$(cd /Users/xiilab/git/HAMi && git rev-parse HEAD)
+sed -i.bak "s/<NEW_HAMI_BUMP_SHA>/$HAMI_BUMP_SHA/g" /tmp/step-c-vk-split-pr-drafts/pr1803.md
+rm /tmp/step-c-vk-split-pr-drafts/pr1803.md.bak
+
+ls -la /tmp/step-c-vk-split-pr-drafts/
+```
+
+(SHA placeholders in pr182.md will be filled by the controller from `git log` output.)
+
+- [ ] **Step 4: Report — DO NOT post comments. Wait for explicit user approval.**
+
+---
+
+## Self-Review
+
+**1. Spec coverage:**
+- Spec §"Architecture" (split, DT_NEEDED, manifest-only activation) → Tasks 1-4, 7
+- Spec §"Components" (libvgpu.so loses vulkan_mod, libvgpu_vk.so, budget bridge update, hami.json) → Tasks 1-4, 7
+- Spec §"Data flow" (production happy path) → Tasks 8-9 verify
+- Spec §"Error handling" (libvgpu.so absent, manifest absent, etc.) → Task 8 covers `libvgpu.so` absent indirectly (we only test the present case here; absent case is "loader skips layer" which is library-loader behavior we trust); manifest-absent case is exactly Task 8's main test.
+- Spec §"Testing" (unit + ELF + LD_PRELOAD-only smoke + manifest smoke + HAMI_VK_TRACE) → Tasks 3, 5, 6, 8, 9
+- Spec §"Production safety gate" (backup before swap, baseline-after-swap check, md5 logging) → Task 8 step 2-3, plus restore guidance in step 4.
+- Spec §"Out of scope" (Tasks 1+2 deferred, root-cause diagnostic skipped, webhook in Step A/D) → reflected in Task 10 PR draft language. ✅
+
+**2. Placeholder scan:** Tasks 8 and 9 contain expected outputs and concrete kubectl/ssh commands. Task 10 PR drafts have one explicit `<NEW_HAMI_BUMP_SHA>` placeholder that's substituted in step 3 and a `(sha)` placeholder in pr182.md noted as "filled by the controller". No `TODO`/`TBD`/`figure out`/`add appropriate ...` patterns. ✅
+
+**3. Type consistency:** `hami_core_oom_check` / `hami_core_add_memory_usage` / `hami_core_rm_memory_usage` / `hami_core_get_memory_limit` / `hami_core_throttle` — same names in header, .c, call sites, and verification grep. `LIBVGPU_VK = vgpu_vk` → `lib${LIBVGPU_VK}.so` = `libvgpu_vk.so` consistent across CMake + ELF checks + manifest `library_path`. ✅
+
+**4. Scope check:** Single .so split + manifest. Plan-able as one implementation. Step D (manifest install via webhook + opt-in label activation) is the next plan, not this one. ✅
+
+**5. Production safety:** Task 8 verifies before installing the manifest (LD_PRELOAD-only) precisely so we get the regression-killed proof first. Task 9 only proceeds if Task 8 passes. Both have explicit restore commands at failure. ✅
+
+---
+
+## Estimated time
+
+| Task | 예상 |
+|---|---|
+| 1 hami_core_export wrappers | 25분 |
+| 2 vulkan call-site rewrite | 15분 |
+| 3 pre-split sanity build | 10분 |
+| 4 CMake split | 20분 |
+| 5 ELF / symbol diff verify | 10분 |
+| 6 unit tests | 15분 |
+| 7 manifest file | 10분 |
+| 8 ws-node074 LD_PRELOAD-only smoke | 30분 |
+| 9 ws-node074 manifest smoke | 30분 |
+| 10 push + bump + PR drafts | 20분 |
+| **총** | **약 3시간** |
diff --git a/docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md b/docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md
new file mode 100644
index 000000000..24999cec1
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md
@@ -0,0 +1,820 @@
+# Step D — Vulkan opt-in production activation + 4-path 검증 Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Step C 의 `libvgpu_vk.so` 가 production opt-in path 에서 실제로 chain 진입 + partition enforce 가 NVML / CUDA / Vulkan-memory-query / Vulkan-allocate 4 path 모두에서 작동함을 ws-node074 isaac-launchable-0 에서 검증.
+
+**Architecture:** volcano-vgpu-device-plugin image rebuild → 새 libvgpu.so + libvgpu_vk.so 호스트 install. `hami-vulkan-manifest` ConfigMap 의 `library_path` 를 `libvgpu_vk.so` 로 update + type INSTANCE. manifest installer DaemonSet 재활성. webhook 의 `applyVulkanAnnotation` 코드 그대로 — annotation `hami.io/vulkan: "true"` 가 trigger.
+
+**Tech Stack:** Docker (image rebuild), kubectl (CM/DS apply), ws-node074 (production verification), python (4-path test scripts). Repos: `Project-HAMi/HAMi`, `Project-HAMi/HAMi-core` (libvgpu submodule), `volcano-vgpu-device-plugin` fork at `/Users/xiilab/git/volcano-vgpu-device-plugin/`. Spec: `docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md`.
+
+---
+
+## File Structure
+
+| 파일 | 변경 종류 | 책임 |
+|---|---|---|
+| `/Users/xiilab/git/volcano-vgpu-device-plugin/libvgpu` (submodule) | Modify | submodule SHA bump → `65930f4` (Step C end) |
+| `/Users/xiilab/git/volcano-vgpu-device-plugin/docker/Dockerfile.ubuntu20.04` | Inspect / possibly Modify | image build 가 새 `libvgpu_vk.so` 도 `lib/nvidia/` 에 복사하도록 |
+| `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml` | Create (copy from snapshot-2026-04-28) | library_path → libvgpu_vk.so, type → INSTANCE |
+| `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml` | Create (copy from snapshot-2026-04-28) | nodeSelector 복구 |
+| `cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml` | Create (copy from snapshot-2026-04-28) | image tag → vulkan-v2 |
+| `cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh` | Create | NVML / CUDA / Vulkan memory / Vulkan allocate 검증 script |
+| `cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py` | Create | Vulkan path 검증 python script (vkGetPhysicalDeviceMemoryProperties + vkAllocateMemory) |
+
+---
+
+## Tasks
+
+### Task 1: Inventory current production state + baseline backup
+
+**Files:** none (state capture)
+
+- [ ] **Step 1: Capture current production state**
+
+```bash
+ssh root@10.61.3.74 '
+echo "=== /usr/local/vgpu/ contents + md5 ==="
+ls -la /usr/local/vgpu/ | head
+md5sum /usr/local/vgpu/libvgpu*.so 2>/dev/null
+echo
+echo "=== ConfigMap hami-vulkan-manifest current state ==="
+kubectl get cm -n kube-system hami-vulkan-manifest -o yaml | head -30
+echo
+echo "=== DaemonSet hami-vulkan-manifest-installer status + nodeSelector ==="
+kubectl get ds -n kube-system hami-vulkan-manifest-installer -o jsonpath="{.spec.template.spec.nodeSelector}{\"\n\"}{.status}{\"\n\"}"
+echo
+echo "=== DaemonSet volcano-device-plugin image + status ==="
+kubectl get ds -n kube-system volcano-device-plugin -o jsonpath="{.spec.template.spec.containers[*].image}{\"\n\"}{.status}{\"\n\"}"
+' > /tmp/step-d-pre-state.txt
+cat /tmp/step-d-pre-state.txt
+```
+
+Expected output captured to `/tmp/step-d-pre-state.txt`. Verify:
+- `libvgpu.so` md5 = `8f889313ece246b2d08ea6291f48b67a` (Step C end baseline)
+- `hami-vulkan-manifest-installer` nodeSelector 가 `hami.io/disabled: "true"` (현재 비활성)
+- `volcano-device-plugin` image 가 `vulkan-v1`
+
+- [ ] **Step 2: Baseline runheadless on isaac-launchable-0 + isaac-launchable-1**
+
+```bash
+for POD in $(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-[0-9]/{print $1}'); do
+  echo "=== $POD baseline ==="
+  kubectl -n isaac-launchable exec $POD -c vscode -- bash -lc '
+    pkill -KILL kit 2>/dev/null; sleep 2
+    timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/baseline.log 2>&1
+    EC=$?
+    pkill -KILL kit 2>/dev/null
+    echo "exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/baseline.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)"
+    rm -f /tmp/baseline.log
+  '
+done
+```
+
+Expected: 두 pod 모두 `exit=124 crash=0 listen=1`.
+
+- [ ] **Step 3: No commit (state capture only)**
+
+If any baseline check fails, STOP — production already broken pre-Step-D. Investigate before proceeding.
+
+---
+
+### Task 2: Build & push volcano-vgpu-device-plugin:vulkan-v2 image with new libvgpu.so + libvgpu_vk.so
+
+**Files:**
+- Modify (volcano fork): `libvgpu` submodule SHA → `65930f4`
+- Inspect/Modify (volcano fork): `docker/Dockerfile.ubuntu20.04`
+
+- [ ] **Step 1: Inspect Dockerfile to confirm libvgpu_vk.so handling**
+
+```bash
+cd /Users/xiilab/git/volcano-vgpu-device-plugin
+sed -n '30,80p' docker/Dockerfile.ubuntu20.04
+```
+
+Verify whether the Dockerfile copies BOTH `libvgpu.so` AND `libvgpu_vk.so` from the libvgpu build dir into `/k8s-vgpu/lib/nvidia/` (or wherever the postStart `cp -rf ... /usr/local/vgpu/` source path is). If only `libvgpu.so` is copied, ADD `libvgpu_vk.so` to the same COPY/cp step.
+
+Expected: Dockerfile already runs `make build-in-docker` or equivalent inside libvgpu and ends up with `libvgpu*.so` in the final image's `/k8s-vgpu/lib/nvidia/`. If not, edit Dockerfile to add the second .so.
+
+- [ ] **Step 2: Bump libvgpu submodule to Step C end**
+
+```bash
+cd /Users/xiilab/git/volcano-vgpu-device-plugin/libvgpu
+git fetch xiilab vulkan-layer
+git checkout 65930f4  # Step C 끝 (feat(vulkan): ship hami.json implicit-layer manifest)
+cd ..
+git add libvgpu
+git status
+git -c user.email=je.kim@xiilab.com -c user.name=Jea-Eok-Kim commit -s -m "build: bump libvgpu submodule to Step C end (libvgpu_vk.so split)" -m "Pulls in HAMi-core vulkan-layer 65930f4 — the Step C redesign that
+splits Vulkan layer code into a separate libvgpu_vk.so. After this
+bump, the device plugin image will ship both libvgpu.so (HAMi-core
+only, no vk* exports) and libvgpu_vk.so (Vulkan implicit layer)
+into /k8s-vgpu/lib/nvidia/, and the existing postStart cp -rf will
+install both onto /usr/local/vgpu/ on each scheduled node.
+
+Spec: HAMi-core docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md
+Step D plan in HAMi parent: docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md"
+```
+
+- [ ] **Step 3: Build the image**
+
+```bash
+cd /Users/xiilab/git/volcano-vgpu-device-plugin
+docker build -f docker/Dockerfile.ubuntu20.04 \
+  -t 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 \
+  --platform linux/amd64 \
+  . 2>&1 | tail -20
+```
+
+Expected: `Successfully tagged 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2`. No errors during the libvgpu sub-build.
+
+If local Docker daemon isn't running, push the build to ws-node074:
+
+```bash
+rsync -az --exclude=.git/objects/pack . root@10.61.3.74:/tmp/volcano-build/
+ssh root@10.61.3.74 'cd /tmp/volcano-build && docker build -f docker/Dockerfile.ubuntu20.04 -t 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 --platform linux/amd64 . 2>&1 | tail -20'
+```
+
+- [ ] **Step 4: Verify the image contains both .so**
+
+```bash
+docker run --rm --entrypoint /bin/sh 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 \
+  -c 'ls -la /k8s-vgpu/lib/nvidia/ ; md5sum /k8s-vgpu/lib/nvidia/libvgpu*.so'
+```
+
+Expected: 두 .so 모두 존재 + md5 가 우리 build 와 일치 (libvgpu.so `1bd8f078`, libvgpu_vk.so `95b44957` 또는 새로 빌드된 동일한 산출물).
+
+If on ws-node074 (no local docker):
+
+```bash
+ssh root@10.61.3.74 'docker run --rm --entrypoint /bin/sh 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 -c "ls -la /k8s-vgpu/lib/nvidia/ ; md5sum /k8s-vgpu/lib/nvidia/libvgpu*.so"'
+```
+
+- [ ] **Step 5: Push to local registry**
+
+```bash
+docker push 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 2>&1 | tail -5
+# or via ssh
+ssh root@10.61.3.74 'docker push 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 2>&1 | tail -5'
+```
+
+Expected: push 성공.
+
+- [ ] **Step 6: Push volcano fork commit**
+
+```bash
+cd /Users/xiilab/git/volcano-vgpu-device-plugin
+git remote -v   # confirm xiilab fork
+git push xiilab HEAD 2>&1 | tail -3
+```
+
+---
+
+### Task 3: Update hami-vulkan-manifest ConfigMap to point to libvgpu_vk.so
+
+**Files:**
+- Create: `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml`
+
+- [ ] **Step 1: Create snapshot directory and copy base ConfigMap**
+
+```bash
+cd /Users/xiilab/git/HAMi
+mkdir -p cluster/runtime/snapshot-2026-04-29-step-d
+cp cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml \
+   cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml
+```
+
+- [ ] **Step 2: Edit the ConfigMap data — library_path + type**
+
+Use Edit tool to change in `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml`:
+
+OLD `data.hami.json` value (the inline JSON):
+```
+"library_path": "/usr/local/vgpu/libvgpu.so"
+```
+NEW:
+```
+"library_path": "/usr/local/vgpu/libvgpu_vk.so"
+```
+
+OLD:
+```
+"type": "GLOBAL"
+```
+NEW:
+```
+"type": "INSTANCE"
+```
+
+Also strip the runtime metadata that doesn't apply to a fresh apply: `creationTimestamp`, `resourceVersion`, `uid`, the `last-applied-configuration` annotation. Keep `name`, `namespace`, `data`.
+
+- [ ] **Step 3: Apply ConfigMap**
+
+```bash
+kubectl apply -f cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml
+kubectl get cm -n kube-system hami-vulkan-manifest -o jsonpath='{.data.hami\.json}' | python3 -m json.tool
+```
+
+Expected: parsed JSON shows `library_path` = `/usr/local/vgpu/libvgpu_vk.so` and `type` = `INSTANCE`.
+
+- [ ] **Step 4: Commit the snapshot**
+
+```bash
+cd /Users/xiilab/git/HAMi
+git add cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml
+git commit -s -m "chore(runtime): Step D — update hami-vulkan-manifest CM to libvgpu_vk.so" \
+  -m "library_path = /usr/local/vgpu/libvgpu_vk.so (Step C split target)
+type = INSTANCE (per spec; matches single-instance Vulkan layer
+contract instead of the deprecated GLOBAL).
+
+enable_environment HAMI_VULKAN_ENABLE=1 unchanged — opt-in trigger
+flows through the existing webhook applyVulkanAnnotation."
+```
+
+---
+
+### Task 4: Re-enable hami-vulkan-manifest-installer DaemonSet
+
+**Files:**
+- Create: `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml`
+
+- [ ] **Step 1: Copy base + change nodeSelector**
+
+```bash
+cp cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml \
+   cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml
+```
+
+Edit `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml`:
+
+OLD:
+```yaml
+      nodeSelector:
+        hami.io/disabled: "true"
+```
+NEW:
+```yaml
+      nodeSelector:
+        nvidia.com/gpu.present: "true"
+```
+
+Also strip runtime metadata (creationTimestamp, resourceVersion, uid, status, generation, last-applied-configuration annotation).
+
+- [ ] **Step 2: Apply DaemonSet patch**
+
+```bash
+kubectl apply -f cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml
+```
+
+- [ ] **Step 3: Wait for installer DS to schedule + run on GPU nodes**
+
+```bash
+kubectl rollout status ds/hami-vulkan-manifest-installer -n kube-system --timeout=120s
+kubectl -n kube-system get pods -l app=hami-vulkan-manifest-installer -o wide
+```
+
+Expected: at least 1 pod scheduled (ws-node074 has `nvidia.com/gpu.present=true`).
+
+- [ ] **Step 4: Verify manifest installed on host**
+
+```bash
+ssh root@10.61.3.74 'ls -la /usr/local/vgpu/vulkan/implicit_layer.d/ ; cat /usr/local/vgpu/vulkan/implicit_layer.d/hami.json | head -20'
+```
+
+Expected: `hami.json` exists with `library_path: /usr/local/vgpu/libvgpu_vk.so`.
+
+- [ ] **Step 5: Post-step alive check (no annotation yet → loader still inert)**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1)
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+pkill -KILL kit 2>/dev/null; sleep 2
+timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/post-task4.log 2>&1
+EC=$?
+pkill -KILL kit 2>/dev/null
+echo "post-task4: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/post-task4.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)"
+rm -f /tmp/post-task4.log
+'
+```
+
+Expected: `exit=124 crash=0 listen=1`. (Manifest is now installed but `enable_environment` requires `HAMI_VULKAN_ENABLE=1`; without that env, the layer stays inert — should not regress baseline.) If anything else, immediately rollback installer DS to disabled state and STOP.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml
+git commit -s -m "chore(runtime): Step D — re-enable hami-vulkan-manifest-installer DS" \
+  -m "nodeSelector hami.io/disabled: true → nvidia.com/gpu.present: true.
+Was disabled during the 4-27 night-patch rollback; re-enabling it here
+because the Step C redesign (libvgpu_vk.so split + manifest INSTANCE
+type + enable_environment gate) makes activation safe even when the
+manifest is host-installed: layer stays inert until HAMI_VULKAN_ENABLE=1
+flows through the webhook on a per-pod basis."
+```
+
+---
+
+### Task 5: Bump volcano-device-plugin DaemonSet image to vulkan-v2
+
+**Files:**
+- Create: `cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml`
+
+- [ ] **Step 1: Copy base + bump image tag**
+
+```bash
+cp cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml \
+   cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml
+```
+
+Edit the file: replace ALL occurrences of `volcano-vgpu-device-plugin:vulkan-v1` with `volcano-vgpu-device-plugin:vulkan-v2`. There are 2 (init container + main container) per the prior snapshot. Also strip runtime metadata.
+
+- [ ] **Step 2: Apply DaemonSet bump**
+
+```bash
+kubectl apply -f cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml
+kubectl rollout status ds/volcano-device-plugin -n kube-system --timeout=300s
+```
+
+Expected: pods rolling, eventually `numberReady` matches `desiredNumberScheduled`.
+
+- [ ] **Step 3: Verify host install — both .so present with new md5**
+
+```bash
+ssh root@10.61.3.74 '
+md5sum /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu_vk.so
+ls -la /usr/local/vgpu/libvgpu*.so 2>&1
+'
+```
+
+Expected: both .so present. md5 of `libvgpu.so` = `1bd8f078...` (or whatever the Step C end build produced; compare against `/tmp/libvgpu-build/build/libvgpu.so` if still around). md5 of `libvgpu_vk.so` = `95b44957...`.
+
+- [ ] **Step 4: Post-step alive check on isaac-launchable-0 (still no annotation)**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1)
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+pkill -KILL kit 2>/dev/null; sleep 2
+timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/post-task5.log 2>&1
+EC=$?
+pkill -KILL kit 2>/dev/null
+echo "post-task5: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/post-task5.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)"
+rm -f /tmp/post-task5.log
+'
+```
+
+Expected: `exit=124 crash=0 listen=1`. (Without HAMI_VULKAN_ENABLE the layer is still inert.) If regression, immediate rollback to vulkan-v1 image.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml
+git commit -s -m "chore(runtime): Step D — bump volcano-device-plugin to vulkan-v2" \
+  -m "Image vulkan-v1 → vulkan-v2. The new image ships libvgpu.so
+(Step C end build, HAMi-core only) and libvgpu_vk.so (Vulkan layer)
+in /k8s-vgpu/lib/nvidia/, so the existing postStart cp -rf ...
+/usr/local/vgpu/ installs both onto every GPU node."
+```
+
+---
+
+### Task 6: Annotate isaac-launchable-0 + restart + initial activation verify
+
+**Files:** none (state changes only)
+
+- [ ] **Step 1: Check current annotation**
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1)
+kubectl -n isaac-launchable get pod $NEWPOD -o jsonpath='{.metadata.annotations}' | python3 -m json.tool 2>/dev/null | grep -i hami
+```
+
+If `hami.io/vulkan: "true"` already present, the deployment likely had it from prior testing; skip step 2 and go to step 3 (just delete pod to re-apply webhook).
+
+- [ ] **Step 2: Annotate the deployment / statefulset**
+
+```bash
+# isaac-launchable-0 is likely managed by a Deployment/StatefulSet — patch the workload, not the pod
+kubectl -n isaac-launchable get $(kubectl -n isaac-launchable get all -o name | grep -E "isaac-launchable-0$" | head -1) -o yaml > /tmp/isaac-0-pre.yaml
+# Add hami.io/vulkan: "true" to spec.template.metadata.annotations
+WORKLOAD=$(kubectl -n isaac-launchable get all -o name | grep -E "isaac-launchable-0$" | head -1)
+echo "Workload: $WORKLOAD"
+kubectl -n isaac-launchable patch $WORKLOAD --type=merge -p '{"spec":{"template":{"metadata":{"annotations":{"hami.io/vulkan":"true"}}}}}'
+```
+
+- [ ] **Step 3: Wait for new pod to come up**
+
+```bash
+kubectl -n isaac-launchable rollout status $WORKLOAD --timeout=300s
+NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1)
+echo "New pod: $NEWPOD"
+kubectl -n isaac-launchable get pod $NEWPOD -o jsonpath='{range .spec.containers[*]}{.name}: {.env[?(@.name=="HAMI_VULKAN_ENABLE")].value}{"\n"}{end}'
+kubectl -n isaac-launchable get pod $NEWPOD -o jsonpath='{range .spec.containers[*]}{.name}: {.env[?(@.name=="NVIDIA_DRIVER_CAPABILITIES")].value}{"\n"}{end}'
+```
+
+Expected: `vscode: 1` for HAMI_VULKAN_ENABLE, NVIDIA_DRIVER_CAPABILITIES contains `graphics`.
+
+- [ ] **Step 4: Verify pod healthy + alive runheadless**
+
+```bash
+kubectl -n isaac-launchable get pod $NEWPOD
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+pkill -KILL kit 2>/dev/null; sleep 2
+timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/active.log 2>&1
+EC=$?
+pkill -KILL kit 2>/dev/null
+echo "active: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/active.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)"
+rm -f /tmp/active.log
+'
+```
+
+Expected: `exit=124 crash=0 listen=1`. If regression → rollback annotation, then if still bad rollback DS bumps too.
+
+- [ ] **Step 5: No commit**
+
+---
+
+### Task 7: 4-path partition-enforcement verification
+
+**Files:**
+- Create: `cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh`
+- Create: `cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py`
+
+This task confirms partition enforce works in NVML, CUDA, Vulkan-memory-query, Vulkan-allocate.
+
+- [ ] **Step 1: Write the python Vulkan probe**
+
+Create `cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py`:
+
+```python
+#!/usr/bin/env python3
+"""Step D 4-path verification — Vulkan-side partition enforce.
+
+Path 3: vkGetPhysicalDeviceMemoryProperties → device-local heap size MUST
+        be the partition limit (23552 MiB), not the raw 46068 MiB.
+Path 4: vkAllocateMemory(size = 25 GiB) MUST fail with
+        VK_ERROR_OUT_OF_DEVICE_MEMORY (partition limit is 23 GiB).
+
+Requires: python3-vulkan or vulkan binding (pip install vulkan).
+Run inside isaac-launchable-0 vscode container with HAMI_VULKAN_ENABLE=1
+already in env.
+"""
+import sys
+import ctypes
+
+try:
+    import vulkan as vk
+except ImportError:
+    print("ERR: pip install vulkan (or python3-vulkan)")
+    sys.exit(2)
+
+PARTITION_MIB = 23552  # Step C/D production limit
+PARTITION_BYTES = PARTITION_MIB * 1024 * 1024
+OVER_BUDGET_BYTES = 25 * 1024 * 1024 * 1024  # 25 GiB > 23 GiB
+
+# Path 3: query memory properties
+app_info = vk.VkApplicationInfo(
+    sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO,
+    pApplicationName="hami-step-d-probe",
+    applicationVersion=1,
+    pEngineName="probe",
+    engineVersion=1,
+    apiVersion=vk.VK_API_VERSION_1_3,
+)
+inst_info = vk.VkInstanceCreateInfo(sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, pApplicationInfo=app_info)
+inst = vk.vkCreateInstance(inst_info, None)
+phys_devs = vk.vkEnumeratePhysicalDevices(inst)
+if not phys_devs:
+    print("ERR: no physical devices")
+    sys.exit(2)
+dev = phys_devs[0]
+mem_props = vk.vkGetPhysicalDeviceMemoryProperties(dev)
+
+device_local_heap_size = 0
+for i in range(mem_props.memoryHeapCount):
+    heap = mem_props.memoryHeaps[i]
+    if heap.flags & vk.VK_MEMORY_HEAP_DEVICE_LOCAL_BIT:
+        device_local_heap_size = max(device_local_heap_size, heap.size)
+print(f"Path 3: device-local heap size = {device_local_heap_size} bytes ({device_local_heap_size // (1024*1024)} MiB)")
+if abs(device_local_heap_size - PARTITION_BYTES) < (256 * 1024 * 1024):  # 256 MiB tolerance
+    print(f"Path 3: PASS (within 256 MiB of {PARTITION_MIB} MiB partition)")
+else:
+    print(f"Path 3: FAIL (expected ~{PARTITION_MIB} MiB, got {device_local_heap_size // (1024*1024)} MiB)")
+
+# Path 4: try to allocate over-budget
+device_create_info = vk.VkDeviceCreateInfo(
+    sType=vk.VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+    queueCreateInfoCount=1,
+    pQueueCreateInfos=[vk.VkDeviceQueueCreateInfo(
+        sType=vk.VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+        queueFamilyIndex=0,
+        queueCount=1,
+        pQueuePriorities=[1.0],
+    )],
+)
+ldev = vk.vkCreateDevice(dev, device_create_info, None)
+mem_type_idx = -1
+for i in range(mem_props.memoryTypeCount):
+    if mem_props.memoryTypes[i].propertyFlags & vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT:
+        mem_type_idx = i
+        break
+alloc_info = vk.VkMemoryAllocateInfo(
+    sType=vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+    allocationSize=OVER_BUDGET_BYTES,
+    memoryTypeIndex=mem_type_idx,
+)
+try:
+    mem = vk.vkAllocateMemory(ldev, alloc_info, None)
+    print(f"Path 4: FAIL (expected VK_ERROR_OUT_OF_DEVICE_MEMORY for {OVER_BUDGET_BYTES} bytes, got success — partition not enforced)")
+    vk.vkFreeMemory(ldev, mem, None)
+except vk.VkErrorOutOfDeviceMemory:
+    print(f"Path 4: PASS (VK_ERROR_OUT_OF_DEVICE_MEMORY for {OVER_BUDGET_BYTES // (1024*1024*1024)} GiB > {PARTITION_MIB // 1024} GiB partition)")
+except Exception as e:
+    print(f"Path 4: FAIL (unexpected error {type(e).__name__}: {e})")
+
+vk.vkDestroyDevice(ldev, None)
+vk.vkDestroyInstance(inst, None)
+```
+
+- [ ] **Step 2: Write the orchestrator script**
+
+Create `cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh`:
+
+```bash
+#!/bin/bash
+# Step D 4-path verification orchestrator.
+# Run from controller host; orchestrates 4-path checks inside isaac-launchable-0.
+set -u
+
+NS=isaac-launchable
+POD=$(kubectl -n $NS get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1)
+if [ -z "$POD" ]; then
+    echo "ERR: isaac-launchable-0 pod not found"; exit 1
+fi
+echo "Pod: $POD"
+
+# Copy the python probe into the pod
+kubectl -n $NS cp "$(dirname "$0")/vk_partition_test.py" $POD:/tmp/vk_partition_test.py -c vscode
+
+PASS=0
+FAIL=0
+
+echo
+echo "=== Path 1: NVML hook (nvidia-smi clamp) ==="
+RAW=$(kubectl -n $NS exec $POD -c vscode -- bash -lc 'env -u LD_PRELOAD nvidia-smi --query-gpu=memory.total --format=csv,noheader' 2>&1 | head -1)
+HOOKED=$(kubectl -n $NS exec $POD -c vscode -- bash -lc 'nvidia-smi --query-gpu=memory.total --format=csv,noheader' 2>&1 | grep -E "MiB" | head -1)
+echo "  raw  = $RAW"
+echo "  hook = $HOOKED"
+if echo "$HOOKED" | grep -qE "23552 MiB"; then
+    echo "  Path 1: PASS"; PASS=$((PASS+1))
+else
+    echo "  Path 1: FAIL"; FAIL=$((FAIL+1))
+fi
+
+echo
+echo "=== Path 2: CUDA driver hook (cuMemGetInfo clamp) ==="
+P2=$(kubectl -n $NS exec $POD -c vscode -- bash -lc '
+python3 -c "
+import sys
+try:
+    import pycuda.driver as cuda
+    cuda.init()
+    ctx = cuda.Device(0).make_context()
+    free, total = cuda.mem_get_info()
+    print(f\"free={free} total={total}\")
+    ctx.pop()
+except ImportError:
+    sys.exit(2)
+except Exception as e:
+    print(f\"err: {e}\")
+" 2>&1' || echo "ERR")
+echo "  $P2"
+TOTAL_MIB=$(echo "$P2" | sed -nE "s/.*total=([0-9]+).*/\1/p" | awk "{print int(\$1/(1024*1024))}")
+if [ "$TOTAL_MIB" = "23552" ] || [ "$TOTAL_MIB" -ge "23000" -a "$TOTAL_MIB" -le "24000" ]; then
+    echo "  Path 2: PASS (~$TOTAL_MIB MiB)"; PASS=$((PASS+1))
+else
+    echo "  Path 2: SKIP_OR_FAIL (no pycuda or unexpected total=$TOTAL_MIB)"; FAIL=$((FAIL+1))
+fi
+
+echo
+echo "=== Paths 3 & 4: Vulkan memory query + allocate ==="
+P34=$(kubectl -n $NS exec $POD -c vscode -- bash -lc '
+if ! python3 -c "import vulkan" 2>/dev/null; then
+    /isaac-sim/python.sh -m pip install vulkan 2>&1 | tail -3
+fi
+/isaac-sim/python.sh /tmp/vk_partition_test.py 2>&1
+')
+echo "$P34"
+echo "$P34" | grep -q "Path 3: PASS" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+echo "$P34" | grep -q "Path 4: PASS" && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+
+echo
+echo "=== Summary ==="
+echo "PASS=$PASS FAIL=$FAIL of 4 paths"
+[ "$FAIL" = "0" ] && exit 0 || exit 1
+```
+
+- [ ] **Step 3: chmod + run**
+
+```bash
+chmod +x cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh
+./cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh
+```
+
+Expected: `PASS=4 FAIL=0 of 4 paths`. If any path fails, capture the output and STOP for analysis. Do not roll back automatically — the underlying issue may be a code bug, not a deployment issue.
+
+- [ ] **Step 4: Commit verification scripts**
+
+```bash
+git add cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh \
+        cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py
+git commit -s -m "test(runtime): Step D — 4-path partition enforce verification scripts" \
+  -m "Run on ws-node074 against isaac-launchable-0 with hami.io/vulkan
+annotation active. Verifies:
+
+Path 1: NVML hook nvidia-smi → 23552 MiB clamp
+Path 2: CUDA driver hook cuMemGetInfo → ~23 GiB total
+Path 3: Vulkan vkGetPhysicalDeviceMemoryProperties → device-local heap
+        ~23 GiB
+Path 4: Vulkan vkAllocateMemory(25 GiB) → VK_ERROR_OUT_OF_DEVICE_MEMORY
+
+Skip path 2 if pycuda unavailable in pod (informational FAIL — not
+blocker, NVML+CUDA hooks already validated by Step B unit tests)."
+```
+
+---
+
+### Task 8: HAMI_VK_TRACE host-loader verification + sanity check other Vulkan pods
+
+**Files:** none (verification only)
+
+- [ ] **Step 1: HAMI_VK_TRACE host-loader probe**
+
+Run a small Vulkan probe via host system Vulkan loader (NOT Kit's Conan-bundled loader) to confirm our layer is in chain:
+
+```bash
+NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1)
+kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc '
+which vulkaninfo || apt list --installed 2>/dev/null | grep -i vulkan-tools
+HAMI_VK_TRACE=1 vulkaninfo --summary 2>&1 | head -20 || echo "vulkaninfo unavailable"
+echo
+echo "=== HAMI_VK_TRACE lines via /isaac-sim python ==="
+HAMI_VK_TRACE=1 /isaac-sim/python.sh -c "
+import vulkan as vk
+app = vk.VkApplicationInfo(sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO, apiVersion=vk.VK_API_VERSION_1_3)
+ci = vk.VkInstanceCreateInfo(sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, pApplicationInfo=app)
+inst = vk.vkCreateInstance(ci, None)
+print(\"created instance\")
+vk.vkDestroyInstance(inst, None)
+" 2>&1 | grep -E "HAMI_VK_TRACE|created" | head -20
+'
+```
+
+Expected: HAMI_VK_TRACE lines > 0 — at least the `vkGetInstanceProcAddr` lookups for each entry point during `vkCreateInstance`. This proves the layer is in the chain when activation conditions are met (manifest installed + HAMI_VULKAN_ENABLE=1 + python uses host's libvulkan, not Kit's Conan-bundled one).
+
+If trace=0 even here, capture full log and surface to controller — manifest activation is broken at the loader level.
+
+- [ ] **Step 2: Sanity check other Vulkan-using pods**
+
+```bash
+echo "=== isaac-launchable-1 ==="
+POD1=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-1/{print $1}' | head -1)
+kubectl -n isaac-launchable exec $POD1 -c vscode -- bash -lc '
+pkill -KILL kit 2>/dev/null; sleep 2
+timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/p1.log 2>&1
+EC=$?; pkill -KILL kit 2>/dev/null
+echo "isaac-launchable-1: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/p1.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)"
+rm -f /tmp/p1.log
+'
+echo
+echo "=== usd-composer ==="
+POD2=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^usd-composer/{print $1}' | head -1)
+[ -n "$POD2" ] && kubectl -n isaac-launchable get pod $POD2
+echo
+echo "=== other isaac-launchable namespace pods status ==="
+kubectl -n isaac-launchable get pods
+```
+
+Expected:
+- isaac-launchable-1: `exit=124 crash=0 listen=1` (no annotation → still inert; should be unaffected by Step D changes).
+- usd-composer: `3/3 Running`, no crash loop.
+- All other pods steady.
+
+If isaac-launchable-1 regresses despite NOT having the annotation, that means the manifest is being activated globally somehow — the `enable_environment` gate is broken or the webhook is leaking annotation cross-pod. Investigate.
+
+- [ ] **Step 3: No commit**
+
+---
+
+### Task 9: Push snapshot YAMLs + draft PR comments (DO NOT post)
+
+**Files:**
+- Create: `/tmp/step-d-pr-drafts/{pr-hami,pr-volcano-fork}.md`
+
+- [ ] **Step 1: Push parent HAMi commits**
+
+```bash
+cd /Users/xiilab/git/HAMi
+git log --oneline xiilab/feat/vulkan-vgpu..HEAD 2>&1 | head -5
+git push xiilab feat/vulkan-vgpu 2>&1 | tail -3
+```
+
+- [ ] **Step 2: Draft PR comments**
+
+```bash
+mkdir -p /tmp/step-d-pr-drafts
+
+cat > /tmp/step-d-pr-drafts/pr-hami.md <<'EOF'
+## Step D — Vulkan opt-in production activation + 4-path 검증
+
+Step C 의 `libvgpu_vk.so` 분리 산출물을 production opt-in path 에서 활성화하고, partition enforce 가 4 path 모두에서 작동함을 ws-node074 에서 검증.
+
+### Commits
+
+- `chore(runtime): Step D — update hami-vulkan-manifest CM to libvgpu_vk.so`
+- `chore(runtime): Step D — re-enable hami-vulkan-manifest-installer DS`
+- `chore(runtime): Step D — bump volcano-device-plugin to vulkan-v2`
+- `test(runtime): Step D — 4-path partition enforce verification scripts`
+
+### Verification on ws-node074, isaac-launchable-0 (with `hami.io/vulkan: "true"` annotation)
+
+| Path | Expected | Actual |
+|---|---|---|
+| 1. NVML `nvidia-smi` | 23552 MiB | (fill from script run) |
+| 2. CUDA `cuMemGetInfo` | ~23 GiB | (fill) |
+| 3. Vulkan `vkGetPhysicalDeviceMemoryProperties` device-local heap | ~23 GiB | (fill) |
+| 4. Vulkan `vkAllocateMemory(25 GiB)` | `VK_ERROR_OUT_OF_DEVICE_MEMORY` | (fill) |
+
+`HAMI_VK_TRACE > 0` confirmed via host vulkan-loader path on python3-vulkan probe.
+
+### Companion changes
+- volcano-vgpu-device-plugin fork: libvgpu submodule bumped to HAMi-core `65930f4` (Step C end). Image rebuilt and pushed as `vulkan-v2` to local registry.
+
+### Rollback path (if needed)
+- DaemonSet `hami-vulkan-manifest-installer`: nodeSelector → `hami.io/disabled: "true"` (kubectl patch).
+- DaemonSet `volcano-device-plugin`: image → `vulkan-v1`.
+- Annotation `hami.io/vulkan` → remove from workload.
+
+Spec: `docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md`
+Plan: `docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md`
+EOF
+
+cat > /tmp/step-d-pr-drafts/pr-volcano-fork.md <<'EOF'
+## bump libvgpu submodule to HAMi-core Step C end (libvgpu_vk.so split)
+
+Pulls in HAMi-core `vulkan-layer` `65930f4` — the Step C redesign that splits Vulkan layer code into a separate `libvgpu_vk.so`. After this bump:
+
+- `libvgpu.so` (HAMi-core only, no `vk*` exports) and `libvgpu_vk.so` (Vulkan implicit layer) are both shipped in `/k8s-vgpu/lib/nvidia/`.
+- The existing postStart `cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/` installs both onto every GPU node.
+- Image tag bump: `vulkan-v1` → `vulkan-v2`.
+
+Verification done in HAMi parent Step D plan; partition enforce confirmed across NVML, CUDA, Vulkan-memory-query, Vulkan-allocate paths on ws-node074 isaac-launchable-0.
+
+Submodule SHA: `65930f4` (commit "feat(vulkan): ship hami.json implicit-layer manifest").
+EOF
+
+ls -la /tmp/step-d-pr-drafts/
+```
+
+- [ ] **Step 3: Report — DO NOT post comments. Wait for explicit user approval.**
+
+---
+
+## Self-Review
+
+**1. Spec coverage:**
+- Spec §"핵심 결정 1" (image rebuild) → Task 2
+- Spec §"핵심 결정 2" (CM update) → Task 3
+- Spec §"핵심 결정 3" (installer DS 재활성) → Task 4
+- Spec §"핵심 결정 4" (annotation/webhook) → Task 6
+- Spec §"핵심 결정 5" (4-path verification) → Task 7
+- Spec §"핵심 결정 6" (rollback) → 각 Task post-step alive 체크 + restore 가이드
+- Spec §"Activation flow" → Tasks 3-6 순서대로
+- Spec §"4-path verification" → Task 7
+- Spec §"Production safety gate" → Tasks 1, 4-5 의 post-step 검증 + Task 8 의 sanity ✅
+
+**2. Placeholder scan:** Task 7 의 (fill from script run) 자리는 PR draft 의 verification table 이고, 실행 후 채워질 자리이지 plan 자체의 결함이 아님. 그 외 placeholder 없음. ✅
+
+**3. Type consistency:** `hami.io/vulkan` annotation 이름 / `HAMI_VULKAN_ENABLE` env 이름 / `library_path` JSON key — 모든 task 에서 일관 사용. PARTITION_MIB=23552 / OVER_BUDGET_BYTES=25 GiB — vk_partition_test.py 와 4-path-verification.sh 가 동일 값 사용. ✅
+
+**4. Scope check:** 단일 production deploy + 검증. helm chart 통합 / Tasks 1+2 재도입 / multi-GPU 는 out of scope (spec 명시). 단일 plan 으로 실행 가능. ✅
+
+**5. External-repo dependency**: Task 2 가 `volcano-vgpu-device-plugin` fork 작업 (HAMi parent repo 외). Plan 에 명시적으로 working dir 구분, git push 도 fork 만. 이 task 는 controller 가 외부 repo permissions / SSH 가 보장되는 환경에서 실행해야 함. 안 되면 BLOCKED 보고. ✅
+
+---
+
+## Estimated time
+
+| Task | 예상 |
+|---|---|
+| 1 inventory + baseline | 10분 |
+| 2 image build + push (외부 repo, libvgpu submodule bump 포함) | 60분 |
+| 3 CM update + apply | 15분 |
+| 4 installer DS 재활성 | 15분 |
+| 5 device plugin DS bump | 20분 |
+| 6 annotation + restart + verify | 20분 |
+| 7 4-path verification scripts + run | 45분 |
+| 8 trace host-loader + sanity | 20분 |
+| 9 push + PR drafts | 15분 |
+| **총** | **약 3.5시간** |
+
+(Task 2 가 가장 변동성 큼 — image build 인프라/네트워크 의존도 높음.)
diff --git a/docs/superpowers/plans/notes/hami-core-layout.md b/docs/superpowers/plans/notes/hami-core-layout.md
new file mode 100644
index 000000000..4e1450a03
--- /dev/null
+++ b/docs/superpowers/plans/notes/hami-core-layout.md
@@ -0,0 +1,306 @@
+# HAMi-core layout notes (for Vulkan vGPU plan)
+
+HAMi-core submodule root: `libvgpu/` (HAMi-core). This note records the real
+symbol names and file locations that the Vulkan vGPU plan (Tasks 1.4, 1.6)
+will need when extracting a shared throttle utility and a VRAM budget
+counter adapter. No source in `libvgpu/` is modified by this task.
+
+## 소스 구조
+
+Top-level build artefacts:
+- `libvgpu/CMakeLists.txt` — root CMake, adds `src/` and `test/` subdirs,
+  generates `config/static_config.h` from `src/static_config.h.in`.
+- `libvgpu/Makefile` — wrapper (`make build` → `./build.sh`,
+  `make build-in-docker` runs the build inside an `nvidia/cuda:12.2.0-devel`
+  container).
+- `libvgpu/build.sh` — invokes cmake with flags
+  `-DDLSYM_HOOK_ENABLE=1 -DMULTIPROCESS_LIMIT_ENABLE=1 -DHOOK_MEMINFO_ENABLE=1
+  -DHOOK_NVML_ENABLE=1 -DCMAKE_BUILD_TYPE=Debug`, then `make -j$J`.
+
+`libvgpu/src/` (not flat — it is split into feature directories, each with
+its own `CMakeLists.txt` that produces an OBJECT library linked together
+into `libvgpu.so`):
+
+- `src/libvgpu.c` — top-level hook loader / dlsym dispatch (entrypoints).
+- `src/utils.c` — misc helpers (`round_up`, env parsing).
+- `src/static_config.h.in` — generated config header.
+- `src/allocator/` — **VRAM accounting + oom-check + allocation list** layer.
+  - `allocator.c`, `allocator.h` — defines `allocate_raw`, `free_raw`,
+    `oom_check`, `add_chunk_only`, `remove_chunk_only`, etc.
+- `src/cuda/` — CUDA driver API wrappers:
+  - `memory.c` — `cuMemAlloc_v2`, `cuMemAllocManaged`, `cuMemAllocPitch_v2`,
+    `cuMemFree_v2`, `cuLaunchKernel`, `cuLaunchKernelEx`,
+    `cuLaunchCooperativeKernel`, `cuMemCreate`/`cuMemRelease` (VMM), …
+  - `hook.c` — populates the cuda override table with the above symbols.
+  - `device.c`, `context.c`, `stream.c`, `event.c`, `graph.c`.
+- `src/nvml/` — NVML wrappers (`nvml_entry.c`, `hook.c`).
+- `src/multiprocess/` — **shared-memory region (cross-process counters) +
+  SM rate limiter**:
+  - `multiprocess_memory_limit.c/.h` — `shared_region_t`, per-proc slots,
+    `get_current_device_memory_limit`, `get_gpu_memory_usage`,
+    `add_gpu_device_memory_usage`, `rm_gpu_device_memory_usage`,
+    `pre_launch_kernel`.
+  - `multiprocess_utilization_watcher.c/.h` — `rate_limiter`,
+    `utilization_watcher` background thread, `init_utilization_watcher`,
+    `delta()`/`change_token()` token-bucket logic.
+  - `shrreg_tool.c` — standalone CLI for inspecting the shared region.
+- `src/include/` — public headers (used by other subdirs via
+  `include "include/…"`). Notable:
+  - `memory_limit.h` — macros `ENSURE_RUNNING`, `INC_MEMORY_OR_RETURN_ERROR`,
+    `DECL_MEMORY_ON_ERROR/_SUCCESS`.
+  - `libcuda_hook.h`, `libnvml_hook.h` — override table enum/entries.
+  - `nvml-subset.h`, `nvml_override.h`, `nvml_prefix.h`.
+  - `log_utils.h` — `LOG_DEBUG/INFO/WARN/ERROR`, `CHECK_DRV_API`,
+    `CHECK_NVML_API`, `CHECK_CU_RESULT`.
+
+## VRAM 카운터 API (기존 CUDA 경로에서 사용)
+
+All three primitives live in **allocator + multiprocess** layers. The CUDA
+memory wrappers in `src/cuda/memory.c` call them.
+
+### 예약 (reserve / budget check)
+
+- **Signature**: `int oom_check(const int dev, size_t addon);`
+- **Defined at**: `libvgpu/src/allocator/allocator.c:36`
+- **Declared at**: `libvgpu/src/allocator/allocator.h:155`
+- **Semantics**: reads `get_current_device_memory_limit(dev)` and
+  `get_gpu_memory_usage(dev)`, returns `1` if `usage + addon > limit`
+  (OOM, caller must fail), returns `0` if OK. If `limit == 0` (unlimited)
+  always returns `0`. Note: this is a **check-only** primitive, it does
+  NOT reserve/increment the counter.
+- **Counter increment** happens later via
+  `int add_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type);`
+  defined at `libvgpu/src/multiprocess/multiprocess_memory_limit.c:336`
+  (declared at `…/multiprocess_memory_limit.h:147`).
+  - Returns `CUDA_DEVICE_MEMORY_UPDATE_SUCCESS (0)` on success,
+    `CUDA_DEVICE_MEMORY_UPDATE_FAILURE (1)` on failure.
+- **Full reserve path used in CUDA wrappers**: the allocator wraps this in
+  `int allocate_raw(CUdeviceptr *dptr, size_t size)` at
+  `libvgpu/src/allocator/allocator.c:205`, which delegates to
+  `add_chunk(...)` at `:103` → calls `oom_check` then the real
+  `cuMemAlloc_v2`, then `add_gpu_device_memory_usage(getpid(), dev, size, 2)`.
+- **Alt path** (for already-allocated buffers, e.g. managed/pitch/VMM):
+  `int add_chunk_only(CUdeviceptr address, size_t size);` at
+  `libvgpu/src/allocator/allocator.c:133` — same `oom_check` + counter
+  increment but without invoking `cuMemAlloc_v2`.
+
+### 해제 (release)
+
+- **Signature**: `int free_raw(CUdeviceptr dptr);`
+- **Defined at**: `libvgpu/src/allocator/allocator.c:213`
+- **Declared at**: `libvgpu/src/allocator/allocator.h:159`
+- **Semantics**: looks up `dptr` in `device_overallocated` list, calls real
+  `cuMemFree_v2`, removes the entry, and calls
+  `rm_gpu_device_memory_usage(getpid(), dev, t_size, 2)` (defined at
+  `libvgpu/src/multiprocess/multiprocess_memory_limit.c:365`).
+  Returns `0` on success, `-1` if pointer not found.
+- **Alt release-only** (no real `cuMemFree`): `int remove_chunk_only(CUdeviceptr dptr);`
+  at `libvgpu/src/allocator/allocator.c:185`.
+
+### 버짓 조회 (budget / limit)
+
+- **Signature**: `uint64_t get_current_device_memory_limit(const int dev);`
+- **Defined at**: `libvgpu/src/multiprocess/multiprocess_memory_limit.c:828`
+- **Declared at**: `libvgpu/src/multiprocess/multiprocess_memory_limit.h:126`
+- **Semantics**: returns `region_info.shared_region->limit[dev]` from the
+  cross-process shared region (populated from
+  `CUDA_DEVICE_MEMORY_LIMIT_<dev>` env vars). Returns `0` when no limit is
+  set (interpreted as "unlimited" by `oom_check`).
+- **Companion usage getter**:
+  `uint64_t get_current_device_memory_usage(const int dev);` at
+  `…/multiprocess_memory_limit.c:846` — sum of `used[dev].total` across
+  procs in the shared region; the lower-level
+  `size_t get_gpu_memory_usage(const int dev);`
+  (`…/multiprocess_memory_limit.c:243`) is what `oom_check` actually reads.
+
+### 실패 시 반환 규약
+
+- `oom_check` → `int`: **`1` = OOM (caller must fail)**, `0` = OK, `limit==0`
+  also returns `0` (unlimited). Note: this is the **opposite** of the
+  typical "0 = success" Unix convention.
+- `allocate_raw` / `add_chunk` / `add_chunk_only` → `int`: `0` on success,
+  `CUDA_ERROR_OUT_OF_MEMORY` (= `2`, a `CUresult`) on OOM, `-1` on malloc
+  failure. Callers in `cuda/memory.c` compare against `CUDA_SUCCESS` (0).
+- `free_raw` → `int`: `0` on success, `-1` if pointer not tracked.
+- `add_gpu_device_memory_usage` / `rm_gpu_device_memory_usage` → `int`: `0`
+  (`CUDA_DEVICE_MEMORY_UPDATE_SUCCESS`) on success, `1`
+  (`CUDA_DEVICE_MEMORY_UPDATE_FAILURE`) on failure.
+- `get_current_device_memory_limit` → `uint64_t`: the budget in bytes; `0`
+  means "unlimited" (downstream code must treat 0 as a sentinel, not as
+  "zero budget").
+
+## SM throttle 루프 (CUDA launch 래퍼)
+
+- **Wrapper file**: `libvgpu/src/cuda/memory.c`
+  - `cuLaunchKernel`:        line 545 (calls `pre_launch_kernel()` then
+    `rate_limiter(grids, blocks)` when `pidfound==1`).
+  - `cuLaunchKernelEx`:       line 556.
+  - `cuLaunchCooperativeKernel`: line 567 (only `pre_launch_kernel()`; no
+    rate limiter — possible gap).
+- **Throttle function**: `void rate_limiter(int grids, int blocks);`
+  defined at `libvgpu/src/multiprocess/multiprocess_utilization_watcher.c:34`,
+  declared at `…/multiprocess_utilization_watcher.h:20`.
+- **Background producer**: `void* utilization_watcher();` at
+  `…/multiprocess_utilization_watcher.c:178`, started by
+  `init_utilization_watcher()` at line 213 (creates a pthread at line 218)
+  when `0 < sm_limit <= 100`. Entry point called from `libvgpu.c:888`.
+- **Loop structure (this is what Task 1.4 will extract)**:
+  1. `rate_limiter` short-circuits if SM limit is `0` or `>=100`
+     (unlimited) or if `get_utilization_switch() == 0`.
+  2. It does **NOT** itself call `nvmlDeviceGetUtilizationRates` or
+     `usleep`. Instead it implements a **token-bucket consumer**:
+     ```
+     do {
+         before = g_cur_cuda_cores;                      // line 52
+         if (before < 0) { nanosleep(&g_cycle, NULL); goto CHECK; }  // line 55
+         after = before - kernel_size;
+     } while (!CAS(&g_cur_cuda_cores, before, after));   // line 59
+     ```
+     When the shared counter is depleted it `nanosleep`s for
+     `g_cycle = 10 ms` (`TIME_TICK * MILLISEC`, from
+     `multiprocess_utilization_watcher.h:9`) and retries.
+  3. The **actual NVML polling + token refill** runs in the separate
+     background thread `utilization_watcher` (lines 178–211):
+     ```
+     while (1) {
+         nanosleep(&g_wait, NULL);              // g_wait = 120 ms (header:14)
+         init_gpu_device_utilization();
+         get_used_gpu_utilization(userutil, &sysprocnum);
+         share = delta(upper_limit, userutil[0], share);
+         change_token(share);
+     }
+     ```
+     `get_used_gpu_utilization` (`:121`) calls
+     `nvmlDeviceGetComputeRunningProcesses` +
+     `nvmlDeviceGetProcessUtilization` (not
+     `nvmlDeviceGetUtilizationRates` — per-process sampling is used
+     instead). The NVML `nvmlDeviceGetUtilizationRates` symbol **is**
+     hooked (`src/nvml/nvml_entry.c:730`) but is a passthrough.
+  4. Poll cadence: 120 ms refill loop (`g_wait`), 10 ms consumer backoff
+     (`g_cycle`). Max iterations: unbounded (while loop).
+
+**Implication for Task 1.4**: "throttle loop" here is actually a
+producer/consumer pair. Extracting a shared utility for Vulkan probably
+means extracting (a) a token-bucket consumer equivalent to
+`rate_limiter`, and (b) sharing the existing background refill thread —
+not extracting a simple `poll-utilization+usleep` helper, because that
+pattern does not literally exist in the CUDA path. If Task 1.4 only wants
+the passive "sleep-until-budget-available" semantics, the consumer loop
+in `rate_limiter` (lines 50–60) is the single place to model on.
+
+## 빌드 / 테스트
+
+### Makefile 타겟
+- `build` (default) — runs `./build.sh` locally (needs host CUDA at
+  `$CUDA_HOME` or `/usr/local/cuda`).
+- `build-in-docker` — bind-mounts the repo into
+  `nvidia/cuda:12.2.0-devel-ubuntu20.04` and runs `build.sh` inside.
+
+### CMakeLists 구조
+- Root `CMakeLists.txt` (`libvgpu/CMakeLists.txt`) sets
+  `LIBRARY_COMPILE_FLAGS = -shared -fPIC -D_GNU_SOURCE -fvisibility=hidden
+  -Wall` (Debug adds `-g`, drops `-fvisibility=hidden`), generates
+  `config/static_config.h` from the `.h.in` template (git hash/branch
+  baked in), then `add_subdirectory(src)` and `add_subdirectory(test)`.
+- `src/CMakeLists.txt` adds four subdirs (multiprocess, allocator, cuda,
+  nvml), each of which declares an OBJECT library
+  (`multiprocess_mod`, `allocator_mod`, `cuda_mod`, `nvml_mod`). The root
+  then links them into a single SHARED lib target `vgpu`
+  (= `libvgpu.so`), linking against `-lcuda -lnvidia-ml`. On Release a
+  `strip_symbol` custom target strips the `.so`.
+- `test/CMakeLists.txt` globs every `*.c` / `*.cu` under `test/` and
+  builds one executable per file (linking `-lrt -lpthread -lnvidia-ml
+  -lcuda -lcudart`). No unit-test framework, no `ctest` registration.
+
+### 테스트 프레임워크
+- **없음.** The `test/` directory contains bare CUDA sample programs
+  (one-off allocation/launch harnesses) that are compiled into
+  stand-alone binaries. There is no GoogleTest, no `ctest`, no
+  assertion framework, no CI `make test`. Verification is manual
+  (run a binary under `LD_PRELOAD=libvgpu.so`, inspect logs).
+- `test/python/` holds four manual PyTorch/TF/MXNet smoke scripts
+  (`limit_pytorch.py`, `limit_tensorflow.py`, `limit_tensorflow2.py`,
+  `limit_mxnet.py`) copied into the build dir via a `python_test`
+  custom target.
+
+### test/ 디렉토리 파일 목록
+```
+test/CMakeLists.txt
+test/test_alloc.c
+test/test_alloc_hold.c
+test/test_alloc_host.c
+test/test_alloc_managed.c
+test/test_alloc_pitch.c
+test/test_create_3d_array.c
+test/test_create_array.c
+test/test_host_alloc.c
+test/test_host_register.c
+test/test_runtime_alloc.c
+test/test_runtime_alloc_host.c
+test/test_runtime_alloc_managed.c
+test/test_runtime_host_alloc.c
+test/test_runtime_host_register.c
+test/test_runtime_launch.cu
+test/test_utils.h
+test/python/limit_mxnet.py
+test/python/limit_pytorch.py
+test/python/limit_tensorflow.py
+test/python/limit_tensorflow2.py
+```
+
+## 기타 관찰
+
+### Vulkan 헤더 의존성
+- **현재 없음.** `grep -ri "vulkan\|VULKAN\|vk_" libvgpu/` returns zero
+  files. The build links only `-lcuda -lnvidia-ml`; `CMakeLists.txt`
+  references only `CUDA_HOME`. Any Vulkan layer work will have to add a
+  new `src/vulkan/` subdir and new dependency on vulkan-headers /
+  libvulkan.
+
+### 후속 Task에 영향 주는 주의사항
+1. **`oom_check` is check-only, not reserve+commit.** The CUDA path is:
+   `oom_check` → real `cuMemAlloc` → `add_gpu_device_memory_usage` (or the
+   combined `allocate_raw` / `add_chunk`). There is a TOCTOU window. For
+   the Vulkan adapter (Task 1.6) we must replicate this two-step pattern
+   (or add a new atomic `reserve(dev, size)` helper) and must commit the
+   counter with `add_gpu_device_memory_usage(..., type=2)` after the
+   Vulkan allocation succeeds.
+2. **Sentinel value `limit == 0` means unlimited**, not "zero budget".
+   Downstream Vulkan code must preserve this.
+3. **Per-process accounting key is `getpid()`** (plus a shared-region
+   `hostpid` fixed up by `update_host_pid()`). Vulkan allocations made
+   from the same process should reuse the existing shared region slot,
+   not allocate a new one.
+4. **`rate_limiter` silently no-ops** when SM limit is `0`, `>=100`, or
+   `get_utilization_switch()==0`. A Vulkan consumer that reuses this
+   primitive inherits that behaviour — the Vulkan wrapper will need its
+   own switch/env var if we want independent SM partitioning.
+5. **`cuLaunchCooperativeKernel` at `src/cuda/memory.c:567` is missing
+   the `rate_limiter` call** (only `pre_launch_kernel` runs). Not our
+   bug to fix, but worth knowing when auditing throttle coverage.
+6. **No unit-test framework.** If Task 1.4/1.6 want unit tests around
+   the extracted utility, we will have to introduce one (GoogleTest or
+   equivalent) inside `libvgpu/`, which is a submodule change. A less
+   invasive option is to put unit tests on the HAMi (Go) side that
+   exercise the C symbols via cgo, or write new stand-alone C binaries
+   under `test/` following the current convention.
+7. **Visibility is `-fvisibility=hidden` in Release builds.** Any new
+   symbols that Vulkan wrappers need to export from `libvgpu.so` must be
+   annotated (`__attribute__((visibility("default")))` or similar) or
+   they will not be dlsym-resolvable.
+
+## 시도한 검색 (참고)
+
+```
+grep -rn "oom_check" libvgpu/src/
+  → allocator/allocator.h:155 decl, allocator.c:36 defn
+grep -rn "allocate_raw\|free_raw\|add_chunk_only" libvgpu/src/
+  → allocator/allocator.c:205 / :213 / :133
+grep -rn "get_current_device_memory_limit\|get_gpu_memory_usage" libvgpu/src/
+  → multiprocess/multiprocess_memory_limit.c:828 / :243
+grep -rn "rate_limiter\|utilization_watcher\|nvmlDeviceGetUtilizationRates" libvgpu/src/
+  → multiprocess/multiprocess_utilization_watcher.c:34 / :178
+  → nvml/nvml_entry.c:730 (passthrough hook, not the throttle path)
+grep -rin "vulkan\|VULKAN\|vk_" libvgpu/
+  → (no matches)
+```
diff --git a/docs/superpowers/plans/notes/hami-core-vulkan-sha.txt b/docs/superpowers/plans/notes/hami-core-vulkan-sha.txt
new file mode 100644
index 000000000..8b0c8a294
--- /dev/null
+++ b/docs/superpowers/plans/notes/hami-core-vulkan-sha.txt
@@ -0,0 +1 @@
+579a421d1cae2df9bc692ca35f6b6d53ac7a7a1a
diff --git a/docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md b/docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md
new file mode 100644
index 000000000..ff20e964c
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md
@@ -0,0 +1,260 @@
+# HAMi Vulkan vGPU 분할 — 설계 스펙
+
+- 작성일: 2026-04-21
+- 상태: 초안 (구현 전)
+- 범위: NVIDIA GPU, Vulkan 컴퓨트 + 그래픽 워크로드
+- 영향 레포: `Project-HAMi/HAMi` (Go), `Project-HAMi/HAMi-core` (C, `libvgpu/` submodule)
+
+## 1. 문제 정의
+
+HAMi는 `libvgpu.so`(HAMi-core)에서 CUDA 드라이버 API를 `LD_PRELOAD`로 가로채 NVIDIA GPU를 분할합니다. Vulkan 워크로드(컴퓨트 셰이더, `llama.cpp` Vulkan 백엔드, 렌더링 등)는 Vulkan이 별도 API 계층(`libvulkan.so` → ICD)이기 때문에 이 훅을 그대로 우회합니다. 결과적으로:
+
+- `nvidia.com/gpumem`으로 선언한 VRAM 제한이 Vulkan 할당에는 **적용되지 않음**.
+- `nvidia.com/gpucores` SM/코어 throttle이 Vulkan 큐 제출에는 **적용되지 않음**.
+- 기본값으로 컨테이너에 **Vulkan 라이브러리 자체가 마운트되지 않음** — HAMi는 `NVIDIA_DRIVER_CAPABILITIES`를 건드리지 않고, NVIDIA Container Toolkit 기본값(`compute,utility`)에는 Vulkan ICD가 포함되지 않음.
+
+이 설계 작성 시점에 레포 전체를 grep한 결과 `vulkan`/`VK_` 언급은 0건.
+
+## 2. 목표
+
+1. 같은 파드 내 Vulkan 메모리 할당에 대해 기존 `nvidia.com/gpumem` 버짓을 **CUDA와 공유**하여 강제한다 (물리 VRAM 한 개 = 버짓 한 개).
+2. 기존 `nvidia.com/gpucores` SM throttle을 Vulkan 큐 제출에 강제한다.
+3. 요청이 있을 때 Vulkan 라이브러리가 실제로 컨테이너에 도달하게 한다.
+4. 완전한 하위 호환성 유지: Vulkan을 요청하지 않은 파드는 동작 변화 없음.
+
+## 비목표 (Non-Goals)
+
+- NVIDIA 외 벤더(AMD, Intel, Moore Threads)의 Vulkan 분할.
+- CUDA/Vulkan 별도 VRAM 버짓 (물리 실체는 VRAM 단일 풀).
+- `NVIDIA_VISIBLE_DEVICES`가 이미 걸러주는 것 이상의 `vkEnumeratePhysicalDevices` 필터링.
+- 그래픽 프레임 페이싱 보장 — SM throttle은 렌더링 워크로드에 지터를 유발할 수 있음(문서화 대상, 해결 대상은 아님).
+
+## 3. 결정 사항
+
+| 항목 | 결정 | 근거 |
+|------|------|------|
+| 벤더 | NVIDIA 전용 | 기존 HAMi-core CUDA 훅 구조에 부합. |
+| 제어 차원 | VRAM + SM | Vulkan으로 LLM 추론하는 수요에서 둘 다 필요. |
+| 리소스 API | 기존 `nvidia.com/gpumem`, `nvidia.com/gpucores` 공유 버짓 | 물리 실체와 일치, 사용자 YAML 변경 없음. |
+| 활성화 | 파드 annotation `hami.io/vulkan: "true"` opt-in | 모든 CUDA 전용 파드에 수십 MB 그래픽 라이브러리를 붙이지 않기 위해. |
+| 후킹 방식 | HAMi-core `libvgpu.so`가 노출하는 Vulkan implicit layer | Vulkan 로더 표준 계약, LD_PRELOAD vs ICD 디스패치 이슈 회피. |
+| 버짓 공유 | 프로세스 내 공유 카운터(기존 구조체 재사용) | 같은 `libvgpu.so` 인스턴스가 CUDA/Vulkan 훅을 모두 보유 → 별도 IPC 불필요. |
+
+## 4. 아키텍처
+
+```
+┌───────────────────────────────────┐
+│ Project-HAMi/HAMi    (Go)          │
+│  pkg/device/nvidia/device.go       │  ← MutateAdmission 확장
+│  (pkg/device/nvidia/device_test.go)│
+└────────────┬──────────────────────┘
+             │ env: HAMI_VULKAN_ENABLE=1,
+             │      NVIDIA_DRIVER_CAPABILITIES⊇graphics
+             ▼
+┌───────────────────────────────────┐
+│ 컨테이너                            │
+│  NVIDIA Container Toolkit가         │
+│  Vulkan ICD + libGLX_nvidia 마운트  │
+│  HAMi device-plugin가               │
+│  /usr/local/vgpu/libvgpu.so 마운트  │
+└────────────┬──────────────────────┘
+             │ Vulkan 로더가 implicit_layer.d 스캔
+             ▼
+┌───────────────────────────────────┐
+│ Project-HAMi/HAMi-core  (C)        │
+│  libvgpu.so                        │
+│   ├─ 기존 CUDA 훅                  │
+│   ├─ 신규 Vulkan 레이어            │
+│   │     src/vulkan/*.c             │
+│   └─ 공유 VRAM/SM 카운터           │
+│  etc/vulkan/implicit_layer.d/      │
+│   └─ hami.json (신규)              │
+└───────────────────────────────────┘
+```
+
+## 5. 컴포넌트
+
+### 5.1 HAMi (Go) — `pkg/device/nvidia/device.go`
+
+신설 상수:
+```go
+const (
+    VulkanEnableAnno       = "hami.io/vulkan"
+    VulkanLayerName        = "VK_LAYER_HAMI_vgpu"
+    NvidiaDriverCapsEnvVar = "NVIDIA_DRIVER_CAPABILITIES"
+    HamiVulkanEnvVar       = "HAMI_VULKAN_ENABLE"
+)
+```
+
+`MutateAdmission` 확장 (단, `hasResource == true`일 때만):
+1. 파드 annotation `hami.io/vulkan`을 읽고 `"true"`일 때만 이후 로직 수행.
+2. 신규 `NVIDIA_DRIVER_CAPABILITIES` 값 계산:
+   - 컨테이너에 미설정이면: `"compute,utility,graphics"`로 설정.
+   - 설정되어 있고 `"all"` 포함이면: 변경 없음.
+   - 그 외: 콤마 구분 토큰 파싱 후 `"graphics"`와 합집합, 다시 직렬화.
+3. `HAMI_VULKAN_ENABLE=1`이 없으면 추가.
+4. `NVIDIA_VISIBLE_DEVICES`, RuntimeClass는 건드리지 않음 (기존 로직 그대로).
+
+스케줄러 익스텐더, 리소스 회계, 디바이스 플러그인 할당 로직은 변경 없음.
+
+### 5.2 HAMi-core (C) — 신규 모듈 `src/vulkan/`
+
+파일 구성:
+```
+src/vulkan/
+  layer.c            # vkNegotiateLoaderLayerInterfaceVersion,
+                     # vk_layerGetInstanceProcAddr,
+                     # vk_layerGetDeviceProcAddr
+  layer.h
+  dispatch.c         # VkInstance/VkDevice 별 next-layer 디스패치 테이블
+  hooks_memory.c     # vkAllocateMemory, vkFreeMemory,
+                     # vkGetPhysicalDeviceMemoryProperties/2
+  hooks_buffer.c     # vkCreateBuffer, vkCreateImage,
+                     # vkBindBufferMemory/2 (회계상 필요 시)
+  hooks_submit.c     # vkQueueSubmit, vkQueueSubmit2
+```
+
+후킹 대상 엔트리포인트와 동작:
+
+| 함수 | 동작 |
+|------|------|
+| `vkGetPhysicalDeviceMemoryProperties` | next-layer 호출 후 device-local 힙의 `size`를 `min(real, pod_budget)`로 클램핑. |
+| `vkGetPhysicalDeviceMemoryProperties2` | 동일 로직, `pNext` 체인으로 처리. |
+| `vkAllocateMemory` | 공유 카운터 락 획득. `used + allocationSize > budget`이면 언락 후 `VK_ERROR_OUT_OF_DEVICE_MEMORY`. 가능하면 잠정 `used += allocationSize`, 언락, next-layer 호출. next-layer 실패 시 롤백. `VkDeviceMemory → allocationSize` 매핑 저장. |
+| `vkFreeMemory` | 매핑에서 size 조회, 락, `used -= size`, 언락, next-layer 호출, 매핑 제거. |
+| `vkQueueSubmit` / `vkQueueSubmit2` | CUDA `cuLaunchKernel` 래퍼와 공통화한 throttle 유틸 호출: `nvmlDeviceGetUtilizationRates` 폴링 + `usleep(POLL_INTERVAL)`을 `util < cores_limit` 또는 최대 재시도까지 반복. 이후 next-layer 호출. |
+
+레이어 ↔ 로더 계약:
+- `vk_layer.h` 시그니처대로 `vkNegotiateLoaderLayerInterfaceVersion` export.
+- 반환 구조체에 `vk_layerGetInstanceProcAddr` / `vk_layerGetDeviceProcAddr` 포인터 채움.
+- `VkLayerInstanceCreateInfo` 체인에서 next-layer 포인터를 획득해 `VkInstance` 핸들 키의 디스패치 테이블에 저장.
+- 훅 대상이 아닌 이름은 next-layer 포인터를 그대로 반환(pass-through).
+
+### 5.3 공유 VRAM / SM 카운터
+
+HAMi-core는 이미 CUDA 래퍼가 참조하는 per-device `device_memory` 구조체를 갖고 있음. Vulkan 래퍼는 **같은** API를 호출:
+```c
+// 의사코드
+if (!reserve_device_memory(dev_idx, size)) return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+```
+`reserve_device_memory` 내부 뮤텍스가 CUDA/Vulkan 경로를 직렬화. 신규 IPC, 신규 공유메모리 세그먼트 없음.
+
+SM throttle 폴링 루프는 공통 유틸(`util_throttle(dev_idx)`)로 추출하여 `cuLaunchKernel` 래퍼(기존)와 `vkQueueSubmit` 래퍼(신규)가 공유.
+
+### 5.4 Vulkan 레이어 매니페스트
+
+파일: `etc/vulkan/implicit_layer.d/hami.json`. HAMi-core Dockerfile이 이미지의 `/etc/vulkan/implicit_layer.d/hami.json` 경로에 설치.
+
+```json
+{
+  "file_format_version": "1.2.0",
+  "layer": {
+    "name": "VK_LAYER_HAMI_vgpu",
+    "type": "GLOBAL",
+    "library_path": "/usr/local/vgpu/libvgpu.so",
+    "api_version": "1.3.0",
+    "implementation_version": "1",
+    "description": "HAMi Vulkan vGPU limiter",
+    "enable_environment":  { "HAMI_VULKAN_ENABLE": "1" },
+    "disable_environment": { "HAMI_VULKAN_DISABLE": "1" }
+  }
+}
+```
+
+`enable_environment`로 Go 웹훅이 주입한 env가 있을 때만 활성화되므로, 매니페스트가 존재하는 CUDA 전용 파드에서도 레이어는 비활성 상태.
+
+### 5.5 빌드
+
+- HAMi-core `Makefile`: `src/vulkan/*.c` 소스 추가, CFLAGS에 `-I$(VULKAN_SDK_INCLUDE)` 추가, 런타임 링크 없음(`libvulkan.so`는 로더가 dlopen).
+- HAMi-core Dockerfile: `apt-get install vulkan-headers`(또는 동등 패키지), `etc/vulkan/implicit_layer.d/hami.json`을 이미지의 `/etc/vulkan/implicit_layer.d/`로 복사.
+
+## 6. 데이터 흐름
+
+### 6.1 Admission
+1. 사용자가 `nvidia.com/gpumem: 3000`, `nvidia.com/gpucores: 30`, annotation `hami.io/vulkan: "true"`로 파드 생성.
+2. HAMi 웹훅 `MutateAdmission` 기존 경로 — `NVIDIA_VISIBLE_DEVICES`, RuntimeClass 설정.
+3. 신규 경로(annotation 존재 + `hasResource`): `NVIDIA_DRIVER_CAPABILITIES`에 `graphics` 합집합 병합, `HAMI_VULKAN_ENABLE=1` 추가.
+4. 스케줄러/디바이스 플러그인 흐름은 변경 없음.
+
+### 6.2 컨테이너 시작
+1. NVIDIA Container Toolkit prestart 훅이 `NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics`를 감지해 Vulkan ICD JSON + `libGLX_nvidia.so.0` + `libnvidia-glvkspirv.so` 등을 마운트.
+2. HAMi-core 이미지가 `libvgpu.so`와 `/etc/vulkan/implicit_layer.d/hami.json`을 이미 배치함.
+3. Vulkan 로더가 `implicit_layer.d`를 스캔하고 `HAMI_VULKAN_ENABLE=1`을 확인한 뒤 `libvgpu.so`에서 `VK_LAYER_HAMI_vgpu` 로드.
+
+### 6.3 런타임
+- `vkAllocateMemory(size)` → 레이어 → 카운터 예약 → next-layer 또는 `VK_ERROR_OUT_OF_DEVICE_MEMORY`.
+- `vkFreeMemory(mem)` → 레이어 → 카운터 반환 → next-layer.
+- `vkGetPhysicalDeviceMemoryProperties` → next-layer → 힙 size 클램프 → 반환.
+- `vkQueueSubmit` → 레이어 throttle 폴링 → next-layer.
+
+### 6.4 공유 버짓 (CUDA + Vulkan 동시 사용)
+두 경로 모두 하나의 뮤텍스로 보호되는 `reserve_device_memory(dev, size)`에 진입. API를 가로질러 합산된 활성 할당량은 파드 버짓을 초과하지 않음.
+
+## 7. 에러 처리
+
+| 상황 | 동작 |
+|------|------|
+| `HAMI_VULKAN_ENABLE` 미설정 | `enable_environment` 게이트 불통과 → 레이어 미활성화, Vulkan은 훅 없이 실행. |
+| 런타임에 매니페스트 파일 누락 | 로더가 레이어를 발견 못 함 → Vulkan은 훅 없이 실행, HAMi-core 시작 프로브에서 경고 로그(추후). |
+| 빌드 타임에 `vulkan-headers` 없음 | 컴파일 에러. 런타임 무관. |
+| NVML 유틸리티 조회 실패 | throttle 스킵 (fail-open), errno 로그. |
+| next-layer 체인 재진입 | 디스패치 테이블에서 저장된 next 포인터로 라우팅, 레이어 코드 비재진입 설계로 재귀 차단. |
+| 멀티 physical device 컨테이너 | PCI 버스 ID / NVML 디바이스 핸들 기반 per-device 카운터. `NVIDIA_VISIBLE_DEVICES`가 이미 세트를 제한. |
+| 예약 후 next-layer `vkAllocateMemory` 실패 | 카운터 롤백, 에러 그대로 반환. |
+| 앱이 `VkDeviceMemory`를 leak (`vkFreeMemory` 호출 안 함) | 프로세스 동안 카운터 drift, 프로세스 종료 시 라이브러리 언로드로 해소. |
+| non-NVIDIA 파드에 `hami.io/vulkan: true` annotation | NVIDIA 디바이스에서 `hasResource == false` → 조용히 no-op. |
+| 사용자가 `NVIDIA_DRIVER_CAPABILITIES=all` 선설정 | 변경 없음 (`all` ⊇ `graphics`). |
+| 사용자가 `NVIDIA_DRIVER_CAPABILITIES=compute` 선설정 | `compute,graphics`로 교체(합집합). |
+| 사용자가 `NVIDIA_DRIVER_CAPABILITIES=compute,graphics` 선설정 | 변경 없음 (이미 `graphics` 포함). |
+
+## 8. 테스트 전략
+
+### 8.1 Go 단위 테스트 — `pkg/device/nvidia/device_test.go`
+- `TestMutateAdmission_VulkanAnno_AddsGraphicsCap` — annotation + HAMi 리소스 → env에 `graphics`, `HAMI_VULKAN_ENABLE=1` 포함.
+- `TestMutateAdmission_VulkanAnno_MergesExistingCaps` — 기존 `compute` 있음 → `compute,graphics`로 병합.
+- `TestMutateAdmission_VulkanAnno_AllCaps_NoChange` — 기존 `all` 있음 → 변경 없음.
+- `TestMutateAdmission_NoVulkanAnno_NoChange` — annotation 없음 → env 주입 없음.
+- `TestMutateAdmission_VulkanAnno_NoGPUResource` — annotation만 있고 HAMi 리소스 없음 → no-op.
+- `TestMutateAdmission_VulkanAnno_IdempotentHamiEnable` — 웹훅 재적용 시 `HAMI_VULKAN_ENABLE` 중복 추가되지 않음.
+
+### 8.2 HAMi-core C 단위 테스트
+- `vk_layerGetInstanceProcAddr` — 훅 대상 이름은 래퍼 반환, 그 외는 next-layer 포인터 반환.
+- `vkAllocateMemory`:
+  - 버짓 이내 → next-layer 호출, 카운터 증가.
+  - 버짓 초과 → `VK_ERROR_OUT_OF_DEVICE_MEMORY`, next-layer 미호출, 카운터 불변.
+  - next-layer 에러 반환 → 카운터 롤백.
+- pthread 경쟁 스트레스: CUDA `cuMemAlloc` + Vulkan `vkAllocateMemory` 동시 실행 시 `used_memory ≤ budget` 불변식, 성공 합산이 버짓 초과 없음.
+- `vkGetPhysicalDeviceMemoryProperties` 클램프: 반환된 구조체의 힙 size가 `min(real, budget)`.
+
+### 8.3 통합 / E2E
+- 신규 예제 `examples/nvidia/vulkan_example.yaml` — `hami.io/vulkan: "true"`, `nvidia.com/gpumem: 1024`, `vulkaninfo` 이미지. 검증(수동 또는 스크립트):
+  - `vulkaninfo | grep heapSize`가 device-local 힙에서 ≤ 1024 MiB.
+  - `vkAllocateMemory` 테스트 바이너리(또는 `vkcube --size-mb 2048`)가 `OUT_OF_DEVICE_MEMORY`로 실패.
+- (수동, CI 미포함) Vulkan 백엔드 llama.cpp 파드에 `gpumem: 4096` + 7B 모델 — 버짓 초과 시 할당 실패 로그 확인. `docs/vulkan-vgpu-support.md`에 기록.
+
+### 8.4 수동 검증 체크리스트 (문서)
+- `vulkaninfo` 힙 size 클램프.
+- `vkAllocateMemory` 버짓 초과 시 기대한 에러 반환.
+- 큐 제출 집중 워크로드에서 `nvidia-smi` compute 사용률이 설정된 `gpucores` 근방에서 throttle.
+- 한 파드에서 CUDA + Vulkan 혼합 워크로드가 합산 버짓을 준수.
+
+## 9. 딜리버리 계획
+
+두 레포에 걸친 변경, 순서:
+
+1. **HAMi-core PR** (C): Vulkan 레이어 모듈, 매니페스트 JSON, Dockerfile 업데이트, Makefile 업데이트, C 단위 테스트. 신규 릴리스 태그(`vX.Y.0`).
+2. **HAMi PR** (Go, 이 레포):
+   - `pkg/device/nvidia/device.go` — annotation → env 주입.
+   - `pkg/device/nvidia/device_test.go` — 단위 테스트.
+   - `libvgpu` submodule 포인터를 신규 HAMi-core 릴리스로 갱신.
+   - `examples/nvidia/vulkan_example.yaml`.
+   - `docs/vulkan-vgpu-support.md` (영문 + `_cn.md`).
+
+롤아웃: 기본 OFF (annotation 게이트). 기존 배포에 대한 마이그레이션/파괴적 변경 없음.
+
+## 10. 미해결 / 후속 과제
+
+- SM throttle 하 그래픽 워크로드의 프레임 페이싱 — `vkQueueSubmit` 지터 측정 후 후속 릴리스에서 throttle 모드 설정(`strict` vs `cooperative`) 옵션 필요할 수 있음.
+- Vulkan Video 확장(`VK_KHR_video_queue`) — v1에서는 후킹 대상 아님.
+- Vulkan 할당 거부에 대한 Prometheus 메트릭 — 후속.
+- MPS 모드와의 상호작용 — MPS는 Vulkan을 노출하지 않음. annotation + MPS 모드 조합은 에러 또는 `hami-core` 모드로 폴백 + 경고. 구현 단계에서 최종 결정.
diff --git a/docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md b/docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md
new file mode 100644
index 000000000..cbb5b550b
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md
@@ -0,0 +1,210 @@
+# Volcano + Vulkan vGPU 통합 설계
+
+**작성일**: 2026-04-27
+**관련 작업**: HAMi `feat/vulkan-vgpu` 브랜치의 Vulkan vGPU 기능을 `xiilab/volcano-vgpu-device-plugin` 환경에 적용
+
+## 목적
+
+Volcano scheduler 가 이미 운영 중인 클러스터에 HAMi 의 Vulkan vGPU 메모리 partitioning 기능을 추가한다. Volcano scheduler 와 `volcano-vgpu-device-plugin` 은 그대로 유지하면서 **Vulkan workload (Isaac Sim, Kit 등) 도 CUDA workload 와 동일하게 `nvidia.com/gpumem` 제약을 받도록** 한다.
+
+## 비목표 (Non-goals)
+
+- Volcano scheduler 동작/스케줄링 로직 변경 ❌
+- 기존 CUDA-only workload 의 동작 회귀 ❌
+- HAMi 자체 scheduler extender 또는 device-plugin 도입 ❌
+- 새 task scheduler 또는 webhook 체인 변경 ❌
+
+## 현재 상태 (As-is)
+
+### HAMi `feat/vulkan-vgpu` 브랜치 (이미 검증됨)
+
+- `libvgpu` submodule (HAMi-core, vulkan-layer): `vkAllocateMemory` 후킹으로 Vulkan 메모리 enforcement
+- `pkg/device/nvidia/device.go:applyVulkanAnnotation`: pod annotation `hami.io/vulkan: "true"` 검사 → `HAMI_VULKAN_ENABLE=1` env + `NVIDIA_DRIVER_CAPABILITIES` 에 `graphics` merge
+- `0150ea7` commit: device-plugin 이 Vulkan implicit layer manifest (`hami.json`) 를 container 에 자동 mount
+- 2026-04-26 production verification: ws-node074 의 Isaac Sim pod 에서 23 GB partition enforcement 확인
+
+### `xiilab/volcano-vgpu-device-plugin` (현재)
+
+- Project-HAMi/volcano-vgpu-device-plugin 의 fork
+- `libvgpu` submodule = `6660c84` (vulkan-layer 미포함)
+- HAMi-core 사용은 하지만 CUDA path 만 enforce
+- Volcano scheduler 와 ConfigMap (`deviceshare.VGPUEnable: true`) 으로 협업
+- standard / CDI 두 가지 deploy yaml 제공
+
+## 설계: 책임 분담
+
+| 레이어 | 담당자 | 변경 |
+|---|---|---|
+| Pod scheduling | Volcano scheduler | ❌ 변경 없음 |
+| GPU 자원 sharing/할당 | volcano-vgpu-device-plugin | ⚠️ submodule + manifest mount |
+| Pod spec mutation (env) | HAMi mutating webhook | ✅ 별도 deploy (annotation 처리) |
+| Vulkan 메모리 enforcement | libvgpu (HAMi-core vulkan-layer) | ✅ submodule 갱신으로 자동 |
+
+### 핵심 결정
+
+1. **HAMi webhook 만 별도 deploy** — Volcano 우회 아님. mutating admission webhook 은 scheduling 과 별개 단계라 scheduler 그대로 유지.
+2. **submodule 단순 교체로는 부족** — Vulkan layer 코드는 들어오지만 manifest 파일 자동 mount + env 주입 두 가지 부수 효과 필요.
+3. **manifest 파일은 device-plugin 이 hostPath mount** — HAMi commit `0150ea7` 패턴 그대로 포팅. 호스트 노드에 `/etc/vulkan/implicit_layer.d/hami.json` 사전 배치는 별도 DaemonSet 또는 helm chart init.
+
+## Components
+
+### C1. libvgpu submodule 교체
+
+- **변경 위치**: `xiilab/volcano-vgpu-device-plugin/libvgpu`
+- **변경 내용**: `6660c84` → vulkan-layer HEAD (HAMi 가 사용 중인 commit, 현재 `8d4f712`)
+- **부수 효과**: vulkan source 추가, `vkQueueSubmit2` / `VkSubmitInfo2` Vulkan 1.3 가드 코드 포함
+
+### C2. Vulkan manifest auto-mount
+
+- **변경 위치**: `xiilab/volcano-vgpu-device-plugin/pkg/.../allocate` (또는 device 응답 빌더)
+- **변경 내용**: HAMi commit `0150ea7` 의 `injectVulkanLayerMount()` 함수 포팅
+- **동작**: device-plugin 의 `Allocate()` 응답에 다음 mount 추가
+  ```
+  hostPath:      /etc/vulkan/implicit_layer.d/hami.json
+  containerPath: /etc/vulkan/implicit_layer.d/hami.json
+  readOnly:      true
+  ```
+- **CDI 모드**: `volcano-vgpu-device-plugin-cdi.yml` 경로도 동일하게 처리. CDI spec yaml 에 mount 추가하는 형태로.
+
+### C3. 빌드 의존성
+
+- **변경 위치**: `Dockerfile` (volcano-vgpu-device-plugin 의 builder stage)
+- **변경 내용**: `libvulkan-dev` apt install (HAMi commit `50b37ff` 와 동일)
+- **이유**: vulkan-layer source 컴파일에 Vulkan headers 필요
+
+### C4. HAMi webhook deployment
+
+- **변경 위치**: 새 클러스터에 helm install (코드 변경 없음, deploy 작업)
+- **values.yaml**:
+  ```yaml
+  devicePlugin:
+    enabled: false       # volcano-vgpu-device-plugin 이 GPU 자원 등록
+  scheduler:
+    kubeScheduler:
+      enabled: false     # Volcano scheduler 사용
+    extender:
+      enabled: false     # HAMi extender 사용 안 함
+  admissionWebhook:
+    enabled: true        # Vulkan annotation 처리만
+  ```
+- **결과**: HAMi 의 `applyVulkanAnnotation` 코드가 Volcano 환경에서도 동작. annotation 있는 pod 의 container env 자동 주입.
+
+### C5. Host 측 manifest 파일 사전 배치 (`volcano-vgpu-vulkan-manifest.yml`)
+
+- **변경 위치**: `xiilab/volcano-vgpu-device-plugin` 에 신규 raw yaml 추가 (기존 `volcano-vgpu-device-plugin.yml` 와 같은 디렉터리/패턴)
+- **구성**: ConfigMap (`hami.json` 본문) + DaemonSet (initContainer 가 ConfigMap 의 `hami.json` 을 host 의 `/etc/vulkan/implicit_layer.d/hami.json` 으로 복사)
+- **manifest 내용**: HAMi 의 `0150ea7` commit 에서 사용한 것 그대로 (layer 이름 `VK_LAYER_HAMI_vgpu`, library path `/usr/local/vgpu/libvgpu.so`, enable_environment `HAMI_VULKAN_ENABLE=1`)
+- **DaemonSet 위치**: 모든 GPU 노드 (label `nvidia.com/gpu.present=true` 또는 동등 selector). manifest 파일이 ready 된 노드만 device-plugin 의 mount 가 성공할 수 있으므로 device-plugin DaemonSet 보다 먼저 배포하는 게 안전.
+- **대안**: 사용자 image 에 manifest 베이크 — 비채택 (사용자 부담 증가)
+
+### C6. E2E 테스트
+
+- **검증 항목**:
+  1. annotation 있는 Vulkan pod → Kit boot log 의 `GPU Memory: 23000 MB` (partition enforce)
+  2. annotation 없는 Vulkan pod → Kit boot log 의 `GPU Memory: 46068 MB` (full GPU)
+  3. annotation 있는 CUDA-only pod → CUDA 정상 + Vulkan layer 안 로드 확인
+  4. 기존 volcano-vgpu-device-plugin CUDA sharing 회귀 (HAMi-core dynamic-mig 모드 포함)
+- **참고 문서**: HAMi `docs/vulkan-vgpu-e2e-checklist.md` 의 체크리스트 그대로 적용
+
+## Data flow (활성화 케이스)
+
+```
+1. kubectl apply  isaac-sim.yaml
+     annotations: hami.io/vulkan: "true"
+     resources.limits: nvidia.com/gpumem: 23000
+
+2. K8s API server
+   ├─ HAMi mutating webhook (별도 deploy 됨)
+   │  ├─ env += HAMI_VULKAN_ENABLE=1
+   │  └─ env += NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics
+   └─ etcd 저장
+
+3. Volcano scheduler  (변경 없음)
+   └─ pod 을 ws-node074 로 schedule
+
+4. kubelet → volcano-vgpu-device-plugin Allocate()
+   ├─ GPU UUID 할당 (NVIDIA_VISIBLE_DEVICES)
+   ├─ libvgpu.so mount (CUDA + Vulkan 후킹용, 기존 코드)
+   └─ /etc/vulkan/implicit_layer.d/hami.json mount (C2 신규)
+
+5. Container 시작
+   ├─ ld.so.preload 가 libvgpu.so 로드 (image 측 책임)
+   ├─ Vulkan app 시작 → loader 가 hami.json 발견
+   ├─ enable_environment 가드 매치 (HAMI_VULKAN_ENABLE=1)
+   ├─ Vulkan layer 로드 → vkAllocateMemory 후킹
+   └─ CUDA_DEVICE_MEMORY_LIMIT_0=23000m enforce
+```
+
+## Error handling / edge cases
+
+| 시나리오 | 동작 | 비고 |
+|---|---|---|
+| annotation 없는 pod | webhook no-op → env 미주입 → enable_environment 가드 unmatched → layer 안 로드 | 일반 CUDA pod 동작 그대로 |
+| 노드에 manifest 파일 없음 | device-plugin Allocate 의 mount 시도 → kubelet mount 실패 → pod ContainerCreating | DaemonSet 의 manifest 배포 readiness 보장 필요 |
+| HAMi webhook + Volcano webhook 순서 | mutating webhook chain 순차 실행. capability 추가 → Volcano 가 받는 spec 에 반영 → schedule 시 capability 미사용 | 충돌 없음 |
+| CDI 모드 | `volcano-vgpu-device-plugin-cdi.yml` 의 device-plugin 도 동일하게 hami.json mount 추가 필요 | 코드 분기 |
+| Vulkan ICD 의존성 부재 | libGLX_nvidia.so 가 vk_icdNegotiateLoaderICDInterfaceVersion -3 반환 → Vulkan init 실패 | 사용자 image 가 libEGL.so.1 + X11 + /dev/dri 포함해야 함 (HAMi 메모리 노트 참고) |
+
+## Risks
+
+1. **CDI 모드와 standard 모드 분기 누락**: 두 deploy yaml 이 서로 다른 device-plugin binary 를 사용한다면 manifest mount 코드도 두 곳에 들어가야 함. 점검 필요.
+2. **DaemonSet 으로 host 노드에 manifest 배포 안 되어있는 경우**: pod 이 ContainerCreating 으로 stuck. helm chart 또는 별도 manifest 로 readinessGate 처리 필요.
+3. **NVIDIA driver container 의존**: Volcano 환경이 NVIDIA gpu-operator 사용한다면 driver container 가 X11/EGL 라이브러리를 마운트해야 Vulkan 동작. HAMi 환경에서 검증한 것과 동일한 image 셋업 가정.
+4. **upstream Project-HAMi/volcano-vgpu-device-plugin 과 divergence**: xiilab fork 가 별도 vulkan 코드 포함하는 동안 upstream 과 sync 가 어려워질 수 있음. 가능하면 upstream 에 PR 도 보내 divergence 최소화 권장.
+
+## Testing
+
+1. **Unit test**: 기존 volcano-vgpu-device-plugin 의 device allocate test 에 manifest mount 검증 추가
+2. **회귀 test**: CUDA-only workload 가 기존과 동일하게 동작
+3. **Integration**: kind/minikube 에서 Volcano + HAMi webhook + 새 device-plugin → 표준 CUDA pod 정상 동작 확인
+4. **E2E manual** (ws-node074 또는 별도 Volcano cluster):
+   - 4-1. Vulkan pod + annotation: 23 GB partition 확인
+   - 4-2. Vulkan pod no-annotation: full GPU 확인
+   - 4-3. CUDA pod + annotation: 영향 없음
+   - 4-4. dynamic-mig 모드 회귀 (Ampere+ GPU 가용 시)
+
+## Deployment artifact (raw yaml 패턴)
+
+`xiilab/volcano-vgpu-device-plugin` 의 기존 패턴 (helm chart 없음, 평탄한 raw yaml) 을 그대로 따른다.
+
+```
+xiilab/volcano-vgpu-device-plugin/
+├── volcano-vgpu-device-plugin.yml             # 기존 standard mode (image tag 갱신)
+├── volcano-vgpu-device-plugin-cdi.yml         # 기존 CDI mode (image tag 갱신)
+└── volcano-vgpu-vulkan-manifest.yml           # ★ 신규 — ConfigMap + DaemonSet
+```
+
+HAMi webhook 은 별도 yaml 작성하지 않고 **HAMi 본가 helm chart 재사용** (C4 의 values.yaml).
+
+## Deployment 순서
+
+1. **PR-1: xiilab/volcano-vgpu-device-plugin**
+   - submodule 갱신 (C1: `6660c84` → vulkan-layer HEAD)
+   - device-plugin 코드에 manifest mount 추가 (C2)
+   - Dockerfile 빌드 의존성 (C3: `libvulkan-dev`)
+   - 기존 두 yaml 의 image tag 를 새 빌드 (`vulkan-v1`) 으로 갱신
+   - 신규 `volcano-vgpu-vulkan-manifest.yml` 추가 (C5)
+   - image 빌드 + harbor push
+
+2. **클러스터 deploy**
+   - 2-1. `kubectl apply -f volcano-vgpu-vulkan-manifest.yml` (host 에 hami.json 배치)
+   - 2-2. `kubectl apply -f volcano-vgpu-device-plugin.yml` (또는 CDI 버전, 새 image rolling)
+   - 2-3. `helm install hami-webhook hami/hami` (C4 values 로 webhook only)
+
+3. **E2E 검증** (C6)
+
+## 관련 자료
+
+- HAMi `feat/vulkan-vgpu` 브랜치 (현재)
+  - `pkg/device/nvidia/device.go:applyVulkanAnnotation` (webhook 코드)
+  - commit `0150ea7` (manifest auto-inject)
+  - commit `50b37ff` (libvulkan-dev 빌드 의존성)
+  - `docs/vulkan-vgpu-support.md`, `docs/vulkan-vgpu-e2e-checklist.md`
+- xiilab/volcano-vgpu-device-plugin
+  - `https://github.com/xiilab/volcano-vgpu-device-plugin`
+  - 현재 libvgpu submodule: `6660c84`
+- HAMi 메모리 노트
+  - `project_hami_vulkan_verification.md` (production activation 검증)
+- Volcano scheduler
+  - `https://github.com/volcano-sh/volcano`
+  - vGPU 활성화: `deviceshare.VGPUEnable: true` ConfigMap 설정
diff --git a/docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md b/docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md
new file mode 100644
index 000000000..dfd139320
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md
@@ -0,0 +1,302 @@
+# HAMi vGPU 격리를 NVIDIA Isaac Sim Kit (Omniverse) 에 적용 — Design
+
+**Date**: 2026-04-28
+**Status**: Approved (사용자 design 승인 완료)
+**Goal**: HAMi vGPU 격리(NVML + CUDA + Vulkan path) 를 NVIDIA Isaac Sim Kit (Carbonite/OptiX/Vulkan implicit layer chain) 와 호환되게 적용
+
+## 1. Context
+
+PR #1803 (HAMi 메인 fork `xiilab/feat/vulkan-vgpu`) + PR #182 (HAMi-core fork `xiilab/vulkan-layer`) 가 Vulkan vGPU partition 격리를 추가했고, 2026-04-27 새벽에 4개 patch 가 cluster 에 deploy 되어 **노드 wide HAMi 격리** 가 활성화됐다. 그러나 이 시점부터 isaac-launchable namespace 의 **Isaac Sim Kit 6.0.0-rc.22** (`runheadless.sh`, `train.py --livestream 2`) 가 SegFault 로 더 이상 동작하지 않게 됐다.
+
+사용자가 2일 동안 정상 시연했던 baseline 은 **2026-04-27 08:44 이전** 이고, 이 시점 이후의 노드 wide 강제 격리가 NVIDIA Isaac Sim Kit 의 init path 와 호환 충돌한다. race lucky 가 아닌 진짜 regression.
+
+진정한 fix 는 격리 메커니즘을 namespace 단위 opt-in 으로 변경하고 (Step A), HAMi-core 의 hook code 를 Isaac Sim Kit init 시 안전하게 동작하도록 hardening (Step B/C) 한 다음, isaac-launchable namespace 도 opt-in 활성화하여 격리 + 동작 둘 다 만족하는 (Step D) 것이다.
+
+## 2. 4-27 새벽 patch (regression 시점)
+
+| 시각 (UTC) | 변경 |
+|---|---|
+| 02:02 | `volcano-vgpu-device-plugin:vulkan-v1` Harbor push (`10.61.3.124:30002/library/`) |
+| 02:17:50 | `hami-vulkan-manifest-installer` daemonset 생성 (kube-system) — 노드의 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 생성 |
+| 03:34:22 | `hami-webhook` MutatingWebhookConfiguration install (helm release `hami-webhook` in `hami-system`) — pod 생성 시 자동 mutation (HAMI_VULKAN_ENABLE env, hami.json mount, NVIDIA_DRIVER_CAPABILITIES patch) |
+| 08:44 | `/usr/local/vgpu/ld.so.preload` 만들어짐 — **노드 wide 모든 컨테이너 process 에 `libvgpu.so` 강제 inject** |
+
+마지막 (`ld.so.preload`) 이 결정적 trigger.
+
+## 3. Isaac Sim Kit 와의 호환 충돌 (backtrace 증거)
+
+### 3.1 OptiX denoising init 시 NULL deref
+```
+000: libc.so.6!__sigaction
+001: libvgpu.so!cuMemGetInfo_v2+0x52c (memory.c:513)   ← HAMi-core CUDA hook
+002: libnvoptix.so.1!rtGetSymbolTable
+004: librtx.optixdenoising.plugin.so!carbOnPluginPreStartup
+009: libcarb.scenerenderer-rtx.plugin.so!carbOnPluginPreStartup
+010: libomni.hydra.rtx.plugin.so
+```
+NVIDIA OptiX denoising plugin 이 init 시 `cuMemGetInfo_v2(NULL, NULL)` 호출 → HAMi-core hook 이 NULL pointer dereference 시도.
+**Fix 이미 적용됨**: HAMi-core fork commit `03f99d7` — forward to real driver first + NULL guard.
+
+### 3.2 Carbonite Vulkan plugin extension list dangling
+```
+001: libvulkan.so.1!+0x22bc8                       ← Vulkan loader
+002: libcarb.graphics-vulkan.plugin.so!std::vector<char const*>::_M_emplace_aux<char const*&>
+003: libgpu.foundation.plugin.so!Map_base<string, ulong>::operator[]
+009: libgpu.foundation.plugin.so!filesystem::path::~vector()
+013: libomni.ui!Image::_loadSourceUrl
+039: libomni.kit.renderer.plugin.so!carbOnPluginPreStartup
+```
+Carbonite Vulkan plugin 이 enabled extension list 만들 때 layer chain 에서 `vkGetInstanceProcAddr(NULL, "vkEnumerate*ExtensionProperties")` 호출 → HAMi Vulkan layer 가 NULL 반환 → loader 가 NULL fn ptr 사용 → SegFault.
+**Fix 이미 적용됨**: HAMi-core commit `2b6b875` — `vkEnumerate{Instance,Device}{Extension,Layer}Properties` hooks 추가.
+
+### 3.3 carb.tasking fiber init race
+```
+014-017: libcarb.tasking.plugin.so!make_fcontext+0x39
+```
+NVIDIA Kit 의 task scheduler 가 fiber/coroutine context 생성 시 race. Layer chain 활성 시 dispatch 차이로 trigger.
+**Fix 미적용** — Step C 영역.
+
+### 3.4 omni.clipboard.service utmp 부재
+```
+Failed to open [/var/run/utmp]
+Active user not found. Using default user [kiosk]
+```
+`omni.clipboard.service` 가 init 시 logged-in user 식별 실패. 직접 SegFault trigger 는 아니나 race 기여 가능. 우회: utmp record 만들기.
+
+## 4. 검증된 baseline (Step A 직전 상태)
+
+```
+ws-node074:
+  /usr/local/vgpu/ld.so.preload     = "" (빈 파일, HAMi-core inject 비활성)
+  /usr/local/vgpu/libvgpu.so        = HAMi-core fork build (md5 62fedf17)
+  /usr/local/vgpu/vulkan/implicit_layer.d/hami.json = 복원
+  hami-vulkan-manifest-installer ds = nodeSelector hami.io/disabled=true (비활성)
+  isaac-launchable namespace label  = hami.io/webhook=ignore (webhook opt-out)
+
+검증:
+  runheadless.sh 5번 → 5/5 exit=124 alive, crash=0, listen 49100/30999 ✅
+  nvidia-smi total = 46068 MiB (raw — 격리 비활성)
+  외부 http://10.61.3.118 = 5/5 → 200
+  isaac-launchable-0/1, usd-composer pod 모두 3/3 Running
+```
+
+이 환경이 사용자가 본 2일 동안 동작하던 baseline 과 동등 (격리 0).
+
+## 5. Goal
+
+| 격리 path | 검증 방법 | 기대값 |
+|---|---|---|
+| **NVML** | `nvidia-smi --query-gpu=memory.total --format=csv,noheader` | `23552 MiB` |
+| **CUDA** | `cuMemGetInfo_v2()` returned total / `cuMemAlloc(>23 GiB)` | partition value / `CUDA_ERROR_OUT_OF_MEMORY` |
+| **Vulkan** | `vkGetPhysicalDeviceMemoryProperties` heap[0].size / `vkAllocateMemory(>23 GiB)` | `23 GiB` / `VK_ERROR_OUT_OF_DEVICE_MEMORY` |
+| **Isaac Sim Kit** | `runheadless.sh` 5번 / `train.py --livestream 2` | 5/5 alive, listen 49100, 화면 표시, 학습 진행 |
+
+**4개 path 동시에 만족** = 성공.
+
+## 6. Architecture (4 Step)
+
+```
+Step A (namespace opt-in/out webhook)
+   ↓
+Step B (HAMi-core CUDA/NVML hook hardening)
+   ↓
+Step C (HAMi-core Vulkan layer compat hardening)
+   ↓
+Step D (isaac-launchable opt-in 활성화 + 4-path 검증)
+```
+
+각 step 은 independent (앞 step 결과물만 의존). Step A 끝나면 isaac-launchable 즉시 정상 운영. Step B, C 가 완료된 후에만 Step D 의 진짜 검증 가능.
+
+## 7. Step A — Namespace opt-in/out (1일)
+
+### 7.1 변경 대상
+
+| 컴포넌트 | 현재 | 변경 |
+|---|---|---|
+| `hami-webhook` MutatingWebhookConfiguration `namespaceSelector` | opt-out (`hami.io/webhook NotIn ignore`) | **opt-in (`hami.io/vgpu In enabled`)** |
+| `hami-vulkan-manifest-installer` daemonset (노드 wide hami.json install) | 모든 GPU 노드 활성 | **폐기 또는 webhook init container 로 변환** — pod 단위 hami.json mount |
+| `/usr/local/vgpu/ld.so.preload` (노드 wide HAMi-core inject) | 모든 컨테이너 강제 inject | **폐기** — webhook 이 enabled namespace pod 에만 `LD_PRELOAD` env 주입 + `libvgpu.so` volume mount |
+
+### 7.2 새 webhook mutation 패턴 (enabled pod 만)
+
+```yaml
+# Pod containers[*] 에 추가:
+env:
+  - name: LD_PRELOAD
+    value: /usr/local/vgpu/libvgpu.so
+  - name: HAMI_VULKAN_ENABLE
+    value: "1"
+  - name: NVIDIA_DRIVER_CAPABILITIES
+    value: <기존값>,graphics  # 이미 all 이면 noop
+volumeMounts:
+  - name: hami-libvgpu
+    mountPath: /usr/local/vgpu
+    readOnly: true
+  - name: hami-vulkan-layer
+    mountPath: /etc/vulkan/implicit_layer.d/hami.json
+    subPath: hami.json
+    readOnly: true
+
+# Pod volumes 에 추가:
+volumes:
+  - name: hami-libvgpu
+    hostPath:
+      path: /usr/local/vgpu
+      type: Directory
+  - name: hami-vulkan-layer
+    configMap:
+      name: hami-vulkan-layer
+      items:
+        - key: hami.json
+          path: hami.json
+```
+
+### 7.3 변경 파일
+
+- `charts/hami/values.yaml` — namespaceSelector default mode (`opt-in` 추가)
+- `charts/hami/templates/webhook-mutating.yaml` — selector mode 분기
+- `charts/hami/templates/manifest-installer-ds.yaml` — 제거 또는 init container 로 이동
+- `charts/hami/templates/preload-installer.yaml` (있다면) — 제거 (`/usr/local/vgpu/ld.so.preload` 만들기 daemonset)
+- `pkg/scheduler/webhook/*` (mutation 로직 변경)
+
+### 7.4 검증
+
+- isaac-launchable namespace = label 없음 → webhook mutation 0 → 현재 baseline 그대로 (5/5 alive)
+- 새 namespace `hami-test` 에 label `hami.io/vgpu=enabled` + simple CUDA pod 배포 → `nvidia-smi 23552 MiB`, `cuMemAlloc(>23GiB)` 거부 검증
+
+## 8. Step B — HAMi-core CUDA/NVML hook hardening (3-5일)
+
+### 8.1 Robustness 패턴
+
+`cuMemGetInfo_v2` 의 fix 패턴 (commit `03f99d7`):
+
+```c
+CUresult cuXxx(...) {
+    LOG_DEBUG("cuXxx");
+    ENSURE_INITIALIZED();
+
+    /* 1. Forward to the real driver FIRST. NULL/missing-context errors
+     * surface exactly as without HAMi. We never dereference pointers
+     * the driver rejected. */
+    CUresult r = REAL_CALL(cuXxx, ...);
+    if (r != CUDA_SUCCESS) return r;
+
+    /* 2. NULL/invalid arg guard — return early without enforcement */
+    if (...args invalid for HAMi logic...) return r;
+
+    /* 3. Get device + apply HAMi 격리 logic */
+    ...
+}
+```
+
+### 8.2 Audit 대상
+
+| Hook | 현재 상태 | 액션 |
+|---|---|---|
+| `cuMemGetInfo`, `cuMemGetInfo_v2` | ✅ Fixed (`03f99d7`) | unit test 추가 |
+| `cuMemAlloc`, `cuMemAlloc_v2` | audit 필요 | NULL devptr / `bytesize == 0` guard |
+| `cuMemAllocAsync`, `cuMemAllocPitch` | audit 필요 | 동일 패턴 |
+| `cuMemFree`, `cuMemFree_v2`, `cuMemFreeAsync`, `cuMemFreeHost` | audit 필요 | untracked pointer fallback (이미 일부 fix `3bebc8a`) |
+| `cuCtxGetDevice` | audit 필요 | NULL ctx 시 driver error pass-through |
+| `cuMemCreate` | ✅ Fixed (`833c62c`) | 검증 |
+| `nvmlDeviceGetMemoryInfo`, `_v2` | ✅ Robust | 검증 |
+
+### 8.3 단위 검증
+
+각 hook 별로:
+- normal happy path (정상 인자, 정상 반환)
+- NULL pointer arg (driver 가 거부하면 그대로 반환)
+- partition limit 도달 (OOM 반환)
+- partition limit 0 (unlimited fallback)
+
+`vk_partition_test.py` 와 비슷한 단순 test 추가 (`cuda_partition_test.py`).
+
+### 8.4 Isaac Sim 통합 검증 (Step B 완료 시점)
+
+- `LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/python.sh -c "from isaacsim import SimulationApp; SimulationApp({'headless': True}).close()"` — graceful exit (no SegFault)
+- `runheadless.sh` 단독 실행 — Vulkan path 문제 잔존하므로 Step C 후 검증
+
+## 9. Step C — HAMi-core Vulkan layer compat (5-7일)
+
+### 9.1 이미 적용된 fix (commits)
+
+- `93dd103`: deviceUUID zero → idx=0 fallback (single-GPU container 호환)
+- `91ca00c`: HOOK_NVML_ENABLE build flag — NVML hook activate
+- `2b6b875`: `vkEnumerate{Instance,Device}{Extension,Layer}Properties` hooks — GIPA NULL deref 방지
+
+### 9.2 추가 hardening
+
+`hami_vkGetInstanceProcAddr` audit:
+- 모든 instance-level entry point 호출 시 invalid handle pass-through 패턴 (단, NVIDIA driver 에 unknown handle 절대 forward 금지 — 정의되지 않은 동작)
+- 현재 hook 안 한 함수들 (`vkGetPhysicalDeviceFormatProperties{,2}`, `vkGetPhysicalDeviceImageFormatProperties{,2}`, `vkGetPhysicalDeviceQueueFamilyProperties{,2}`, `vkGetPhysicalDeviceFeatures{,2}`, `vkGetPhysicalDeviceProperties{,2}`, `vkGetPhysicalDeviceSparseImageFormatProperties{,2}`) — instance dispatch 통해 next layer forward 가 표준 패턴이며 instance 등록 시 cache
+
+`hami_vkCreateInstance` / `hami_vkCreateDevice` audit:
+- chain 변경의 in-place 수정 (`chain->u.pLayerInfo = chain->u.pLayerInfo->pNext`) 이 spec 표준 — caller 가 createInfo 재사용 안 한다고 가정. 그러나 NVIDIA OptiX 가 재사용 가능성 있음 → caller-safe deep copy 검토.
+
+dispatch lifetime audit:
+- `hami_instance_unregister` / `hami_device_unregister` 가 caller-side에서 적절한 시점에 호출되는지
+- multi-instance 환경 (Carbonite 가 두 번째 instance 만드는 케이스) 에서 first instance 의 cached gipa 가 stale 안 되도록
+
+OptiX/Aftermath 호환:
+- `aftermath_status=auto-enabled` 환경에서 vkCreateDevice extensions 처리 검증
+- `librtx.optixdenoising.plugin.so` init path 추적 (Step B 의 cuMemGetInfo 이후 stage)
+
+### 9.3 검증
+
+- `runheadless.sh` 5번 — 5/5 alive + listen 49100/30999 (현재 ld.so.preload 비활성에서 5/5 → layer 활성에서도 5/5 목표)
+- `vk_partition_test.py` — Vulkan partition enforce 유지 (이미 통과)
+- `train.py --livestream 2` — 학습 진행 + WebRTC 화면 표시
+- OptiX denoising 활성 시 Kit init 통과
+
+## 10. Step D — isaac-launchable opt-in 활성화 + 검증 (1-2일)
+
+### 10.1 시나리오
+
+1. isaac-launchable namespace label 변경: `hami.io/webhook=ignore` 제거 → `hami.io/vgpu=enabled` 추가
+2. isaac-launchable-* / usd-composer pod 재생성
+3. webhook 이 enabled mutation 적용 (LD_PRELOAD env, libvgpu.so mount, hami.json mount)
+4. 4-path 동시 검증
+
+### 10.2 검증 매트릭스
+
+| Path | Command | Expected |
+|---|---|---|
+| NVML | `kubectl exec ... nvidia-smi --query-gpu=memory.total --format=csv,noheader` | `23552 MiB` |
+| CUDA | `LD_PRELOAD=/usr/local/vgpu/libvgpu.so python -c "import cupy; cupy.cuda.runtime.malloc(25*1024**3)"` | `cudaErrorMemoryAllocation` |
+| Vulkan | `kubectl exec ... /isaac-sim/python.sh vk_partition_test.py` | heap[0]=23 GiB, 25/30 GiB OOM |
+| Isaac Sim | `kubectl exec ... ACCEPT_EULA=y /isaac-sim/runheadless.sh` 5회 | 5/5 alive, listen 49100/30999 |
+| Isaac Sim 학습 | `kubectl exec ... ./isaaclab.sh -p train.py --livestream 2 --max_iterations 5` | `Iteration 0..4` reward 출력 + 화면 표시 |
+
+5/5 통과 = Step D 성공 = 전체 design goal 달성.
+
+## 11. 위험 및 대응
+
+| 위험 | 영향 | 대응 |
+|---|---|---|
+| Step B/C 가 며칠 걸리는데 isaac-launchable 즉시 운영 필요 | 높음 | Step A 만으로 isaac-launchable 즉시 baseline 동작 (현 상태) |
+| Step C 후에도 race 잔존 (NVIDIA Kit 자체 bug) | 중 | NVIDIA bug report, Isaac Sim GA / 다른 RC build 시도 |
+| `namespaceSelector` opt-in 변경이 기존 사용자 영향 (label 없는 namespace 격리 0) | 중 | helm chart values 의 default mode 분기 — 기존 사용자는 명시적 enable, 새 사용자만 opt-in default |
+| `ld.so.preload` 폐기로 cluster wide 격리 일시적 0 | 낮음-중 | Step A 후 즉시 namespace label 추가로 enabled namespace 격리 회복 |
+| Webhook 의 volume mount 추가가 기존 pod spec 과 충돌 | 낮음 | mountPath 검증 (`/etc/vulkan/implicit_layer.d/hami.json` subPath) — 기존 nvidia_layers.json 과 공존 가능 |
+
+## 12. 일정
+
+| Step | 일정 | 결과물 |
+|---|---|---|
+| A | 1일 | helm chart commit + push, webhook config 변경, isaac-launchable baseline 안정 |
+| B | 3-5일 | HAMi-core PR #182 추가 commits (cuda/nvml hook hardening) + unit test |
+| C | 5-7일 | HAMi-core PR #182 추가 commits (Vulkan layer compat) + Isaac Sim init 통과 |
+| D | 1-2일 | isaac-launchable opt-in label + 4-path 검증, 운영 회복 + 격리 동시 만족 |
+
+**총 약 10-15일**.
+
+## 13. 산출물
+
+- HAMi 메인 (`xiilab/feat/vulkan-vgpu` PR #1803): helm chart 변경 commit 들
+- HAMi-core (`xiilab/vulkan-layer` PR #182): hook hardening + Vulkan layer compat commits
+- volcano-vgpu-device-plugin (`xiilab/pr/vulkan-upstream` PR #118): 변경 없음 (libvgpu.so hostPath mount 패턴 유지)
+- 본 spec 문서 + 후속 implementation plans (`writing-plans` skill 출력)
+
+## 14. 다음 단계
+
+이 spec 검토 후 `writing-plans` skill 으로 Step A 부터 step-by-step implementation plan 생성 → step별 commit/PR push → 검증 → 다음 step.
diff --git a/docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md b/docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md
new file mode 100644
index 000000000..31336a439
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md
@@ -0,0 +1,184 @@
+# Step C 재설계 — Vulkan layer 를 별도 `libvgpu_vk.so` 로 분리
+
+## 배경
+
+2026-04-28 Step C 첫 시도(`docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md`)는 ws-node074 production 환경에서 regression 을 만들었다. 검증 데이터는 `libvgpu/docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md` 와 같은 폴더의 dispatch lifetime audit 노트에 보존.
+
+핵심 발견:
+
+- pre-Step-C build (md5 `8f889313`) 를 LD_PRELOAD 했을 때 isaac-launchable-0 의 `runheadless.sh` 는 `exit=124 alive`.
+- post-Step-C build (md5 `9586feee`, 추가 시도 `1048daaf`) 를 같은 환경에서 LD_PRELOAD 하면 `exit=139` 에서 NVIDIA driver init path crash.
+- crash backtrace 는 `libvulkan.so.1` → `libGLX_nvidia.so.0!vk_icdNegotiateLoaderICDInterfaceVersion` → `libEGL_nvidia.so.0!__egl_Main` → `libc.so.6!__sigaction`.
+- HAMI_VK_TRACE 카운트는 두 시도 모두 0 — 우리 layer wrapper 는 호출되지 않음.
+- HAMI_HOOK 매칭 가설을 falsify 하기 위해 `EnumerateDeviceExtensionProperties` / `EnumerateDeviceLayerProperties` 를 `g_inst_head != NULL` 로 게이트했으나 동일 crash. 즉 regression 은 Vulkan wrapper 코드 path 가 아니라 **`.so` load-time / NVIDIA driver init 시점의 ELF 수준 영향**.
+
+추가 진단 (nm/readelf diff, `7dcb5a4` clean rebuild md5 비교, `LD_DEBUG=symbols,bindings`) 은 sandbox 가 ws-node074 외 build 환경 부재로 차단. 코드 commits `996cb22`, `eea2beb` 는 이번 세션에서 revert (`83fd245`, `f52aada`). 노트 commits 은 보존하되 fork push 보류.
+
+## 목적
+
+regression 의 root cause 진단에 추가 시간을 쓰지 않고, **regression 이 구조적으로 발생할 수 없는 architecture 로 Step C 의 본래 목표 (Carbonite/Kit init 호환되는 Vulkan layer hardening) 를 달성**한다.
+
+본래 목표는 plan 의 Section 1 그대로 — Vulkan layer 가 NVIDIA Isaac Sim Kit (Carbonite/OptiX/Aftermath) 의 Vulkan 초기화 경로에서 NULL deref 없이 dispatch chain 을 끝까지 forwarding.
+
+## 핵심 결정
+
+| # | 결정 | 선택 |
+|---|---|---|
+| 1 | 접근 | 새 architecture 우선. root cause 진단 spike 생략 |
+| 2 | Vulkan layer 활성 trigger | manifest 만 (`/etc/vulkan/implicit_layer.d/hami.json`). LD_PRELOAD path 는 Vulkan 활성 안 함 |
+| 3 | 분리 boundary | full Vulkan split — `src/vulkan/*` 전체를 새 `libvgpu_vk.so` 로 |
+| 4 | 검증 환경 | local docker (`make build-in-docker`) + ws-node074 integration |
+| 5 | 기존 `libvgpu.so` Vulkan 코드 | 완전 제거 — `vulkan_mod` 를 `libvgpu.so` build 에서 제외 |
+
+## 비변경 사항
+
+- HAMi-core (NVML/CUDA hook, allocator, multiprocess) 코드 변경 0. budget IPC 그대로.
+- Step B 의 commits (`88143ab`, `275ba3d`, `01a58f1`, `7dcb5a4`) — CUDA NULL guards 보존.
+- Step B 의 Vulkan 관련 commits (`2b6b875`, `91ca00c`, `93dd103`) — Carbonite SegFault 1차 수정, NVML hook 활성, deviceUUID zero fallback 모두 보존. `libvgpu_vk.so` 의 시작점은 이 Step B end 코드.
+- Step A 의 webhook namespaceSelector (HAMi parent `master` 기반) 변경 0.
+- Step D scope (isaac-launchable opt-in 활성화 + 4-path 검증) 그대로 — 이번 spec 은 .so / manifest 산출물만, 활성화 path 는 Step D 가 책임.
+- Plan 첫 시도의 commits `996cb22`, `eea2beb` 는 revert 상태. Tasks 1+2 의 의도 (cache first next-gipa, GIPA/GDPA fallback) 는 새 architecture 검증 통과 후 별도 phase 에서 재도입 후보.
+
+## Architecture
+
+```
+process (LD_PRELOAD'd or manifest-activated):
+
+  ┌──────────────────────────────────────────────┐
+  │ libvgpu.so   ← LD_PRELOAD by ld.so.preload   │
+  │   - NVML hooks (nvmlDeviceGetMemoryInfo …)   │
+  │   - CUDA hooks (cuMemAlloc …)                │
+  │   - allocator + multiprocess (budget IPC)    │
+  │   - exports: hami_core_budget_*, hami_core_  │
+  │     get_partition_uuid(), …                  │
+  │   - NO Vulkan symbols (vk* 미export)         │
+  └──────────────────────────────────────────────┘
+                       ▲
+                       │ DT_NEEDED  (link-time dependency)
+                       │ resolved at dlopen
+  ┌──────────────────────────────────────────────┐
+  │ libvgpu_vk.so   ← Vulkan loader dlopen via   │
+  │                    /etc/vulkan/implicit_     │
+  │                    layer.d/hami.json         │
+  │   - layer.c, dispatch.c (entry points)       │
+  │   - hooks_alloc/memory/submit                │
+  │   - physdev_index, budget bridge, throttle   │
+  │   - exports: vkGetInstanceProcAddr,          │
+  │     vkGetDeviceProcAddr,                     │
+  │     vkNegotiateLoaderLayerInterfaceVersion   │
+  └──────────────────────────────────────────────┘
+```
+
+격리 속성:
+
+- Vulkan 코드는 `libvgpu_vk.so` 에 단 1개 copy.
+- `libvgpu.so` LD_PRELOAD 단독 시 Vulkan symbol 0 → loader/ICD 가 우리 export 와 collision 가능 surface 0. 4-28 trace 에서 발견된 LD_PRELOAD-only crash class 가 구조적으로 불가능.
+- Vulkan layer 활성은 manifest dlopen path 만. Vulkan loader 가 chain 을 정상적으로 build 한 후 우리 layer 에 진입 → `g_inst_head` 가 항상 set 된 상태에서만 wrapper 동작.
+- `libvgpu_vk.so` 의 DT_NEEDED 가 `libvgpu.so` 를 가리켜, manifest 활성 시점에 LD_PRELOAD 된 `libvgpu.so` 의 export 자동 resolve. `libvgpu.so` 가 process 에 없으면 dlopen 실패 → loader 가 layer 자동 skip → Isaac Sim alive (no HAMi enforcement). webhook 실수 시 fail-safe.
+
+## Components
+
+| 단위 | 위치 | 책임 | 의존성 |
+|---|---|---|---|
+| `libvgpu.so` (수정) | `src/CMakeLists.txt` | HAMi-core. `vulkan_mod` OBJECT lib 제거. budget/UUID 조회 함수 export | -lcuda, -lnvidia-ml |
+| `libvgpu_vk.so` (신규) | `src/vulkan/CMakeLists.txt` | Vulkan layer entry + dispatch + hooks | DT_NEEDED libvgpu.so, -lpthread |
+| budget bridge | `src/vulkan/budget.c` 확장 | `libvgpu.so` 의 `hami_core_*` 함수를 layer hooks 가 호출하는 thin wrapper. 기존 budget.c 가 이미 HAMi-core 와 layer 사이 bridge 역할이므로 별도 파일 신규 없음 | libvgpu.so export |
+| `hami.json` manifest | install path 결정 (`/usr/local/vgpu/hami.json` + symlink `/etc/vulkan/implicit_layer.d/hami.json`) | Vulkan implicit layer 정의. `library_path` = `/usr/local/vgpu/libvgpu_vk.so` | (정적 file) |
+| 기존 `tests/vulkan/` | 그대로 유지 | layer/dispatch unit tests | libvgpu_vk.so |
+
+`libvgpu.so` 의 신규 export (HAMi-core 측 인터페이스):
+
+- `hami_core_get_device_uuid_count()` — NVML idx 매핑
+- `hami_core_get_device_memory_limit(int nvml_idx)` — partition 값
+- `hami_core_budget_charge(int nvml_idx, size_t bytes)` — 할당 시 budget 차감
+- `hami_core_budget_release(int nvml_idx, size_t bytes)` — 해제 시 복귀
+- `hami_core_budget_remaining(int nvml_idx)` — 남은 한도 조회
+
+prefix `hami_core_*` 통일. 기존 internal 이름 (`get_used_memory_for_uuid` 등) 은 그대로 두고, 외부 인터페이스는 별도 파일 (`src/hami_core_export.c` 또는 기존 `libvgpu.c` 끝에 export 블록 추가) 의 thin wrapper 로 명시 export. CMake `-fvisibility=hidden` default 적용 + 외부 인터페이스 함수에만 `__attribute__((visibility("default")))` 부착해서 export surface 를 의도된 5개로 좁힘.
+
+## Data flow (production happy path)
+
+```
+1. Pod 시작
+   → ld.so.preload 가 libvgpu.so LD_PRELOAD
+   → NVML/CUDA hook 활성, partition 값 ready
+
+2. Isaac Sim Kit 시작
+   → Vulkan loader 가 implicit_layer.d/ scan
+   → hami.json 발견 → libvgpu_vk.so dlopen
+   → DT_NEEDED libvgpu.so 자동 resolve (이미 process 에 있음)
+   → vkNegotiateLoaderLayerInterfaceVersion 호출
+
+3. 앱이 vkCreateInstance
+   → loader chain 거쳐 hami_vkCreateInstance
+   → hami_instance_register, hook table 구성
+
+4. 앱이 vkAllocateMemory
+   → hami_vkAllocateMemory wrapper
+   → hami_core_budget_remaining(idx) 조회 (libvgpu.so call)
+   → 가능하면 next_alloc 호출 + hami_core_budget_charge
+   → 한도 초과 시 VK_ERROR_OUT_OF_DEVICE_MEMORY
+
+5. 앱이 vkGetPhysicalDeviceMemoryProperties
+   → hooks_memory.c
+   → hami_core_get_device_memory_limit 으로 raw 값 clamp
+```
+
+## Error handling
+
+| 시나리오 | 동작 |
+|---|---|
+| `libvgpu.so` 부재 + manifest 활성 | `libvgpu_vk.so` dlopen 시 DT_NEEDED 해결 실패 → loader 가 layer 자동 skip → Isaac Sim alive (no HAMi enforcement) |
+| manifest 부재 + `libvgpu.so` LD_PRELOAD | Vulkan loader 가 layer 발견 0 → libvgpu_vk.so 미load → NVML/CUDA hook 만 동작. Vulkan 호출은 raw — partition 안 됨, 운영자 책임 |
+| `hami_vkCreateInstance` 안에서 chain 실패 | 기존과 동일: `VK_ERROR_INITIALIZATION_FAILED` 반환 |
+| budget 차감 시 `libvgpu.so` 함수 NULL (불가하지만 방어) | `hami_vkAllocateMemory` 가 next_alloc 그대로 forward (no enforcement). 로깅만 |
+| `physdev_index` UUID 매핑 실패 | 기존과 동일: NVML idx=0 fallback (single-GPU). `93dd103` 패치 그대로 |
+| Vulkan wrapper 진입 후 NULL deref 가능 path | Step B end 의 NULL guards (`2b6b875`) 그대로 보존 |
+
+Race / lifetime 분석은 기존 audit (`6fc7f9a` `2026-04-28-vk-dispatch-lifetime-audit.md`) 그대로 유효. 별도 .so 라도 같은 process · 같은 dispatch table — race surface 변경 없음.
+
+## Testing
+
+| 층 | 어디 실행 | 무엇 검증 |
+|---|---|---|
+| Unit (`test/vulkan/`) | local docker | 기존 `test_layer`, `test_memprops`, `test_alloc` 등이 새 `libvgpu_vk.so` 로 빌드/통과 |
+| ELF / symbol diff | local | `nm -D libvgpu.so | grep '^.* T vk'` 결과 0줄. `nm -D libvgpu_vk.so` 에 `vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`, `vkNegotiateLoaderLayerInterfaceVersion` 만 외부 export. `readelf -d libvgpu_vk.so | grep NEEDED` 에 libvgpu.so 포함 |
+| Step B regression | local docker (LD_PRELOAD libvgpu.so) | `test_cuda_null_guards` 9/9 [OK] |
+| LD_PRELOAD-only smoke | ws-node074 isaac-launchable-0 | LD_PRELOAD `libvgpu.so` (manifest 미설치) + runheadless.sh × 5 → 5/5 exit=124 alive crash=0. **regression class 가 사라졌다는 핵심 검증** |
+| Manifest 활성 smoke (Step D 와 합치) | ws-node074 isaac-launchable-0 | LD_PRELOAD `libvgpu.so` + manifest hami.json + runheadless.sh × 5 → 5/5 alive + Vulkan partition enforce (44 GiB → 23 GiB clamp) |
+| HAMI_VK_TRACE 수집 | ws-node074 manifest 활성 path | trace lines > 0 — layer 가 실제로 chain 에 진입했음 검증 |
+
+## Production safety gate
+
+이번 세션의 사고 재발 방지:
+
+1. ws-node074 의 `/usr/local/vgpu/libvgpu.so` swap 전 항상 `.bak-pre-stepC2` 백업.
+2. Swap 직후 baseline runheadless 1회 (no LD_PRELOAD) → alive 확인. 실패 시 즉시 restore.
+3. Baseline 통과 시에만 LD_PRELOAD-forced 검증 진행.
+4. 모든 swap 단계는 `md5sum` before/after 로 기록.
+5. isaac-launchable-0 / isaac-launchable-1 의 3/3 Running steady state 가 swap 후에도 유지되는지 monitor.
+
+## Compatibility / 호환성 약속
+
+- 기존 manifest 사용자 (4-27 새벽 패치 시점에 manifest installer 가 활성된 환경) 는 manifest 의 `library_path` 만 update 하면 동작 — Vulkan layer 의 ABI / behavior 는 유지.
+- Step D 의 활성화 webhook 은 manifest installer + LD_PRELOAD config 가 분리됨을 인지해야 함 (별도 .so 두 개 install).
+- `libvgpu.so` 의 신규 export (`hami_core_*`) 는 추가일 뿐. 기존 internal 함수 변경 없음.
+
+## Out of scope (이번 spec 에서 다루지 않음)
+
+- Tasks 1+2 의 cache + GIPA fallback 재도입 — 새 architecture 검증 통과 후 별도 phase.
+- root cause 진단 spike (ELF/symbol diff, LD_DEBUG) — `libvgpu_vk.so` 분리만으로 영향이 사라지는지 보고 결정.
+- HAMi parent 의 webhook / namespaceSelector / opt-in label — Step A / Step D scope.
+- `hami.json` manifest 의 자동 install/uninstall (DaemonSet 또는 webhook 주입) — Step D scope. 이번 spec 은 manifest 파일 자체와 그것이 가리킬 .so 만.
+
+## PR
+
+`Project-HAMi/HAMi-core` (libvgpu) 의 `vulkan-layer` branch 에 새 commits. 별도 PR 또는 PR #182 의 후속 commits. parent repo `HAMi` 의 submodule SHA bump 는 기존 PR #1803 또는 새 PR.
+
+## Test plan (high level)
+
+1. local docker `make build-in-docker` → `libvgpu.so` + `libvgpu_vk.so` 두 산출물 생성 검증.
+2. local `nm -D` / `readelf -d` 로 export / NEEDED 검증.
+3. local docker 에서 `test_cuda_null_guards` 9/9 + `test_layer`/`test_memprops`/`test_alloc` 통과.
+4. ws-node074 swap → baseline runheadless alive → LD_PRELOAD-only × 5 alive (no manifest).
+5. ws-node074 manifest 활성 (Step D 와 통합) → 5/5 alive + partition clamp + HAMI_VK_TRACE > 0.
diff --git a/docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md b/docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md
new file mode 100644
index 000000000..2687a88c1
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md
@@ -0,0 +1,153 @@
+# Step D — Vulkan layer opt-in production activation + 4-path 검증
+
+## 배경
+
+Step C 재설계 (`docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md`, plan `2026-04-29-step-c-vk-so-split.md`) 가 완료. 산출물:
+
+- `libvgpu.so`: HAMi-core 만 (vk* 미export, 5개 `hami_core_*` export). 검증된 build md5 `1bd8f078a15b20e86b78626ddb938141`.
+- `libvgpu_vk.so` (신규): Vulkan implicit-layer code. DT_NEEDED → `libvgpu.so`. Build md5 `95b44957ca3546fb72f8b5d7d699a4aa`.
+- `hami.json` manifest (`libvgpu/share/hami/hami.json`): `library_path = /usr/local/vgpu/libvgpu_vk.so`, `type = INSTANCE`, api 1.3.0.
+- ws-node074 검증: LD_PRELOAD `libvgpu.so` (manifest 미설치) × 5 → 5/5 alive (regression class 사라짐).
+
+다만 RT9 의 manifest 활성 검증에서 `HAMI_VK_TRACE > 0` 은 확인되지 않음 — Kit 의 embedded Conan vulkan-loader 가 우리 GIPA 를 traverse 하지 않음. **Step D 의 4-path 검증이 이 부분의 closure**.
+
+기존 production state (4-27 새벽 패치 이후 baseline):
+
+- `volcano-device-plugin` DaemonSet (image `10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1`) 이 `postStart` lifecycle hook 으로 `cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/` 실행 → 호스트 `/usr/local/vgpu/libvgpu.so` 가 image 의 .so 로 매번 reset.
+- `hami-vulkan-manifest` ConfigMap (`kube-system/`) 에 `hami.json` 정의. 현재 `library_path: /usr/local/vgpu/libvgpu.so`, `type: GLOBAL`, `enable_environment: HAMI_VULKAN_ENABLE=1`.
+- `hami-vulkan-manifest-installer` DaemonSet 이 ConfigMap 의 `hami.json` 을 host `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 으로 install. 현재 `nodeSelector: hami.io/disabled: "true"` 로 비활성 (4-27 새벽 패치 호환 충돌 후 baseline 보존).
+- HAMi webhook (`pkg/device/nvidia/device.go::applyVulkanAnnotation`) 가 pod annotation `hami.io/vulkan: "true"` 인식해서 container 에 `HAMI_VULKAN_ENABLE=1` + `NVIDIA_DRIVER_CAPABILITIES` 에 `graphics` 추가. 이 코드는 이미 master 에 있음.
+
+## 목적
+
+Step C 의 `libvgpu_vk.so` 가 **production opt-in 활성 path 에서 실제로 동작**함을 검증하고, partition enforce 가 4 path 모두에서 작동함을 입증한다. 검증은 ws-node074 isaac-launchable namespace 의 isaac-launchable-0 pod 에서 수행.
+
+## 핵심 결정
+
+| # | 결정 | 선택 |
+|---|---|---|
+| 1 | `libvgpu.so` + `libvgpu_vk.so` 호스트 install 방식 | volcano-device-plugin image 에 두 파일 모두 ship (image rebuild). 기존 `cp -rf /k8s-vgpu/lib/nvidia/.` lifecycle 가 둘 다 install. 별도 DaemonSet 추가 안 함 |
+| 2 | manifest CM 변경 | 기존 `hami-vulkan-manifest` ConfigMap update — `library_path` → `/usr/local/vgpu/libvgpu_vk.so`, `type` → `INSTANCE`, `enable_environment` 유지 (`HAMI_VULKAN_ENABLE: "1"`) |
+| 3 | manifest installer DaemonSet 재활성 | `nodeSelector` 를 `hami.io/disabled: "true"` → `nvidia.com/gpu.present: "true"` 로 복귀. install path 그대로 (`/usr/local/vgpu/vulkan/implicit_layer.d/hami.json`) |
+| 4 | opt-in trigger | 기존 `hami.io/vulkan: "true"` annotation + webhook injection 그대로. 추가 코드 변경 0 |
+| 5 | 4-path 검증 method | isaac-launchable-0 vscode container 에서 ad-hoc shell + python script 실행. 별도 test pod 만들지 않음 (existing pod 활용) |
+| 6 | rollback 안전장치 | swap 전 backup md5 기록, 각 단계 후 baseline runheadless 확인, 실패 시 즉시 backup restore |
+
+## 비변경 사항
+
+- HAMi parent Go 코드 (`pkg/device/nvidia/device.go`, webhook). `applyVulkanAnnotation` 그대로.
+- `libvgpu` (HAMi-core) 코드 — Step C 끝낸 그대로.
+- helm chart templates — Step D 는 runtime YAMLs (`cluster/runtime/snapshot-2026-04-28/`) 만 update. chart 통합은 별도 Step.
+- volcano-device-plugin (Volcano fork) Go 코드 — 변경 없이 image rebuild 만.
+
+## 호환성 약속
+
+- `hami.io/vulkan: "true"` annotation 미설정 pod: HAMI_VULKAN_ENABLE 미주입 → loader manifest 의 `enable_environment` 매칭 실패 → layer 미활성. 기존 동작 그대로.
+- annotation true 설정 pod: webhook 가 env 주입 → layer 활성 → partition enforce.
+- `volcano-vgpu-device-plugin:vulkan-v1` image rebuild 는 기존 build pipeline 재사용. tag 만 `vulkan-v2` 로 bump.
+
+## Architecture
+
+```
+┌── volcano-device-plugin DS (priv container, image vulkan-v2 신규) ──┐
+│   - postStart: cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/      │
+│       → /usr/local/vgpu/libvgpu.so       (Step C build)            │
+│       → /usr/local/vgpu/libvgpu_vk.so    (신규)                    │
+│       → /usr/local/vgpu/ld.so.preload    (기존)                    │
+└────────────────────────────────────────────────────────────────────┘
+                                 ↓
+┌── hami-vulkan-manifest ConfigMap (kube-system) ────────────────────┐
+│   hami.json:                                                       │
+│     "type": "INSTANCE"                                             │
+│     "library_path": "/usr/local/vgpu/libvgpu_vk.so"                │
+│     "enable_environment": { "HAMI_VULKAN_ENABLE": "1" }            │
+└────────────────────────────────────────────────────────────────────┘
+                                 ↓
+┌── hami-vulkan-manifest-installer DS (재활성, nodeSelector 복구) ───┐
+│   - cp /manifest/hami.json → /host/usr/local/vgpu/vulkan/          │
+│       implicit_layer.d/hami.json                                   │
+└────────────────────────────────────────────────────────────────────┘
+                                 ↓
+┌── pod (with annotation hami.io/vulkan: "true") ────────────────────┐
+│   webhook injects:                                                 │
+│     - HAMI_VULKAN_ENABLE=1                                         │
+│     - NVIDIA_DRIVER_CAPABILITIES = ...,graphics                    │
+│   Vulkan loader 가 manifest 발견 → enable_environment 매칭 →       │
+│   libvgpu_vk.so dlopen → DT_NEEDED libvgpu.so → 5 hami_core_*      │
+│   resolved → layer chain 진입                                      │
+└────────────────────────────────────────────────────────────────────┘
+```
+
+## Components
+
+| 단위 | 위치 | 변경 종류 |
+|---|---|---|
+| `volcano-vgpu-device-plugin` image (vulkan-v2) | external (Volcano fork) | rebuild — image 의 `/k8s-vgpu/lib/nvidia/` 에 새 `libvgpu.so` + `libvgpu_vk.so` 둘 다 포함 |
+| `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml` | repo | update — library_path / type / 주석 |
+| `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml` | repo | update — nodeSelector 복구 |
+| `cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml` | repo | update — image tag → vulkan-v2 |
+| `cluster/runtime/snapshot-2026-04-28/4-path-verification.sh` (신규) | repo | NVML / CUDA / Vulkan memory query / Vulkan allocate 검증 script |
+
+(snapshot 디렉토리 명을 `snapshot-2026-04-29-step-d` 로 새로 만들거나 기존 디렉토리 이름 변경할지는 plan 단계 결정.)
+
+## Activation flow
+
+production deploy:
+
+1. volcano-device-plugin image rebuild + push (`vulkan-v2` tag).
+2. ConfigMap `hami-vulkan-manifest` apply (library_path 변경).
+3. DaemonSet `hami-vulkan-manifest-installer` patch (nodeSelector 복구) → DS pod schedule → manifest install 실행.
+4. DaemonSet `volcano-device-plugin` image bump → pod rollout → postStart lifecycle 가 새 .so 두 개 install.
+5. isaac-launchable-0 pod 의 annotation 에 `hami.io/vulkan: "true"` 추가 (이미 있을 수도). pod 재시작 → webhook 가 env 주입.
+6. 4-path verification 실행.
+
+## 4-path verification
+
+4 path 모두 `hami.io/vulkan: "true"` annotation 활성된 isaac-launchable-0 의 vscode container 에서 실행:
+
+| Path | 명령 | 기대 |
+|---|---|---|
+| 1. NVML hook | `nvidia-smi --query-gpu=memory.total --format=csv,noheader` | `23552 MiB` (clamp). 이미 검증 — 그대로. |
+| 2. CUDA driver hook | python: `import pycuda.driver as cuda; cuda.init(); ctx = cuda.Device(0).make_context(); free, total = cuda.mem_get_info(); print(total)` | `23552 * 1024 * 1024` ≈ 24696061952 bytes (clamp) |
+| 3. Vulkan memory query | python: `vkGetPhysicalDeviceMemoryProperties` 의 `memoryHeaps[device-local].size` | `23552 * 1024 * 1024` (clamp) |
+| 4. Vulkan allocate | python: `vkAllocateMemory(VkMemoryAllocateInfo{ size = 25 * 1024 * 1024 * 1024 })` (25 GiB > 23 GiB partition) | `VK_ERROR_OUT_OF_DEVICE_MEMORY` |
+
+추가:
+
+- Manifest 가 active layer 로 enumerated 되는지 (`VK_LOADER_DEBUG=layer` 출력) 확인.
+- HAMI_VK_TRACE > 0 (layer 가 호출됨을 입증) — Kit 의 embedded Conan loader 우회를 위해 host system Vulkan loader 쓰는 python 스크립트로 검증.
+
+`vk_partition_test.py` 같은 script 를 신규 작성 (또는 기존 isaac-sim/ 디렉토리에서 재사용). 위치: `cluster/runtime/snapshot-2026-04-28/4-path-verification.sh` 또는 isaac-launchable-0 의 home dir.
+
+## Production safety gate
+
+각 단계마다:
+
+1. **Pre-step 백업**: 현재 production state 의 md5sum + ConfigMap export + DaemonSet status 기록.
+2. **Apply**: kubectl apply / patch.
+3. **Post-step verify**: isaac-launchable-0 / -1 baseline runheadless 1회 — `exit=124 crash=0 listen=1` 확인. 실패 시 즉시 rollback (backup 적용).
+4. **Roll forward only on green**.
+
+## 비검증 항목
+
+- helm chart 통합 (현재 chart values 에 vulkan toggle 없음. 추가는 별도 Step).
+- usd-composer / 다른 Vulkan 사용 pod 검증 (Step D 는 isaac-launchable-0 만).
+- multi-GPU 케이스 (현재 ws-node074 single-GPU 로만 검증).
+
+## Test plan (high level)
+
+1. volcano-vgpu-device-plugin image rebuild + push.
+2. ConfigMap update + patch DS — DS pod 가 manifest install 한 후 isaac-launchable-0 baseline runheadless 1회 확인.
+3. volcano-device-plugin DS image bump → pod rollout — `/usr/local/vgpu/libvgpu_vk.so` 존재 + md5 = 새 build md5 확인.
+4. isaac-launchable-0 annotation 확인 (이미 `hami.io/vulkan: "true"` 인지) + pod 재시작.
+5. 4-path 검증 실행. 4/4 expected 결과.
+6. HAMI_VK_TRACE > 0 확인 (host system loader path 통한 python script).
+7. usd-composer 등 다른 Vulkan 사용 pod 영향 0 확인 (steady state Running 유지).
+
+## Out of scope (이번 spec 에서 다루지 않음)
+
+- Tasks 1+2 재도입 (`996cb22` cache + Enumerate hooks, `eea2beb` GIPA fallback). 별도 follow-up plan.
+- helm chart 의 vulkan toggle / values 추가.
+- `enable_environment` 외 alternative trigger (예: env-var-prefix manifest, side-channel labels). 현재 path 가 standard 이므로 그대로 유지.
+- volcano-device-plugin Go 코드 변경 (image rebuild 만).
+- Multi-GPU partition enforce 검증.
diff --git a/docs/vulkan-vgpu-e2e-checklist.md b/docs/vulkan-vgpu-e2e-checklist.md
new file mode 100644
index 000000000..30eb561a2
--- /dev/null
+++ b/docs/vulkan-vgpu-e2e-checklist.md
@@ -0,0 +1,83 @@
+# Vulkan vGPU — Manual E2E Verification Checklist
+
+This checklist must be executed on a Kubernetes cluster with at least one
+NVIDIA GPU node running HAMi with the Vulkan-enabled `libvgpu.so`. Automation
+is deferred until an NVIDIA-capable CI runner is available.
+
+## Prerequisites
+
+1. HAMi scheduler + device plugin built from `feat/vulkan-vgpu` branch,
+   including the bumped `libvgpu` submodule pointer (commit `b60b4e6` or
+   later).
+2. NVIDIA Container Toolkit installed, default runtime `nvidia`.
+3. `libvgpu.so` built from HAMi-core `vulkan-layer` branch (commit `579a421`
+   or later) and shipped with the manifest
+   `/etc/vulkan/implicit_layer.d/hami.json` in the HAMi vgpu image.
+
+## 1. Heap clamp (`vulkaninfo`)
+
+```
+kubectl apply -f examples/nvidia/vulkan_example.yaml
+kubectl logs hami-vulkan-example | grep -iE "heap|device local"
+```
+
+**Pass criteria:** the reported `heapSize` for the `DEVICE_LOCAL` heap is
+**≤ 1073741824 bytes (1 GiB)**, matching `nvidia.com/gpumem: 1024`.
+
+## 2. Allocation exceed → `VK_ERROR_OUT_OF_DEVICE_MEMORY`
+
+Build a tiny allocation-stress image (pseudocode):
+```c
+for (int i = 0; i < 5; ++i) {
+    VkMemoryAllocateInfo info = { .allocationSize = 512*1024*1024 };
+    VkResult r = vkAllocateMemory(dev, &info, NULL, &m[i]);
+    printf("alloc %d -> %d\n", i, r);
+}
+```
+Package as `ghcr.io/<org>/vulkan-alloc-stress:latest`, deploy with the same
+annotation + `gpumem: 1024`.
+
+**Pass criteria:** first two allocations return `VK_SUCCESS (0)`, the third
+returns `VK_ERROR_OUT_OF_DEVICE_MEMORY (-2)`.
+
+## 3. SM throttle on `vkQueueSubmit`
+
+Image: any Vulkan compute workload that loops `vkQueueSubmit` continuously
+(e.g. `vkcube --headless` loop, or custom compute shader pinging GPU).
+Pod spec: add `nvidia.com/gpucores: "30"` annotation.
+
+**Pass criteria:** `nvidia-smi dmon -s u` on the host reports GPU compute
+utilization averaged near 30% (± token-bucket refill jitter ±120 ms), not
+100%.
+
+## 4. Mixed CUDA + Vulkan shared budget
+
+Image containing both a CUDA `cudaMalloc(512 MiB)` loop and Vulkan
+`vkAllocateMemory(512 MiB)` loop.
+Pod spec: `gpumem: 1024` + `hami.io/vulkan: "true"`.
+
+**Pass criteria:**
+- Sum of successful allocations across CUDA + Vulkan does **not** exceed
+  1024 MiB.
+- Either path may be the one that starts failing depending on scheduling;
+  both `VK_ERROR_OUT_OF_DEVICE_MEMORY` and `cudaErrorMemoryAllocation` are
+  valid end states.
+
+## 5. Opt-out still works for CUDA-only pods
+
+Deploy a pod with `nvidia.com/gpumem` but **no** `hami.io/vulkan` annotation.
+
+**Pass criteria:**
+- `env | grep NVIDIA_DRIVER_CAPABILITIES` inside the container is unchanged
+  from the image default (`compute,utility` unless image overrides).
+- `env | grep HAMI_VULKAN_ENABLE` is empty.
+- CUDA workloads continue to be throttled/clamped as before.
+
+## Results log
+
+Record cluster name, node GPU model, HAMi image tag, HAMi-core image tag,
+and pass/fail for each of the 5 checks in a dated entry below.
+
+| Date | Cluster | GPU | HAMi tag | libvgpu tag | 1 | 2 | 3 | 4 | 5 |
+|------|---------|-----|----------|-------------|---|---|---|---|---|
+| _pending_ | - | - | - | - | - | - | - | - | - |
diff --git a/docs/vulkan-vgpu-support.md b/docs/vulkan-vgpu-support.md
new file mode 100644
index 000000000..af49f39e5
--- /dev/null
+++ b/docs/vulkan-vgpu-support.md
@@ -0,0 +1,41 @@
+# Vulkan vGPU Support
+
+HAMi partitions NVIDIA GPUs for Vulkan workloads by injecting a Vulkan implicit
+layer (`VK_LAYER_HAMI_vgpu`) that shares the same VRAM and SM budgets used by
+the existing CUDA hooks.
+
+## Enabling Vulkan partitioning
+
+Add the `hami.io/vulkan: "true"` annotation to any pod that uses HAMi NVIDIA
+resources. The webhook will:
+
+- Union `graphics` into `NVIDIA_DRIVER_CAPABILITIES` so the NVIDIA Container
+  Toolkit mounts the Vulkan ICD and graphics libraries.
+- Set `HAMI_VULKAN_ENABLE=1` which activates the HAMi Vulkan layer via its
+  `enable_environment` clause in the implicit layer manifest.
+
+Example: `examples/nvidia/vulkan_example.yaml`.
+
+## What gets limited
+
+- `nvidia.com/gpumem` enforces VRAM allocation across **both** CUDA and Vulkan
+  in the container, sharing a single budget.
+- `nvidia.com/gpucores` throttles Vulkan `vkQueueSubmit[2]` using the same
+  token-bucket rate limiter as `cuLaunchKernel`.
+- `vkGetPhysicalDeviceMemoryProperties[2]` clamps the device-local heap size
+  to the pod budget so apps that size allocations from this value self-limit.
+
+## What is not limited (yet)
+
+- Vulkan Video (`VK_KHR_video_queue`) submissions.
+- Frame-pacing jitter introduced by throttling on graphics queues (documented
+  behavior; strict/cooperative modes are a future option).
+
+## Troubleshooting
+
+| Symptom | Check |
+|---------|-------|
+| Container has no `vulkan` CLI / libs | Annotation absent or `NVIDIA_DRIVER_CAPABILITIES` already frozen to `compute` by image. |
+| `vkAllocateMemory` always succeeds | Layer did not activate — ensure `HAMI_VULKAN_ENABLE=1` set and `/etc/vulkan/implicit_layer.d/hami.json` exists. |
+| `vulkaninfo` still shows full VRAM heap | Layer manifest not loaded; run `VK_LOADER_DEBUG=all vulkaninfo` to see layer scan. |
+| Nothing gets throttled | `rate_limiter` no-ops when SM limit is 0, >=100, or HAMi's utilization switch is disabled. Confirm `nvidia.com/gpucores` was requested on the pod. |
diff --git a/docs/vulkan-vgpu-support_cn.md b/docs/vulkan-vgpu-support_cn.md
new file mode 100644
index 000000000..c4b4aa042
--- /dev/null
+++ b/docs/vulkan-vgpu-support_cn.md
@@ -0,0 +1,32 @@
+# Vulkan vGPU 支持
+
+HAMi 通过注入 Vulkan 隐式层（`VK_LAYER_HAMI_vgpu`）对 NVIDIA GPU 进行 Vulkan 工作负载的切分。该层与已有的 CUDA 钩子共享同一套 VRAM 与 SM 预算。
+
+## 启用方式
+
+在使用 HAMi NVIDIA 资源的 Pod 上添加 annotation `hami.io/vulkan: "true"`。Webhook 会：
+
+- 将 `graphics` 合并进 `NVIDIA_DRIVER_CAPABILITIES`，以便 NVIDIA Container Toolkit 挂载 Vulkan ICD 与图形库。
+- 设置 `HAMI_VULKAN_ENABLE=1`，通过隐式层 manifest 的 `enable_environment` 激活 HAMi Vulkan 层。
+
+示例：`examples/nvidia/vulkan_example.yaml`。
+
+## 生效范围
+
+- `nvidia.com/gpumem` 对容器内 CUDA 与 Vulkan 的 VRAM 分配**共享同一预算**。
+- `nvidia.com/gpucores` 通过与 `cuLaunchKernel` 相同的 token-bucket 限速器对 `vkQueueSubmit[2]` 进行限速。
+- `vkGetPhysicalDeviceMemoryProperties[2]` 将 device-local 堆大小裁剪为 Pod 预算。
+
+## 未涵盖项（未来工作）
+
+- Vulkan Video（`VK_KHR_video_queue`）提交。
+- 图形队列限速导致的帧抖动（已记录，未来提供 strict/cooperative 模式）。
+
+## 故障排查
+
+| 现象 | 检查 |
+|------|------|
+| 容器没有 Vulkan 库 | annotation 缺失，或镜像已冻结 `NVIDIA_DRIVER_CAPABILITIES=compute`。 |
+| `vkAllocateMemory` 总是成功 | 层未激活 — 确认 `HAMI_VULKAN_ENABLE=1` 与 `/etc/vulkan/implicit_layer.d/hami.json` 存在。 |
+| `vulkaninfo` 仍报告全量 VRAM | Manifest 未加载；可 `VK_LOADER_DEBUG=all vulkaninfo` 查看扫描日志。 |
+| 限速未生效 | `rate_limiter` 在 SM 限额为 0、>=100 或 HAMi 利用率开关关闭时不工作。确认 Pod 已请求 `nvidia.com/gpucores`。 |
diff --git a/examples/nvidia/vulkan_example.yaml b/examples/nvidia/vulkan_example.yaml
new file mode 100644
index 000000000..da7f01368
--- /dev/null
+++ b/examples/nvidia/vulkan_example.yaml
@@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: hami-vulkan-example
+  annotations:
+    hami.io/vulkan: "true"
+spec:
+  restartPolicy: Never
+  containers:
+    - name: vulkaninfo
+      image: khronosgroup/vulkan-samples:latest
+      command: ["vulkaninfo"]
+      resources:
+        limits:
+          nvidia.com/gpu: "1"
+          nvidia.com/gpumem: "1024"   # 1 GiB VRAM budget (shared with CUDA)
+          nvidia.com/gpucores: "30"   # 30% SM throttle (shared with CUDA)
diff --git a/libvgpu b/libvgpu
index 8c32de630..8733ec48b 160000
--- a/libvgpu
+++ b/libvgpu
@@ -1 +1 @@
-Subproject commit 8c32de630b24f5f7d6355fbeb0034845d3bdafb7
+Subproject commit 8733ec48b8486657ff4a4e725e520dae00c943a1
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
index 4c10d7c71..810e23d43 100644
--- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
+++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go
@@ -615,6 +615,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *kubeletdev
 				podAllocationFailed(nodename, current, NodeLockNvidia)
 				return nil, fmt.Errorf("failed to get allocate response: %v", err)
 			}
+			response.Mounts = appendVulkanManifestMount(response.Mounts, hostHookPath)
 			responses.ContainerResponses = append(responses.ContainerResponses, response)
 		} else {
 			currentCtr, devreq, err := GetNextDeviceRequest(nvidia.NvidiaGPUDevice, *current)
@@ -699,6 +700,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *kubeletdev
 						ReadOnly: true},
 					)
 				}
+				response.Mounts = appendVulkanManifestMount(response.Mounts, hostHookPath)
 				_, err = os.Stat(fmt.Sprintf("%s/vgpu/license", hostHookPath))
 				if err == nil {
 					response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan.go
new file mode 100644
index 000000000..edca8361a
--- /dev/null
+++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan.go
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The HAMi Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package plugin
+
+import (
+	"os"
+
+	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+)
+
+// appendVulkanManifestMount appends a bind-mount for the HAMi Vulkan implicit
+// layer manifest when present on the host. The manifest is placed under
+// hostHookPath/vgpu/vulkan/implicit_layer.d/hami.json by vgpu-init.sh as part
+// of the standard lib distribution.
+//
+// The manifest's enable_environment guard means the Vulkan layer activates
+// only when the pod sets HAMI_VULKAN_ENABLE=1 (injected by the admission
+// webhook for pods that carry the hami.io/vulkan="true" annotation), so the
+// mount is safe to append unconditionally for both vGPU and MIG paths.
+//
+// Returns the input slice unchanged when the host file is absent, so nodes
+// without the Vulkan manifest do not block pod startup.
+func appendVulkanManifestMount(mounts []*kubeletdevicepluginv1beta1.Mount, hostHookPath string) []*kubeletdevicepluginv1beta1.Mount {
+	manifestHost := hostHookPath + "/vgpu/vulkan/implicit_layer.d/hami.json"
+	if _, err := os.Stat(manifestHost); err != nil {
+		return mounts
+	}
+	return append(mounts, &kubeletdevicepluginv1beta1.Mount{
+		ContainerPath: "/etc/vulkan/implicit_layer.d/hami.json",
+		HostPath:      manifestHost,
+		ReadOnly:      true,
+	})
+}
diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan_test.go
new file mode 100644
index 000000000..4397b1fef
--- /dev/null
+++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan_test.go
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The HAMi Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package plugin
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+)
+
+// appendVulkanManifestMount must return the input slice untouched when the
+// Vulkan implicit-layer manifest is absent on the host. This is the path
+// taken on nodes where vgpu-init.sh has not (or cannot) place the manifest,
+// and Pod startup must not block on a missing optional file.
+func TestAppendVulkanManifestMount_Absent(t *testing.T) {
+	dir := t.TempDir() // no vgpu/vulkan/implicit_layer.d/hami.json under here
+	in := []*kubeletdevicepluginv1beta1.Mount{
+		{ContainerPath: "/already/there", HostPath: "/already/there"},
+	}
+	out := appendVulkanManifestMount(in, dir)
+	if len(out) != len(in) {
+		t.Fatalf("expected mounts unchanged when manifest absent, got %d mounts (want %d)", len(out), len(in))
+	}
+	for i := range in {
+		if out[i] != in[i] {
+			t.Fatalf("mount[%d] mutated: got %+v, want %+v", i, out[i], in[i])
+		}
+	}
+}
+
+// When the Vulkan implicit-layer manifest is present on the host, the helper
+// must append a single bind-mount at the well-known container path so the
+// Vulkan loader picks the layer up via the enable_environment guard.
+func TestAppendVulkanManifestMount_Present(t *testing.T) {
+	dir := t.TempDir()
+	manifestRel := "vgpu/vulkan/implicit_layer.d/hami.json"
+	if err := os.MkdirAll(filepath.Dir(filepath.Join(dir, manifestRel)), 0o755); err != nil {
+		t.Fatalf("setup mkdir: %v", err)
+	}
+	manifestHost := filepath.Join(dir, manifestRel)
+	if err := os.WriteFile(manifestHost, []byte("{}"), 0o644); err != nil {
+		t.Fatalf("setup writefile: %v", err)
+	}
+
+	in := []*kubeletdevicepluginv1beta1.Mount{}
+	out := appendVulkanManifestMount(in, dir)
+	if len(out) != 1 {
+		t.Fatalf("expected exactly one mount appended, got %d", len(out))
+	}
+	m := out[0]
+	if m.ContainerPath != "/etc/vulkan/implicit_layer.d/hami.json" {
+		t.Errorf("ContainerPath = %q, want /etc/vulkan/implicit_layer.d/hami.json", m.ContainerPath)
+	}
+	if m.HostPath != manifestHost {
+		t.Errorf("HostPath = %q, want %q", m.HostPath, manifestHost)
+	}
+	if !m.ReadOnly {
+		t.Errorf("ReadOnly = false, want true (manifest must not be writable from container)")
+	}
+}
+
+// Helper must preserve the order and identity of preceding mounts when it
+// appends. Regression guard for the MIG / non-MIG callers in server.go that
+// rely on positional ordering.
+func TestAppendVulkanManifestMount_PreservesPriorMounts(t *testing.T) {
+	dir := t.TempDir()
+	manifestRel := "vgpu/vulkan/implicit_layer.d/hami.json"
+	if err := os.MkdirAll(filepath.Dir(filepath.Join(dir, manifestRel)), 0o755); err != nil {
+		t.Fatalf("setup mkdir: %v", err)
+	}
+	if err := os.WriteFile(filepath.Join(dir, manifestRel), []byte("{}"), 0o644); err != nil {
+		t.Fatalf("setup writefile: %v", err)
+	}
+
+	first := &kubeletdevicepluginv1beta1.Mount{ContainerPath: "/a", HostPath: "/a"}
+	second := &kubeletdevicepluginv1beta1.Mount{ContainerPath: "/b", HostPath: "/b"}
+	out := appendVulkanManifestMount([]*kubeletdevicepluginv1beta1.Mount{first, second}, dir)
+	if len(out) != 3 {
+		t.Fatalf("expected 3 mounts, got %d", len(out))
+	}
+	if out[0] != first || out[1] != second {
+		t.Fatalf("prior mounts reordered or replaced")
+	}
+	if out[2].ContainerPath != "/etc/vulkan/implicit_layer.d/hami.json" {
+		t.Errorf("appended mount has wrong ContainerPath: %q", out[2].ContainerPath)
+	}
+}
diff --git a/pkg/device/nvidia/device.go b/pkg/device/nvidia/device.go
index 9deb08cdd..0b5611be9 100644
--- a/pkg/device/nvidia/device.go
+++ b/pkg/device/nvidia/device.go
@@ -56,6 +56,34 @@ const (
 	MpsMode      = "mps"
 )
 
+const (
+	VulkanEnableAnno       = "hami.io/vulkan"
+	VulkanLayerName        = "VK_LAYER_HAMI_vgpu"
+	NvidiaDriverCapsEnvVar = "NVIDIA_DRIVER_CAPABILITIES"
+	HamiVulkanEnvVar       = "HAMI_VULKAN_ENABLE"
+
+	// VulkanManifestVolumeName is the pod-level volume that exposes the
+	// host's hami.json implicit-layer manifest into the container at the
+	// standard Vulkan loader search path.
+	VulkanManifestVolumeName = "hami-vulkan-manifest"
+	// VulkanManifestHostPath is where the hami-vulkan-manifest-installer
+	// DaemonSet drops the manifest on each GPU node. The webhook mounts
+	// this single file (not the whole directory) into the container so
+	// the existing /etc/vulkan/implicit_layer.d/nvidia_layers.json from
+	// the image keeps working alongside it.
+	VulkanManifestHostPath      = "/etc/vulkan/implicit_layer.d/hami.json"
+	VulkanManifestContainerPath = "/etc/vulkan/implicit_layer.d/hami.json"
+	// VulkanLibSoVolumeName exposes libvgpu_vk.so (Step C split) into
+	// the container at the path the manifest's library_path references.
+	// volcano-vgpu-device-plugin already mounts /usr/local/vgpu/libvgpu.so
+	// for every GPU pod, but it does NOT mount libvgpu_vk.so — that file
+	// is only ever needed by Vulkan-opt-in pods, so the webhook handles
+	// it on a per-pod basis.
+	VulkanLibSoVolumeName    = "hami-vulkan-lib-so"
+	VulkanLibSoHostPath      = "/usr/local/vgpu/libvgpu_vk.so"
+	VulkanLibSoContainerPath = "/usr/local/vgpu/libvgpu_vk.so"
+)
+
 var (
 	NodeName          string
 	RuntimeSocketFlag string
@@ -370,6 +398,7 @@ func (dev *NvidiaGPUDevices) MutateAdmission(ctr *corev1.Container, p *corev1.Po
 		if p.Spec.RuntimeClassName == nil && dev.config.RuntimeClassName != "" {
 			p.Spec.RuntimeClassName = &dev.config.RuntimeClassName
 		}
+		applyVulkanAnnotation(ctr, p)
 	}
 
 	if !hasResource && dev.config.OverwriteEnv {
@@ -381,6 +410,116 @@ func (dev *NvidiaGPUDevices) MutateAdmission(ctr *corev1.Container, p *corev1.Po
 	return hasResource, nil
 }
 
+// mergeGraphicsCap returns the union of existing NVIDIA_DRIVER_CAPABILITIES
+// tokens with "graphics". If existing contains "all", it is returned unchanged.
+// An empty (or whitespace/comma-only) existing value becomes
+// "compute,utility,graphics".
+func mergeGraphicsCap(existing string) string {
+	if strings.TrimSpace(existing) == "" {
+		return "compute,utility,graphics"
+	}
+	tokens := strings.Split(existing, ",")
+	cleaned := make([]string, 0, len(tokens)+1)
+	seen := make(map[string]struct{}, len(tokens)+1)
+	for _, t := range tokens {
+		t = strings.TrimSpace(t)
+		if t == "" {
+			continue
+		}
+		if t == "all" {
+			return existing
+		}
+		if _, ok := seen[t]; ok {
+			continue
+		}
+		seen[t] = struct{}{}
+		cleaned = append(cleaned, t)
+	}
+	if len(cleaned) == 0 {
+		return "compute,utility,graphics"
+	}
+	if _, ok := seen["graphics"]; ok {
+		return existing
+	}
+	cleaned = append(cleaned, "graphics")
+	return strings.Join(cleaned, ",")
+}
+
+// applyVulkanAnnotation mutates the container env and pod spec when the
+// pod opts into Vulkan partitioning. It (1) ensures NVIDIA_DRIVER_
+// CAPABILITIES contains "graphics", (2) sets HAMI_VULKAN_ENABLE=1 so the
+// loader's enable_environment gate matches, and (3) injects a hostPath
+// volume that exposes the per-node hami.json implicit-layer manifest at
+// the container's /etc/vulkan/implicit_layer.d/ path. No-op otherwise.
+func applyVulkanAnnotation(ctr *corev1.Container, pod *corev1.Pod) {
+	if pod == nil || pod.Annotations[VulkanEnableAnno] != "true" {
+		return
+	}
+
+	capsIdx := -1
+	hasEnable := false
+	for i, e := range ctr.Env {
+		switch e.Name {
+		case NvidiaDriverCapsEnvVar:
+			capsIdx = i
+		case HamiVulkanEnvVar:
+			hasEnable = true
+		}
+	}
+
+	if capsIdx >= 0 {
+		ctr.Env[capsIdx].Value = mergeGraphicsCap(ctr.Env[capsIdx].Value)
+	} else {
+		ctr.Env = append(ctr.Env, corev1.EnvVar{Name: NvidiaDriverCapsEnvVar, Value: mergeGraphicsCap("")})
+	}
+
+	if !hasEnable {
+		ctr.Env = append(ctr.Env, corev1.EnvVar{Name: HamiVulkanEnvVar, Value: "1"})
+	}
+
+	ensureHostPathFileVolume(pod, VulkanManifestVolumeName, VulkanManifestHostPath)
+	ensureHostPathFileVolumeMount(ctr, VulkanManifestVolumeName, VulkanManifestContainerPath)
+	ensureHostPathFileVolume(pod, VulkanLibSoVolumeName, VulkanLibSoHostPath)
+	ensureHostPathFileVolumeMount(ctr, VulkanLibSoVolumeName, VulkanLibSoContainerPath)
+}
+
+// ensureHostPathFileVolume appends a HostPathFile volume to the pod once
+// (idempotent across calls — used when the same opt-in trigger fires per
+// container of a multi-container pod). The volume source is the named
+// host file (not directory) so the bind mount is precise.
+func ensureHostPathFileVolume(pod *corev1.Pod, name, hostPath string) {
+	for _, v := range pod.Spec.Volumes {
+		if v.Name == name {
+			return
+		}
+	}
+	fileType := corev1.HostPathFile
+	pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{
+		Name: name,
+		VolumeSource: corev1.VolumeSource{
+			HostPath: &corev1.HostPathVolumeSource{
+				Path: hostPath,
+				Type: &fileType,
+			},
+		},
+	})
+}
+
+// ensureHostPathFileVolumeMount appends a read-only volumeMount referring
+// to the named volume into the container. Idempotent per container.
+func ensureHostPathFileVolumeMount(ctr *corev1.Container, name, mountPath string) {
+	for _, m := range ctr.VolumeMounts {
+		if m.Name == name {
+			return
+		}
+	}
+	ctr.VolumeMounts = append(ctr.VolumeMounts, corev1.VolumeMount{
+		Name:      name,
+		MountPath: mountPath,
+		ReadOnly:  true,
+	})
+}
+
 func (dev *NvidiaGPUDevices) mutateContainerResource(ctr *corev1.Container) bool {
 	_, resourceNameOK := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceCountName)]
 	if resourceNameOK {
diff --git a/pkg/device/nvidia/device_test.go b/pkg/device/nvidia/device_test.go
index 493fec3c5..f6d1a5046 100644
--- a/pkg/device/nvidia/device_test.go
+++ b/pkg/device/nvidia/device_test.go
@@ -2617,3 +2617,303 @@ func TestFit_TopologyBestCombination(t *testing.T) {
 	assert.Assert(t, uuids["dev-0"])
 	assert.Assert(t, uuids["dev-2"])
 }
+
+func TestMutateAdmission_VulkanAnno_AddsGraphicsCap(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{
+			ResourceCountName:            "nvidia.com/gpu",
+			ResourceMemoryName:           "nvidia.com/gpumem",
+			ResourceCoreName:             "nvidia.com/gpucores",
+			ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage",
+		},
+	}
+	ctr := &corev1.Container{
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Annotations: map[string]string{VulkanEnableAnno: "true"},
+		},
+	}
+	_, err := dev.MutateAdmission(ctr, pod)
+	assert.NilError(t, err)
+
+	var caps, enable string
+	for _, e := range ctr.Env {
+		if e.Name == NvidiaDriverCapsEnvVar {
+			caps = e.Value
+		}
+		if e.Name == HamiVulkanEnvVar {
+			enable = e.Value
+		}
+	}
+	assert.Assert(t, strings.Contains(caps, "graphics"), "expected graphics in caps, got %q", caps)
+	assert.Equal(t, enable, "1")
+}
+
+func TestMutateAdmission_VulkanAnno_MergesExistingCaps(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{
+			ResourceCountName:            "nvidia.com/gpu",
+			ResourceMemoryName:           "nvidia.com/gpumem",
+			ResourceCoreName:             "nvidia.com/gpucores",
+			ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage",
+		},
+	}
+	ctr := &corev1.Container{
+		Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "compute,utility"}},
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+	}
+	_, _ = dev.MutateAdmission(ctr, pod)
+
+	var caps string
+	for _, e := range ctr.Env {
+		if e.Name == NvidiaDriverCapsEnvVar {
+			caps = e.Value
+		}
+	}
+	assert.Assert(t, strings.Contains(caps, "compute"))
+	assert.Assert(t, strings.Contains(caps, "utility"))
+	assert.Assert(t, strings.Contains(caps, "graphics"))
+}
+
+func TestMutateAdmission_VulkanAnno_AllCaps_NoChange(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+	}
+	ctr := &corev1.Container{
+		Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "all"}},
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+	}
+	_, _ = dev.MutateAdmission(ctr, pod)
+
+	for _, e := range ctr.Env {
+		if e.Name == NvidiaDriverCapsEnvVar {
+			assert.Equal(t, e.Value, "all")
+		}
+	}
+}
+
+func TestMutateAdmission_NoVulkanAnno_NoChange(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+	}
+	ctr := &corev1.Container{
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{}
+	_, _ = dev.MutateAdmission(ctr, pod)
+	for _, e := range ctr.Env {
+		assert.Assert(t, e.Name != NvidiaDriverCapsEnvVar, "unexpected caps env")
+		assert.Assert(t, e.Name != HamiVulkanEnvVar, "unexpected enable env")
+	}
+}
+
+func TestMutateAdmission_VulkanAnno_NoGPUResource(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{
+			ResourceCountName:            "nvidia.com/gpu",
+			ResourceMemoryName:           "nvidia.com/gpumem",
+			ResourceCoreName:             "nvidia.com/gpucores",
+			ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage",
+		},
+	}
+	ctr := &corev1.Container{Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{}}}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+	}
+	_, _ = dev.MutateAdmission(ctr, pod)
+	for _, e := range ctr.Env {
+		assert.Assert(t, e.Name != HamiVulkanEnvVar, "no Vulkan env on non-GPU pod")
+	}
+}
+
+func TestMutateAdmission_VulkanAnno_IdempotentHamiEnable(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+	}
+	ctr := &corev1.Container{
+		Env: []corev1.EnvVar{{Name: HamiVulkanEnvVar, Value: "1"}},
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+	}
+	_, _ = dev.MutateAdmission(ctr, pod)
+	count := 0
+	for _, e := range ctr.Env {
+		if e.Name == HamiVulkanEnvVar {
+			count++
+		}
+	}
+	assert.Equal(t, count, 1)
+}
+
+func TestMutateAdmission_VulkanAnno_InjectsManifestVolumeMount(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+	}
+	ctr := &corev1.Container{
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+	}
+	_, _ = dev.MutateAdmission(ctr, pod)
+
+	var volume *corev1.Volume
+	for i := range pod.Spec.Volumes {
+		if pod.Spec.Volumes[i].Name == VulkanManifestVolumeName {
+			volume = &pod.Spec.Volumes[i]
+		}
+	}
+	assert.Assert(t, volume != nil, "expected volume %q on pod", VulkanManifestVolumeName)
+	assert.Assert(t, volume.HostPath != nil, "expected HostPath source")
+	assert.Equal(t, volume.HostPath.Path, VulkanManifestHostPath)
+	assert.Assert(t, volume.HostPath.Type != nil)
+	assert.Equal(t, *volume.HostPath.Type, corev1.HostPathFile)
+
+	var mount *corev1.VolumeMount
+	for i := range ctr.VolumeMounts {
+		if ctr.VolumeMounts[i].Name == VulkanManifestVolumeName {
+			mount = &ctr.VolumeMounts[i]
+		}
+	}
+	assert.Assert(t, mount != nil, "expected volumeMount %q on container", VulkanManifestVolumeName)
+	assert.Equal(t, mount.MountPath, VulkanManifestContainerPath)
+	assert.Equal(t, mount.ReadOnly, true)
+}
+
+func TestMutateAdmission_VulkanAnno_VolumeIdempotent(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+	}
+	ctr1 := &corev1.Container{
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	ctr2 := &corev1.Container{
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{
+		ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}},
+	}
+	_, _ = dev.MutateAdmission(ctr1, pod)
+	_, _ = dev.MutateAdmission(ctr2, pod)
+
+	count := 0
+	for _, v := range pod.Spec.Volumes {
+		if v.Name == VulkanManifestVolumeName {
+			count++
+		}
+	}
+	assert.Equal(t, count, 1, "volume should be added once even when multiple containers opt in")
+
+	mountCount1 := 0
+	for _, m := range ctr1.VolumeMounts {
+		if m.Name == VulkanManifestVolumeName {
+			mountCount1++
+		}
+	}
+	mountCount2 := 0
+	for _, m := range ctr2.VolumeMounts {
+		if m.Name == VulkanManifestVolumeName {
+			mountCount2++
+		}
+	}
+	assert.Equal(t, mountCount1, 1)
+	assert.Equal(t, mountCount2, 1)
+}
+
+func TestMutateAdmission_NoVulkanAnno_NoVolume(t *testing.T) {
+	dev := &NvidiaGPUDevices{
+		config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"},
+	}
+	ctr := &corev1.Container{
+		Resources: corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI),
+			},
+		},
+	}
+	pod := &corev1.Pod{}
+	_, _ = dev.MutateAdmission(ctr, pod)
+	for _, v := range pod.Spec.Volumes {
+		assert.Assert(t, v.Name != VulkanManifestVolumeName, "no manifest volume without annotation")
+	}
+	for _, m := range ctr.VolumeMounts {
+		assert.Assert(t, m.Name != VulkanManifestVolumeName, "no manifest mount without annotation")
+	}
+}
+
+func TestMergeGraphicsCap_Empty(t *testing.T) {
+	assert.Equal(t, mergeGraphicsCap(""), "compute,utility,graphics")
+}
+
+func TestMergeGraphicsCap_WhitespaceOnly(t *testing.T) {
+	assert.Equal(t, mergeGraphicsCap("   "), "compute,utility,graphics")
+}
+
+func TestMergeGraphicsCap_CommasOnly(t *testing.T) {
+	// All tokens are empty after trimming -> default fallback.
+	assert.Equal(t, mergeGraphicsCap(", , ,"), "compute,utility,graphics")
+}
+
+func TestMergeGraphicsCap_All(t *testing.T) {
+	// "all" implies every capability; do not modify.
+	assert.Equal(t, mergeGraphicsCap("all"), "all")
+	assert.Equal(t, mergeGraphicsCap("compute,all,utility"), "compute,all,utility")
+}
+
+func TestMergeGraphicsCap_AlreadyHasGraphics(t *testing.T) {
+	// graphics already present -> return existing untouched.
+	assert.Equal(t, mergeGraphicsCap("compute,graphics,utility"), "compute,graphics,utility")
+}
+
+func TestMergeGraphicsCap_DuplicatesAndPadding(t *testing.T) {
+	// Duplicate tokens are deduped; surrounding whitespace is trimmed; graphics appended.
+	out := mergeGraphicsCap("compute, compute , utility")
+	assert.Equal(t, out, "compute,utility,graphics")
+}
+
+func TestMergeGraphicsCap_AppendGraphics(t *testing.T) {
+	assert.Equal(t, mergeGraphicsCap("compute"), "compute,graphics")
+	assert.Equal(t, mergeGraphicsCap("compute,utility"), "compute,utility,graphics")
+}