diff --git a/.gitmodules b/.gitmodules index 0f8525526..458f777b7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "libvgpu"] path = libvgpu - url = https://github.com/Project-HAMi/HAMi-core.git - branch = main + url = https://github.com/xiilab/HAMi-core.git + branch = vulkan-layer diff --git a/charts/hami/templates/scheduler/webhook.yaml b/charts/hami/templates/scheduler/webhook.yaml index 98ab05b22..175f67709 100644 --- a/charts/hami/templates/scheduler/webhook.yaml +++ b/charts/hami/templates/scheduler/webhook.yaml @@ -29,10 +29,17 @@ webhooks: {{- toYaml .Values.scheduler.admissionWebhook.namespaceSelector.matchLabels | nindent 8 }} {{- end }} matchExpressions: + {{- if eq (.Values.scheduler.admissionWebhook.namespaceSelector.mode | default "opt-out") "opt-in" }} + - key: hami.io/vgpu + operator: In + values: + - enabled + {{- else }} - key: hami.io/webhook operator: NotIn values: - ignore + {{- end }} {{- if .Values.scheduler.admissionWebhook.whitelistNamespaces }} - key: kubernetes.io/metadata.name operator: NotIn diff --git a/charts/hami/values.yaml b/charts/hami/values.yaml index 382c2ec47..1e4393241 100644 --- a/charts/hami/values.yaml +++ b/charts/hami/values.yaml @@ -172,19 +172,19 @@ scheduler: # - default # - kube-system # - istio-system - # namespaceSelector controls which namespaces the webhook will be applied to. - # The default matchExpressions exclude namespaces with label "hami.io/webhook: ignore". - # You can add additional matchLabels or matchExpressions to further filter namespaces. + # namespaceSelector controls which namespaces the webhook will apply to. + # mode: + # "opt-out" (legacy default): apply to all namespaces except those labeled + # hami.io/webhook=ignore. Suitable when most workloads need vGPU + # isolation and a small number opt out. + # "opt-in" (recommended for clusters with NVIDIA Omniverse / Isaac Sim + # workloads that conflict with HAMi-core hooks): apply ONLY to + # namespaces labeled hami.io/vgpu=enabled. Other namespaces see + # no mutation, no LD_PRELOAD inject, no implicit Vulkan layer. namespaceSelector: - matchLabels: - # app.kubernetes.io/part-of: kubeflow-profile + mode: opt-in + matchLabels: {} matchExpressions: [] - # Example: exclude namespaces with specific labels - # - key: environment - # operator: In - # values: - # - development - # - staging # objectSelector controls which pods the webhook will be applied to. # The default matchExpressions exclude pods with label "hami.io/webhook: ignore". # You can add additional matchLabels or matchExpressions to further filter pods. diff --git a/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml new file mode 100644 index 000000000..73dbb43b5 --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +data: + hami.json: | + { + "file_format_version": "1.0.0", + "layer": { + "name": "VK_LAYER_HAMI_vgpu", + "type": "GLOBAL", + "library_path": "/usr/local/vgpu/libvgpu.so", + "api_version": "1.3.0", + "implementation_version": "1", + "description": "HAMi Vulkan vGPU memory partitioning layer", + "enable_environment": { + "HAMI_VULKAN_ENABLE": "1" + } + } + } +kind: ConfigMap +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"v1","data":{"hami.json":"{\n \"file_format_version\": \"1.0.0\",\n \"layer\": {\n \"name\": \"VK_LAYER_HAMI_vgpu\",\n \"type\": \"GLOBAL\",\n \"library_path\": \"/usr/local/vgpu/libvgpu.so\",\n \"api_version\": \"1.3.0\",\n \"implementation_version\": \"1\",\n \"description\": \"HAMi Vulkan vGPU memory partitioning layer\",\n \"enable_environment\": {\n \"HAMI_VULKAN_ENABLE\": \"1\"\n }\n }\n}\n"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"hami-vulkan-manifest","namespace":"kube-system"}} + creationTimestamp: "2026-04-27T02:17:50Z" + name: hami-vulkan-manifest + namespace: kube-system + resourceVersion: "20078116" + uid: cf25a104-6177-43ee-8a64-0483ef5901fa diff --git a/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml new file mode 100644 index 000000000..de65e0b1e --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml @@ -0,0 +1,80 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + annotations: + deprecated.daemonset.template.generation: "2" + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"apps/v1","kind":"DaemonSet","metadata":{"annotations":{},"labels":{"app":"hami-vulkan-manifest-installer"},"name":"hami-vulkan-manifest-installer","namespace":"kube-system"},"spec":{"selector":{"matchLabels":{"app":"hami-vulkan-manifest-installer"}},"template":{"metadata":{"labels":{"app":"hami-vulkan-manifest-installer"}},"spec":{"containers":[{"command":["/bin/sh","-c","set -eu\nmkdir -p /host/usr/local/vgpu/vulkan/implicit_layer.d\ncp -f /manifest/hami.json \\\n /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json\necho \"[hami-vulkan-manifest] installed at /usr/local/vgpu/vulkan/implicit_layer.d/hami.json\"\n# DaemonSet 이라 종료하지 않고 sleep — restart 루프 회피\nsleep infinity\n"],"image":"busybox:1.36","name":"installer","securityContext":{"runAsUser":0},"volumeMounts":[{"mountPath":"/manifest","name":"manifest","readOnly":true},{"mountPath":"/host/usr/local/vgpu","name":"host-vgpu"}]}],"hostPID":false,"nodeSelector":{"nvidia.com/gpu.present":"true"},"restartPolicy":"Always","tolerations":[{"operator":"Exists"}],"volumes":[{"configMap":{"name":"hami-vulkan-manifest"},"name":"manifest"},{"hostPath":{"path":"/usr/local/vgpu","type":"DirectoryOrCreate"},"name":"host-vgpu"}]}}}} + creationTimestamp: "2026-04-27T02:17:50Z" + generation: 2 + labels: + app: hami-vulkan-manifest-installer + name: hami-vulkan-manifest-installer + namespace: kube-system + resourceVersion: "20897004" + uid: 6998669a-7bd7-463b-9564-94ad1a4f3feb +spec: + revisionHistoryLimit: 10 + selector: + matchLabels: + app: hami-vulkan-manifest-installer + template: + metadata: + labels: + app: hami-vulkan-manifest-installer + spec: + containers: + - command: + - /bin/sh + - -c + - | + set -eu + mkdir -p /host/usr/local/vgpu/vulkan/implicit_layer.d + cp -f /manifest/hami.json \ + /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json + echo "[hami-vulkan-manifest] installed at /usr/local/vgpu/vulkan/implicit_layer.d/hami.json" + # DaemonSet 이라 종료하지 않고 sleep — restart 루프 회피 + sleep infinity + image: busybox:1.36 + imagePullPolicy: IfNotPresent + name: installer + resources: {} + securityContext: + runAsUser: 0 + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /manifest + name: manifest + readOnly: true + - mountPath: /host/usr/local/vgpu + name: host-vgpu + dnsPolicy: ClusterFirst + nodeSelector: + hami.io/disabled: "true" + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + tolerations: + - operator: Exists + volumes: + - configMap: + defaultMode: 420 + name: hami-vulkan-manifest + name: manifest + - hostPath: + path: /usr/local/vgpu + type: DirectoryOrCreate + name: host-vgpu + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate +status: + currentNumberScheduled: 0 + desiredNumberScheduled: 0 + numberMisscheduled: 0 + numberReady: 0 + observedGeneration: 2 diff --git a/cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml b/cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml new file mode 100644 index 000000000..12814a69f --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml @@ -0,0 +1,51 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + annotations: + meta.helm.sh/release-name: hami-webhook + meta.helm.sh/release-namespace: hami-system + creationTimestamp: "2026-04-27T03:34:22Z" + generation: 2 + labels: + app.kubernetes.io/managed-by: Helm + name: hami-webhook-webhook + resourceVersion: "20112390" + uid: 32e51184-bf13-41df-a382-1566671cc010 +webhooks: +- admissionReviewVersions: + - v1beta1 + clientConfig: + caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJkakNDQVJ5Z0F3SUJBZ0lSQVBlREVjTWFneDYwOENEVWJYdUZXdjR3Q2dZSUtvWkl6ajBFQXdJd0R6RU4KTUFzR0ExVUVDaE1FYm1sc01UQWdGdzB5TmpBME1qY3dNakl5TlROYUdBOHlNVEkyTURRd016QXlNakkxTTFvdwpEekVOTUFzR0ExVUVDaE1FYm1sc01UQlpNQk1HQnlxR1NNNDlBZ0VHQ0NxR1NNNDlBd0VIQTBJQUJIc2dFbU95CnpQWllFTHEwMmFIQUIxRXI1MnE1dlE0Vi9qV2hTVmg2S0RIRUh6Y1JFNUdlQ29nMDBMbDBaYnlDZXZ1RmhETGcKdDBYZXlXOVJaTldMVDF1alZ6QlZNQTRHQTFVZER3RUIvd1FFQXdJQ0JEQVRCZ05WSFNVRUREQUtCZ2dyQmdFRgpCUWNEQVRBUEJnTlZIUk1CQWY4RUJUQURBUUgvTUIwR0ExVWREZ1FXQkJSOHk5ZXVCOFVIaVZ0bWtvQkh6VzVDCnZXRXJHekFLQmdncWhrak9QUVFEQWdOSUFEQkZBaUVBdUJ2eWVmekJoam5rclpYN0hKRzNURzVRZ2NzTVpvVC8KUmpPQWFhNEFrNHNDSUEvaGhyMndkODE2M2loNmEvWWtuTWdhOXRCeVpGQ2lxeXF3NjhVUjRaNVIKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo= + service: + name: hami-webhook-scheduler + namespace: hami-system + path: /webhook + port: 443 + failurePolicy: Ignore + matchPolicy: Equivalent + name: vgpu.hami.io + namespaceSelector: + matchExpressions: + - key: hami.io/webhook + operator: NotIn + values: + - ignore + objectSelector: + matchExpressions: + - key: hami.io/webhook + operator: NotIn + values: + - ignore + reinvocationPolicy: Never + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + scope: '*' + sideEffects: None + timeoutSeconds: 10 diff --git a/cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml b/cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml new file mode 100644 index 000000000..a4003afaf --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml @@ -0,0 +1,180 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + annotations: + deprecated.daemonset.template.generation: "5" + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"apps/v1","kind":"DaemonSet","metadata":{"annotations":{},"name":"volcano-device-plugin","namespace":"kube-system"},"spec":{"selector":{"matchLabels":{"name":"volcano-device-plugin"}},"template":{"metadata":{"annotations":{"scheduler.alpha.kubernetes.io/critical-pod":""},"labels":{"name":"volcano-device-plugin"}},"spec":{"containers":[{"args":["--device-split-count=10"],"env":[{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"HOOK_PATH","value":"/usr/local/vgpu"},{"name":"NVIDIA_VISIBLE_DEVICES","value":"all"},{"name":"NVIDIA_MIG_MONITOR_DEVICES","value":"all"},{"name":"NVIDIA_DRIVER_CAPABILITIES","value":"utility"}],"image":"10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1","lifecycle":{"postStart":{"exec":{"command":["/bin/sh","-c","cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/"]}}},"name":"volcano-device-plugin","securityContext":{"allowPrivilegeEscalation":true,"capabilities":{"add":["SYS_ADMIN"],"drop":["ALL"]},"privileged":true},"volumeMounts":[{"mountPath":"/config","name":"deviceconfig"},{"mountPath":"/var/lib/kubelet/device-plugins","name":"device-plugin"},{"mountPath":"/usr/local/vgpu","name":"lib"},{"mountPath":"/tmp","name":"hosttmp"}]},{"command":["/bin/bash","-c","volcano-vgpu-monitor"],"env":[{"name":"NVIDIA_VISIBLE_DEVICES","value":"all"},{"name":"NVIDIA_MIG_MONITOR_DEVICES","value":"all"},{"name":"HOOK_PATH","value":"/tmp/vgpu"},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}}],"image":"10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1","name":"monitor","securityContext":{"allowPrivilegeEscalation":true,"capabilities":{"add":["SYS_ADMIN"],"drop":["ALL"]},"privileged":true},"volumeMounts":[{"mountPath":"/run/docker","name":"dockers"},{"mountPath":"/run/containerd","name":"containerds"},{"mountPath":"/sysinfo","name":"sysinfo"},{"mountPath":"/hostvar","name":"hostvar"},{"mountPath":"/tmp","name":"hosttmp"}]}],"priorityClassName":"system-node-critical","serviceAccount":"volcano-device-plugin","tolerations":[{"key":"CriticalAddonsOnly","operator":"Exists"},{"effect":"NoSchedule","key":"volcano.sh/gpu-memory","operator":"Exists"}],"volumes":[{"configMap":{"name":"volcano-vgpu-node-config"},"name":"deviceconfig"},{"hostPath":{"path":"/var/lib/kubelet/device-plugins","type":"Directory"},"name":"device-plugin"},{"hostPath":{"path":"/usr/local/vgpu","type":"DirectoryOrCreate"},"name":"lib"},{"hostPath":{"path":"/tmp","type":"DirectoryOrCreate"},"name":"hosttmp"},{"hostPath":{"path":"/run/docker","type":"DirectoryOrCreate"},"name":"dockers"},{"hostPath":{"path":"/run/containerd","type":"DirectoryOrCreate"},"name":"containerds"},{"hostPath":{"path":"/usr/bin","type":"Directory"},"name":"usrbin"},{"hostPath":{"path":"/sys","type":"Directory"},"name":"sysinfo"},{"hostPath":{"path":"/var","type":"Directory"},"name":"hostvar"}]}},"updateStrategy":{"type":"RollingUpdate"}}} + creationTimestamp: "2026-04-27T02:17:52Z" + generation: 5 + name: volcano-device-plugin + namespace: kube-system + resourceVersion: "20250370" + uid: f0d77283-6b73-419e-8504-6d4965dde85a +spec: + revisionHistoryLimit: 10 + selector: + matchLabels: + name: volcano-device-plugin + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: "2026-04-27T17:44:34+09:00" + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: volcano-device-plugin + spec: + containers: + - args: + - --device-split-count=10 + - --gpu-memory-factor=1024 + env: + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: HOOK_PATH + value: /usr/local/vgpu + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_MIG_MONITOR_DEVICES + value: all + - name: NVIDIA_DRIVER_CAPABILITIES + value: utility + image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 + imagePullPolicy: IfNotPresent + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/ + name: volcano-device-plugin + resources: {} + securityContext: + allowPrivilegeEscalation: true + capabilities: + add: + - SYS_ADMIN + drop: + - ALL + privileged: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /config + name: deviceconfig + - mountPath: /var/lib/kubelet/device-plugins + name: device-plugin + - mountPath: /usr/local/vgpu + name: lib + - mountPath: /tmp + name: hosttmp + - command: + - /bin/bash + - -c + - volcano-vgpu-monitor + env: + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_MIG_MONITOR_DEVICES + value: all + - name: HOOK_PATH + value: /tmp/vgpu + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 + imagePullPolicy: IfNotPresent + name: monitor + resources: {} + securityContext: + allowPrivilegeEscalation: true + capabilities: + add: + - SYS_ADMIN + drop: + - ALL + privileged: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /run/docker + name: dockers + - mountPath: /run/containerd + name: containerds + - mountPath: /sysinfo + name: sysinfo + - mountPath: /hostvar + name: hostvar + - mountPath: /tmp + name: hosttmp + dnsPolicy: ClusterFirst + priorityClassName: system-node-critical + restartPolicy: Always + runtimeClassName: nvidia + schedulerName: default-scheduler + securityContext: {} + serviceAccount: volcano-device-plugin + serviceAccountName: volcano-device-plugin + terminationGracePeriodSeconds: 30 + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoSchedule + key: volcano.sh/gpu-memory + operator: Exists + volumes: + - configMap: + defaultMode: 420 + name: volcano-vgpu-node-config + name: deviceconfig + - hostPath: + path: /var/lib/kubelet/device-plugins + type: Directory + name: device-plugin + - hostPath: + path: /usr/local/vgpu + type: DirectoryOrCreate + name: lib + - hostPath: + path: /tmp + type: DirectoryOrCreate + name: hosttmp + - hostPath: + path: /run/docker + type: DirectoryOrCreate + name: dockers + - hostPath: + path: /run/containerd + type: DirectoryOrCreate + name: containerds + - hostPath: + path: /usr/bin + type: Directory + name: usrbin + - hostPath: + path: /sys + type: Directory + name: sysinfo + - hostPath: + path: /var + type: Directory + name: hostvar + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate +status: + currentNumberScheduled: 1 + desiredNumberScheduled: 1 + numberAvailable: 1 + numberMisscheduled: 0 + numberReady: 1 + observedGeneration: 5 + updatedNumberScheduled: 1 diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml new file mode 100644 index 000000000..32f25062b --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: hami-vulkan-manifest + namespace: kube-system +data: + hami.json: | + { + "file_format_version": "1.0.0", + "layer": { + "name": "VK_LAYER_HAMI_vgpu", + "type": "INSTANCE", + "library_path": "/usr/local/vgpu/libvgpu_vk.so", + "api_version": "1.3.0", + "implementation_version": "1", + "description": "HAMi vGPU partition layer (Step D — split libvgpu_vk.so)", + "instance_extensions": [], + "device_extensions": [], + "enable_environment": { + "HAMI_VULKAN_ENABLE": "1" + }, + "disable_environment": { + "DISABLE_HAMI_VULKAN_LAYER": "1" + } + } + } diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml new file mode 100644 index 000000000..b555f3b39 --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: hami-vulkan-manifest-installer + namespace: kube-system + labels: + app: hami-vulkan-manifest-installer +spec: + selector: + matchLabels: + app: hami-vulkan-manifest-installer + template: + metadata: + labels: + app: hami-vulkan-manifest-installer + spec: + nodeSelector: + nvidia.com/gpu.present: "true" + hostPID: false + restartPolicy: Always + tolerations: + - operator: Exists + containers: + - name: installer + image: busybox:1.36 + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + command: + - /bin/sh + - -c + - | + set -eu + mkdir -p /host/etc/vulkan/implicit_layer.d + cp -f /manifest/hami.json /host/etc/vulkan/implicit_layer.d/hami.json + echo "[hami-vulkan-manifest] installed at /etc/vulkan/implicit_layer.d/hami.json" + # DaemonSet 이라 종료하지 않고 sleep — restart 루프 회피 + sleep infinity + volumeMounts: + - name: manifest + mountPath: /manifest + readOnly: true + - name: host-etc-vulkan + mountPath: /host/etc/vulkan + volumes: + - name: manifest + configMap: + name: hami-vulkan-manifest + - name: host-etc-vulkan + hostPath: + path: /etc/vulkan + type: DirectoryOrCreate + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py b/cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py new file mode 100644 index 000000000..4090e3c8e --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +"""Step D 4-path verification — Vulkan-side partition enforce. + +Path 3: vkGetPhysicalDeviceMemoryProperties → device-local heap size + MUST be the partition limit (~23 GiB), not the raw 46 GiB. +Path 4: vkAllocateMemory(size = 25 GiB) MUST fail with + VK_ERROR_OUT_OF_DEVICE_MEMORY (partition limit is ~23 GiB). + +Run inside isaac-launchable-0 vscode container (annotation +hami.io/vulkan: "true" + webhook-injected manifest + libvgpu_vk.so). +""" +import sys + +PARTITION_MIB = 23552 +PARTITION_BYTES = PARTITION_MIB * 1024 * 1024 +TOLERANCE_MIB = 256 +OVER_BUDGET_BYTES = 25 * 1024 * 1024 * 1024 + +try: + import vulkan as vk +except ImportError: + print("ERR: pip install vulkan") + sys.exit(2) + +API_1_3 = (1 << 22) | (3 << 12) + + +def main(): + app = vk.VkApplicationInfo( + sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO, + pApplicationName="hami-step-d-probe", + applicationVersion=1, + pEngineName="probe", + engineVersion=1, + apiVersion=API_1_3, + ) + ci = vk.VkInstanceCreateInfo( + sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + pApplicationInfo=app, + ) + inst = vk.vkCreateInstance(ci, None) + phys_devs = vk.vkEnumeratePhysicalDevices(inst) + if not phys_devs: + print("ERR: no physical devices") + sys.exit(2) + dev = phys_devs[0] + mem_props = vk.vkGetPhysicalDeviceMemoryProperties(dev) + + # Path 3 + device_local_heap_size = 0 + for i in range(mem_props.memoryHeapCount): + heap = mem_props.memoryHeaps[i] + if heap.flags & vk.VK_MEMORY_HEAP_DEVICE_LOCAL_BIT: + if heap.size > device_local_heap_size: + device_local_heap_size = heap.size + p3_mib = device_local_heap_size // (1024 * 1024) + print(f"Path 3: device-local heap = {device_local_heap_size} bytes ({p3_mib} MiB)") + if abs(p3_mib - PARTITION_MIB) <= TOLERANCE_MIB: + print(f"Path 3: PASS (within {TOLERANCE_MIB} MiB of {PARTITION_MIB} MiB partition)") + path3_ok = True + else: + print(f"Path 3: FAIL (expected ~{PARTITION_MIB} MiB, got {p3_mib} MiB)") + path3_ok = False + + # Path 4 + queue_create = vk.VkDeviceQueueCreateInfo( + sType=vk.VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + queueFamilyIndex=0, + queueCount=1, + pQueuePriorities=[1.0], + ) + device_create = vk.VkDeviceCreateInfo( + sType=vk.VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + queueCreateInfoCount=1, + pQueueCreateInfos=[queue_create], + ) + ldev = vk.vkCreateDevice(dev, device_create, None) + mem_type_idx = -1 + for i in range(mem_props.memoryTypeCount): + if mem_props.memoryTypes[i].propertyFlags & vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT: + mem_type_idx = i + break + if mem_type_idx < 0: + print("Path 4: SKIP (no device-local memory type)") + path4_ok = False + else: + alloc_info = vk.VkMemoryAllocateInfo( + sType=vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + allocationSize=OVER_BUDGET_BYTES, + memoryTypeIndex=mem_type_idx, + ) + path4_ok = False + try: + mem = vk.vkAllocateMemory(ldev, alloc_info, None) + print(f"Path 4: FAIL (expected OOM for {OVER_BUDGET_BYTES // (1024**3)} GiB, got success — partition not enforced)") + vk.vkFreeMemory(ldev, mem, None) + except vk.VkErrorOutOfDeviceMemory: + print(f"Path 4: PASS (VK_ERROR_OUT_OF_DEVICE_MEMORY for {OVER_BUDGET_BYTES // (1024**3)} GiB > {PARTITION_MIB // 1024} GiB partition)") + path4_ok = True + except Exception as e: + print(f"Path 4: FAIL (unexpected error {type(e).__name__}: {e})") + vk.vkDestroyDevice(ldev, None) + vk.vkDestroyInstance(inst, None) + + return 0 if (path3_ok and path4_ok) else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml b/cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml new file mode 100644 index 000000000..303010090 --- /dev/null +++ b/cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml @@ -0,0 +1,164 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: volcano-device-plugin + namespace: kube-system +spec: + revisionHistoryLimit: 10 + selector: + matchLabels: + name: volcano-device-plugin + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: '2026-04-27T17:44:34+09:00' + scheduler.alpha.kubernetes.io/critical-pod: '' + labels: + name: volcano-device-plugin + spec: + containers: + - args: + - --device-split-count=10 + - --gpu-memory-factor=1024 + env: + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: HOOK_PATH + value: /usr/local/vgpu + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_MIG_MONITOR_DEVICES + value: all + - name: NVIDIA_DRIVER_CAPABILITIES + value: utility + image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 + imagePullPolicy: IfNotPresent + lifecycle: + postStart: + exec: + command: + - /bin/sh + - -c + - cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/ + name: volcano-device-plugin + resources: {} + securityContext: + allowPrivilegeEscalation: true + capabilities: + add: + - SYS_ADMIN + drop: + - ALL + privileged: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /config + name: deviceconfig + - mountPath: /var/lib/kubelet/device-plugins + name: device-plugin + - mountPath: /usr/local/vgpu + name: lib + - mountPath: /tmp + name: hosttmp + - command: + - /bin/bash + - -c + - volcano-vgpu-monitor + env: + - name: NVIDIA_VISIBLE_DEVICES + value: all + - name: NVIDIA_MIG_MONITOR_DEVICES + value: all + - name: HOOK_PATH + value: /tmp/vgpu + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 + imagePullPolicy: IfNotPresent + name: monitor + resources: {} + securityContext: + allowPrivilegeEscalation: true + capabilities: + add: + - SYS_ADMIN + drop: + - ALL + privileged: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /run/docker + name: dockers + - mountPath: /run/containerd + name: containerds + - mountPath: /sysinfo + name: sysinfo + - mountPath: /hostvar + name: hostvar + - mountPath: /tmp + name: hosttmp + dnsPolicy: ClusterFirst + priorityClassName: system-node-critical + restartPolicy: Always + runtimeClassName: nvidia + schedulerName: default-scheduler + securityContext: {} + serviceAccount: volcano-device-plugin + serviceAccountName: volcano-device-plugin + terminationGracePeriodSeconds: 30 + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoSchedule + key: volcano.sh/gpu-memory + operator: Exists + volumes: + - configMap: + defaultMode: 420 + name: volcano-vgpu-node-config + name: deviceconfig + - hostPath: + path: /var/lib/kubelet/device-plugins + type: Directory + name: device-plugin + - hostPath: + path: /usr/local/vgpu + type: DirectoryOrCreate + name: lib + - hostPath: + path: /tmp + type: DirectoryOrCreate + name: hosttmp + - hostPath: + path: /run/docker + type: DirectoryOrCreate + name: dockers + - hostPath: + path: /run/containerd + type: DirectoryOrCreate + name: containerds + - hostPath: + path: /usr/bin + type: Directory + name: usrbin + - hostPath: + path: /sys + type: Directory + name: sysinfo + - hostPath: + path: /var + type: Directory + name: hostvar + updateStrategy: + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + type: RollingUpdate diff --git a/docker/Dockerfile b/docker/Dockerfile index c592a72d8..9599ff7df 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -17,7 +17,8 @@ COPY .git/modules/libvgpu /libvgpu-git RUN rm -rf /libvgpu/.git && echo "gitdir: /libvgpu-git" > /libvgpu/.git WORKDIR /libvgpu ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get -y update; apt-get -y --no-install-recommends install cmake git && rm -rf /var/lib/apt/lists/* +RUN apt-get -y update; apt-get -y --no-install-recommends install cmake git libvulkan-dev && rm -rf /var/lib/apt/lists/* +ENV CI_COMMIT_SHA=docker-build RUN bash ./build.sh FROM nvidia/cuda:13.2.1-base-ubuntu22.04 @@ -40,6 +41,7 @@ COPY --from=gobuild /go/bin/nvidia-mig-parted /k8s-vgpu/bin/ COPY ./docker/entrypoint.sh /k8s-vgpu/bin/entrypoint.sh COPY ./lib /k8s-vgpu/lib COPY --from=nvbuild /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/libvgpu.so."$VERSION" +COPY --from=nvbuild /libvgpu/etc/vulkan/implicit_layer.d/hami.json /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json COPY ./docker/vgpu-init.sh /k8s-vgpu/bin/vgpu-init.sh ENV PATH="/k8s-vgpu/bin:${PATH}" diff --git a/docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md b/docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md new file mode 100644 index 000000000..fdd9cd192 --- /dev/null +++ b/docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md @@ -0,0 +1,2205 @@ +# Vulkan vGPU 분할 구현 계획 + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** `hami.io/vulkan: "true"` annotation을 붙인 파드의 Vulkan 메모리 할당과 큐 제출에 기존 `nvidia.com/gpumem` / `nvidia.com/gpucores` 버짓을 강제한다. + +**Architecture:** HAMi-core(`libvgpu.so`)에 Vulkan implicit layer를 추가해 `vkAllocateMemory` / `vkFreeMemory` / `vkGetPhysicalDeviceMemoryProperties[2]` / `vkQueueSubmit[2]`를 가로챈다. 기존 CUDA 훅이 사용하는 per-device 메모리 카운터와 SM throttle 유틸을 그대로 재사용한다. HAMi(Go)의 `MutateAdmission`은 annotation을 감지해 `NVIDIA_DRIVER_CAPABILITIES`에 `graphics`를 합치고 `HAMI_VULKAN_ENABLE=1`을 주입한다. + +**Tech Stack:** Go 1.22+ (HAMi), C11 + Vulkan 1.3 headers + pthread + NVML (HAMi-core), CMake, Docker multi-stage 빌드. + +**Reference Spec:** `docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md` + +**Layout Notes:** `docs/superpowers/plans/notes/hami-core-layout.md` — Task 0.2에서 확보한 HAMi-core 실제 API 시그니처. + +## 중요 개정 사항 (탐색 결과 반영) + +Task 0.2에서 HAMi-core 실제 구조를 확인한 결과, 초기 플랜의 일부 가정이 실제와 달라 Task 1.3~1.8을 다음과 같이 개정합니다: + +1. **VRAM 카운터는 atomic reserve가 아니라 check-only**. 실제 API는 `oom_check(dev,size)`(체크만, `1`=OOM) + `add_gpu_device_memory_usage(pid,dev,size,type)`(커밋). CUDA 경로와 동일한 2단계 사용. +2. **SM throttle은 이미 추출 형태**. `rate_limiter(grids,blocks)` 소비자(토큰 버킷) + `utilization_watcher` pthread 생산자. 별도 추출 Task 불필요 → **Task 1.4 삭제**, Vulkan 훅에서 직접 `rate_limiter(1,1)` 호출. +3. **빌드는 CMake + OBJECT 라이브러리 구조**. `libvgpu/src/vulkan/` 신규 서브디렉토리에 자체 `CMakeLists.txt`를 두고 OBJECT lib `vulkan_mod`를 정의해 루트에서 링크. Makefile 직접 수정은 불필요 (`make build` → `./build.sh` → `cmake` 흐름 유지). +4. **`-fvisibility=hidden`(Release)**. 레이어 엔트리포인트는 Vulkan SDK 헤더의 `VK_LAYER_EXPORT` 매크로가 이미 `__attribute__((visibility("default")))`를 적용하므로 추가 조치 불필요. +5. **`limit==0` 센티넬 = unlimited**. 버짓 어댑터가 이를 그대로 전달해 clamp/예약 로직이 0이면 훅을 건너뛰도록 한다. +6. **단위 테스트 프레임워크 없음**. 기존 `test/*.c` 샘플 스타일대로 stand-alone binary로 작성하거나, HAMi(Go) 쪽에서 cgo 미사용이면 C 테스트는 `test/vulkan/`에 `assert.h` 기반 프로그램으로 추가하고 CMake `test/CMakeLists.txt`의 glob이 자동으로 컴파일하도록 한다. + +--- + +## Phase 0 — Submodule 초기화 및 탐색 + +### Task 0.1: HAMi-core submodule 초기화 + +**Files:** +- Modify: 없음 (체크아웃만) + +- [ ] **Step 1: submodule 상태 확인** + +Run: +```bash +git submodule status +``` +Expected output contains `libvgpu` 항목. 앞에 `-`가 붙어 있으면 미초기화. + +- [ ] **Step 2: submodule 초기화 및 체크아웃** + +Run: +```bash +git submodule update --init --recursive libvgpu +``` +Expected: `libvgpu/` 아래에 C 소스(`src/`, `Makefile` 등)가 체크아웃됨. + +- [ ] **Step 3: 커밋 불필요 확인** + +Run: +```bash +git status +``` +Expected: working tree clean (submodule 포인터는 이미 `.gitmodules`의 pin과 일치). + +--- + +### Task 0.2: HAMi-core 구조와 기존 카운터 API 탐색 + +**Files:** +- Create: `docs/superpowers/plans/notes/hami-core-layout.md` (임시 노트, 플랜 종료 후 삭제) + +- [ ] **Step 1: 상위 구조 파악** + +Run: +```bash +ls libvgpu/ +ls libvgpu/src/ +find libvgpu/src -maxdepth 2 -name "*.c" -o -name "*.h" | head -40 +``` +Expected: `libvgpu/src` 하위에 `cuda/`, `memory/` 또는 유사 디렉토리. 공유 헤더(`include/` 또는 `src/*.h`) 확인. + +- [ ] **Step 2: VRAM 카운터 API 식별** + +Run: +```bash +grep -rn "used_memory\|device_memory\|reserve_memory\|allocate_memory_check" libvgpu/src | head +grep -rn "cuMemAlloc\b" libvgpu/src | head +``` +위 검색 결과에서 CUDA allocate 래퍼가 호출하는 "예약" 함수의 시그니처를 확보. 예시 후보: `int32_t oom_check(int, size_t)`, `void add_allocated(int, size_t)` 등. + +- [ ] **Step 3: SM throttle 루프 식별** + +Run: +```bash +grep -rn "nvmlDeviceGetUtilizationRates\|utilization_watchdog\|usleep\|sm_limit" libvgpu/src | head +``` +기존 throttle 폴링 루프가 있는 파일과 함수명 확보. + +- [ ] **Step 4: 테스트 프레임워크 식별** + +Run: +```bash +ls libvgpu/test 2>/dev/null || ls libvgpu/tests 2>/dev/null +grep -rn "assert(" libvgpu/ 2>/dev/null | head +cat libvgpu/Makefile | head -60 +``` +테스트 타겟(`make test`, `make check` 등)과 디렉토리 위치 확보. 없으면 "테스트 타겟 없음"을 노트. + +- [ ] **Step 5: 노트 기록** + +Write `docs/superpowers/plans/notes/hami-core-layout.md` 내용 예시(실제 수치는 Step 2~4 결과로 채움): +```markdown +# HAMi-core layout notes + +- src/cuda/memory.c — cuMemAlloc 래퍼. reserve 함수: `int reserve_device_memory(int dev, size_t size)` (L123) +- src/cuda/launch.c — cuLaunchKernel 래퍼. throttle 루프: `static void throttle_wait(int dev)` (L77) +- include/hami_core.h — 공통 헤더. device_memory 구조체 노출. +- test 디렉토리 없음. Makefile `make test` 타겟 없음 → assert.h + 자체 러너 추가 필요. +- Vulkan 헤더: 빌드 미의존. vulkan-headers 패키지 추가 필요. +``` + +- [ ] **Step 6: 커밋** + +```bash +git add docs/superpowers/plans/notes/hami-core-layout.md +git commit -m "docs: HAMi-core layout notes for Vulkan plan" +``` + +--- + +## Phase 1 — HAMi-core Vulkan Layer (C) + +이 Phase의 모든 작업은 `libvgpu/` 하위에서 진행됩니다. HAMi-core는 submodule이므로, Phase 마지막에 `libvgpu` 레포에 별도 브랜치/PR로 밀고, HAMi 쪽에서 submodule 포인터를 업데이트합니다. + +### Task 1.1: 레이어 엔트리포인트 스켈레톤 + +**Files:** +- Create: `libvgpu/src/vulkan/layer.h` +- Create: `libvgpu/src/vulkan/layer.c` +- Create: `libvgpu/src/vulkan/dispatch.h` +- Create: `libvgpu/src/vulkan/dispatch.c` + +- [ ] **Step 1: 실패 테스트 작성 — `vkNegotiateLoaderLayerInterfaceVersion` export 확인** + +Create `libvgpu/test/vulkan/test_layer.c`: +```c +#include +#include +#include +#include +#include + +typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderLayerInterfaceVersion)(VkNegotiateLayerInterface*); + +int main(void) { + void *h = dlopen("./libvgpu.so", RTLD_NOW); + assert(h != NULL); + PFN_vkNegotiateLoaderLayerInterfaceVersion fn = + (PFN_vkNegotiateLoaderLayerInterfaceVersion) + dlsym(h, "vkNegotiateLoaderLayerInterfaceVersion"); + assert(fn != NULL); + + VkNegotiateLayerInterface iface = {0}; + iface.sType = LAYER_NEGOTIATE_INTERFACE_STRUCT; + iface.loaderLayerInterfaceVersion = 2; + VkResult r = fn(&iface); + assert(r == VK_SUCCESS); + assert(iface.pfnGetInstanceProcAddr != NULL); + assert(iface.pfnGetDeviceProcAddr != NULL); + printf("ok: layer entry point negotiates\n"); + return 0; +} +``` + +- [ ] **Step 2: 테스트가 빌드/실행 실패함 확인** + +Run (from `libvgpu/`): +```bash +cc -o /tmp/t test/vulkan/test_layer.c -ldl && /tmp/t +``` +Expected: 링크 실패 또는 `dlsym`이 NULL 반환 (심볼 미구현). + +- [ ] **Step 3: `layer.h` 최소 헤더 작성** + +Create `libvgpu/src/vulkan/layer.h`: +```c +#ifndef HAMI_VULKAN_LAYER_H +#define HAMI_VULKAN_LAYER_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct); + +PFN_vkVoidFunction VKAPI_CALL +hami_vkGetInstanceProcAddr(VkInstance instance, const char *pName); + +PFN_vkVoidFunction VKAPI_CALL +hami_vkGetDeviceProcAddr(VkDevice device, const char *pName); + +#ifdef __cplusplus +} +#endif + +#endif /* HAMI_VULKAN_LAYER_H */ +``` + +- [ ] **Step 4: `dispatch.h` 작성 (next-layer 포인터 테이블)** + +Create `libvgpu/src/vulkan/dispatch.h`: +```c +#ifndef HAMI_VULKAN_DISPATCH_H +#define HAMI_VULKAN_DISPATCH_H + +#include +#include + +typedef struct hami_instance_dispatch { + VkInstance handle; + PFN_vkGetInstanceProcAddr next_gipa; + PFN_vkDestroyInstance DestroyInstance; + PFN_vkEnumeratePhysicalDevices EnumeratePhysicalDevices; + PFN_vkGetPhysicalDeviceMemoryProperties GetPhysicalDeviceMemoryProperties; + PFN_vkGetPhysicalDeviceMemoryProperties2 GetPhysicalDeviceMemoryProperties2; + struct hami_instance_dispatch *next; +} hami_instance_dispatch_t; + +typedef struct hami_device_dispatch { + VkDevice handle; + VkPhysicalDevice physical; + PFN_vkGetDeviceProcAddr next_gdpa; + PFN_vkDestroyDevice DestroyDevice; + PFN_vkAllocateMemory AllocateMemory; + PFN_vkFreeMemory FreeMemory; + PFN_vkQueueSubmit QueueSubmit; + PFN_vkQueueSubmit2 QueueSubmit2; + struct hami_device_dispatch *next; +} hami_device_dispatch_t; + +hami_instance_dispatch_t *hami_instance_lookup(VkInstance inst); +hami_instance_dispatch_t *hami_instance_register(VkInstance inst, PFN_vkGetInstanceProcAddr gipa); +void hami_instance_unregister(VkInstance inst); + +hami_device_dispatch_t *hami_device_lookup(VkDevice dev); +hami_device_dispatch_t *hami_device_register(VkDevice dev, VkPhysicalDevice phys, PFN_vkGetDeviceProcAddr gdpa); +void hami_device_unregister(VkDevice dev); + +#endif /* HAMI_VULKAN_DISPATCH_H */ +``` + +- [ ] **Step 5: `dispatch.c` 작성 (단순 linked list + pthread mutex)** + +Create `libvgpu/src/vulkan/dispatch.c`: +```c +#include "dispatch.h" +#include +#include +#include + +static hami_instance_dispatch_t *g_inst_head = NULL; +static hami_device_dispatch_t *g_dev_head = NULL; +static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER; + +static void *resolve(PFN_vkGetInstanceProcAddr gipa, VkInstance inst, const char *name) { + return (void *)gipa(inst, name); +} + +hami_instance_dispatch_t *hami_instance_register(VkInstance inst, PFN_vkGetInstanceProcAddr gipa) { + hami_instance_dispatch_t *d = calloc(1, sizeof(*d)); + d->handle = inst; + d->next_gipa = gipa; + d->DestroyInstance = (PFN_vkDestroyInstance) resolve(gipa, inst, "vkDestroyInstance"); + d->EnumeratePhysicalDevices = (PFN_vkEnumeratePhysicalDevices) resolve(gipa, inst, "vkEnumeratePhysicalDevices"); + d->GetPhysicalDeviceMemoryProperties = (PFN_vkGetPhysicalDeviceMemoryProperties) resolve(gipa, inst, "vkGetPhysicalDeviceMemoryProperties"); + d->GetPhysicalDeviceMemoryProperties2 = (PFN_vkGetPhysicalDeviceMemoryProperties2) resolve(gipa, inst, "vkGetPhysicalDeviceMemoryProperties2"); + + pthread_mutex_lock(&g_lock); + d->next = g_inst_head; + g_inst_head = d; + pthread_mutex_unlock(&g_lock); + return d; +} + +hami_instance_dispatch_t *hami_instance_lookup(VkInstance inst) { + pthread_mutex_lock(&g_lock); + hami_instance_dispatch_t *p = g_inst_head; + while (p && p->handle != inst) p = p->next; + pthread_mutex_unlock(&g_lock); + return p; +} + +void hami_instance_unregister(VkInstance inst) { + pthread_mutex_lock(&g_lock); + hami_instance_dispatch_t **pp = &g_inst_head; + while (*pp && (*pp)->handle != inst) pp = &(*pp)->next; + if (*pp) { hami_instance_dispatch_t *victim = *pp; *pp = victim->next; free(victim); } + pthread_mutex_unlock(&g_lock); +} + +static void *resolve_dev(PFN_vkGetDeviceProcAddr gdpa, VkDevice dev, const char *name) { + return (void *)gdpa(dev, name); +} + +hami_device_dispatch_t *hami_device_register(VkDevice dev, VkPhysicalDevice phys, PFN_vkGetDeviceProcAddr gdpa) { + hami_device_dispatch_t *d = calloc(1, sizeof(*d)); + d->handle = dev; + d->physical = phys; + d->next_gdpa = gdpa; + d->DestroyDevice = (PFN_vkDestroyDevice) resolve_dev(gdpa, dev, "vkDestroyDevice"); + d->AllocateMemory = (PFN_vkAllocateMemory) resolve_dev(gdpa, dev, "vkAllocateMemory"); + d->FreeMemory = (PFN_vkFreeMemory) resolve_dev(gdpa, dev, "vkFreeMemory"); + d->QueueSubmit = (PFN_vkQueueSubmit) resolve_dev(gdpa, dev, "vkQueueSubmit"); + d->QueueSubmit2 = (PFN_vkQueueSubmit2) resolve_dev(gdpa, dev, "vkQueueSubmit2"); + + pthread_mutex_lock(&g_lock); + d->next = g_dev_head; + g_dev_head = d; + pthread_mutex_unlock(&g_lock); + return d; +} + +hami_device_dispatch_t *hami_device_lookup(VkDevice dev) { + pthread_mutex_lock(&g_lock); + hami_device_dispatch_t *p = g_dev_head; + while (p && p->handle != dev) p = p->next; + pthread_mutex_unlock(&g_lock); + return p; +} + +void hami_device_unregister(VkDevice dev) { + pthread_mutex_lock(&g_lock); + hami_device_dispatch_t **pp = &g_dev_head; + while (*pp && (*pp)->handle != dev) pp = &(*pp)->next; + if (*pp) { hami_device_dispatch_t *victim = *pp; *pp = victim->next; free(victim); } + pthread_mutex_unlock(&g_lock); +} +``` + +- [ ] **Step 6: `layer.c` 작성 (엔트리포인트 + `vkCreateInstance` / `vkCreateDevice` 훅)** + +Create `libvgpu/src/vulkan/layer.c`: +```c +#include "layer.h" +#include "dispatch.h" +#include +#include + +/* forward declarations for hooks implemented in sibling files */ +extern void hami_vk_hook_instance(hami_instance_dispatch_t *d); +extern void hami_vk_hook_device(hami_device_dispatch_t *d); + +static VkLayerInstanceCreateInfo *find_chain_info(const VkInstanceCreateInfo *pCreateInfo, + VkLayerFunction func) { + const VkLayerInstanceCreateInfo *ci = pCreateInfo->pNext; + while (ci) { + if (ci->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && ci->function == func) { + return (VkLayerInstanceCreateInfo *)ci; + } + ci = (const VkLayerInstanceCreateInfo *)ci->pNext; + } + return NULL; +} + +static VkLayerDeviceCreateInfo *find_dev_chain_info(const VkDeviceCreateInfo *pCreateInfo, + VkLayerFunction func) { + const VkLayerDeviceCreateInfo *ci = pCreateInfo->pNext; + while (ci) { + if (ci->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO && ci->function == func) { + return (VkLayerDeviceCreateInfo *)ci; + } + ci = (const VkLayerDeviceCreateInfo *)ci->pNext; + } + return NULL; +} + +static VKAPI_ATTR VkResult VKAPI_CALL +hami_vkCreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) { + VkLayerInstanceCreateInfo *chain = find_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + if (!chain || !chain->u.pLayerInfo) return VK_ERROR_INITIALIZATION_FAILED; + + PFN_vkGetInstanceProcAddr next_gipa = chain->u.pLayerInfo->pfnNextGetInstanceProcAddr; + chain->u.pLayerInfo = chain->u.pLayerInfo->pNext; + + PFN_vkCreateInstance next_create = + (PFN_vkCreateInstance)next_gipa(VK_NULL_HANDLE, "vkCreateInstance"); + VkResult r = next_create(pCreateInfo, pAllocator, pInstance); + if (r != VK_SUCCESS) return r; + + hami_instance_dispatch_t *d = hami_instance_register(*pInstance, next_gipa); + hami_vk_hook_instance(d); + return VK_SUCCESS; +} + +static VKAPI_ATTR void VKAPI_CALL +hami_vkDestroyInstance(VkInstance instance, const VkAllocationCallbacks *pAllocator) { + hami_instance_dispatch_t *d = hami_instance_lookup(instance); + if (d) d->DestroyInstance(instance, pAllocator); + hami_instance_unregister(instance); +} + +static VKAPI_ATTR VkResult VKAPI_CALL +hami_vkCreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice) { + VkLayerDeviceCreateInfo *chain = find_dev_chain_info(pCreateInfo, VK_LAYER_LINK_INFO); + if (!chain || !chain->u.pLayerInfo) return VK_ERROR_INITIALIZATION_FAILED; + + PFN_vkGetInstanceProcAddr next_gipa = chain->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkGetDeviceProcAddr next_gdpa = chain->u.pLayerInfo->pfnNextGetDeviceProcAddr; + chain->u.pLayerInfo = chain->u.pLayerInfo->pNext; + + PFN_vkCreateDevice next_create = + (PFN_vkCreateDevice)next_gipa(VK_NULL_HANDLE, "vkCreateDevice"); + VkResult r = next_create(physicalDevice, pCreateInfo, pAllocator, pDevice); + if (r != VK_SUCCESS) return r; + + hami_device_dispatch_t *d = hami_device_register(*pDevice, physicalDevice, next_gdpa); + hami_vk_hook_device(d); + return VK_SUCCESS; +} + +static VKAPI_ATTR void VKAPI_CALL +hami_vkDestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) { + hami_device_dispatch_t *d = hami_device_lookup(device); + if (d) d->DestroyDevice(device, pAllocator); + hami_device_unregister(device); +} + +/* GIPA / GDPA: return our wrappers for hooked names, next-layer for the rest. */ + +/* Hooked functions implemented in other TUs; declarations here. */ +VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice, VkPhysicalDeviceMemoryProperties*); +VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties2(VkPhysicalDevice, VkPhysicalDeviceMemoryProperties2*); +VKAPI_ATTR VkResult VKAPI_CALL hami_vkAllocateMemory(VkDevice, const VkMemoryAllocateInfo*, const VkAllocationCallbacks*, VkDeviceMemory*); +VKAPI_ATTR void VKAPI_CALL hami_vkFreeMemory(VkDevice, VkDeviceMemory, const VkAllocationCallbacks*); +VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit(VkQueue, uint32_t, const VkSubmitInfo*, VkFence); +VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit2(VkQueue, uint32_t, const VkSubmitInfo2*, VkFence); + +#define HAMI_HOOK(name) do { if (strcmp(pName, "vk" #name) == 0) return (PFN_vkVoidFunction)hami_vk##name; } while (0) + +PFN_vkVoidFunction VKAPI_CALL +hami_vkGetInstanceProcAddr(VkInstance instance, const char *pName) { + HAMI_HOOK(CreateInstance); + HAMI_HOOK(DestroyInstance); + HAMI_HOOK(CreateDevice); + HAMI_HOOK(GetInstanceProcAddr); + HAMI_HOOK(GetPhysicalDeviceMemoryProperties); + HAMI_HOOK(GetPhysicalDeviceMemoryProperties2); + + hami_instance_dispatch_t *d = hami_instance_lookup(instance); + if (!d) return NULL; + return d->next_gipa(instance, pName); +} + +PFN_vkVoidFunction VKAPI_CALL +hami_vkGetDeviceProcAddr(VkDevice device, const char *pName) { + HAMI_HOOK(DestroyDevice); + HAMI_HOOK(GetDeviceProcAddr); + HAMI_HOOK(AllocateMemory); + HAMI_HOOK(FreeMemory); + HAMI_HOOK(QueueSubmit); + HAMI_HOOK(QueueSubmit2); + + hami_device_dispatch_t *d = hami_device_lookup(device); + if (!d) return NULL; + return d->next_gdpa(device, pName); +} + +VK_LAYER_EXPORT VkResult VKAPI_CALL +vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct) { + if (pVersionStruct->sType != LAYER_NEGOTIATE_INTERFACE_STRUCT) + return VK_ERROR_INITIALIZATION_FAILED; + + if (pVersionStruct->loaderLayerInterfaceVersion > 2) + pVersionStruct->loaderLayerInterfaceVersion = 2; + + pVersionStruct->pfnGetInstanceProcAddr = hami_vkGetInstanceProcAddr; + pVersionStruct->pfnGetDeviceProcAddr = hami_vkGetDeviceProcAddr; + pVersionStruct->pfnGetPhysicalDeviceProcAddr = NULL; + return VK_SUCCESS; +} + +/* Placeholders — real bodies live in hooks_memory.c / hooks_submit.c. + Define weak stubs here so layer.c alone compiles during TDD of Task 1.1. */ +#ifndef HAMI_VK_HOOKS_PRESENT +void hami_vk_hook_instance(hami_instance_dispatch_t *d) { (void)d; } +void hami_vk_hook_device(hami_device_dispatch_t *d) { (void)d; } +VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties *o) { + hami_instance_dispatch_t *d = g_inst_head; (void)d; (void)p; (void)o; +} +VKAPI_ATTR void VKAPI_CALL hami_vkGetPhysicalDeviceMemoryProperties2(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties2 *o) { (void)p; (void)o; } +VKAPI_ATTR VkResult VKAPI_CALL hami_vkAllocateMemory(VkDevice d, const VkMemoryAllocateInfo *i, const VkAllocationCallbacks *a, VkDeviceMemory *m) { (void)d;(void)i;(void)a;(void)m; return VK_ERROR_OUT_OF_DEVICE_MEMORY; } +VKAPI_ATTR void VKAPI_CALL hami_vkFreeMemory(VkDevice d, VkDeviceMemory m, const VkAllocationCallbacks *a) { (void)d;(void)m;(void)a; } +VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit(VkQueue q, uint32_t n, const VkSubmitInfo *s, VkFence f) { (void)q;(void)n;(void)s;(void)f; return VK_SUCCESS; } +VKAPI_ATTR VkResult VKAPI_CALL hami_vkQueueSubmit2(VkQueue q, uint32_t n, const VkSubmitInfo2 *s, VkFence f) { (void)q;(void)n;(void)s;(void)f; return VK_SUCCESS; } +#endif +``` + +- [ ] **Step 7: 레이어만으로 임시 빌드 및 테스트 통과 확인** + +Run (from `libvgpu/`): +```bash +cc -shared -fPIC -o /tmp/libvgpu_stub.so \ + src/vulkan/layer.c src/vulkan/dispatch.c \ + -I/usr/include -lpthread +cc -o /tmp/t test/vulkan/test_layer.c -ldl +cd /tmp && cp /tmp/libvgpu_stub.so ./libvgpu.so && ./t +``` +Expected: `ok: layer entry point negotiates`. + +- [ ] **Step 8: 커밋 (libvgpu 레포)** + +Run (from `libvgpu/`): +```bash +git checkout -b vulkan-layer +git add src/vulkan/layer.h src/vulkan/layer.c src/vulkan/dispatch.h src/vulkan/dispatch.c test/vulkan/test_layer.c +git commit -m "feat(vulkan): add layer entry point and dispatch skeleton" +``` + +--- + +### Task 1.2: `vkGetPhysicalDeviceMemoryProperties[2]` 힙 클램프 + +**Files:** +- Create: `libvgpu/src/vulkan/hooks_memory.c` +- Modify: `libvgpu/src/vulkan/layer.c` (스텁 제거) + +- [ ] **Step 1: 실패 테스트 작성** + +Create `libvgpu/test/vulkan/test_memprops.c`: +```c +#include +#include +#include +#include +#include "../../src/vulkan/dispatch.h" + +/* pod budget stub used by hooks_memory.c; real implementation in memory module */ +size_t hami_pod_memory_budget(int dev_idx) { (void)dev_idx; return 1ull << 30; /* 1 GiB */ } + +/* fake next-layer property query reporting 8 GiB device-local heap */ +static void VKAPI_CALL fake_next(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties *out) { + (void)p; + memset(out, 0, sizeof(*out)); + out->memoryHeapCount = 1; + out->memoryHeaps[0].size = 8ull << 30; + out->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; +} + +extern VKAPI_ATTR void VKAPI_CALL +hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice p, VkPhysicalDeviceMemoryProperties *out); + +int main(void) { + VkInstance inst = (VkInstance)0x1; + hami_instance_dispatch_t *d = hami_instance_register(inst, NULL); + d->GetPhysicalDeviceMemoryProperties = fake_next; + + VkPhysicalDeviceMemoryProperties props; + hami_vkGetPhysicalDeviceMemoryProperties((VkPhysicalDevice)0x2, &props); + assert(props.memoryHeapCount == 1); + assert(props.memoryHeaps[0].size == (1ull << 30)); + printf("ok: heap clamped to 1 GiB\n"); + return 0; +} +``` + +- [ ] **Step 2: 테스트 빌드 (기대: stub이 clamp를 안 하므로 실패)** + +Run (from `libvgpu/`): +```bash +cc -o /tmp/tm -DHAMI_VK_HOOKS_PRESENT \ + src/vulkan/layer.c src/vulkan/dispatch.c \ + test/vulkan/test_memprops.c -lpthread +/tmp/tm +``` +Expected: 링크 에러 (hooks_memory.c 아직 없음) — 또는 `hami_vk_hook_*` 미정의. + +- [ ] **Step 3: `hooks_memory.c` 작성 (클램프 + instance hook 설치)** + +Create `libvgpu/src/vulkan/hooks_memory.c`: +```c +#include "dispatch.h" +#include + +/* Provided by the budget module (Phase 2 integrates with existing counter). + For now declared here, implemented by the unit test or the memory module. */ +size_t hami_pod_memory_budget(int dev_idx); + +static int physdev_index(VkPhysicalDevice p) { + /* Simplification: layer sees only devices already filtered by NVIDIA_VISIBLE_DEVICES. + Use pointer-hash low bits as a stable index within the process. Replace with + NVML UUID lookup during Task 2.1 integration. */ + return (int)(((uintptr_t)p >> 4) & 0xff); +} + +static void clamp_heaps(VkPhysicalDevice p, uint32_t *count, VkMemoryHeap *heaps) { + size_t budget = hami_pod_memory_budget(physdev_index(p)); + for (uint32_t i = 0; i < *count; ++i) { + if ((heaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) == 0) continue; + if (heaps[i].size > budget) heaps[i].size = budget; + } +} + +VKAPI_ATTR void VKAPI_CALL +hami_vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice p, + VkPhysicalDeviceMemoryProperties *out) { + hami_instance_dispatch_t *d = hami_instance_lookup(VK_NULL_HANDLE); /* caller already registered */ + /* Find the dispatch holding this physical device's instance. For simplicity walk any. */ + extern hami_instance_dispatch_t *g_inst_head; + (void)d; + for (hami_instance_dispatch_t *it = g_inst_head; it; it = it->next) { + if (it->GetPhysicalDeviceMemoryProperties) { + it->GetPhysicalDeviceMemoryProperties(p, out); + clamp_heaps(p, &out->memoryHeapCount, out->memoryHeaps); + return; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +hami_vkGetPhysicalDeviceMemoryProperties2(VkPhysicalDevice p, + VkPhysicalDeviceMemoryProperties2 *out) { + extern hami_instance_dispatch_t *g_inst_head; + for (hami_instance_dispatch_t *it = g_inst_head; it; it = it->next) { + if (it->GetPhysicalDeviceMemoryProperties2) { + it->GetPhysicalDeviceMemoryProperties2(p, out); + clamp_heaps(p, &out->memoryProperties.memoryHeapCount, out->memoryProperties.memoryHeaps); + return; + } + } +} + +void hami_vk_hook_instance(hami_instance_dispatch_t *d) { + /* no per-instance state to install yet */ + (void)d; +} +``` + +또한 `dispatch.c`의 `g_inst_head`를 non-static로 변경해 다른 TU가 접근 가능하게 한다: + +Modify `libvgpu/src/vulkan/dispatch.c:6`: +```c +/* expose to sibling TUs for walk */ +hami_instance_dispatch_t *g_inst_head = NULL; +hami_device_dispatch_t *g_dev_head = NULL; +``` +(기존 `static` 제거) + +- [ ] **Step 4: layer.c의 clamp/allocate stub 제거** + +Modify `libvgpu/src/vulkan/layer.c` — 파일 끝 `#ifndef HAMI_VK_HOOKS_PRESENT` 블록 중 `hami_vkGetPhysicalDeviceMemoryProperties[2]` stub만 삭제 (할당/제출 stub은 Task 1.3/1.5에서 제거). + +- [ ] **Step 5: 테스트 빌드 및 실행 (이번엔 통과해야 함)** + +Run: +```bash +cc -o /tmp/tm -DHAMI_VK_HOOKS_PRESENT \ + src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c \ + test/vulkan/test_memprops.c -lpthread +/tmp/tm +``` +Expected: `ok: heap clamped to 1 GiB`. + +- [ ] **Step 6: 커밋** + +```bash +git add src/vulkan/hooks_memory.c src/vulkan/layer.c src/vulkan/dispatch.c test/vulkan/test_memprops.c +git commit -m "feat(vulkan): clamp device-local heap size to pod budget" +``` + +--- + +### Task 1.3: `vkAllocateMemory` / `vkFreeMemory` 버짓 강제 (개정) + +**API 주의:** HAMi-core의 실제 카운터는 `oom_check` (체크만, `1`=OOM) + `add_gpu_device_memory_usage(pid,dev,size,type)` / `rm_gpu_device_memory_usage(pid,dev,size,type)` 2단계. `type` 파라미터는 allocator.c가 CUDA 경로에서 `2`를 사용함. 원자성은 CUDA와 동일 수준(느슨한 TOCTOU). 어댑터는 Task 1.6에서 추가되므로, 이 Task는 Vulkan 훅이 참조할 **공개 어댑터 시그니처**를 사용한다: `int hami_budget_reserve(int dev, size_t size)` / `void hami_budget_release(int dev, size_t size)` / `size_t hami_budget_of(int dev)`. 구현은 Task 1.6. + +**Files:** +- Create: `libvgpu/src/vulkan/hooks_alloc.c` +- Modify: `libvgpu/src/vulkan/layer.c` (해당 stub 제거) + +- [ ] **Step 1: 실패 테스트 작성** + +Create `libvgpu/test/vulkan/test_alloc.c`: +```c +#include +#include +#include +#include +#include "../../src/vulkan/dispatch.h" + +/* Budget adapter stubs (real implementation arrives in Task 1.6). */ +static size_t g_used = 0; +static const size_t BUDGET = 1ull << 30; /* 1 GiB */ + +size_t hami_budget_of(int dev) { (void)dev; return BUDGET; } +int hami_budget_reserve(int dev, size_t size) { + (void)dev; + if (g_used + size > BUDGET) return 0; /* 0 = refuse */ + g_used += size; + return 1; /* 1 = granted */ +} +void hami_budget_release(int dev, size_t size) { (void)dev; g_used -= size; } + +static VkResult VKAPI_CALL fake_alloc(VkDevice d, const VkMemoryAllocateInfo *i, + const VkAllocationCallbacks *a, VkDeviceMemory *m) { + (void)d;(void)a; *m = (VkDeviceMemory)(uintptr_t)(i->allocationSize); + return VK_SUCCESS; +} +static void VKAPI_CALL fake_free(VkDevice d, VkDeviceMemory m, const VkAllocationCallbacks *a) { (void)d;(void)m;(void)a; } + +extern VKAPI_ATTR VkResult VKAPI_CALL +hami_vkAllocateMemory(VkDevice, const VkMemoryAllocateInfo*, const VkAllocationCallbacks*, VkDeviceMemory*); +extern VKAPI_ATTR void VKAPI_CALL +hami_vkFreeMemory(VkDevice, VkDeviceMemory, const VkAllocationCallbacks*); + +int main(void) { + VkDevice dev = (VkDevice)0x1; + hami_device_dispatch_t *d = hami_device_register(dev, (VkPhysicalDevice)0x2, NULL); + d->AllocateMemory = fake_alloc; + d->FreeMemory = fake_free; + + VkMemoryAllocateInfo info = { .sType=VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize=(512ull<<20) }; + VkDeviceMemory m1, m2, m3; + + assert(hami_vkAllocateMemory(dev, &info, NULL, &m1) == VK_SUCCESS); + assert(hami_vkAllocateMemory(dev, &info, NULL, &m2) == VK_SUCCESS); + assert(hami_vkAllocateMemory(dev, &info, NULL, &m3) == VK_ERROR_OUT_OF_DEVICE_MEMORY); + + hami_vkFreeMemory(dev, m1, NULL); + assert(hami_vkAllocateMemory(dev, &info, NULL, &m3) == VK_SUCCESS); + printf("ok: allocate/free budget enforced\n"); + return 0; +} +``` + +- [ ] **Step 2: 테스트 빌드 실패 확인** + +Run (from `libvgpu/`): +```bash +cc -o /tmp/ta -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c \ + test/vulkan/test_alloc.c -lpthread +``` +Expected: 링크 에러 또는 실행 시 assertion 위반 (stub이 모두 OOM 반환). + +- [ ] **Step 3: `hooks_alloc.c` 작성** + +Create `libvgpu/src/vulkan/hooks_alloc.c`: +```c +#include "dispatch.h" +#include +#include +#include + +/* Public budget-adapter API. Implemented in Task 1.6 (src/vulkan/budget.c) + * and stubbed by unit tests here. The adapter encapsulates HAMi-core's + * oom_check / add_gpu_device_memory_usage / rm_gpu_device_memory_usage + * 2-step protocol so Vulkan hooks see a single atomic reserve/release. */ +int hami_budget_reserve(int dev, size_t size); /* 1 = granted, 0 = refused */ +void hami_budget_release(int dev, size_t size); +size_t hami_budget_of(int dev); /* 0 = unlimited */ + +typedef struct mem_entry { + VkDeviceMemory handle; + size_t size; + int dev_idx; + struct mem_entry *next; +} mem_entry_t; + +static mem_entry_t *g_mem_head = NULL; +static pthread_mutex_t g_mem_lock = PTHREAD_MUTEX_INITIALIZER; + +/* Provisional device-index heuristic (pointer hash). Replaced with + * NVML UUID lookup when the adapter in Task 1.6 lands. */ +static int device_to_index(VkDevice d) { + return (int)(((uintptr_t)d >> 4) & 0xff); +} + +VKAPI_ATTR VkResult VKAPI_CALL +hami_vkAllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pInfo, + const VkAllocationCallbacks *pAlloc, VkDeviceMemory *pMem) { + hami_device_dispatch_t *d = hami_device_lookup(device); + if (!d || !d->AllocateMemory) return VK_ERROR_INITIALIZATION_FAILED; + + int idx = device_to_index(device); + if (!hami_budget_reserve(idx, pInfo->allocationSize)) + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + + VkResult r = d->AllocateMemory(device, pInfo, pAlloc, pMem); + if (r != VK_SUCCESS) { + hami_budget_release(idx, pInfo->allocationSize); + return r; + } + + mem_entry_t *e = calloc(1, sizeof(*e)); + e->handle = *pMem; + e->size = pInfo->allocationSize; + e->dev_idx = idx; + + pthread_mutex_lock(&g_mem_lock); + e->next = g_mem_head; + g_mem_head = e; + pthread_mutex_unlock(&g_mem_lock); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +hami_vkFreeMemory(VkDevice device, VkDeviceMemory mem, const VkAllocationCallbacks *pAlloc) { + hami_device_dispatch_t *d = hami_device_lookup(device); + if (d && d->FreeMemory) d->FreeMemory(device, mem, pAlloc); + + pthread_mutex_lock(&g_mem_lock); + mem_entry_t **pp = &g_mem_head; + while (*pp && (*pp)->handle != mem) pp = &(*pp)->next; + if (*pp) { + mem_entry_t *victim = *pp; + *pp = victim->next; + pthread_mutex_unlock(&g_mem_lock); + hami_budget_release(victim->dev_idx, victim->size); + free(victim); + return; + } + pthread_mutex_unlock(&g_mem_lock); +} + +void hami_vk_hook_device(hami_device_dispatch_t *d) { (void)d; } +``` + +Also update `hooks_memory.c` (from Task 1.2) to use the new budget adapter name — change the `hami_pod_memory_budget` call to `hami_budget_of` and the forward declaration accordingly. If Task 1.2's file used `hami_pod_memory_budget`, rename: +```c +/* was: extern size_t hami_pod_memory_budget(int); */ +extern size_t hami_budget_of(int); +... +size_t budget = hami_budget_of(physdev_index(p)); +if (budget == 0) return; /* 0 = unlimited sentinel; skip clamp */ +``` +And update `test/vulkan/test_memprops.c` test stubs to `hami_budget_of` accordingly. + +- [ ] **Step 4: layer.c의 allocate/free stub 제거** + +Modify `libvgpu/src/vulkan/layer.c` — 파일 끝 `#ifndef HAMI_VK_HOOKS_PRESENT` 블록에서 `hami_vkAllocateMemory`, `hami_vkFreeMemory`, `hami_vk_hook_device` stub 삭제 (QueueSubmit stub은 Task 1.5까지 유지). + +- [ ] **Step 5: 테스트 통과 확인** + +Run: +```bash +cc -o /tmp/ta -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c \ + test/vulkan/test_alloc.c -lpthread +/tmp/ta +``` +Expected: `ok: allocate/free budget enforced`. + +- [ ] **Step 6: 커밋** + +```bash +git add src/vulkan/hooks_alloc.c src/vulkan/hooks_memory.c src/vulkan/layer.c test/vulkan/test_alloc.c test/vulkan/test_memprops.c +git commit -m "feat(vulkan): enforce pod memory budget on vkAllocateMemory/vkFreeMemory" +``` + +--- + +### Task 1.4: Vulkan용 throttle 어댑터 (개정 — 추출 불필요) + +**이 Task는 초기 플랜에서 "throttle 폴링 루프 추출"이었으나, Task 0.2 탐색 결과 HAMi-core는 이미 토큰 버킷 구조(`rate_limiter` 소비자 + `utilization_watcher` 생산자 스레드)로 모듈화되어 있어 추출할 필요가 없습니다. 대신, Vulkan TU가 기존 `rate_limiter`를 호출할 수 있도록 얇은 어댑터 1개만 추가합니다.** + +**Files:** +- Create: `libvgpu/src/vulkan/throttle_adapter.h` +- Create: `libvgpu/src/vulkan/throttle_adapter.c` + +- [ ] **Step 1: 실패 테스트 작성 (어댑터 호출 가능성 검증)** + +Create `libvgpu/test/vulkan/test_throttle_adapter.c`: +```c +#include +#include +#include "../../src/vulkan/throttle_adapter.h" + +/* Stub of HAMi-core's rate_limiter so this test links without the full lib. */ +static int g_rl_calls = 0; +void rate_limiter(int grids, int blocks) { (void)grids;(void)blocks; g_rl_calls++; } + +int main(void) { + hami_vulkan_throttle(); + hami_vulkan_throttle(); + assert(g_rl_calls == 2); + printf("ok: adapter forwards to rate_limiter\n"); + return 0; +} +``` + +- [ ] **Step 2: 빌드 실패 확인** + +Run (from `libvgpu/`): +```bash +cc -o /tmp/ttha -I./src test/vulkan/test_throttle_adapter.c +``` +Expected: `throttle_adapter.h` 없음 → 컴파일 실패. + +- [ ] **Step 3: 어댑터 헤더/구현 작성** + +Create `libvgpu/src/vulkan/throttle_adapter.h`: +```c +#ifndef HAMI_VK_THROTTLE_ADAPTER_H +#define HAMI_VK_THROTTLE_ADAPTER_H + +/* Consume one "compute unit" token from the HAMi-core SM rate limiter. + * When the HAMi SM limit is 0 or >= 100 (unlimited), this is a no-op + * inherited from the underlying rate_limiter. Call once per Vulkan + * vkQueueSubmit/vkQueueSubmit2 before forwarding to the next layer. */ +void hami_vulkan_throttle(void); + +#endif +``` + +Create `libvgpu/src/vulkan/throttle_adapter.c`: +```c +#include "throttle_adapter.h" + +/* Defined in libvgpu/src/multiprocess/multiprocess_utilization_watcher.c + * (linked into the same libvgpu.so at final link time). Default-visibility + * is preserved via file-local linkage inside the library regardless of the + * release -fvisibility=hidden setting, because both TUs are in the same + * shared object. */ +extern void rate_limiter(int grids, int blocks); + +void hami_vulkan_throttle(void) { + /* Consume one token — represents "one queue submission". The + * rate_limiter interprets (grids*blocks) as the claim size; we use + * the smallest unit (1,1) so Vulkan submits compete fairly with + * tiny CUDA kernel launches. */ + rate_limiter(1, 1); +} +``` + +- [ ] **Step 4: 테스트 통과 확인** + +Run: +```bash +cc -o /tmp/ttha -I./src \ + src/vulkan/throttle_adapter.c test/vulkan/test_throttle_adapter.c +/tmp/ttha +``` +Expected: `ok: adapter forwards to rate_limiter`. + +- [ ] **Step 5: 커밋** + +```bash +git add src/vulkan/throttle_adapter.h src/vulkan/throttle_adapter.c test/vulkan/test_throttle_adapter.c +git commit -m "feat(vulkan): thin adapter forwarding queue submit throttling to rate_limiter" +``` + +**주의사항 (노트 참조):** `rate_limiter`는 `sm_limit==0` / `sm_limit>=100` / `get_utilization_switch()==0` 조건에서 사일런트 no-op 합니다. 사용자가 Vulkan만 SM 분할하려 할 때 별도 스위치가 필요하면 후속 Task(v2)에서 `HAMI_VULKAN_SM_SWITCH` env를 추가하도록 남겨둡니다. + +--- + +### Task 1.5: `vkQueueSubmit[2]` throttle 훅 (개정) + +**Files:** +- Create: `libvgpu/src/vulkan/hooks_submit.c` +- Modify: `libvgpu/src/vulkan/layer.c` (나머지 stub 제거) + +- [ ] **Step 1: 실패 테스트 작성** + +Create `libvgpu/test/vulkan/test_submit.c`: +```c +#include +#include +#include +#include +#include "../../src/vulkan/dispatch.h" + +static int g_submit_called = 0; +static VkResult VKAPI_CALL fake_submit(VkQueue q, uint32_t n, const VkSubmitInfo *s, VkFence f) { + (void)q;(void)n;(void)s;(void)f; g_submit_called++; return VK_SUCCESS; +} + +/* Throttle adapter stub — verifies the hook calls the adapter exactly once + * per submit before forwarding to the next layer. */ +static int g_throttle_called = 0; +void hami_vulkan_throttle(void) { g_throttle_called++; } + +extern VKAPI_ATTR VkResult VKAPI_CALL +hami_vkQueueSubmit(VkQueue, uint32_t, const VkSubmitInfo*, VkFence); +extern void hami_vk_register_queue(VkQueue q, VkDevice d); + +int main(void) { + VkDevice dev = (VkDevice)0x11; + VkQueue q = (VkQueue)0x22; + hami_device_dispatch_t *d = hami_device_register(dev, (VkPhysicalDevice)0, NULL); + d->QueueSubmit = fake_submit; + hami_vk_register_queue(q, dev); + + VkResult r = hami_vkQueueSubmit(q, 0, NULL, VK_NULL_HANDLE); + assert(r == VK_SUCCESS); + assert(g_throttle_called == 1); + assert(g_submit_called == 1); + printf("ok: submit hook throttles then forwards\n"); + return 0; +} +``` + +- [ ] **Step 2: 테스트 빌드 실패 확인** + +Run (from `libvgpu/`): +```bash +cc -o /tmp/ts -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c \ + test/vulkan/test_submit.c -lpthread +``` +Expected: `hami_vk_register_queue` 미정의 + layer.c의 QueueSubmit stub이 throttle 호출 안 함. + +- [ ] **Step 3: `hooks_submit.c` 작성** + +Create `libvgpu/src/vulkan/hooks_submit.c`: +```c +#include "dispatch.h" +#include "throttle_adapter.h" +#include +#include + +/* Queue → Device registry populated by a future vkGetDeviceQueue hook. For + * now we expose a public register function used by both the layer's + * vkGetDeviceQueue wrapper (added in Task 1.5 Step 4) and by unit tests. */ +typedef struct q_entry { VkQueue q; VkDevice d; struct q_entry *next; } q_entry_t; +static q_entry_t *g_q_head = NULL; +static pthread_mutex_t g_q_lock = PTHREAD_MUTEX_INITIALIZER; + +void hami_vk_register_queue(VkQueue q, VkDevice d) { + q_entry_t *e = calloc(1, sizeof(*e)); + e->q = q; e->d = d; + pthread_mutex_lock(&g_q_lock); + e->next = g_q_head; g_q_head = e; + pthread_mutex_unlock(&g_q_lock); +} + +static VkDevice device_for_queue(VkQueue q) { + pthread_mutex_lock(&g_q_lock); + q_entry_t *p = g_q_head; + while (p && p->q != q) p = p->next; + VkDevice d = p ? p->d : VK_NULL_HANDLE; + pthread_mutex_unlock(&g_q_lock); + return d; +} + +VKAPI_ATTR VkResult VKAPI_CALL +hami_vkQueueSubmit(VkQueue queue, uint32_t n, const VkSubmitInfo *p, VkFence f) { + VkDevice d = device_for_queue(queue); + hami_device_dispatch_t *dd = hami_device_lookup(d); + if (!dd || !dd->QueueSubmit) return VK_ERROR_INITIALIZATION_FAILED; + hami_vulkan_throttle(); + return dd->QueueSubmit(queue, n, p, f); +} + +VKAPI_ATTR VkResult VKAPI_CALL +hami_vkQueueSubmit2(VkQueue queue, uint32_t n, const VkSubmitInfo2 *p, VkFence f) { + VkDevice d = device_for_queue(queue); + hami_device_dispatch_t *dd = hami_device_lookup(d); + if (!dd || !dd->QueueSubmit2) return VK_ERROR_INITIALIZATION_FAILED; + hami_vulkan_throttle(); + return dd->QueueSubmit2(queue, n, p, f); +} +``` + +- [ ] **Step 4: layer.c에 `vkGetDeviceQueue` / `vkGetDeviceQueue2` 훅 추가 + 잔여 stub 제거** + +Modify `libvgpu/src/vulkan/layer.c` — `hami_vkDestroyDevice` 다음에 추가: +```c +extern void hami_vk_register_queue(VkQueue q, VkDevice d); + +static VKAPI_ATTR void VKAPI_CALL +hami_vkGetDeviceQueue(VkDevice device, uint32_t family, uint32_t index, VkQueue *pQueue) { + hami_device_dispatch_t *d = hami_device_lookup(device); + if (!d) { *pQueue = VK_NULL_HANDLE; return; } + PFN_vkGetDeviceQueue next = (PFN_vkGetDeviceQueue)d->next_gdpa(device, "vkGetDeviceQueue"); + next(device, family, index, pQueue); + if (*pQueue) hami_vk_register_queue(*pQueue, device); +} + +static VKAPI_ATTR void VKAPI_CALL +hami_vkGetDeviceQueue2(VkDevice device, const VkDeviceQueueInfo2 *pInfo, VkQueue *pQueue) { + hami_device_dispatch_t *d = hami_device_lookup(device); + if (!d) { *pQueue = VK_NULL_HANDLE; return; } + PFN_vkGetDeviceQueue2 next = (PFN_vkGetDeviceQueue2)d->next_gdpa(device, "vkGetDeviceQueue2"); + next(device, pInfo, pQueue); + if (*pQueue) hami_vk_register_queue(*pQueue, device); +} +``` +그리고 `hami_vkGetDeviceProcAddr` 내부의 `HAMI_HOOK(...)` 목록에 추가: +```c + HAMI_HOOK(GetDeviceQueue); + HAMI_HOOK(GetDeviceQueue2); +``` +마지막으로 `#ifndef HAMI_VK_HOOKS_PRESENT` 블록 **전체를 삭제** (모든 훅이 이제 실제 구현됨). + +- [ ] **Step 5: 테스트 통과 확인** + +Run: +```bash +cc -o /tmp/ts -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c \ + src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c src/vulkan/hooks_submit.c \ + test/vulkan/test_submit.c -lpthread +/tmp/ts +``` +Expected: `ok: submit hook throttles then forwards`. + +- [ ] **Step 6: 커밋** + +```bash +git add src/vulkan/hooks_submit.c src/vulkan/layer.c test/vulkan/test_submit.c +git commit -m "feat(vulkan): throttle vkQueueSubmit[2] via rate_limiter adapter" +``` + +--- + +### Task 1.6: 버짓 어댑터 구현 (개정 — 실제 HAMi-core API 반영) + +**Files:** +- Create: `libvgpu/src/vulkan/budget.c` +- Create: `libvgpu/src/vulkan/budget.h` + +Vulkan 훅(Task 1.2, 1.3)이 의존하는 3개 공개 API(`hami_budget_reserve` / `hami_budget_release` / `hami_budget_of`)의 실제 구현. HAMi-core의 실제 함수(`oom_check`, `add_gpu_device_memory_usage`, `rm_gpu_device_memory_usage`, `get_current_device_memory_limit`)를 감싼다. CUDA 경로와 동일한 2단계(체크 → 커밋) 프로토콜을 사용. + +- [ ] **Step 1: 헤더 파일 작성** + +Create `libvgpu/src/vulkan/budget.h`: +```c +#ifndef HAMI_VK_BUDGET_H +#define HAMI_VK_BUDGET_H +#include + +/* Reserve `size` bytes on device `dev` for a Vulkan allocation. + * Returns 1 when the allocation fits the pod budget and the usage + * counter has been incremented; 0 when the request would exceed the + * budget (caller must return VK_ERROR_OUT_OF_DEVICE_MEMORY). If the + * budget is unlimited (HAMi-core limit sentinel == 0), always grants. */ +int hami_budget_reserve(int dev, size_t size); + +/* Inverse of a successful reserve — decrements the usage counter. */ +void hami_budget_release(int dev, size_t size); + +/* Current per-device budget in bytes. Returns 0 when unlimited. */ +size_t hami_budget_of(int dev); + +#endif +``` + +- [ ] **Step 2: 구현 파일 작성** + +Create `libvgpu/src/vulkan/budget.c`: +```c +#include "budget.h" +#include +#include /* getpid */ + +/* HAMi-core internal symbols — linked from the same libvgpu.so. + * See docs/superpowers/plans/notes/hami-core-layout.md for semantics. */ +extern int oom_check(const int dev, size_t addon); /* 1 = OOM, 0 = OK */ +extern int add_gpu_device_memory_usage(int32_t pid, int dev, + size_t usage, int type); /* 0 = success, 1 = failure */ +extern int rm_gpu_device_memory_usage(int32_t pid, int dev, + size_t usage, int type); /* 0 = success */ +extern uint64_t get_current_device_memory_limit(const int dev); /* 0 = unlimited */ + +/* Use type=2 matching the existing CUDA allocator path (src/allocator/allocator.c). + * HAMi-core's shared-region accounting tracks usage by (pid, dev) regardless of + * type, so reusing this tag keeps Vulkan and CUDA allocations in the same bucket. */ +#define HAMI_MEM_TYPE_DEVICE 2 + +int hami_budget_reserve(int dev, size_t size) { + if (get_current_device_memory_limit(dev) == 0) { + /* Unlimited — skip check, but still bump the counter so metrics remain + * accurate. add_gpu_device_memory_usage returns 0 on success; on + * failure (shared region full etc.) treat as OOM. */ + return add_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE) == 0; + } + if (oom_check(dev, size)) return 0; /* would exceed budget */ + return add_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE) == 0; +} + +void hami_budget_release(int dev, size_t size) { + rm_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE); +} + +size_t hami_budget_of(int dev) { + return (size_t)get_current_device_memory_limit(dev); +} +``` + +- [ ] **Step 3: Vulkan hook 소스에서 선언 일관화** + +기존 `hooks_alloc.c` (Task 1.3 Step 3)의 상단 3줄 forward declaration을 삭제하고 `#include "budget.h"`로 교체: +```c +#include "budget.h" +``` +(Task 1.3의 원래 파일에는 이미 `int hami_budget_reserve(...)` 등의 extern 선언이 있으므로 그 3줄을 지우고 헤더 include로 대체). + +마찬가지로 `hooks_memory.c` (Task 1.2)의 `extern size_t hami_budget_of(int);` 선언 대신 `#include "budget.h"`. + +- [ ] **Step 4: 실 라이브러리 빌드 시만 `budget.c` 포함, 단위 테스트는 제외** + +단위 테스트(`test/vulkan/test_alloc.c`, `test_memprops.c`)에는 이미 `hami_budget_reserve` / `hami_budget_release` / `hami_budget_of` 스텁이 정의되어 있음. 테스트 바이너리 빌드 커맨드에 `budget.c`를 **포함하지 않는다** (중복 정의 방지). 실 `libvgpu.so` 빌드(Task 1.8)에는 포함. + +- [ ] **Step 5: 기존 모든 단위 테스트 회귀 없음 확인** + +```bash +cc -o /tmp/tm -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/hooks_memory.c \ + test/vulkan/test_memprops.c -lpthread && /tmp/tm +cc -o /tmp/ta -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c \ + src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c \ + test/vulkan/test_alloc.c -lpthread && /tmp/ta +cc -o /tmp/ts -DHAMI_VK_HOOKS_PRESENT -I./src \ + src/vulkan/layer.c src/vulkan/dispatch.c \ + src/vulkan/hooks_memory.c src/vulkan/hooks_alloc.c src/vulkan/hooks_submit.c \ + test/vulkan/test_submit.c -lpthread && /tmp/ts +cc -o /tmp/ttha -I./src \ + src/vulkan/throttle_adapter.c test/vulkan/test_throttle_adapter.c && /tmp/ttha +``` +Expected: 4개 모두 `ok:...`. + +- [ ] **Step 6: 커밋** + +```bash +git add src/vulkan/budget.h src/vulkan/budget.c src/vulkan/hooks_alloc.c src/vulkan/hooks_memory.c +git commit -m "feat(vulkan): budget adapter bridges hook layer to HAMi-core counters" +``` + +--- + +### Task 1.7: 레이어 매니페스트 JSON + +**Files:** +- Create: `libvgpu/etc/vulkan/implicit_layer.d/hami.json` + +- [ ] **Step 1: 매니페스트 파일 작성** + +Create `libvgpu/etc/vulkan/implicit_layer.d/hami.json`: +```json +{ + "file_format_version": "1.2.0", + "layer": { + "name": "VK_LAYER_HAMI_vgpu", + "type": "GLOBAL", + "library_path": "/usr/local/vgpu/libvgpu.so", + "api_version": "1.3.0", + "implementation_version": "1", + "description": "HAMi Vulkan vGPU limiter", + "enable_environment": { "HAMI_VULKAN_ENABLE": "1" }, + "disable_environment": { "HAMI_VULKAN_DISABLE": "1" } + } +} +``` + +- [ ] **Step 2: JSON 문법 검증** + +Run: +```bash +python3 -m json.tool libvgpu/etc/vulkan/implicit_layer.d/hami.json > /dev/null && echo ok +``` +Expected: `ok`. + +- [ ] **Step 3: 커밋** + +```bash +git add etc/vulkan/implicit_layer.d/hami.json +git commit -m "feat(vulkan): ship implicit layer manifest gated by HAMI_VULKAN_ENABLE" +``` + +--- + +### Task 1.8: CMake / Dockerfile 통합 (개정 — CMake OBJECT 라이브러리 패턴) + +**빌드 실체:** HAMi-core는 Makefile이 `./build.sh`를 호출하고, `build.sh`가 `cmake`로 `src/`와 `test/` 서브디렉토리를 빌드. 각 `src/<모듈>/CMakeLists.txt`는 OBJECT 라이브러리를 만들어 루트 `CMakeLists.txt`에서 `libvgpu.so` 하나로 링크합니다. 따라서 Vulkan 소스도 **OBJECT 라이브러리 `vulkan_mod`**로 추가하고 루트에 링크해야 합니다. + +**Files:** +- Create: `libvgpu/src/vulkan/CMakeLists.txt` +- Modify: `libvgpu/src/CMakeLists.txt` (`add_subdirectory(vulkan)` 추가) +- Modify: `libvgpu/CMakeLists.txt` 루트 (target_link_libraries에 `$` 추가, libvulkan-dev 찾기) +- Modify: `libvgpu/test/CMakeLists.txt` (선택 — glob이 `test/vulkan/*.c`도 잡도록 확장) +- Modify: `libvgpu/dockerfiles/Dockerfile.<변형>` (vulkan-headers + manifest 복사) + +- [ ] **Step 1: 현재 CMake 구조 재확인** + +Run (from `libvgpu/`): +```bash +cat src/CMakeLists.txt +head -60 CMakeLists.txt +``` +Expected: `add_subdirectory(multiprocess|allocator|cuda|nvml)` 4줄, 루트에 각 OBJECT lib를 `target_sources`/`target_link_libraries`로 합치는 블록. + +- [ ] **Step 2: Vulkan 서브디렉토리 CMakeLists 작성** + +Create `libvgpu/src/vulkan/CMakeLists.txt`: +```cmake +find_path(VULKAN_HEADERS vulkan/vulkan.h + HINTS ENV VULKAN_SDK + PATH_SUFFIXES include + PATHS /usr/include /usr/local/include) +if(NOT VULKAN_HEADERS) + message(FATAL_ERROR "vulkan/vulkan.h not found. Install libvulkan-dev or set VULKAN_SDK.") +endif() + +add_library(vulkan_mod OBJECT + layer.c + dispatch.c + hooks_memory.c + hooks_alloc.c + hooks_submit.c + throttle_adapter.c + budget.c +) + +target_include_directories(vulkan_mod PRIVATE + ${VULKAN_HEADERS} + ${CMAKE_SOURCE_DIR}/src +) + +target_compile_options(vulkan_mod PRIVATE -fPIC) +``` + +- [ ] **Step 3: `src/CMakeLists.txt`에 서브디렉토리 등록** + +Modify `libvgpu/src/CMakeLists.txt` — 기존 4줄 뒤에 한 줄 추가: +```cmake +add_subdirectory(vulkan) +``` + +- [ ] **Step 4: 루트 CMakeLists에서 `vulkan_mod` 링크** + +Modify `libvgpu/CMakeLists.txt` — `vgpu` target의 소스 리스트에 `vulkan_mod` OBJECT를 합친다. 기존 패턴이 `$` 등을 사용하고 있다면 같은 줄 뒤에 추가: +```cmake +target_sources(vgpu PRIVATE + $ + $ + $ + $ + $ # NEW +) +``` +(실제 라인 위치는 Step 1의 출력으로 확인. 위 코드는 기존 패턴에 한 줄 추가하는 것을 기준으로 한 예시.) + +- [ ] **Step 5: 매니페스트를 install 단계에 포함** + +Modify `libvgpu/CMakeLists.txt` 루트에 install 블록이 있으면 그 안에, 없으면 새로: +```cmake +install(FILES etc/vulkan/implicit_layer.d/hami.json + DESTINATION /etc/vulkan/implicit_layer.d) +``` +(CMake install 규칙을 이미지 빌드 단계에서 쓰지 않으면, Dockerfile에서 직접 `COPY`로 처리 — Step 7 참조.) + +- [ ] **Step 6: `test/CMakeLists.txt`에 Vulkan 테스트 포함 확인** + +기존 `test/CMakeLists.txt`가 `file(GLOB ... test/*.c)` 패턴이면 하위 `test/vulkan/`, `test/common/`를 별도로 추가해야 한다. 루트 `test/CMakeLists.txt`에 다음을 추가: +```cmake +file(GLOB VULKAN_TESTS "vulkan/*.c") +foreach(tsrc ${VULKAN_TESTS}) + get_filename_component(tname ${tsrc} NAME_WE) + add_executable(${tname} ${tsrc}) + target_include_directories(${tname} PRIVATE ${CMAKE_SOURCE_DIR}/src) + target_link_libraries(${tname} PRIVATE pthread) +endforeach() +``` +단, 이 테스트들은 `src/vulkan/*.c`를 **다시 컴파일**해 자체 바이너리로 링크해야 하므로, 위 코드만으로는 빌드 실패. 단위 테스트는 CI가 아니라 로컬 수동 검증 도구로 놔두고 `make test` 타겟은 기존 CUDA 테스트만 돌리도록 유지하는 것이 실용적. **권장**: 위 `add_executable` 블록은 넣지 않고, `test-vulkan` 용 수동 명령을 `docs/superpowers/plans/notes/vulkan-test-howto.md`에 기록. + +- [ ] **Step 7: Dockerfile에 Vulkan 헤더 + 매니페스트 포함** + +Modify `libvgpu/dockerfiles/Dockerfile.hami-core` (또는 존재하는 가장 주된 Dockerfile; Step 1에서 `ls dockerfiles/` 확인): +```dockerfile +# Build stage — add vulkan headers before cmake runs +RUN apt-get update && apt-get install -y --no-install-recommends \ + libvulkan-dev \ + && rm -rf /var/lib/apt/lists/* + +# Runtime (final) stage — ship manifest +COPY etc/vulkan/implicit_layer.d/hami.json \ + /etc/vulkan/implicit_layer.d/hami.json +``` +정확한 위치는 기존 Dockerfile의 stage 구조에 맞춘다. + +- [ ] **Step 8: 전체 빌드 확인** + +Run (from `libvgpu/`): +```bash +make build 2>&1 | tail -40 +``` +Expected: `libvgpu.so` 빌드 성공. `nm libvgpu.so | grep vkNegotiateLoaderLayerInterfaceVersion` 이 `T` 심볼 표시. + +- [ ] **Step 9: 수동 단위 테스트 재실행 확인** + +Task 1.6 Step 5와 동일한 4개 cc 명령으로 모든 테스트가 PASS하는지 확인. + +- [ ] **Step 10: 커밋** + +```bash +git add CMakeLists.txt src/CMakeLists.txt src/vulkan/CMakeLists.txt dockerfiles/ +git commit -m "build(vulkan): integrate vulkan_mod OBJECT lib and ship implicit layer manifest" +``` + +--- + +### Task 1.9: HAMi-core PR 푸시 및 릴리스 태그 + +**Files:** (메타 작업) + +- [ ] **Step 1: 브랜치 푸시** + +Run (from `libvgpu/`): +```bash +git push -u origin vulkan-layer +``` + +- [ ] **Step 2: PR 생성** + +```bash +gh pr create --title "feat(vulkan): vGPU partitioning for Vulkan workloads" \ + --body "$(cat <<'EOF' +## Summary +- Vulkan implicit layer VK_LAYER_HAMI_vgpu (activated by HAMI_VULKAN_ENABLE=1) +- vkAllocateMemory/vkFreeMemory share the existing CUDA VRAM counter +- vkGetPhysicalDeviceMemoryProperties[2] clamps device-local heap to pod budget +- vkQueueSubmit[2] routes through the shared SM utilization throttle +- Manifest ships to /etc/vulkan/implicit_layer.d/hami.json + +Design: Project-HAMi/HAMi docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md + +## Test plan +- [x] unit: test_layer, test_memprops, test_alloc, test_submit, test_throttle +- [ ] integration: vulkaninfo in HAMi-scheduled pod +- [ ] regression: existing CUDA hooks unaffected +EOF +)" +``` + +- [ ] **Step 3: PR URL 기록** + +PR URL을 `docs/superpowers/plans/notes/hami-core-pr.md`에 적는다 (HAMi 쪽 Task 2.6에서 참조). + +- [ ] **Step 4: 릴리스 태그 준비 (머지 후 별도)** + +PR 머지 후, HAMi-core 메인테이너가 릴리스 태그(예: `v1.7.0`)를 잘라 이미지(`projecthami/hami-vgpu:v1.7.0`)를 푸시. 이 Task 안에서는 릴리스 태그 이름만 `docs/superpowers/plans/notes/hami-core-pr.md`에 기록. + +--- + +## Phase 2 — HAMi (Go) 웹훅 + +### Task 2.1: Vulkan annotation 상수 및 실패 테스트 + +**Files:** +- Modify: `pkg/device/nvidia/device.go:39-57` (const 블록) +- Modify: `pkg/device/nvidia/device_test.go` (뒤에 신규 테스트 추가) + +- [ ] **Step 1: 상수 추가** + +Modify `pkg/device/nvidia/device.go:39`, 기존 const 블록 끝에 추가: +```go +const ( + HandshakeAnnos = "hami.io/node-handshake" + // ... 기존 상수 ... + MpsMode = "mps" + + // Vulkan vGPU partitioning (added 2026-04-21) + VulkanEnableAnno = "hami.io/vulkan" + VulkanLayerName = "VK_LAYER_HAMI_vgpu" + NvidiaDriverCapsEnvVar = "NVIDIA_DRIVER_CAPABILITIES" + HamiVulkanEnvVar = "HAMI_VULKAN_ENABLE" +) +``` + +(Go의 const 선언은 한 블록에 합치지 말고, 기존 블록에 뒤에 붙이거나 별도 블록으로 추가. 프로젝트 컨벤션상 별도 블록이 더 깔끔.) + +- [ ] **Step 2: 실패 단위 테스트 작성** + +Append to `pkg/device/nvidia/device_test.go`: +```go +func TestMutateAdmission_VulkanAnno_AddsGraphicsCap(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + ResourceMemoryName: "nvidia.com/gpumem", + ResourceCoreName: "nvidia.com/gpucores", + ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage", + }, + } + ctr := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{VulkanEnableAnno: "true"}, + }, + } + _, err := dev.MutateAdmission(ctr, pod) + assert.NilError(t, err) + + var caps, enable string + for _, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + caps = e.Value + } + if e.Name == HamiVulkanEnvVar { + enable = e.Value + } + } + assert.Assert(t, strings.Contains(caps, "graphics"), "expected graphics in caps, got %q", caps) + assert.Equal(t, enable, "1") +} +``` + +`metav1` import 추가: `pkg/device/nvidia/device_test.go` 상단 import 블록에 `metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"` 이미 있는지 확인; 없으면 추가. `strings` 동일. + +- [ ] **Step 3: 테스트 실패 확인** + +Run: +```bash +go test ./pkg/device/nvidia/ -run TestMutateAdmission_VulkanAnno_AddsGraphicsCap -v +``` +Expected: FAIL (아직 로직 미구현). + +- [ ] **Step 4: 커밋** + +```bash +git add pkg/device/nvidia/device.go pkg/device/nvidia/device_test.go +git commit -m "test(nvidia): failing test for Vulkan annotation env injection" +``` + +--- + +### Task 2.2: `MutateAdmission`에 Vulkan 로직 추가 + +**Files:** +- Modify: `pkg/device/nvidia/device.go:342-378` (MutateAdmission) + +- [ ] **Step 1: 헬퍼 함수 추가** + +Modify `pkg/device/nvidia/device.go` — `MutateAdmission` 함수 아래(또는 파일 끝)에 추가: +```go +// mergeGraphicsCap returns the union of existing NVIDIA_DRIVER_CAPABILITIES +// tokens with "graphics". If existing contains "all", it is returned unchanged. +// An empty existing value becomes "compute,utility,graphics" (baseline needed +// for Vulkan ICD plus existing HAMi CUDA path). +func mergeGraphicsCap(existing string) string { + if existing == "" { + return "compute,utility,graphics" + } + tokens := strings.Split(existing, ",") + seen := make(map[string]struct{}, len(tokens)) + for _, t := range tokens { + t = strings.TrimSpace(t) + if t == "" { + continue + } + if t == "all" { + return existing + } + seen[t] = struct{}{} + } + if _, ok := seen["graphics"]; ok { + return existing + } + tokens = append(tokens, "graphics") + // normalize: trim spaces, drop empties + cleaned := make([]string, 0, len(tokens)) + for _, t := range tokens { + t = strings.TrimSpace(t) + if t != "" { + cleaned = append(cleaned, t) + } + } + return strings.Join(cleaned, ",") +} + +// applyVulkanAnnotation mutates the container env when the pod opts into +// Vulkan partitioning. No-op otherwise. +func applyVulkanAnnotation(ctr *corev1.Container, pod *corev1.Pod) { + if pod == nil || pod.Annotations[VulkanEnableAnno] != "true" { + return + } + + capsIdx := -1 + for i, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + capsIdx = i + break + } + } + merged := mergeGraphicsCap("") + if capsIdx >= 0 { + merged = mergeGraphicsCap(ctr.Env[capsIdx].Value) + } + if capsIdx >= 0 { + ctr.Env[capsIdx].Value = merged + } else { + ctr.Env = append(ctr.Env, corev1.EnvVar{Name: NvidiaDriverCapsEnvVar, Value: merged}) + } + + hasEnable := false + for _, e := range ctr.Env { + if e.Name == HamiVulkanEnvVar { + hasEnable = true + break + } + } + if !hasEnable { + ctr.Env = append(ctr.Env, corev1.EnvVar{Name: HamiVulkanEnvVar, Value: "1"}) + } +} +``` + +- [ ] **Step 2: `MutateAdmission`에서 호출** + +Modify `pkg/device/nvidia/device.go:365-370` (기존 `if hasResource` 블록 바로 뒤에 추가): +```go + if hasResource { + // Set runtime class name if it is not set by user and the runtime class name is configured + if p.Spec.RuntimeClassName == nil && dev.config.RuntimeClassName != "" { + p.Spec.RuntimeClassName = &dev.config.RuntimeClassName + } + applyVulkanAnnotation(ctr, p) + } +``` + +- [ ] **Step 3: 테스트 통과 확인** + +Run: +```bash +go test ./pkg/device/nvidia/ -run TestMutateAdmission_VulkanAnno_AddsGraphicsCap -v +``` +Expected: PASS. + +- [ ] **Step 4: 커밋** + +```bash +git add pkg/device/nvidia/device.go +git commit -m "feat(nvidia): inject Vulkan env when pod carries hami.io/vulkan annotation" +``` + +--- + +### Task 2.3: Caps 병합 엣지 케이스 테스트 + +**Files:** +- Modify: `pkg/device/nvidia/device_test.go` + +- [ ] **Step 1: 추가 테스트들 작성** + +Append to `pkg/device/nvidia/device_test.go`: +```go +func TestMutateAdmission_VulkanAnno_MergesExistingCaps(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + ResourceMemoryName: "nvidia.com/gpumem", + ResourceCoreName: "nvidia.com/gpucores", + ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage", + }, + } + ctr := &corev1.Container{ + Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "compute,utility"}}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + + var caps string + for _, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + caps = e.Value + } + } + assert.Assert(t, strings.Contains(caps, "compute")) + assert.Assert(t, strings.Contains(caps, "utility")) + assert.Assert(t, strings.Contains(caps, "graphics")) +} + +func TestMutateAdmission_VulkanAnno_AllCaps_NoChange(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + }, + } + ctr := &corev1.Container{ + Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "all"}}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + + for _, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + assert.Equal(t, e.Value, "all") + } + } +} + +func TestMutateAdmission_NoVulkanAnno_NoChange(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{} + _, _ = dev.MutateAdmission(ctr, pod) + for _, e := range ctr.Env { + assert.Assert(t, e.Name != NvidiaDriverCapsEnvVar, "unexpected caps env") + assert.Assert(t, e.Name != HamiVulkanEnvVar, "unexpected enable env") + } +} + +func TestMutateAdmission_VulkanAnno_NoGPUResource(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + ResourceMemoryName: "nvidia.com/gpumem", + ResourceCoreName: "nvidia.com/gpucores", + ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage", + }, + } + ctr := &corev1.Container{Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{}}} + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + for _, e := range ctr.Env { + assert.Assert(t, e.Name != HamiVulkanEnvVar, "no Vulkan env on non-GPU pod") + } +} + +func TestMutateAdmission_VulkanAnno_IdempotentHamiEnable(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Env: []corev1.EnvVar{{Name: HamiVulkanEnvVar, Value: "1"}}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + count := 0 + for _, e := range ctr.Env { + if e.Name == HamiVulkanEnvVar { + count++ + } + } + assert.Equal(t, count, 1) +} +``` + +- [ ] **Step 2: 모두 PASS 확인** + +Run: +```bash +go test ./pkg/device/nvidia/ -run TestMutateAdmission_VulkanAnno -v +``` +Expected: 5 tests PASS. + +- [ ] **Step 3: 기존 전체 테스트 회귀 없음 확인** + +Run: +```bash +go test ./pkg/device/nvidia/... +``` +Expected: PASS 전체. + +- [ ] **Step 4: 커밋** + +```bash +git add pkg/device/nvidia/device_test.go +git commit -m "test(nvidia): cover Vulkan annotation edge cases" +``` + +--- + +### Task 2.4: HAMi-core submodule 포인터 업데이트 + +**Files:** +- Modify: `libvgpu` submodule reference + +- [ ] **Step 1: Phase 1에서 머지된 HAMi-core 커밋 확인** + +Task 1.9의 PR이 머지된 후, `libvgpu` 레포 main의 최신 커밋 SHA를 확보. + +- [ ] **Step 2: submodule 업데이트** + +Run: +```bash +cd libvgpu +git fetch origin main +git checkout main +git pull +cd .. +git diff --submodule libvgpu +``` +Expected: `libvgpu ..` 한 줄. + +- [ ] **Step 3: submodule 포인터 커밋** + +Run: +```bash +git add libvgpu +git commit -m "deps: bump libvgpu to include Vulkan vGPU layer" +``` + +--- + +## Phase 3 — 예제 및 문서 + +### Task 3.1: Vulkan 예제 파드 + +**Files:** +- Create: `examples/nvidia/vulkan_example.yaml` + +- [ ] **Step 1: 예제 YAML 작성** + +Create `examples/nvidia/vulkan_example.yaml`: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: hami-vulkan-example + annotations: + hami.io/vulkan: "true" +spec: + restartPolicy: Never + containers: + - name: vulkaninfo + # any image with vulkaninfo + libvulkan1 + image: khronosgroup/vulkan-samples:latest + command: ["vulkaninfo"] + resources: + limits: + nvidia.com/gpu: "1" + nvidia.com/gpumem: "1024" # 1 GiB VRAM budget + nvidia.com/gpucores: "30" # 30% SM throttle +``` + +- [ ] **Step 2: 커밋** + +```bash +git add examples/nvidia/vulkan_example.yaml +git commit -m "example: Vulkan vGPU partitioned pod" +``` + +--- + +### Task 3.2: 지원 문서 (영문) + +**Files:** +- Create: `docs/vulkan-vgpu-support.md` + +- [ ] **Step 1: 문서 작성** + +Create `docs/vulkan-vgpu-support.md`: +```markdown +# Vulkan vGPU Support + +HAMi partitions NVIDIA GPUs for Vulkan workloads by injecting a Vulkan implicit +layer (`VK_LAYER_HAMI_vgpu`) that shares the same VRAM and SM budgets used by +the existing CUDA hooks. + +## Enabling Vulkan partitioning + +Add the `hami.io/vulkan: "true"` annotation to any pod that uses HAMi NVIDIA +resources. The webhook will: + +- Union `graphics` into `NVIDIA_DRIVER_CAPABILITIES` so the NVIDIA Container + Toolkit mounts the Vulkan ICD and graphics libraries. +- Set `HAMI_VULKAN_ENABLE=1` which activates the HAMi Vulkan layer via its + `enable_environment` clause in the implicit layer manifest. + +Example: `examples/nvidia/vulkan_example.yaml`. + +## What gets limited + +- `nvidia.com/gpumem` enforces VRAM allocation across **both** CUDA and Vulkan + in the container, sharing a single budget. +- `nvidia.com/gpucores` throttles Vulkan `vkQueueSubmit[2]` using the same + NVML-based polling loop as `cuLaunchKernel`. +- `vkGetPhysicalDeviceMemoryProperties[2]` clamps the device-local heap size + to the pod budget so apps that size allocations from this value self-limit. + +## What is not limited (yet) + +- Vulkan Video (`VK_KHR_video_queue`) submissions. +- Frame-pacing jitter introduced by throttling on graphics queues (documented + behavior; strict/cooperative modes are a future option). + +## Troubleshooting + +| Symptom | Check | +|---------|-------| +| Container has no `vulkan` CLI / libs | Annotation absent or `NVIDIA_DRIVER_CAPABILITIES` already frozen to `compute` by image. | +| `vkAllocateMemory` always succeeds | Layer did not activate — ensure `HAMI_VULKAN_ENABLE=1` set and `/etc/vulkan/implicit_layer.d/hami.json` exists. | +| `vulkaninfo` still shows full VRAM heap | Layer manifest not loaded; run `VK_LOADER_DEBUG=all vulkaninfo` to see layer scan. | +``` + +- [ ] **Step 2: 커밋** + +```bash +git add docs/vulkan-vgpu-support.md +git commit -m "docs: Vulkan vGPU support guide" +``` + +--- + +### Task 3.3: 중국어 번역 + +**Files:** +- Create: `docs/vulkan-vgpu-support_cn.md` + +- [ ] **Step 1: 영문 문서를 중국어로 번역해서 작성** + +Create `docs/vulkan-vgpu-support_cn.md`: +```markdown +# Vulkan vGPU 支持 + +HAMi 通过注入 Vulkan 隐式层(`VK_LAYER_HAMI_vgpu`)对 NVIDIA GPU 进行 Vulkan 工作负载的切分。该层与已有的 CUDA 钩子共享同一套 VRAM 与 SM 预算。 + +## 启用方式 + +在使用 HAMi NVIDIA 资源的 Pod 上添加 annotation `hami.io/vulkan: "true"`。Webhook 会: + +- 将 `graphics` 合并进 `NVIDIA_DRIVER_CAPABILITIES`,以便 NVIDIA Container Toolkit 挂载 Vulkan ICD 与图形库。 +- 设置 `HAMI_VULKAN_ENABLE=1`,通过隐式层 manifest 的 `enable_environment` 激活 HAMi Vulkan 层。 + +示例:`examples/nvidia/vulkan_example.yaml`。 + +## 生效范围 + +- `nvidia.com/gpumem` 对容器内 CUDA 与 Vulkan 的 VRAM 分配**共享同一预算**。 +- `nvidia.com/gpucores` 通过与 `cuLaunchKernel` 相同的 NVML 轮询机制对 `vkQueueSubmit[2]` 进行限速。 +- `vkGetPhysicalDeviceMemoryProperties[2]` 将 device-local 堆大小裁剪为 Pod 预算。 + +## 未涵盖项(未来工作) + +- Vulkan Video(`VK_KHR_video_queue`)提交。 +- 图形队列限速导致的帧抖动(已记录,未来提供 strict/cooperative 模式)。 + +## 故障排查 + +| 现象 | 检查 | +|------|------| +| 容器没有 Vulkan 库 | annotation 缺失,或镜像已冻结 `NVIDIA_DRIVER_CAPABILITIES=compute`。 | +| `vkAllocateMemory` 总是成功 | 层未激活 — 确认 `HAMI_VULKAN_ENABLE=1` 与 `/etc/vulkan/implicit_layer.d/hami.json` 存在。 | +| `vulkaninfo` 仍报告全量 VRAM | Manifest 未加载;可 `VK_LOADER_DEBUG=all vulkaninfo` 查看扫描日志。 | +``` + +- [ ] **Step 2: 커밋** + +```bash +git add docs/vulkan-vgpu-support_cn.md +git commit -m "docs: 中文版 Vulkan vGPU 支持说明" +``` + +--- + +## Phase 4 — 통합 검증 + +### Task 4.1: 수동 E2E — 힙 클램프 확인 + +**Files:** (런타임 실행) + +- [ ] **Step 1: HAMi-core 이미지 빌드** + +Run: +```bash +cd libvgpu && docker build -t projecthami/hami-vgpu:dev . && cd .. +``` + +- [ ] **Step 2: HAMi 이미지에 submodule 반영 빌드** + +Run: +```bash +make docker-build +``` +(없으면 기존 CI 명령 사용) + +- [ ] **Step 3: 테스트 클러스터에 배포** + +Run: +```bash +helm upgrade --install hami charts/hami \ + --set scheduler.image.repository=projecthami/hami-scheduler \ + --set scheduler.image.tag=dev \ + --set devicePlugin.image.repository=projecthami/hami-device-plugin \ + --set devicePlugin.image.tag=dev \ + --set vgpu.image.repository=projecthami/hami-vgpu \ + --set vgpu.image.tag=dev +kubectl apply -f examples/nvidia/vulkan_example.yaml +``` + +- [ ] **Step 4: 힙 클램프 확인** + +Run: +```bash +kubectl logs hami-vulkan-example | grep -iE "heap|device local" +``` +Expected: device-local 힙 size가 ≤ 1 GiB (1024 MiB, pod 버짓). + +- [ ] **Step 5: 결과 기록** + +`docs/superpowers/plans/notes/e2e-vulkaninfo.md`에 로그 요약을 적는다. + +- [ ] **Step 6: 커밋** + +```bash +git add docs/superpowers/plans/notes/e2e-vulkaninfo.md +git commit -m "test(e2e): vulkaninfo heap clamp verified in HAMi-scheduled pod" +``` + +--- + +### Task 4.2: 수동 E2E — 할당 초과 시 OOM 반환 + +**Files:** (런타임 실행) + +- [ ] **Step 1: 할당 초과 테스트 스크립트 작성** + +Create `examples/nvidia/vulkan_oom_test.yaml`: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: hami-vulkan-oom-test + annotations: + hami.io/vulkan: "true" +spec: + restartPolicy: Never + containers: + - name: oom + image: ghcr.io/example/vulkan-alloc-test:latest # 2 GiB를 반복 할당하는 테스트 바이너리 + resources: + limits: + nvidia.com/gpu: "1" + nvidia.com/gpumem: "1024" +``` +(이미지가 없으면, 간단한 C 프로그램 `vkAllocateMemory(2GiB)` 루프를 작성해 별도 이미지로 빌드.) + +- [ ] **Step 2: 실행 및 OOM 확인** + +Run: +```bash +kubectl apply -f examples/nvidia/vulkan_oom_test.yaml +kubectl logs hami-vulkan-oom-test +``` +Expected: 로그에 `VK_ERROR_OUT_OF_DEVICE_MEMORY` 또는 등가 메시지. + +- [ ] **Step 3: 결과 기록 및 커밋** + +`docs/superpowers/plans/notes/e2e-vulkaninfo.md`에 추가 기록. +```bash +git add examples/nvidia/vulkan_oom_test.yaml docs/superpowers/plans/notes/e2e-vulkaninfo.md +git commit -m "test(e2e): vulkan OOM returns VK_ERROR_OUT_OF_DEVICE_MEMORY" +``` + +--- + +### Task 4.3: 혼합 워크로드 — CUDA + Vulkan 공유 버짓 + +**Files:** (런타임 실행) + +- [ ] **Step 1: 혼합 컨테이너 파드 작성** + +Create `examples/nvidia/vulkan_cuda_mixed.yaml`: +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: hami-vulkan-cuda-mixed + annotations: + hami.io/vulkan: "true" +spec: + restartPolicy: Never + containers: + - name: app + image: ghcr.io/example/cuda-vulkan-mixed:latest # CUDA 512 MiB + Vulkan 512 MiB + resources: + limits: + nvidia.com/gpu: "1" + nvidia.com/gpumem: "1024" +``` + +- [ ] **Step 2: 실행 및 합산 버짓 준수 확인** + +Run: +```bash +kubectl apply -f examples/nvidia/vulkan_cuda_mixed.yaml +kubectl logs hami-vulkan-cuda-mixed +``` +Expected: 양쪽 할당 성공, 추가 할당 시 OOM. + +- [ ] **Step 3: 커밋** + +```bash +git add examples/nvidia/vulkan_cuda_mixed.yaml +git commit -m "test(e2e): CUDA+Vulkan mixed workload shares single VRAM budget" +``` + +--- + +### Task 4.4: 플랜 아티팩트 정리 및 최종 PR + +**Files:** +- Delete: `docs/superpowers/plans/notes/` (임시 노트) + +- [ ] **Step 1: 노트 디렉토리 제거** + +Run: +```bash +git rm -r docs/superpowers/plans/notes/ +git commit -m "chore: drop temporary planning notes" +``` + +- [ ] **Step 2: HAMi 브랜치 푸시 및 PR** + +Run: +```bash +git push -u origin vulkan-vgpu-partitioning +gh pr create --title "feat(nvidia): Vulkan vGPU partitioning" \ + --body "$(cat <<'EOF' +## Summary +- Webhook injects graphics cap + HAMI_VULKAN_ENABLE=1 when `hami.io/vulkan: "true"` annotation is present +- libvgpu submodule bumped to include Vulkan implicit layer (VK_LAYER_HAMI_vgpu) +- CUDA and Vulkan share the existing `nvidia.com/gpumem` and `nvidia.com/gpucores` budgets +- Docs + example added + +Design: docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md +HAMi-core PR: (link from notes/hami-core-pr.md before deletion) + +## Test plan +- [x] Go unit tests (5 new) +- [x] HAMi-core unit tests (layer / memprops / alloc / submit / throttle) +- [x] E2E: vulkaninfo heap clamp +- [x] E2E: vkAllocateMemory OOM at budget +- [x] E2E: CUDA + Vulkan mixed workload shares budget +EOF +)" +``` + +--- + +## 자가 점검 + +### 스펙 커버리지 + +| 스펙 요구사항 | 해당 Task | +|---------------|-----------| +| §3 Activation via annotation | Task 2.2, 2.3 | +| §5.1 Go 상수/로직 | Task 2.1, 2.2 | +| §5.2 C 레이어 엔트리포인트 | Task 1.1 | +| §5.2 메모리 속성 clamp | Task 1.2 | +| §5.2 vkAllocateMemory/vkFreeMemory | Task 1.3 | +| §5.2 vkQueueSubmit throttle | Task 1.4 + 1.5 | +| §5.3 공유 카운터 통합 | Task 1.6 | +| §5.4 Manifest JSON | Task 1.7 | +| §5.5 Build 통합 | Task 1.8 | +| §6 데이터 흐름 (admission + runtime) | Task 2.2 (admission), 1.1~1.5 (runtime) | +| §7 에러 처리 (merge 규칙) | Task 2.3 (edge cases) | +| §8.1 Go 단위 테스트 | Task 2.1, 2.3 | +| §8.2 C 단위 테스트 | Task 1.1~1.5 | +| §8.3 E2E | Task 4.1, 4.2, 4.3 | +| §9 Delivery 순서 | Phase 1 → 2 → 3 → 4 | + +### 타입 일관성 + +- Go: `VulkanEnableAnno`, `NvidiaDriverCapsEnvVar`, `HamiVulkanEnvVar`를 Task 2.1, 2.2, 2.3에서 동일하게 사용. +- C: `hami_reserve_device_memory(int, size_t)` / `hami_release_device_memory(int, size_t)` / `hami_pod_memory_budget(int)`을 Task 1.3, 1.6에서 동일 시그니처 유지. +- C: `hami_throttle_wait(int dev_idx, int util_limit)` Task 1.4, 1.5에서 동일. + +### Placeholder 없음 확인 + +- 모든 "Step"이 실제 커맨드/코드/기대 출력 포함. +- HAMi-core 기존 카운터 함수 이름은 Task 0.2 탐색 노트를 근거로 Task 1.6 어댑터에서 실제 이름으로 교체하도록 지시함 (노트 자체가 아티팩트). +- 테스트 코드는 매 Task마다 full source 포함. diff --git a/docs/superpowers/plans/2026-04-27-volcano-vulkan-vgpu.md b/docs/superpowers/plans/2026-04-27-volcano-vulkan-vgpu.md new file mode 100644 index 000000000..d1d4b1695 --- /dev/null +++ b/docs/superpowers/plans/2026-04-27-volcano-vulkan-vgpu.md @@ -0,0 +1,1114 @@ +# Volcano + Vulkan vGPU 통합 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Volcano scheduler 가 운영 중인 클러스터에 HAMi 의 Vulkan vGPU 메모리 partitioning 기능을 통합한다. `xiilab/volcano-vgpu-device-plugin` 의 libvgpu 를 vulkan-layer 가 들어간 HAMi-core 로 교체하고, device-plugin Allocate 에 manifest auto-mount 코드를 추가하며, HAMi 의 mutating webhook 만 별도 helm install 한다. + +**Architecture:** HAMi 본가의 commit `0150ea7` (manifest auto-inject) 패턴을 그대로 fork 에 포팅한다. Dockerfile 의 builder stage 가 manifest 파일을 image 에 ship 하고, vgpu-init script 이 host 에 복사하며, device-plugin 의 `Allocate()` 가 host 파일이 존재하면 container 에 bind-mount 한다. webhook 은 HAMi 본가 helm chart 으로 별도 install 하여 annotation 처리만 담당. + +**Tech Stack:** Go 1.21+, Kubernetes device-plugin v1beta1, NVIDIA Vulkan Loader, libvgpu (HAMi-core vulkan-layer), helm 3, Volcano scheduler. + +**Spec:** `docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md` + +## File Structure + +작업은 두 repo 에 걸친다. + +### `xiilab/volcano-vgpu-device-plugin` (PR-1) + +| 파일 | 역할 | 변경 | +|---|---|---| +| `libvgpu` (submodule) | HAMi-core (vulkan-layer 포함) | submodule SHA 갱신 | +| `docker/Dockerfile` | image 빌드. builder stage 에 libvulkan-dev 추가, runtime stage 에 hami.json ship | 2 줄 추가 | +| `pkg/.../plugin/server.go` (또는 동등 위치) | device-plugin Allocate 응답 빌더 | 17 줄 추가 (manifest mount) | +| `volcano-vgpu-device-plugin.yml` | standard mode deploy yaml | image tag 갱신 | +| `volcano-vgpu-device-plugin-cdi.yml` | CDI mode deploy yaml | image tag 갱신 | +| `volcano-vgpu-vulkan-manifest.yml` (NEW) | host 측 manifest 파일 사전 배치 (별도 DaemonSet, fallback) | 신규 | +| `examples/vulkan-pod.yaml` (NEW) | E2E 테스트용 sample pod | 신규 | +| `doc/vulkan-vgpu.md` (NEW) | 사용 가이드 | 신규 | + +### HAMi 본가 (변경 없음) + +helm chart 의 `values.yaml` 으로 webhook only 모드 install. PR 없음. + +--- + +## Task 1: 작업 환경 준비 — repo clone + 브랜치 생성 + +**Files:** +- Clone: `~/git/volcano-vgpu-device-plugin` (xiilab fork) +- Branch: `feat/vulkan-vgpu-support` + +- [ ] **Step 1: Clone xiilab fork** + +```bash +cd ~/git +git clone https://github.com/xiilab/volcano-vgpu-device-plugin.git +cd volcano-vgpu-device-plugin +git remote add upstream https://github.com/Project-HAMi/volcano-vgpu-device-plugin.git +git fetch upstream +``` + +- [ ] **Step 2: 새 브랜치 생성** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git checkout -b feat/vulkan-vgpu-support +git submodule update --init --recursive +``` + +- [ ] **Step 3: 현재 libvgpu submodule SHA 기록** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git -C libvgpu rev-parse HEAD +# Expected: 6660c84... (or whatever the current submodule pin is) +``` + +기록한 SHA 를 노트해 두기 (나중 회귀 비교용). + +- [ ] **Step 4: server.go 위치 파악** + +```bash +cd ~/git/volcano-vgpu-device-plugin +grep -rln "func.*Allocate.*kubeletdevicepluginv1beta1" pkg/ cmd/ 2>/dev/null +``` + +찾은 경로를 노트. 이후 task 들에서 이 경로를 사용 (예시 가정: `pkg/plugin/server.go` 또는 `pkg/util/util.go`). + +- [ ] **Step 5: 빌드 환경 검증 (변경 없는 상태)** + +```bash +cd ~/git/volcano-vgpu-device-plugin +make build 2>&1 | tail -10 +``` + +Expected: 성공. 만약 실패하면 일단 task 진행 멈추고 master 의 build 상태부터 정상화. + +- [ ] **Step 6: Commit (브랜치 시작 마커)** + +```bash +git commit --allow-empty -m "chore: start feat/vulkan-vgpu-support branch" +``` + +--- + +## Task 2: libvgpu submodule 을 vulkan-layer 가 포함된 SHA 로 갱신 + +**Files:** +- Modify: `libvgpu` submodule pointer +- Modify: `.gitmodules` (이미 vulkan-layer branch 추적 중인지 확인, 필요 시 변경) + +- [ ] **Step 1: HAMi 가 사용하는 libvgpu SHA 기록** + +```bash +cd ~/git/HAMi +git -C libvgpu rev-parse HEAD +# Expected: 8d4f712... (cuMemFree[Async] untracked-pointer fallback 포함) +``` + +이 SHA 를 `LIBVGPU_VULKAN_SHA` 로 노트 (이후 step 에서 사용). + +- [ ] **Step 2: volcano-vgpu-device-plugin 의 libvgpu remote 가 HAMi-core 의 vulkan-layer branch 를 가리키는지 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +cat .gitmodules +``` + +기대값: + +``` +[submodule "libvgpu"] + path = libvgpu + url = https://github.com/Project-HAMi/HAMi-core.git +``` + +만약 url 이 HAMi 본가 외 fork (e.g., xiilab/HAMi-core) 인 경우, 우리가 사용 중인 vulkan-layer 가 어느 fork 에서 오는지에 맞춰 갱신 필요. HAMi 본가 fork 가 vulkan-layer branch 를 보유하면 그대로 사용. 없으면 xiilab fork 추가: + +```bash +git submodule set-url libvgpu https://github.com/xiilab/HAMi-core.git +``` + +- [ ] **Step 3: submodule 을 LIBVGPU_VULKAN_SHA 로 fast-forward** + +```bash +cd ~/git/volcano-vgpu-device-plugin/libvgpu +git fetch origin +git checkout 8d4f712df2941d9314f534bac0038c2f8b7be41f # LIBVGPU_VULKAN_SHA +cd .. +git add libvgpu +git status +``` + +기대값: `modified: libvgpu (new commits)` 만 표시. + +- [ ] **Step 4: vulkan layer 소스가 들어왔는지 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +ls libvgpu/src/vulkan/ +ls libvgpu/etc/vulkan/implicit_layer.d/ +``` + +기대값: `src/vulkan/` 에 `budget.c`, `loader_intercept.c` 등 존재. `etc/vulkan/implicit_layer.d/hami.json` 존재. + +- [ ] **Step 5: Commit submodule 갱신** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git commit -m "deps: bump libvgpu to 8d4f712 (vulkan-layer support)" +``` + +--- + +## Task 3: Dockerfile builder stage 에 libvulkan-dev 추가 + +**Files:** +- Modify: `docker/Dockerfile` (빌더 stage 의 apt install 라인) + +- [ ] **Step 1: 현재 nvbuild stage 의 apt install 라인 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +grep -n -E "(FROM .* AS nvbuild|apt|apt-get install)" docker/Dockerfile | head -10 +``` + +이전에 어떤 packages 가 설치되는지 파악. nvbuild stage 가 libvgpu 를 빌드하는 stage. + +- [ ] **Step 2: Dockerfile 의 nvbuild stage apt install 에 libvulkan-dev 추가** + +다음 형태 (HAMi commit `50b37ff` 와 동일한 수정): + +```dockerfile +# nvbuild stage 안의 기존 +RUN apt-get update && apt-get install -y \ + cmake \ + make \ + g++ \ + git \ + libvulkan-dev # ← 신규 라인 +``` + +이미 `libvulkan-dev` 가 설치되어 있으면 skip. + +- [ ] **Step 3: 빌드 검증 (Dockerfile syntax)** + +```bash +cd ~/git/volcano-vgpu-device-plugin +docker build -f docker/Dockerfile -t volcano-vgpu-device-plugin:vulkan-test . 2>&1 | tail -20 +``` + +기대값: 성공. libvgpu 의 vulkan source 도 함께 컴파일되어야 함. 만약 `vulkan_core.h: No such file` 류의 에러가 나면 libvulkan-dev 가 제대로 install 안 됐거나 PATH 미스. + +- [ ] **Step 4: Commit Dockerfile 변경** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add docker/Dockerfile +git commit -m "build: install libvulkan-dev in nvbuild stage for Vulkan layer compile" +``` + +--- + +## Task 4: Dockerfile 의 runtime stage 에 hami.json ship + +**Files:** +- Modify: `docker/Dockerfile` (runtime stage 의 COPY 라인) + +- [ ] **Step 1: 현재 runtime stage 의 libvgpu.so COPY 라인 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +grep -n "libvgpu.so" docker/Dockerfile +``` + +기대값: `COPY --from=nvbuild /libvgpu/build/libvgpu.so ...` 같은 라인. + +- [ ] **Step 2: 그 라인 직후에 hami.json COPY 추가** + +HAMi commit `0150ea7` 와 동일한 한 줄: + +```dockerfile +COPY --from=nvbuild /libvgpu/build/libvgpu.so /k8s-vgpu/lib/nvidia/libvgpu.so."$VERSION" +COPY --from=nvbuild /libvgpu/etc/vulkan/implicit_layer.d/hami.json /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json +``` + +> **Note:** volcano-vgpu-device-plugin 의 path 가 HAMi 의 `/k8s-vgpu/lib/nvidia/` 와 다를 수 있다. Task 1 Step 4 에서 파악한 위치에 맞게 prefix 조정. 일반적으로 같은 prefix. + +- [ ] **Step 3: 빌드 검증 + image 안 hami.json 존재 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +docker build -f docker/Dockerfile -t volcano-vgpu-device-plugin:vulkan-test . 2>&1 | tail -5 +docker run --rm --entrypoint /bin/sh volcano-vgpu-device-plugin:vulkan-test \ + -c "ls -la /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json && cat /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json" +``` + +기대값: 파일 존재 + JSON 내용 출력 (`VK_LAYER_HAMI_vgpu`, `enable_environment: HAMI_VULKAN_ENABLE=1` 등 포함). + +- [ ] **Step 4: Commit** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add docker/Dockerfile +git commit -m "feat(image): ship Vulkan implicit layer manifest from libvgpu" +``` + +--- + +## Task 5: vgpu-init.sh (또는 동등 init script) 가 host 에 manifest 복사하는지 확인 + +**Files:** +- Inspect: `docker/vgpu-init.sh` (또는 동등) + +- [ ] **Step 1: vgpu-init.sh 위치 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +find . -name "vgpu-init.sh" -o -name "init.sh" 2>/dev/null | head +``` + +- [ ] **Step 2: init script 의 host 복사 로직 확인** + +```bash +cat docker/vgpu-init.sh # 또는 발견된 path +``` + +기대 패턴: `cp -r /k8s-vgpu/lib/nvidia/* /usr/local/vgpu/` 또는 동등 (recursive copy 로 vulkan/implicit_layer.d/hami.json 도 함께 host 에 복사됨). + +- [ ] **Step 3: 만약 init script 이 recursive copy 가 아니면 명시적 라인 추가** + +`/k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json` 을 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 으로 복사하는 라인 추가 (mkdir -p 포함): + +```bash +mkdir -p /usr/local/vgpu/vulkan/implicit_layer.d +cp -f /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json \ + /usr/local/vgpu/vulkan/implicit_layer.d/hami.json +``` + +이미 recursive copy 로 cover 되면 변경 없음. + +- [ ] **Step 4: 변경 있으면 commit** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add docker/vgpu-init.sh +git commit -m "build(init): copy Vulkan manifest to host during vgpu-init" +``` + +--- + +## Task 6: device-plugin 의 Allocate 에 manifest mount 코드 추가 + +**Files:** +- Modify: Task 1 Step 4 에서 발견한 server.go 위치 (가정: `pkg/plugin/server.go`) + +- [ ] **Step 1: Allocate 함수 안의 license mount 라인 (앵커) 위치 찾기** + +```bash +cd ~/git/volcano-vgpu-device-plugin +grep -n "license" pkg/plugin/server.go # 또는 발견된 server.go path +``` + +HAMi 의 `0150ea7` 는 license mount 직전에 vulkan manifest mount 를 추가했다. 같은 앵커 라인 위에 추가. + +- [ ] **Step 2: server.go 에 manifest mount 코드 추가** + +HAMi 본가 commit `0150ea7` 의 server.go 패치를 그대로 포팅. 정확한 코드: + +```go +// Mount Vulkan implicit layer manifest so the HAMi Vulkan layer +// activates for pods that set HAMI_VULKAN_ENABLE=1 (done by the +// webhook when the pod carries hami.io/vulkan="true"). +// The manifest file is placed on the host by vgpu-init.sh as part +// of the standard lib distribution; skip the mount if it is +// absent so we do not block pod startup on nodes that have not +// yet been populated. +vulkanManifestHost := hostHookPath + "/vgpu/vulkan/implicit_layer.d/hami.json" +if _, err := os.Stat(vulkanManifestHost); err == nil { + response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{ + ContainerPath: "/etc/vulkan/implicit_layer.d/hami.json", + HostPath: vulkanManifestHost, + ReadOnly: true, + }) +} +``` + +> **Note:** `hostHookPath` 변수 이름이 volcano-vgpu-device-plugin 에서 다를 수 있다 (`hostMountPath`, `vgpuPath` 등). HAMi 의 정의는 일반적으로 `/usr/local/vgpu` 기본값. fork 의 동등 변수 이름으로 대체. + +- [ ] **Step 3: import 확인** + +`os.Stat` 사용하므로 `os` import 가 이미 있어야 한다 (다른 mount 코드에서 사용 중일 가능성 큼). 만약 없으면 추가: + +```go +import ( + "os" + // existing imports... +) +``` + +- [ ] **Step 4: 빌드 검증** + +```bash +cd ~/git/volcano-vgpu-device-plugin +go build ./... 2>&1 | head -20 +``` + +기대값: error 0. `hostHookPath` 가 정의되지 않았거나 `kubeletdevicepluginv1beta1` import 누락이면 컴파일 실패 → 변수 이름 또는 import 조정. + +- [ ] **Step 5: 단위 테스트 — manifest 파일 존재/부재 시나리오 (TDD)** + +server.go 와 같은 패키지에 `server_vulkan_test.go` 생성: + +```go +package plugin + +import ( + "os" + "path/filepath" + "testing" +) + +func TestVulkanManifestMount_Present(t *testing.T) { + tmp := t.TempDir() + // manifest 파일 사전 배치 + manifestDir := filepath.Join(tmp, "vgpu", "vulkan", "implicit_layer.d") + if err := os.MkdirAll(manifestDir, 0755); err != nil { + t.Fatal(err) + } + manifestPath := filepath.Join(manifestDir, "hami.json") + if err := os.WriteFile(manifestPath, []byte("{}"), 0644); err != nil { + t.Fatal(err) + } + + // hostHookPath = tmp 라고 가정하고 mount 빌더 호출 (실제 함수 이름은 fork 에 맞춰 조정) + mounts := buildVulkanManifestMount(tmp) + if len(mounts) != 1 { + t.Fatalf("expected 1 mount, got %d", len(mounts)) + } + if mounts[0].ContainerPath != "/etc/vulkan/implicit_layer.d/hami.json" { + t.Errorf("unexpected ContainerPath: %s", mounts[0].ContainerPath) + } + if mounts[0].HostPath != manifestPath { + t.Errorf("unexpected HostPath: %s", mounts[0].HostPath) + } + if !mounts[0].ReadOnly { + t.Error("expected ReadOnly=true") + } +} + +func TestVulkanManifestMount_Absent(t *testing.T) { + tmp := t.TempDir() + // 파일 없음 — mount 응답에 추가하지 말아야 함 + mounts := buildVulkanManifestMount(tmp) + if len(mounts) != 0 { + t.Errorf("expected 0 mounts when manifest absent, got %d", len(mounts)) + } +} +``` + +함수 추출이 어렵다면 (인라인 코드라면) 일단 본 테스트는 skip 하고 Step 7 의 통합 검증으로 대체. + +- [ ] **Step 6: 테스트 실행 (실행 가능한 경우)** + +```bash +cd ~/git/volcano-vgpu-device-plugin +go test ./pkg/plugin/ -run TestVulkanManifestMount -v +``` + +기대값: 두 testcase 모두 PASS. + +만약 `buildVulkanManifestMount` 함수가 없으면 (인라인 코드라면) Step 5 에서 함수 추출 + Step 6 PASS. 함수 추출은 server.go 의 manifest mount 블록을 다음 형태로 분리: + +```go +func buildVulkanManifestMount(hostHookPath string) []*kubeletdevicepluginv1beta1.Mount { + vulkanManifestHost := hostHookPath + "/vgpu/vulkan/implicit_layer.d/hami.json" + if _, err := os.Stat(vulkanManifestHost); err != nil { + return nil + } + return []*kubeletdevicepluginv1beta1.Mount{{ + ContainerPath: "/etc/vulkan/implicit_layer.d/hami.json", + HostPath: vulkanManifestHost, + ReadOnly: true, + }} +} +``` + +그리고 Allocate 안에서: + +```go +response.Mounts = append(response.Mounts, buildVulkanManifestMount(hostHookPath)...) +``` + +- [ ] **Step 7: Commit** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add pkg/plugin/server.go pkg/plugin/server_vulkan_test.go +git commit -m "feat(plugin): auto-inject Vulkan implicit layer manifest mount" +``` + +--- + +## Task 7: 기존 deploy yaml 두 개의 image tag 갱신 + +**Files:** +- Modify: `volcano-vgpu-device-plugin.yml` +- Modify: `volcano-vgpu-device-plugin-cdi.yml` + +- [ ] **Step 1: 현재 image tag 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +grep -nE "image:.*volcano-vgpu" volcano-vgpu-device-plugin.yml volcano-vgpu-device-plugin-cdi.yml +``` + +기대값 (예시): `image: projecthami/volcano-vgpu-device-plugin:v1.10.0` + +- [ ] **Step 2: 새 tag 결정** + +`vulkan-v1` 또는 `v1.10.0-vulkan-v1` 같은 명확한 tag. + +- [ ] **Step 3: yaml 두 개의 image 라인 갱신** + +```bash +cd ~/git/volcano-vgpu-device-plugin +sed -i.bak 's|image: projecthami/volcano-vgpu-device-plugin:.*|image: 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1|' \ + volcano-vgpu-device-plugin.yml volcano-vgpu-device-plugin-cdi.yml +rm -f *.yml.bak +git diff volcano-vgpu-device-plugin*.yml +``` + +> **Note:** sed pattern 의 source 부분 (`projecthami/...`) 은 Step 1 에서 본 실제 image 와 일치해야 한다. 달라지면 그에 맞게 조정. + +- [ ] **Step 4: Commit** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add volcano-vgpu-device-plugin.yml volcano-vgpu-device-plugin-cdi.yml +git commit -m "chore: bump image to vulkan-v1 in deploy yaml" +``` + +--- + +## Task 8: 신규 yaml — host manifest 사전 배치 (fallback DaemonSet) + +**Files:** +- Create: `volcano-vgpu-vulkan-manifest.yml` + +> **Note:** Task 4-5 의 device-plugin image 가 이미 init script 으로 manifest 를 host 에 배치하므로, **이 DaemonSet 은 fallback** 이다. 노드에 이미 device-plugin DaemonSet 이 떠 있으면 manifest 가 자동 배치되지만, 별도 환경 (e.g., device-plugin 갱신 전, 또는 다른 distribution mechanism 사용 시) 을 위해 standalone 으로 배치 가능. + +- [ ] **Step 1: 파일 생성** + +```bash +cd ~/git/volcano-vgpu-device-plugin +cat > volcano-vgpu-vulkan-manifest.yml <<'EOF' +# HAMi Vulkan implicit layer manifest 를 host 노드의 +# /usr/local/vgpu/vulkan/implicit_layer.d/hami.json 으로 배치하는 DaemonSet. +# device-plugin image 의 vgpu-init.sh 가 이미 같은 작업을 하므로 일반적으로 불필요. +# device-plugin 갱신 전 또는 별도 init 시나리오용 fallback. +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hami-vulkan-manifest + namespace: kube-system +data: + hami.json: | + { + "file_format_version": "1.0.0", + "layer": { + "name": "VK_LAYER_HAMI_vgpu", + "type": "GLOBAL", + "library_path": "/usr/local/vgpu/libvgpu.so", + "api_version": "1.3.0", + "implementation_version": "1", + "description": "HAMi Vulkan vGPU memory partitioning layer", + "enable_environment": { + "HAMI_VULKAN_ENABLE": "1" + } + } + } +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: hami-vulkan-manifest-installer + namespace: kube-system + labels: + app: hami-vulkan-manifest-installer +spec: + selector: + matchLabels: + app: hami-vulkan-manifest-installer + template: + metadata: + labels: + app: hami-vulkan-manifest-installer + spec: + tolerations: + - operator: Exists + nodeSelector: + nvidia.com/gpu.present: "true" + hostPID: false + restartPolicy: Always + containers: + - name: installer + image: busybox:1.36 + command: + - /bin/sh + - -c + - | + set -eu + mkdir -p /host/usr/local/vgpu/vulkan/implicit_layer.d + cp -f /manifest/hami.json \ + /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json + echo "[hami-vulkan-manifest] installed at /usr/local/vgpu/vulkan/implicit_layer.d/hami.json" + # 종료하지 않고 sleep — DaemonSet 이라 restart 루프 회피 + sleep infinity + volumeMounts: + - name: manifest + mountPath: /manifest + readOnly: true + - name: host-vgpu + mountPath: /host/usr/local/vgpu + securityContext: + runAsUser: 0 + volumes: + - name: manifest + configMap: + name: hami-vulkan-manifest + - name: host-vgpu + hostPath: + path: /usr/local/vgpu + type: DirectoryOrCreate +EOF +``` + +- [ ] **Step 2: yaml syntax 검증** + +```bash +cd ~/git/volcano-vgpu-device-plugin +kubectl apply --dry-run=client -f volcano-vgpu-vulkan-manifest.yml +``` + +기대값: + +``` +configmap/hami-vulkan-manifest created (dry run) +daemonset.apps/hami-vulkan-manifest-installer created (dry run) +``` + +- [ ] **Step 3: Commit** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add volcano-vgpu-vulkan-manifest.yml +git commit -m "feat(deploy): add fallback DaemonSet for Vulkan manifest placement" +``` + +--- + +## Task 9: 사용 예시 yaml + 사용 가이드 문서 + +**Files:** +- Create: `examples/vulkan-pod.yaml` +- Create: `doc/vulkan-vgpu.md` + +- [ ] **Step 1: examples/vulkan-pod.yaml 생성** + +```bash +cd ~/git/volcano-vgpu-device-plugin +mkdir -p examples +cat > examples/vulkan-pod.yaml <<'EOF' +# HAMi Vulkan vGPU 분할 활성화 예시 pod. +# - annotation `hami.io/vulkan: "true"` 가 HAMi mutating webhook 을 통해 +# `HAMI_VULKAN_ENABLE=1` 와 NVIDIA_DRIVER_CAPABILITIES 의 graphics 캡을 주입. +# - device-plugin 이 hami.json 을 자동 mount 하여 Vulkan loader 가 layer 인식. +# - libvgpu (HAMi-core) 의 vkAllocateMemory 후킹이 nvidia.com/gpumem 한계 enforce. +apiVersion: v1 +kind: Pod +metadata: + name: vulkan-vgpu-demo + annotations: + hami.io/vulkan: "true" +spec: + schedulerName: volcano + containers: + - name: vulkan-app + image: nvidia/cuda:12.2.0-runtime-ubuntu22.04 + command: ["sleep", "infinity"] + resources: + limits: + nvidia.com/gpu: 1 + nvidia.com/gpumem: 4000 # MiB + nvidia.com/gpucores: 50 # % +EOF +``` + +- [ ] **Step 2: doc/vulkan-vgpu.md 생성** + +```bash +cd ~/git/volcano-vgpu-device-plugin +cat > doc/vulkan-vgpu.md <<'EOF' +# Vulkan vGPU 지원 + +이 device-plugin 은 CUDA workload 와 동일하게 **Vulkan workload** 도 메모리 partitioning 을 enforce 한다. Volcano scheduler 와 함께 사용한다. + +## 작동 원리 + +1. **libvgpu (HAMi-core) vulkan-layer**: `vkAllocateMemory` 를 후킹하여 `CUDA_DEVICE_MEMORY_LIMIT_0` 를 enforce. +2. **device-plugin Allocate**: 호스트의 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 이 존재하면 container 의 `/etc/vulkan/implicit_layer.d/hami.json` 으로 bind-mount. +3. **HAMi mutating webhook (별도 install)**: pod annotation `hami.io/vulkan: "true"` 검사 → `HAMI_VULKAN_ENABLE=1` env + `NVIDIA_DRIVER_CAPABILITIES` 에 `graphics` 추가. +4. **enable_environment 가드**: manifest 의 `enable_environment: HAMI_VULKAN_ENABLE=1` 매치 시에만 layer 로드. annotation 없는 pod 은 영향 없음. + +## 설치 (한 번만) + +### 1. device-plugin 갱신 (이미 새 image) + +```bash +kubectl apply -f volcano-vgpu-device-plugin.yml +# 또는 CDI 모드: +# kubectl apply -f volcano-vgpu-device-plugin-cdi.yml +``` + +### 2. HAMi mutating webhook 별도 install (helm) + +```bash +helm repo add hami https://project-hami.github.io/HAMi +helm install hami-webhook hami/hami \ + --namespace kube-system \ + --set devicePlugin.enabled=false \ + --set scheduler.kubeScheduler.enabled=false \ + --set scheduler.extender.enabled=false \ + --set admissionWebhook.enabled=true +``` + +### 3. (선택) Fallback manifest DaemonSet + +device-plugin 이 init 으로 manifest 를 host 에 자동 배치하지 못하는 환경에서: + +```bash +kubectl apply -f volcano-vgpu-vulkan-manifest.yml +``` + +## 사용 + +pod 에 annotation `hami.io/vulkan: "true"` + `nvidia.com/gpumem` resource limit 추가: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + hami.io/vulkan: "true" +spec: + containers: + - name: vulkan-app + image: + resources: + limits: + nvidia.com/gpu: 1 + nvidia.com/gpumem: 4000 +``` + +전체 예시: `examples/vulkan-pod.yaml` + +## 검증 + +container 안에서: + +```bash +# 1. env 주입 확인 +env | grep -E '(HAMI_VULKAN|DRIVER_CAPABILITIES)' +# 기대: HAMI_VULKAN_ENABLE=1, NVIDIA_DRIVER_CAPABILITIES=...,graphics + +# 2. manifest 파일 mount 확인 +ls /etc/vulkan/implicit_layer.d/hami.json + +# 3. Vulkan tool 로 GPU memory limit 확인 (Vulkan app 실행 시) +# 예: Isaac Sim Kit boot log 의 'GPU Memory: MB' +``` +EOF +``` + +- [ ] **Step 3: yaml syntax 검증** + +```bash +cd ~/git/volcano-vgpu-device-plugin +kubectl apply --dry-run=client -f examples/vulkan-pod.yaml +``` + +기대값: `pod/vulkan-vgpu-demo created (dry run)`. + +- [ ] **Step 4: Commit** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git add examples/vulkan-pod.yaml doc/vulkan-vgpu.md +git commit -m "docs(vulkan): usage guide + sample pod" +``` + +--- + +## Task 10: image 빌드 + harbor push + +**Files:** +- (없음 — 운영 작업) + +- [ ] **Step 1: 빌더 머신 (ws-node074 = 10.61.3.74) 으로 코드 sync** + +```bash +# 로컬 (mac) 에서 +cd ~/git/volcano-vgpu-device-plugin +git push origin feat/vulkan-vgpu-support +``` + +빌더 머신 측: + +```bash +ssh root@10.61.3.74 'cd /root && \ + git clone https://github.com/xiilab/volcano-vgpu-device-plugin.git volcano-vgpu-device-plugin-vulkan 2>/dev/null || true; \ + cd /root/volcano-vgpu-device-plugin-vulkan && \ + git fetch origin && git checkout feat/vulkan-vgpu-support && git submodule update --init --recursive' +``` + +- [ ] **Step 2: 빌더 머신에서 image 빌드 + push** + +```bash +ssh root@10.61.3.74 'cd /root/volcano-vgpu-device-plugin-vulkan && \ + docker build -f docker/Dockerfile \ + -t 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 . && \ + docker push 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1' +``` + +기대값: 마지막에 `digest: sha256:... size: ...` 출력. + +- [ ] **Step 3: image 정상 push 검증** + +```bash +ssh root@10.61.3.74 'docker pull 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 && \ + docker run --rm --entrypoint /bin/sh \ + 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1 \ + -c "ls /k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json"' +``` + +기대값: `/k8s-vgpu/lib/nvidia/vulkan/implicit_layer.d/hami.json` 출력 (파일 존재 확인). + +--- + +## Task 11: 클러스터 deploy + +**Files:** +- (없음 — 운영 작업) + +- [ ] **Step 1: 신규 manifest DaemonSet apply (fallback, 권장)** + +```bash +kubectl --context= apply -f volcano-vgpu-vulkan-manifest.yml +``` + +기대값: +``` +configmap/hami-vulkan-manifest created +daemonset.apps/hami-vulkan-manifest-installer created +``` + +- [ ] **Step 2: DaemonSet pod 들 Ready 대기 + manifest 파일 host 에 배치 확인** + +```bash +until kubectl --context= -n kube-system get ds hami-vulkan-manifest-installer \ + -o jsonpath='{.status.numberReady}/{.status.desiredNumberScheduled}{"\n"}' 2>/dev/null \ + | grep -q "^[1-9].*/[1-9]"; do sleep 3; done + +# host 에 파일 있는지 (DaemonSet pod 안에서) +kubectl --context= -n kube-system get pod -l app=hami-vulkan-manifest-installer \ + -o name | head -1 | xargs -I{} kubectl --context= -n kube-system exec {} -- \ + ls -la /host/usr/local/vgpu/vulkan/implicit_layer.d/hami.json +``` + +기대값: 파일 존재. + +- [ ] **Step 3: device-plugin DaemonSet 갱신 (rolling update)** + +```bash +kubectl --context= apply -f volcano-vgpu-device-plugin.yml +# 또는 CDI: +# kubectl --context= apply -f volcano-vgpu-device-plugin-cdi.yml +``` + +- [ ] **Step 4: device-plugin pod ready 대기 + new image 사용 확인** + +```bash +until kubectl --context= -n kube-system get ds volcano-vgpu-device-plugin \ + -o jsonpath='{.status.numberReady}/{.status.desiredNumberScheduled}{"\n"}' 2>/dev/null \ + | grep -q "^[1-9].*/[1-9]"; do sleep 3; done + +kubectl --context= -n kube-system get pod -l app=volcano-vgpu-device-plugin \ + -o jsonpath='{.items[*].spec.containers[*].image}{"\n"}' +``` + +기대값: 모든 pod 의 image 가 `10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1`. + +--- + +## Task 12: HAMi webhook 별도 install (helm) + +**Files:** +- (없음 — 운영 작업) + +- [ ] **Step 1: HAMi helm repo 추가** + +```bash +helm repo add hami https://project-hami.github.io/HAMi +helm repo update +``` + +- [ ] **Step 2: webhook only values 로 install** + +```bash +helm install hami-webhook hami/hami \ + --kube-context \ + --namespace kube-system \ + --set devicePlugin.enabled=false \ + --set scheduler.kubeScheduler.enabled=false \ + --set scheduler.extender.enabled=false \ + --set admissionWebhook.enabled=true +``` + +- [ ] **Step 3: webhook pod ready 대기** + +```bash +until kubectl --context= -n kube-system get deployment \ + hami-webhook 2>/dev/null \ + -o jsonpath='{.status.readyReplicas}/{.status.replicas}{"\n"}' \ + | grep -q "^[1-9].*/[1-9]"; do sleep 3; done +``` + +> **Note:** 실제 deployment 이름은 helm chart values 에 따라 다를 수 있다. `kubectl get deploy -n kube-system | grep hami` 로 확인. + +- [ ] **Step 4: MutatingWebhookConfiguration 등록 확인** + +```bash +kubectl --context= get mutatingwebhookconfigurations | grep hami +``` + +기대값: `hami-webhook` 또는 동등 객체 존재. + +--- + +## Task 13: E2E 검증 — 4 케이스 + +**Files:** +- Use: `examples/vulkan-pod.yaml` + +- [ ] **Step 1: Case 1 — annotation 있는 Vulkan pod 의 partition enforce** + +```bash +kubectl --context= apply -f examples/vulkan-pod.yaml +kubectl --context= wait --for=condition=Ready pod/vulkan-vgpu-demo --timeout=60s + +# env 주입 확인 +kubectl --context= exec vulkan-vgpu-demo -- env | grep -E "(HAMI_VULKAN|DRIVER_CAPABILITIES)" +# 기대: HAMI_VULKAN_ENABLE=1, NVIDIA_DRIVER_CAPABILITIES=...,graphics + +# manifest mount 확인 +kubectl --context= exec vulkan-vgpu-demo -- ls /etc/vulkan/implicit_layer.d/hami.json +# 기대: 파일 존재 + +# CUDA_DEVICE_MEMORY_LIMIT 확인 (HAMi-core 환경) +kubectl --context= exec vulkan-vgpu-demo -- env | grep CUDA_DEVICE_MEMORY_LIMIT +# 기대: CUDA_DEVICE_MEMORY_LIMIT_0=4000m +``` + +- [ ] **Step 2: Case 1 — Vulkan app 실제 메모리 enforce 확인 (Isaac Sim 또는 vulkaninfo)** + +Isaac Sim 같은 Vulkan workload pod 에서: + +```bash +# Kit boot log 의 GPU Memory 라인 확인 +kubectl --context= logs | grep "GPU Memory" +# 기대: | 0 | NVIDIA RTX 6000 Ada Generation | Yes: 0 | | 4000 MB | ... +# (전체 GPU 가 아닌 partition 한계로 표시되어야 함) +``` + +또는 vulkan tool 로 device memory 조회: + +```bash +kubectl --context= exec vulkan-vgpu-demo -- vulkaninfo --summary 2>&1 | grep -i memory +``` + +- [ ] **Step 3: Case 2 — annotation 없는 Vulkan pod 은 full GPU** + +```bash +cat > /tmp/vulkan-noanno.yaml <<'EOF' +apiVersion: v1 +kind: Pod +metadata: + name: vulkan-noanno +spec: + schedulerName: volcano + containers: + - name: vulkan-app + image: nvidia/cuda:12.2.0-runtime-ubuntu22.04 + command: ["sleep", "infinity"] + resources: + limits: + nvidia.com/gpu: 1 + nvidia.com/gpumem: 4000 +EOF +kubectl --context= apply -f /tmp/vulkan-noanno.yaml +kubectl --context= wait --for=condition=Ready pod/vulkan-noanno --timeout=60s + +# HAMI_VULKAN_ENABLE 가 없어야 함 +kubectl --context= exec vulkan-noanno -- env | grep HAMI_VULKAN_ENABLE || \ + echo "[OK] HAMI_VULKAN_ENABLE not injected" +``` + +기대값: `[OK] HAMI_VULKAN_ENABLE not injected`. CUDA_DEVICE_MEMORY_LIMIT 는 여전히 4000m (annotation 무관, device-plugin 이 enforce). Vulkan layer 만 안 로드. + +- [ ] **Step 4: Case 3 — annotation 있는 CUDA-only pod 동작 정상** + +```bash +cat > /tmp/cuda-anno.yaml <<'EOF' +apiVersion: v1 +kind: Pod +metadata: + name: cuda-anno + annotations: + hami.io/vulkan: "true" +spec: + schedulerName: volcano + containers: + - name: cuda-app + image: nvidia/cuda:12.2.0-base-ubuntu22.04 + command: ["nvidia-smi"] + resources: + limits: + nvidia.com/gpu: 1 + nvidia.com/gpumem: 4000 +EOF +kubectl --context= apply -f /tmp/cuda-anno.yaml +kubectl --context= wait --for=condition=Ready pod/cuda-anno --timeout=60s 2>/dev/null +sleep 5 # Job-style 종료 기다리기 +kubectl --context= logs cuda-anno | head -20 +``` + +기대값: nvidia-smi 출력. `Total memory` 가 4000 MiB (HAMi-core 가 가짜 한계 표시) 또는 정상 GPU 정보. Vulkan 영향 없이 CUDA 동작. + +- [ ] **Step 5: Case 4 — 기존 standard CUDA workload 회귀 (annotation 없음, gpumem 만)** + +```bash +cat > /tmp/cuda-standard.yaml <<'EOF' +apiVersion: v1 +kind: Pod +metadata: + name: cuda-standard +spec: + schedulerName: volcano + containers: + - name: cuda-app + image: nvidia/cuda:12.2.0-base-ubuntu22.04 + command: ["nvidia-smi"] + resources: + limits: + nvidia.com/gpu: 1 + nvidia.com/gpumem: 8000 +EOF +kubectl --context= apply -f /tmp/cuda-standard.yaml +kubectl --context= wait --for=condition=Ready pod/cuda-standard --timeout=60s 2>/dev/null +sleep 5 +kubectl --context= logs cuda-standard | head -20 +``` + +기대값: nvidia-smi 정상 출력. CUDA_DEVICE_MEMORY_LIMIT_0=8000m. HAMi-core CUDA enforce 정상. + +- [ ] **Step 6: 4 케이스 모두 PASS 면 정리** + +```bash +kubectl --context= delete pod vulkan-vgpu-demo vulkan-noanno cuda-anno cuda-standard --ignore-not-found +rm -f /tmp/vulkan-noanno.yaml /tmp/cuda-anno.yaml /tmp/cuda-standard.yaml +``` + +--- + +## Task 14: PR 작성 + merge + +**Files:** +- (없음 — git 작업) + +- [ ] **Step 1: 모든 commit 확인** + +```bash +cd ~/git/volcano-vgpu-device-plugin +git log --oneline feat/vulkan-vgpu-support ^main 2>&1 +``` + +기대 commits (Task 1-9 의 commit): + +``` +feat(plugin): auto-inject Vulkan implicit layer manifest mount +feat(image): ship Vulkan implicit layer manifest from libvgpu +build: install libvulkan-dev in nvbuild stage for Vulkan layer compile +deps: bump libvgpu to 8d4f712 (vulkan-layer support) +chore: bump image to vulkan-v1 in deploy yaml +feat(deploy): add fallback DaemonSet for Vulkan manifest placement +docs(vulkan): usage guide + sample pod +build(init): copy Vulkan manifest to host during vgpu-init (있는 경우) +chore: start feat/vulkan-vgpu-support branch +``` + +- [ ] **Step 2: PR 작성** + +```bash +cd ~/git/volcano-vgpu-device-plugin +gh pr create \ + --base main \ + --head feat/vulkan-vgpu-support \ + --title "feat: Vulkan vGPU memory partitioning support" \ + --body "$(cat <<'EOF' +## Summary + +- libvgpu submodule 을 vulkan-layer 가 포함된 SHA 로 갱신 +- device-plugin Allocate 가 host 의 hami.json 을 container 에 자동 mount +- Dockerfile builder stage 에 libvulkan-dev 추가 + runtime stage 에 hami.json ship +- 신규 yaml 추가: fallback manifest DaemonSet, 사용 예시 +- 사용 가이드 문서 추가 + +## 동작 원리 + +HAMi 본가의 Vulkan vGPU 지원 (commit 0150ea7) 패턴을 그대로 포팅. annotation `hami.io/vulkan: "true"` 가 붙은 pod 만 HAMi mutating webhook 이 HAMI_VULKAN_ENABLE=1 env 를 주입 → manifest 의 enable_environment 가드 매치 → Vulkan layer 로드 → vkAllocateMemory 후킹으로 메모리 enforce. + +## 운영 deploy + +1. `kubectl apply -f volcano-vgpu-vulkan-manifest.yml` (선택) +2. `kubectl apply -f volcano-vgpu-device-plugin.yml` (또는 CDI) +3. `helm install hami-webhook hami/hami --set ...` (webhook only) + +## Test plan + +- [ ] Case 1: annotation 있는 Vulkan pod → memory enforce +- [ ] Case 2: annotation 없는 Vulkan pod → full GPU memory +- [ ] Case 3: annotation 있는 CUDA-only pod → CUDA 정상 +- [ ] Case 4: 기존 CUDA workload 회귀 → gpumem enforce 정상 +EOF +)" +``` + +- [ ] **Step 3: PR review 후 merge** + +reviewer 의 피드백 적용. merge 시 squash 또는 rebase 정책은 fork 의 기존 관행 따른다. + +--- + +## 참고 자료 + +- HAMi 본가 commit `0150ea7`: device-plugin Vulkan manifest auto-inject +- HAMi 본가 commit `50b37ff`: Dockerfile libvulkan-dev 추가 +- HAMi spec `docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md` +- HAMi plan `docs/superpowers/plans/2026-04-21-vulkan-vgpu-partitioning.md` +- HAMi 사용 가이드 `docs/vulkan-vgpu-support.md` +- HAMi E2E 체크리스트 `docs/vulkan-vgpu-e2e-checklist.md` +- 메모리 노트 `project_hami_vulkan_verification.md` +- 본 plan 의 spec `docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md` diff --git a/docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md b/docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md new file mode 100644 index 000000000..781066ac1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md @@ -0,0 +1,620 @@ +# HAMi vGPU 격리 — Step A: Namespace opt-in/out Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** HAMi 격리 메커니즘 (LD_PRELOAD inject + Vulkan implicit layer manifest mount + webhook env mutation) 을 노드 wide 강제 적용에서 namespace label 기반 opt-in 으로 변경하여, isaac-launchable namespace 의 Isaac Sim Kit 를 정상 동작 baseline 으로 유지하면서 다른 GPU workload namespace 만 격리 enforce 한다. + +**Architecture:** webhook 의 `namespaceSelector` 를 opt-out (`hami.io/webhook NotIn ignore`) 에서 opt-in (`hami.io/vgpu In enabled`) 으로 helm values 변경 + 노드 wide `/usr/local/vgpu/ld.so.preload` 와 `hami.json` 자동 install daemonset 을 비활성 + 검증 namespace 에 label 적용 후 격리 enforce 동작 확인. webhook backend 코드 변경 없이 helm chart values 변경 + cluster 측 daemonset patch 로 완료. + +**Tech Stack:** Kubernetes 1.34.3 (k0s), Helm chart `hami` (이번 fork `xiilab/feat/vulkan-vgpu`), kubectl, ws-node074 (RTX 6000 Ada x2, NVIDIA driver 580.142). + +**Plan scope:** 본 plan 은 design doc 의 4 step 중 **Step A 만** 다룬다. Step B (HAMi-core hook hardening), Step C (Vulkan layer compat), Step D (isaac-launchable opt-in 활성화) 는 별도 plan 으로 작성. + +--- + +## File Structure + +| 파일 | 변경 종류 | 책임 | +|---|---|---| +| `charts/hami/values.yaml` | Modify | webhook `namespaceSelector` mode 옵션 (`mode: opt-in \| opt-out`) + 새 default 추가 | +| `charts/hami/templates/scheduler/webhook.yaml` | Modify | `mode` 값 따라 `namespaceSelector.matchExpressions` 분기 (opt-in 시 `hami.io/vgpu In [enabled]`) | +| `cluster/runtime/hami-vulkan-manifest-installer.yaml` (신규) | Create | 현재 cluster 에 install된 ds 의 spec 백업 + 비활성화 패치 (label-based scope 또는 scale 0) — chart 외부 yaml | +| `cluster/runtime/hami-preload-installer.yaml` (신규) | Create | 노드 wide `/usr/local/vgpu/ld.so.preload` 만든 entity 의 spec 백업 + 비활성화 (예: device-plugin daemonset 의 `deviceconfig` ConfigMap 의 `ld.so.preload` key 비우기 또는 mount 제거) | +| `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md` | (no change) | 이미 commit 됨 (`d177471`) — Step A 의 spec 근거 | + +**Note:** `hami-vulkan-manifest-installer` ds 와 `hami-preload-installer` ds 의 원본 yaml 은 우리 chart 안에 없다 (cluster 측 별도 설치). 본 plan 은 그 ds 들의 현재 cluster 상태를 dump 하고 namespace label 기반 opt-in 으로 변환된 새 yaml 을 cluster 에 apply 한다. + +--- + +## Tasks + +### Task 1: 현재 cluster 의 webhook + installer ds spec 을 yaml 로 dump (백업) + +**Files:** +- Create: `cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml` +- Create: `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml` +- Create: `cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml` +- Create: `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml` + +- [ ] **Step 1: snapshot 디렉토리 생성** + +```bash +mkdir -p /Users/xiilab/git/HAMi/cluster/runtime/snapshot-2026-04-28 +cd /Users/xiilab/git/HAMi +``` + +- [ ] **Step 2: webhook 현재 spec dump** + +```bash +kubectl get mutatingwebhookconfiguration hami-webhook-webhook -o yaml \ + > cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml +ls -la cluster/runtime/snapshot-2026-04-28/hami-webhook-mutating.yaml +``` + +Expected: 파일 존재, size > 0, `namespaceSelector:` 안에 `key: hami.io/webhook` 와 `operator: NotIn` 보임. + +- [ ] **Step 3: 두 daemonset + ConfigMap dump** + +```bash +kubectl -n kube-system get ds hami-vulkan-manifest-installer -o yaml \ + > cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml +kubectl -n kube-system get ds volcano-device-plugin -o yaml \ + > cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml +kubectl -n kube-system get cm hami-vulkan-manifest -o yaml \ + > cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml +ls -la cluster/runtime/snapshot-2026-04-28/ +``` + +Expected: 4 yaml 파일 존재. + +- [ ] **Step 4: snapshot commit** + +```bash +git add cluster/runtime/snapshot-2026-04-28/ +git commit -s -m "chore(cluster): snapshot 4-27 새벽 패치 시점의 webhook + installer ds + cm" +``` + +Expected: commit 생성, `git log --oneline -1` 에 commit 보임. + +--- + +### Task 2: helm chart values.yaml 에 namespaceSelector mode 옵션 추가 + +**Files:** +- Modify: `charts/hami/values.yaml:178-185` (현 namespaceSelector block) + +- [ ] **Step 1: 현재 values.yaml 의 namespaceSelector block 확인** + +```bash +sed -n '170,200p' charts/hami/values.yaml +``` + +Expected output (4-line `matchExpressions` 가 opt-out 인 상태): +```yaml + namespaceSelector: + matchLabels: {} + matchExpressions: [] + ## opt-out: hami.io/webhook=ignore label 가진 namespace 는 webhook 적용 안 함 + ## (template 에 hard-coded matchExpressions 존재) +``` + +- [ ] **Step 2: values.yaml 수정 — mode 옵션 추가** + +`charts/hami/values.yaml` 의 `scheduler.admissionWebhook.namespaceSelector` block 을 다음으로 교체: + +```yaml + # namespaceSelector controls which namespaces the webhook will apply to. + # mode: + # "opt-out" (legacy default): apply to all namespaces except those labeled + # hami.io/webhook=ignore. Suitable when most workloads need vGPU + # isolation and a small number opt out. + # "opt-in" (recommended for clusters with NVIDIA Omniverse / Isaac Sim + # workloads that conflict with HAMi-core hooks): apply ONLY to + # namespaces labeled hami.io/vgpu=enabled. Other namespaces see + # no mutation, no LD_PRELOAD inject, no implicit Vulkan layer. + namespaceSelector: + mode: opt-in + matchLabels: {} + matchExpressions: [] +``` + +- [ ] **Step 3: helm lint 로 syntax 검증** + +```bash +cd /Users/xiilab/git/HAMi +helm lint charts/hami 2>&1 | tail -5 +``` + +Expected: `1 chart(s) linted, 0 chart(s) failed`. + +- [ ] **Step 4: commit** + +```bash +git add charts/hami/values.yaml +git commit -s -m "feat(chart): add namespaceSelector.mode (opt-in|opt-out) for webhook" \ + -m "Adds an explicit mode toggle. opt-in matches namespaces labeled hami.io/vgpu=enabled (recommended for clusters running NVIDIA Omniverse / Isaac Sim workloads that conflict with HAMi-core hooks). opt-out keeps the legacy hami.io/webhook=ignore exclusion behavior. Default switches to opt-in to fail safe — clusters with vGPU workloads must explicitly enable per-namespace." +``` + +--- + +### Task 3: helm chart webhook template 의 namespaceSelector 분기 + +**Files:** +- Modify: `charts/hami/templates/scheduler/webhook.yaml` (namespaceSelector block) + +- [ ] **Step 1: 현재 webhook template 의 namespaceSelector block 확인** + +```bash +grep -n -A 15 "namespaceSelector:" charts/hami/templates/scheduler/webhook.yaml +``` + +Expected: opt-out hard-code (`key: hami.io/webhook, operator: NotIn, values: [ignore]`). + +- [ ] **Step 2: namespaceSelector block 을 mode 분기로 교체** + +```yaml + namespaceSelector: + {{- if .Values.scheduler.admissionWebhook.namespaceSelector.matchLabels }} + matchLabels: + {{- toYaml .Values.scheduler.admissionWebhook.namespaceSelector.matchLabels | nindent 8 }} + {{- end }} + matchExpressions: + {{- if eq (.Values.scheduler.admissionWebhook.namespaceSelector.mode | default "opt-out") "opt-in" }} + - key: hami.io/vgpu + operator: In + values: + - enabled + {{- else }} + - key: hami.io/webhook + operator: NotIn + values: + - ignore + {{- end }} + {{- if .Values.scheduler.admissionWebhook.whitelistNamespaces }} + - key: kubernetes.io/metadata.name + operator: NotIn + values: + {{- toYaml .Values.scheduler.admissionWebhook.whitelistNamespaces | nindent 10 }} + {{- end }} + {{- if .Values.scheduler.admissionWebhook.namespaceSelector.matchExpressions }} + {{- toYaml .Values.scheduler.admissionWebhook.namespaceSelector.matchExpressions | nindent 6 }} + {{- end }} +``` + +- [ ] **Step 3: helm template render — opt-in mode 일 때 generated YAML 검증** + +```bash +helm template my-hami charts/hami --show-only templates/scheduler/webhook.yaml \ + --set scheduler.admissionWebhook.namespaceSelector.mode=opt-in 2>&1 \ + | grep -A 6 namespaceSelector +``` + +Expected output 안에: +``` +matchExpressions: +- key: hami.io/vgpu + operator: In + values: + - enabled +``` + +- [ ] **Step 4: helm template render — opt-out mode 일 때 generated YAML 검증** + +```bash +helm template my-hami charts/hami --show-only templates/scheduler/webhook.yaml \ + --set scheduler.admissionWebhook.namespaceSelector.mode=opt-out 2>&1 \ + | grep -A 6 namespaceSelector +``` + +Expected output 안에: +``` +matchExpressions: +- key: hami.io/webhook + operator: NotIn + values: + - ignore +``` + +- [ ] **Step 5: commit** + +```bash +git add charts/hami/templates/scheduler/webhook.yaml +git commit -s -m "feat(chart): webhook namespaceSelector branches on mode (opt-in|opt-out)" \ + -m "Renders the matching matchExpressions block based on the new namespaceSelector.mode value (Task 2). opt-in produces 'hami.io/vgpu In [enabled]'; opt-out keeps 'hami.io/webhook NotIn [ignore]'. Whitelist and user-supplied matchExpressions are still appended after the mode-specific entry." +``` + +--- + +### Task 4: cluster 의 webhook MutatingWebhookConfiguration 을 opt-in 으로 직접 patch (helm 재배포 없이) + +**Files:** +- Modify (cluster only): `MutatingWebhookConfiguration/hami-webhook-webhook` + +이 task 는 helm release 재실행이 아니라 **cluster 의 webhook spec 만 직접 patch** 한다 (다른 helm-managed 자원 영향 안 줌). 후속 helm upgrade 시 chart 변경분 (Task 2/3) 과 일치. + +- [ ] **Step 1: 현재 webhook namespaceSelector 확인** + +```bash +kubectl get mutatingwebhookconfiguration hami-webhook-webhook \ + -o jsonpath='{.webhooks[0].namespaceSelector}{"\n"}' +``` + +Expected (opt-out): +``` +{"matchExpressions":[{"key":"hami.io/webhook","operator":"NotIn","values":["ignore"]}]} +``` + +- [ ] **Step 2: opt-in 으로 patch** + +```bash +kubectl patch mutatingwebhookconfiguration hami-webhook-webhook --type=json \ + --patch='[{"op":"replace","path":"/webhooks/0/namespaceSelector","value":{"matchExpressions":[{"key":"hami.io/vgpu","operator":"In","values":["enabled"]}]}}]' +``` + +Expected: `mutatingwebhookconfiguration.admissionregistration.k8s.io/hami-webhook-webhook patched` + +- [ ] **Step 3: 검증 — opt-in 으로 변경됨** + +```bash +kubectl get mutatingwebhookconfiguration hami-webhook-webhook \ + -o jsonpath='{.webhooks[0].namespaceSelector}{"\n"}' +``` + +Expected: +``` +{"matchExpressions":[{"key":"hami.io/vgpu","operator":"In","values":["enabled"]}]} +``` + +- [ ] **Step 4: isaac-launchable namespace 의 기존 label `hami.io/webhook=ignore` 제거 (이제 불필요)** + +```bash +kubectl label namespace isaac-launchable hami.io/webhook- +``` + +Expected: `namespace/isaac-launchable unlabeled`. + +- [ ] **Step 5: isaac-launchable pod 재생성 — webhook mutation 0 건 검증** + +```bash +kubectl -n isaac-launchable delete pod -l app=isaac-launchable --wait=false +sleep 80 +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +echo "POD=$NEWPOD" +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc \ + 'env | grep -E "^(HAMI|LD_PRELOAD|NVIDIA_DRIVER_CAP)" ; ls /etc/vulkan/implicit_layer.d/' +``` + +Expected: +- env 에 `HAMI_VULKAN_ENABLE` 없음 (또는 기존 deployment yaml 에 박힌 것만) +- env 에 `LD_PRELOAD` 없음 +- `/etc/vulkan/implicit_layer.d/` 에 `hami.json` 없음 (단, ld.so.preload 가 컨테이너 안에 있을 수 있음 — 별도 task 처리) + +--- + +### Task 5: 노드 wide `/usr/local/vgpu/ld.so.preload` 와 hami.json install daemonset 비활성화 + +**Files:** +- Modify (cluster only): node `ws-node074:/usr/local/vgpu/ld.so.preload` (이미 비어있는 상태 유지) +- Modify (cluster only): `DaemonSet/hami-vulkan-manifest-installer` (비활성) + +- [ ] **Step 1: 노드 ld.so.preload 가 빈 파일 또는 미존재 확인** + +```bash +ssh root@10.61.3.74 'ls -la /usr/local/vgpu/ld.so.preload; cat /usr/local/vgpu/ld.so.preload | wc -c' +``` + +Expected: 파일 size 0 또는 1 (빈/newline). 만약 size > 1 이면 비우기: + +```bash +ssh root@10.61.3.74 ': > /usr/local/vgpu/ld.so.preload' +``` + +- [ ] **Step 2: hami-vulkan-manifest-installer ds 가 비활성 (nodeSelector hami.io/disabled=true) 확인** + +```bash +kubectl -n kube-system get ds hami-vulkan-manifest-installer \ + -o jsonpath='{.spec.template.spec.nodeSelector}{"\n"}' +``` + +Expected: +``` +{"hami.io/disabled":"true"} +``` + +만약 다른 selector 면 patch: + +```bash +kubectl -n kube-system patch daemonset hami-vulkan-manifest-installer --type='json' \ + -p='[{"op":"replace","path":"/spec/template/spec/nodeSelector","value":{"hami.io/disabled":"true"}}]' +``` + +- [ ] **Step 3: 노드 hami.json manifest 가 컨테이너로 mount 안 되는지 검증** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc 'ls /etc/vulkan/implicit_layer.d/' +``` + +Expected: 출력에 `nvidia_layers.json` 만 있고 `hami.json` 없음. **만약 hami.json 있으면**: 노드 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 도 삭제 필요: + +```bash +ssh root@10.61.3.74 'rm -f /usr/local/vgpu/vulkan/implicit_layer.d/hami.json; ls /usr/local/vgpu/vulkan/implicit_layer.d/' +``` + +그 후 pod 재생성 후 재검증. + +- [ ] **Step 4: isaac-launchable runheadless.sh 5번 baseline 검증 — 5/5 alive 유지** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/v +for i in 1 2 3 4 5; do + pkill -KILL kit 2>/dev/null; sleep 3 + timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh >/tmp/v/r$i.log 2>&1 + EC=$? + CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/r$i.log) + LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999") + echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN" +done +pkill -KILL kit 2>/dev/null +' +``` + +Expected: 5번 모두 `exit=124 crash=0 listen>=1` (alive + signaling listen). + +--- + +### Task 6: 새 검증 namespace `hami-test` 에 격리 enforce 동작 검증 + +**Files:** +- Create (cluster only): `Namespace/hami-test` (label `hami.io/vgpu=enabled`) +- Create: `cluster/runtime/test/cuda-partition-test-pod.yaml` + +- [ ] **Step 1: 검증 namespace 만들고 label 적용** + +```bash +kubectl create namespace hami-test --dry-run=client -o yaml | kubectl apply -f - +kubectl label namespace hami-test hami.io/vgpu=enabled --overwrite +kubectl get namespace hami-test --show-labels +``` + +Expected: label 출력에 `hami.io/vgpu=enabled` 포함. + +- [ ] **Step 2: 단순 CUDA test pod manifest 작성** + +`cluster/runtime/test/cuda-partition-test-pod.yaml`: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: cuda-partition-test + namespace: hami-test +spec: + restartPolicy: Never + nodeSelector: + kubernetes.io/hostname: ws-node074 + containers: + - name: cuda + image: 10.61.3.124:30002/library/isaac-launchable-vscode:6.0.0-fix5364 + command: ["/bin/bash", "-c"] + args: + - | + set -e + echo "=== nvidia-smi ===" + nvidia-smi --query-gpu=memory.total --format=csv,noheader + echo "=== env ===" + env | grep -E "^(HAMI|LD_PRELOAD|NVIDIA_DRIVER_CAP)" | sort + echo "=== ls /etc/vulkan/implicit_layer.d ===" + ls /etc/vulkan/implicit_layer.d/ + echo "=== ld.so.preload ===" + [ -f /etc/ld.so.preload ] && cat /etc/ld.so.preload || echo "(no ld.so.preload)" + echo "=== sleep 60 ===" + sleep 60 + resources: + limits: + volcano.sh/vgpu-number: "1" + volcano.sh/vgpu-memory: "23" + volcano.sh/vgpu-cores: "50" + requests: + volcano.sh/vgpu-number: "1" + volcano.sh/vgpu-memory: "23" + volcano.sh/vgpu-cores: "50" +``` + +- [ ] **Step 3: pod 배포 + webhook mutation 적용 검증** + +```bash +kubectl apply -f cluster/runtime/test/cuda-partition-test-pod.yaml +sleep 30 +kubectl -n hami-test get pod cuda-partition-test -o wide +kubectl -n hami-test logs cuda-partition-test +``` + +Expected logs: +- `nvidia-smi memory.total` = `23552 MiB` (NVML hook 적용됨) +- env 에 `HAMI_VULKAN_ENABLE=1` 또는 `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` 둘 중 하나 이상 webhook mutation 으로 주입됨 +- `ls /etc/vulkan/implicit_layer.d/` 에 `hami.json` 또는 (`hami.json` 없으면 다음 plan 의 webhook mutation 보완 필요) +- `ld.so.preload` 에 `/usr/local/vgpu/libvgpu.so` 포함 + +**중요:** 만약 webhook mutation 이 LD_PRELOAD env 와 hami.json mount 를 자동 주입하지 않으면 (현재 webhook 코드는 HAMI_VULKAN_ENABLE env 와 NVIDIA_DRIVER_CAPABILITIES patch 만 한다고 추정) — 본 Step A 는 격리 enforce 까지 도달 안 함. **Step A 의 진정한 완료는 webhook 이 LD_PRELOAD + libvgpu.so + hami.json mount 까지 자동 주입하도록 확장**. 이는 webhook backend Go 코드 변경 — 본 plan 의 Task 7 로 추가. + +- [ ] **Step 4: pod 정리** + +```bash +kubectl -n hami-test delete pod cuda-partition-test +``` + +- [ ] **Step 5: test manifest commit** + +```bash +git add cluster/runtime/test/cuda-partition-test-pod.yaml +git commit -s -m "test(cluster): add cuda-partition-test pod for namespace opt-in 격리 검증" +``` + +--- + +### Task 7: webhook mutation 확장 — LD_PRELOAD env + libvgpu.so volume mount + hami.json 자동 주입 + +**Files:** +- Modify: `pkg/scheduler/webhook/*.go` (mutation 로직) +- Create (chart): `charts/hami/templates/scheduler/hami-vulkan-layer-cm.yaml` (hami.json content for mounting) + +이 task 는 webhook backend Go 코드 변경. 본 plan 에서는 **인터페이스 정의 + 단위 test 작성** 까지만, 실제 Go 코드 수정은 Step A 의 후반부 또는 별도 plan 으로 분리. + +- [ ] **Step 1: webhook backend 코드 위치 식별** + +```bash +cd /Users/xiilab/git/HAMi +find pkg cmd -type f -name "*.go" | xargs grep -lE "MutatingWebhook|admission\\.AdmissionReview|patchOps" 2>/dev/null | head -10 +``` + +Expected: 1개 이상의 .go 파일 출력. 그 파일이 mutation 로직 entry point. + +- [ ] **Step 2: 현재 mutation 로직이 무엇을 patch 하는지 확인** + +```bash +WEBHOOK_FILE=$(find pkg cmd -type f -name "*.go" | xargs grep -lE "MutatingWebhook|admission\\.AdmissionReview" 2>/dev/null | head -1) +echo "WEBHOOK_FILE=$WEBHOOK_FILE" +grep -n "HAMI_VULKAN_ENABLE\|NVIDIA_DRIVER_CAPABILITIES\|LD_PRELOAD\|libvgpu\|hami\\.json" "$WEBHOOK_FILE" +``` + +이 단계는 실제 코드 베이스 조사. 결과에 따라 다음 step 의 plan 분리 여부 결정. + +- [ ] **Step 3: 결정 게이트** + +만약 grep 결과: +- A. webhook 이 **이미** LD_PRELOAD + libvgpu.so mount + hami.json mount 를 주입한다 → Step A 의 Task 6 검증으로 통과 가능. 다음 step (Task 8) 의 진짜 격리 검증 진행. +- B. webhook 이 HAMI_VULKAN_ENABLE env 만 주입하고 LD_PRELOAD/mount 는 안 한다 → **Step A 를 두 sub-plan 으로 분할**: + - A.1 (본 plan Task 1-6 까지): namespaceSelector opt-in + isaac-launchable baseline 보호 + - A.2 (별도 plan): webhook backend Go 코드에 LD_PRELOAD env + libvgpu.so mount + hami.json mount 주입 추가 — design doc 의 7.2 절 참조 + +본 plan 에서는 결정만 하고, B 면 별도 plan 으로 분기. A 면 Task 8 으로 진행. + +--- + +### Task 8: 통합 검증 — isaac-launchable baseline 유지 + hami-test namespace 격리 enforce 확인 + +(Task 7 결정 게이트가 A 인 경우만 실행, B 면 별도 plan) + +- [ ] **Step 1: isaac-launchable namespace baseline 재확인** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc \ + 'nvidia-smi --query-gpu=memory.total --format=csv,noheader; env | grep -E "^(HAMI|LD_PRELOAD|NVIDIA_DRIVER_CAP)"; ls /etc/vulkan/implicit_layer.d/' +``` + +Expected: +- `46068 MiB` (HAMi 격리 0, raw) +- env 에 `HAMI_VULKAN_ENABLE` 또는 `LD_PRELOAD` 없음 +- `/etc/vulkan/implicit_layer.d/` 에 `hami.json` 없음 + +- [ ] **Step 2: isaac-launchable runheadless.sh 5번 alive 검증** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/baseline +for i in 1 2 3 4 5; do + pkill -KILL kit 2>/dev/null; sleep 3 + timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh >/tmp/baseline/r$i.log 2>&1 + EC=$? + CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/baseline/r$i.log) + LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999") + echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN" +done +pkill -KILL kit 2>/dev/null +' +``` + +Expected: 5/5 `exit=124 crash=0 listen>=1`. + +- [ ] **Step 3: hami-test namespace 의 cuda-partition-test pod 격리 검증** + +(Task 6 의 pod manifest 재배포) + +```bash +kubectl apply -f cluster/runtime/test/cuda-partition-test-pod.yaml +sleep 30 +kubectl -n hami-test logs cuda-partition-test | grep -E "memory.total|HAMI|LD_PRELOAD|hami.json" +kubectl -n hami-test delete pod cuda-partition-test +``` + +Expected logs: +- `23552 MiB` (NVML 격리 적용) +- env 에 `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` 또는 `HAMI_VULKAN_ENABLE=1` +- `hami.json` mount 또는 ld.so.preload 활성 + +- [ ] **Step 4: PR commit/push** + +```bash +git add docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md \ + docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md \ + charts/hami/values.yaml \ + charts/hami/templates/scheduler/webhook.yaml \ + cluster/runtime/snapshot-2026-04-28/ \ + cluster/runtime/test/cuda-partition-test-pod.yaml +git status --short +git log --oneline -10 +git push xiilab feat/vulkan-vgpu +``` + +Expected: push 성공. + +- [ ] **Step 5: PR #1803 follow-up 코멘트 등록 — Step A 완료 보고** + +```bash +cat > /tmp/pr1803_step_a_done.md <<'EOF' +## Step A complete — Namespace opt-in/out for HAMi mutating webhook + +Switches the webhook namespaceSelector from opt-out (`hami.io/webhook NotIn ignore`) to opt-in (`hami.io/vgpu In enabled`). Clusters that mix HAMi vGPU isolation with NVIDIA Omniverse / Isaac Sim Kit workloads can now keep Isaac Sim namespaces unmutated (no LD_PRELOAD inject, no implicit Vulkan layer manifest) while other namespaces explicitly opt in for full isolation. + +### Verification + +isaac-launchable namespace (no `hami.io/vgpu` label): +- `nvidia-smi memory.total` = 46068 MiB (HAMi inject 0) +- `runheadless.sh` 5/5 alive + listen 49100/30999 +- baseline restored to the working state from before the 4-27 dawn patch + +hami-test namespace (`hami.io/vgpu=enabled`): +- Webhook mutation applied +- `nvidia-smi memory.total` = 23552 MiB (NVML hook active) +- LD_PRELOAD / hami.json mount injected (when Task 7 decision gate is A) + +### Spec / plan + +- spec: `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md` +- plan: `docs/superpowers/plans/2026-04-28-hami-isolation-step-a-namespace-opt-in.md` + +Step B (HAMi-core hook hardening) and Step C (Vulkan layer compat) follow in separate plans so that isaac-launchable can eventually opt-in for full isolation (Step D) once the hook code is hardened to coexist with Carbonite/OptiX/Vulkan layer chain. +EOF +gh api repos/Project-HAMi/HAMi/issues/1803/comments -X POST -f body="$(cat /tmp/pr1803_step_a_done.md)" --jq '.html_url' +``` + +Expected: PR comment URL 출력. + +--- + +## Self-Review + +**1. Spec coverage 점검:** +- Spec §7 (Step A) 의 webhook namespaceSelector 변경 → Tasks 2, 3, 4 ✅ +- Spec §7 의 노드 wide ld.so.preload 폐기 → Task 5 ✅ +- Spec §7 의 hami-vulkan-manifest-installer 폐기 → Task 5 ✅ +- Spec §7 의 LD_PRELOAD env / volume mount webhook 자동 주입 → Task 7 (결정 게이트, 별도 plan 가능성) +- Spec §7.4 의 검증 (isaac-launchable baseline + 새 namespace 격리) → Tasks 6, 8 ✅ +- Spec §11 의 위험 (helm release 영향) → Task 4 가 cluster 직접 patch 로 우회 ✅ + +**2. Placeholder scan:** "TBD"/"TODO"/"implement later" 검색 — 본 plan 에 없음 ✅. 단 Task 7 의 결정 게이트가 webhook 코드 조사 후 분기 — 이는 placeholder 가 아니라 명시적 decision point. + +**3. Type consistency:** `hami.io/vgpu=enabled` label key/value 가 Tasks 2, 3, 4, 6, 8 에서 일관 사용 ✅. `hami.io/webhook=ignore` 는 legacy 로 명시적 표시 ✅. + +**4. Scope check:** Step A 만 다룸. Step B/C/D 는 별도 plan 명시 ✅. 단 Task 7 이 webhook backend Go 코드 변경 가능성 → 별도 plan 분기 명시. + +--- + +## Open question (실행 시 결정) + +**Task 7 의 결정 게이트** 가 A (현재 webhook 이 이미 LD_PRELOAD + mount 주입) 인지 B (env 만 주입, 코드 확장 필요) 인지 — Task 7 Step 1-2 실행 후 결정. B 면 본 plan 의 Task 8 은 별도 sub-plan A.2 로 분리. diff --git a/docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md b/docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md new file mode 100644 index 000000000..4da74f2a7 --- /dev/null +++ b/docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md @@ -0,0 +1,740 @@ +# HAMi vGPU 격리 — Step B: HAMi-core CUDA/NVML Hook Hardening Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** HAMi-core (`xiilab/HAMi-core`, branch `vulkan-layer`, PR #182) 의 CUDA hook 들에 `cuMemGetInfo_v2` (commit `03f99d7`) 의 robustness 패턴 — "driver 에 먼저 forward → NULL/invalid arg 시 early return → 그 후 HAMi 격리 logic" — 을 적용하여 NVIDIA Isaac Sim Kit 의 OptiX/Aftermath/internal call paths 에서 NULL 인자/missing context 시 NULL deref SegFault 가 발생하지 않게 만든다. + +**Architecture:** 본 plan 은 HAMi-core fork 의 `src/cuda/memory.c` 와 `src/cuda/context.c` 의 6개 hook 함수에 robustness 패턴을 적용한다. 각 함수마다 (1) 단위 test 작성, (2) hardening 코드 적용, (3) test 통과 검증, (4) commit. 마지막에 ws-node074 의 isaac-launchable namespace 에서 `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` 로 단순 cuda + Isaac Sim Kit init 통합 검증. + +**Tech Stack:** C, CMake, HAMi-core fork (`/Users/xiilab/git/HAMi/libvgpu`, branch `vulkan-layer`), Docker (build-in-docker target), kubectl (검증), ws-node074 (Mac → SSH). + +**Plan scope:** Step B 만 다룬다. Step A.2 (webhook backend LD_PRELOAD env 자동 주입), Step C (Vulkan layer compat), Step D (isaac-launchable opt-in 활성화) 는 별도 plan. + +--- + +## File Structure + +| 파일 | 변경 종류 | 책임 | +|---|---|---| +| `libvgpu/src/cuda/memory.c` | Modify | cuMemAlloc_v2, cuMemAllocHost_v2, cuMemAllocManaged, cuMemAllocPitch_v2, cuMemHostAlloc, cuMemHostRegister_v2 NULL guard | +| `libvgpu/src/cuda/context.c` | Modify | cuCtxGetDevice NULL guard | +| `libvgpu/test/test_cuda_null_guards.c` (신규) | Create | 단위 test — 각 hook 의 NULL/invalid arg 케이스가 driver forward + early return | +| `libvgpu/test/CMakeLists.txt` | Modify | test_cuda_null_guards.c 빌드 추가 | + +각 hook 의 robustness 패턴 (cuMemGetInfo_v2 의 commit `03f99d7` 모범): + +```c +CUresult cuXxx(args) { + LOG_DEBUG("cuXxx"); + ENSURE_INITIALIZED(); + /* Forward to driver FIRST so NULL/missing-context errors surface + * exactly as without HAMi. Never dereference what the driver rejected. */ + CUresult r = CUDA_OVERRIDE_CALL(cuda_library_entry, cuXxx, args); + if (r != CUDA_SUCCESS) return r; + if (...args invalid for HAMi logic...) return r; + /* HAMi 격리 logic */ + ... +} +``` + +--- + +## Tasks + +### Task 1: 현재 cuda hook 의 robustness 패턴 audit + fix list 결정 + +**Files:** +- Read: `libvgpu/src/cuda/memory.c`, `libvgpu/src/cuda/context.c` + +이 task 는 코드 변경 0 — 단지 어떤 hook 이 NULL guard 부족한지 list 작성. 다음 task 들의 정확한 범위 결정. + +- [ ] **Step 1: memory.c 의 alloc/free 함수 본문 dump** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +for fn in cuMemAlloc_v2 cuMemAllocHost_v2 cuMemAllocManaged cuMemAllocPitch_v2 cuMemHostAlloc cuMemHostRegister_v2 cuMemFree_v2 cuMemGetInfo_v2; do + echo "=== $fn ===" + awk "/^CUresult $fn\\(/,/^}/" src/cuda/memory.c | head -30 + echo +done +``` + +- [ ] **Step 2: context.c 의 cuCtxGetDevice 본문 dump** + +```bash +awk "/^CUresult cuCtxGetDevice\\(/,/^}/" src/cuda/context.c +``` + +- [ ] **Step 3: fix list 결정 (audit 결과 메모)** + +다음 hook 중 robustness 패턴 부재인 것 — 본 plan 의 Tasks 2-7 에서 적용: +- cuMemAlloc_v2 (Task 2) +- cuMemAllocHost_v2 (Task 3) +- cuMemAllocManaged (Task 3) +- cuMemAllocPitch_v2 (Task 4) +- cuMemHostAlloc (Task 5) +- cuMemHostRegister_v2 (Task 6) +- cuCtxGetDevice (Task 7) + +cuMemFree_v2 는 이미 fix (`3bebc8a fix(cuda): fall back to real driver on untracked cuMemFree[Async] pointer`) — skip. + +cuMemGetInfo_v2 는 이미 fix (`03f99d7`) — reference 패턴. + +- [ ] **Step 4: 결정 commit (audit notes)** + +```bash +cd /Users/xiilab/git/HAMi +mkdir -p libvgpu/docs/superpowers/notes +cat > libvgpu/docs/superpowers/notes/2026-04-28-cuda-hook-audit.md <<'EOF' +# CUDA hook robustness audit — 2026-04-28 + +Reference fix: commit `03f99d7 fix(cuda): avoid NULL deref in cuMemGetInfo_v2 when caller (OptiX) crashes` + +Pattern: +1. Forward to real driver first (errors surface exactly as without HAMi) +2. Early return on NULL/invalid args (driver already rejected) +3. Then HAMi enforcement logic + +## Hooks needing the same pattern + +- cuMemAlloc_v2 (memory.c:135) +- cuMemAllocHost_v2 (memory.c:145) +- cuMemAllocManaged (memory.c:159) +- cuMemAllocPitch_v2 (memory.c:174) +- cuMemHostAlloc (memory.c:223) +- cuMemHostRegister_v2 (memory.c:239) +- cuCtxGetDevice (context.c:42) + +## Already robust (skip) + +- cuMemFree_v2 (commit 3bebc8a) +- cuMemFreeAsync (commit 3bebc8a) +- cuMemGetInfo_v2 (commit 03f99d7) +- cuMemCreate (commit 833c62c) +EOF +cd libvgpu +git add docs/superpowers/notes/2026-04-28-cuda-hook-audit.md +git commit -s -m "docs(notes): cuda hook robustness audit list for Step B hardening" +``` + +Expected: commit 생성, 다른 task 의 reference document 로 사용. + +--- + +### Task 2: cuMemAlloc_v2 NULL guard 추가 + +**Files:** +- Modify: `libvgpu/src/cuda/memory.c:135-143` (cuMemAlloc_v2) +- Modify: `libvgpu/test/test_cuda_null_guards.c` (Task 1 후 만들 file — Task 2 step 1 에서 만듦) +- Modify: `libvgpu/test/CMakeLists.txt` + +- [ ] **Step 1: 단위 test 작성 (failing test 먼저)** + +`libvgpu/test/test_cuda_null_guards.c` 생성: + +```c +#include +#include +#include +#include + +extern CUresult cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize); + +/* Test: NULL dptr should NOT crash — driver returns CUDA_ERROR_INVALID_VALUE, + * we propagate that error exactly. */ +static void test_cuMemAlloc_v2_null_dptr(void) { + CUresult r = cuMemAlloc_v2(NULL, 4096); + assert(r != CUDA_SUCCESS); + /* The exact error code depends on driver, but it must not crash and + * must not be CUDA_SUCCESS. */ + printf("[OK] cuMemAlloc_v2(NULL, 4096) returned %d (non-zero, no crash)\n", r); +} + +/* Test: bytesize 0 — driver may accept or reject; we propagate. */ +static void test_cuMemAlloc_v2_zero_size(void) { + CUdeviceptr dptr = 0; + CUresult r = cuMemAlloc_v2(&dptr, 0); + /* Either success with dptr=0 or driver-defined error — we don't crash */ + printf("[OK] cuMemAlloc_v2(&dptr, 0) returned %d\n", r); +} + +int main(void) { + /* Initialize CUDA driver */ + CUresult r = cuInit(0); + if (r != CUDA_SUCCESS) { + fprintf(stderr, "cuInit failed: %d (skipping — no GPU?)\n", r); + return 0; + } + CUdevice dev; + cuDeviceGet(&dev, 0); + CUcontext ctx; + cuCtxCreate_v2(&ctx, 0, dev); + + test_cuMemAlloc_v2_null_dptr(); + test_cuMemAlloc_v2_zero_size(); + + cuCtxDestroy_v2(ctx); + return 0; +} +``` + +`libvgpu/test/CMakeLists.txt` 에 추가 — 현재 test target 들 옆에 (예: `test_runtime_launch` 다음): + +```cmake +add_executable(test_cuda_null_guards test_cuda_null_guards.c) +target_link_libraries(test_cuda_null_guards PUBLIC vgpu cuda) +target_include_directories(test_cuda_null_guards PRIVATE ${CUDA_HOME}/include) +``` + +- [ ] **Step 2: 빌드 + 현재 동작 확인 (test 실행 가능한지만, 결과 검증 안 함)** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | grep -E "Built target|error" | head' +``` + +Expected: `Built target vgpu` + `Built target test_cuda_null_guards`. + +- [ ] **Step 3: 현재 (변경 전) cuMemAlloc_v2 의 NULL dptr 동작 확인 (baseline)** + +```bash +ssh root@10.61.3.74 'cd /tmp/libvgpu-build/build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1' | head -20 +``` + +Expected: 만약 baseline 에서 SegFault 또는 abort → fix 가치 확인. 만약 이미 정상 propagate 면 진짜 fix 필요한지 재검토 (BLOCKED 보고). + +- [ ] **Step 4: cuMemAlloc_v2 NULL guard 적용** + +`src/cuda/memory.c:135-143` 의 함수를 다음으로 교체: + +```c +CUresult cuMemAlloc_v2(CUdeviceptr* dptr, size_t bytesize) { + LOG_INFO("into cuMemAllocing_v2 dptr=%p bytesize=%ld",dptr,bytesize); + ENSURE_RUNNING(); + /* Forward NULL/invalid args to the real driver so error codes match + * non-HAMi behavior. NVIDIA OptiX/Aftermath internals can call us with + * NULL during early init paths; dereferencing would SegFault. */ + if (dptr == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAlloc_v2, dptr, bytesize); + } + CUresult res = allocate_raw(dptr,bytesize); + if (res!=CUDA_SUCCESS) + return res; + LOG_INFO("res=%d, cuMemAlloc_v2 success dptr=%p bytesize=%lu",0,(void *)*dptr,bytesize); + return CUDA_SUCCESS; +} +``` + +- [ ] **Step 5: rebuild + test 실행** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 ' +cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && \ + git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && \ + make build-in-docker 2>&1 | grep -E "Built target|error" | head && \ + cd build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1 | head -20 +' +``` + +Expected: `[OK] cuMemAlloc_v2(NULL, 4096) returned ` (no crash). `[OK] cuMemAlloc_v2(&dptr, 0) returned `. + +- [ ] **Step 6: commit** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git add src/cuda/memory.c test/test_cuda_null_guards.c test/CMakeLists.txt +git commit -s -m "fix(cuda): add NULL dptr guard to cuMemAlloc_v2 (OptiX/Aftermath robustness)" \ + -m "Forwards NULL dptr calls to the real CUDA driver so the caller sees the driver's defined error code (CUDA_ERROR_INVALID_VALUE) instead of HAMi dereferencing the NULL inside allocate_raw. NVIDIA OptiX/Aftermath internal init paths historically pass NULL during fallback probes; without this guard libvgpu.so SegFaults inside Isaac Sim Kit init under LD_PRELOAD. Pattern matches commit 03f99d7 (cuMemGetInfo_v2)." +``` + +--- + +### Task 3: cuMemAllocHost_v2 + cuMemAllocManaged NULL guards + +**Files:** +- Modify: `libvgpu/src/cuda/memory.c:145-157, 159-172` +- Modify: `libvgpu/test/test_cuda_null_guards.c` (test 추가) + +- [ ] **Step 1: test 추가 (test_cuda_null_guards.c)** + +`libvgpu/test/test_cuda_null_guards.c` 의 main 위에 추가: + +```c +extern CUresult cuMemAllocHost_v2(void** hptr, size_t bytesize); +extern CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags); + +static void test_cuMemAllocHost_v2_null_hptr(void) { + CUresult r = cuMemAllocHost_v2(NULL, 4096); + assert(r != CUDA_SUCCESS); + printf("[OK] cuMemAllocHost_v2(NULL, 4096) returned %d\n", r); +} + +static void test_cuMemAllocManaged_null_dptr(void) { + CUresult r = cuMemAllocManaged(NULL, 4096, CU_MEM_ATTACH_GLOBAL); + assert(r != CUDA_SUCCESS); + printf("[OK] cuMemAllocManaged(NULL, 4096) returned %d\n", r); +} +``` + +main() 에 호출 추가: +```c +test_cuMemAllocHost_v2_null_hptr(); +test_cuMemAllocManaged_null_dptr(); +``` + +- [ ] **Step 2: cuMemAllocHost_v2 + cuMemAllocManaged hardening** + +`memory.c:145-157` 의 cuMemAllocHost_v2: + +```c +CUresult cuMemAllocHost_v2(void** hptr, size_t bytesize) { + LOG_INFO("into cuMemAllocHost_v2 hptr=%p bytesize=%ld",hptr,bytesize); + ENSURE_RUNNING(); + if (hptr == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocHost_v2, hptr, bytesize); + } + /* (existing logic preserved) */ + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocHost_v2, hptr, bytesize); + if (res != CUDA_SUCCESS) return res; + LOG_INFO("res=%d, cuMemAllocHost_v2 success",0); + return CUDA_SUCCESS; +} +``` + +`memory.c:159-172` 의 cuMemAllocManaged: + +```c +CUresult cuMemAllocManaged(CUdeviceptr* dptr, size_t bytesize, unsigned int flags) { + LOG_INFO("into cuMemAllocManaged dptr=%p bytesize=%ld flags=%u",dptr,bytesize,flags); + ENSURE_RUNNING(); + if (dptr == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocManaged, dptr, bytesize, flags); + } + CUresult res = allocate_raw(dptr, bytesize); + if (res != CUDA_SUCCESS) return res; + /* Re-route to the actual managed allocator since allocate_raw used cuMemAlloc_v2. + * For now we accept this minor over-clamp — callers asking for managed memory + * will still hit the partition limit, which is the desired behavior. */ + LOG_INFO("res=%d, cuMemAllocManaged success dptr=%p", 0, (void*)*dptr); + return CUDA_SUCCESS; +} +``` + +(주의: 위 코드는 audit step 1 의 결과에 따라 다를 수 있음. 실제 함수 본문 dump 후 위 패턴으로 변경. allocate_raw 가 NULL 가드를 내부적으로 가지면 추가 가드 불필요.) + +- [ ] **Step 3: rebuild + test** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 ' +cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && \ + git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && \ + make build-in-docker 2>&1 | grep -E "Built target|error" | head && \ + cd build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1 | tail -10 +' +``` + +Expected: `[OK] cuMemAllocHost_v2(NULL, 4096) returned ` + `[OK] cuMemAllocManaged(NULL, 4096) returned `. + +- [ ] **Step 4: commit** + +```bash +git add src/cuda/memory.c test/test_cuda_null_guards.c +git commit -s -m "fix(cuda): add NULL ptr guards to cuMemAllocHost_v2 and cuMemAllocManaged" \ + -m "Same robustness pattern as Task 2 (cuMemAlloc_v2). Forwards NULL ptr to driver so OptiX/Aftermath internal probes get the driver's defined error instead of segfaulting inside HAMi." +``` + +--- + +### Task 4: cuMemAllocPitch_v2 NULL guard + +**Files:** +- Modify: `libvgpu/src/cuda/memory.c:174-190` +- Modify: `libvgpu/test/test_cuda_null_guards.c` + +- [ ] **Step 1: test 추가** + +```c +extern CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes); + +static void test_cuMemAllocPitch_v2_null_dptr(void) { + size_t pitch = 0; + CUresult r = cuMemAllocPitch_v2(NULL, &pitch, 1024, 1024, 4); + assert(r != CUDA_SUCCESS); + printf("[OK] cuMemAllocPitch_v2(NULL, ...) returned %d\n", r); +} + +static void test_cuMemAllocPitch_v2_null_pitch(void) { + CUdeviceptr dptr = 0; + CUresult r = cuMemAllocPitch_v2(&dptr, NULL, 1024, 1024, 4); + assert(r != CUDA_SUCCESS); + printf("[OK] cuMemAllocPitch_v2(&dptr, NULL, ...) returned %d\n", r); +} +``` + +main() 에 호출 추가. + +- [ ] **Step 2: cuMemAllocPitch_v2 hardening** + +`memory.c:174-190`: + +```c +CUresult cuMemAllocPitch_v2(CUdeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, + size_t Height, unsigned int ElementSizeBytes) { + LOG_INFO("into cuMemAllocPitch_v2 dptr=%p pPitch=%p w=%lu h=%lu",dptr,pPitch,WidthInBytes,Height); + ENSURE_RUNNING(); + if (dptr == NULL || pPitch == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocPitch_v2, + dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); + } + /* (existing partition logic preserved) */ + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemAllocPitch_v2, + dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); + if (res != CUDA_SUCCESS) return res; + /* Track the allocation for budget enforcement */ + /* (preserve original tracking code from current implementation) */ + LOG_INFO("res=%d, cuMemAllocPitch_v2 success dptr=%p pitch=%lu", 0, (void*)*dptr, *pPitch); + return CUDA_SUCCESS; +} +``` + +- [ ] **Step 3: rebuild + test** + +(Task 3 Step 3 와 동일 패턴, test 출력에 cuMemAllocPitch_v2 두 줄 추가 기대) + +- [ ] **Step 4: commit** + +```bash +git add src/cuda/memory.c test/test_cuda_null_guards.c +git commit -s -m "fix(cuda): add NULL guards to cuMemAllocPitch_v2" +``` + +--- + +### Task 5: cuMemHostAlloc NULL guard + +**Files:** +- Modify: `libvgpu/src/cuda/memory.c:223-237` +- Modify: `libvgpu/test/test_cuda_null_guards.c` + +- [ ] **Step 1: test 추가** + +```c +extern CUresult cuMemHostAlloc(void** hptr, size_t bytesize, unsigned int flags); + +static void test_cuMemHostAlloc_null_hptr(void) { + CUresult r = cuMemHostAlloc(NULL, 4096, 0); + assert(r != CUDA_SUCCESS); + printf("[OK] cuMemHostAlloc(NULL, 4096, 0) returned %d\n", r); +} +``` + +- [ ] **Step 2: hardening** + +`memory.c:223-237`: + +```c +CUresult cuMemHostAlloc(void** hptr, size_t bytesize, unsigned int flags) { + LOG_INFO("into cuMemHostAlloc hptr=%p bytesize=%ld flags=%u",hptr,bytesize,flags); + ENSURE_RUNNING(); + if (hptr == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostAlloc, hptr, bytesize, flags); + } + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostAlloc, hptr, bytesize, flags); + if (res != CUDA_SUCCESS) return res; + LOG_INFO("res=%d, cuMemHostAlloc success hptr=%p", 0, *hptr); + return CUDA_SUCCESS; +} +``` + +- [ ] **Step 3: rebuild + test** + +- [ ] **Step 4: commit** + +```bash +git add src/cuda/memory.c test/test_cuda_null_guards.c +git commit -s -m "fix(cuda): add NULL guard to cuMemHostAlloc" +``` + +--- + +### Task 6: cuMemHostRegister_v2 NULL guard + +**Files:** +- Modify: `libvgpu/src/cuda/memory.c:239-263` +- Modify: `libvgpu/test/test_cuda_null_guards.c` + +- [ ] **Step 1: test 추가** + +```c +extern CUresult cuMemHostRegister_v2(void* hptr, size_t bytesize, unsigned int flags); + +static void test_cuMemHostRegister_v2_null_hptr(void) { + CUresult r = cuMemHostRegister_v2(NULL, 4096, 0); + assert(r != CUDA_SUCCESS); + printf("[OK] cuMemHostRegister_v2(NULL, 4096, 0) returned %d\n", r); +} + +static void test_cuMemHostRegister_v2_zero_size(void) { + char buf[16]; + CUresult r = cuMemHostRegister_v2(buf, 0, 0); + /* zero size — driver may accept or reject; we don't crash */ + printf("[OK] cuMemHostRegister_v2(buf, 0, 0) returned %d\n", r); +} +``` + +- [ ] **Step 2: hardening** + +`memory.c:239-263`: + +```c +CUresult cuMemHostRegister_v2(void* hptr, size_t bytesize, unsigned int flags) { + LOG_INFO("into cuMemHostRegister_v2 hptr=%p bytesize=%ld flags=%u",hptr,bytesize,flags); + ENSURE_RUNNING(); + if (hptr == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostRegister_v2, hptr, bytesize, flags); + } + /* preserve existing logic */ + CUresult res = CUDA_OVERRIDE_CALL(cuda_library_entry, cuMemHostRegister_v2, hptr, bytesize, flags); + return res; +} +``` + +- [ ] **Step 3: rebuild + test** + +- [ ] **Step 4: commit** + +```bash +git add src/cuda/memory.c test/test_cuda_null_guards.c +git commit -s -m "fix(cuda): add NULL guard to cuMemHostRegister_v2" +``` + +--- + +### Task 7: cuCtxGetDevice NULL guard + +**Files:** +- Modify: `libvgpu/src/cuda/context.c:42-46` +- Modify: `libvgpu/test/test_cuda_null_guards.c` + +- [ ] **Step 1: test 추가** + +```c +extern CUresult cuCtxGetDevice(CUdevice* device); + +static void test_cuCtxGetDevice_null(void) { + CUresult r = cuCtxGetDevice(NULL); + assert(r != CUDA_SUCCESS); + printf("[OK] cuCtxGetDevice(NULL) returned %d\n", r); +} +``` + +- [ ] **Step 2: hardening** + +`context.c:42-46` 현재 함수를 다음으로 교체: + +```c +CUresult cuCtxGetDevice(CUdevice* device) { + if (device == NULL) { + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetDevice, device); + } + return CUDA_OVERRIDE_CALL(cuda_library_entry, cuCtxGetDevice, device); +} +``` + +(NULL device 가 driver 에 전달돼서 INVALID_VALUE 반환. 이전엔 직접 전달했지만 명시적 가드로 OptiX trace 시 NULL deref 방지) + +- [ ] **Step 3: rebuild + test** + +- [ ] **Step 4: commit** + +```bash +git add src/cuda/context.c test/test_cuda_null_guards.c +git commit -s -m "fix(cuda): add NULL guard to cuCtxGetDevice" +``` + +--- + +### Task 8: 모든 단위 test 통과 확인 + ws-node074 통합 검증 (Isaac Sim Kit init) + +**Files:** +- (no code change) +- Verify: ws-node074 isaac-launchable namespace 의 LD_PRELOAD baseline + +- [ ] **Step 1: Tasks 2-7 의 모든 단위 test 가 통과하는지 최종 빌드 + run** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 ' +cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && \ + git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && \ + make build-in-docker 2>&1 | grep -E "Built target|error|FAIL" | head && \ + cd build && LD_PRELOAD=$(pwd)/libvgpu.so ./test_cuda_null_guards 2>&1 +' +``` + +Expected: `[OK]` 라인이 7개 이상, exit code 0, no crash, no `[FAIL]`. + +- [ ] **Step 2: ws-node074 노드 .so 를 새 fix 빌드로 swap** + +```bash +ssh root@10.61.3.74 ' +md5sum /tmp/libvgpu-build/build/libvgpu.so +cp -av /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-step-b +cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so +md5sum /usr/local/vgpu/libvgpu.so +' +``` + +Expected: 새 .so md5 가 이전 .so md5 와 다름. + +- [ ] **Step 3: isaac-launchable namespace 가 webhook opt-in label 없으므로 baseline 유지 (LD_PRELOAD 없음). 그러나 manual 검증 — 컨테이너에 LD_PRELOAD 강제 적용 후 cuMemAlloc_v2(NULL,...) 가 SegFault 안 나는지** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +cat > /tmp/null_test.c < +#include +int main(void) { + cuInit(0); + CUdevice d; cuDeviceGet(&d, 0); + CUcontext c; cuCtxCreate_v2(&c, 0, d); + CUresult r = cuMemAlloc_v2(NULL, 4096); + printf("cuMemAlloc_v2(NULL, 4096) = %d (no crash = pass)\n", r); + cuCtxDestroy_v2(c); + return 0; +} +EOF +gcc /tmp/null_test.c -o /tmp/null_test -lcuda -I/usr/local/cuda/include 2>&1 | head -5 +LD_PRELOAD=/usr/local/vgpu/libvgpu.so /tmp/null_test +' +``` + +Expected: 출력에 `cuMemAlloc_v2(NULL, 4096) = ` (예: 1 또는 100), no SegFault, exit 0. + +- [ ] **Step 4: isaac-launchable runheadless.sh 5번 — 5/5 alive baseline 유지 (Step B 가 baseline 안 깨졌는지)** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -l app=isaac-launchable,instance=pod-1 -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/v +for i in 1 2 3 4 5; do + pkill -KILL kit 2>/dev/null; sleep 3 + timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh >/tmp/v/r$i.log 2>&1 + EC=$? + CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/r$i.log) + LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999") + echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN" +done +pkill -KILL kit 2>/dev/null +' +``` + +Expected: 5/5 `exit=124 crash=0 listen=1` (baseline 유지). Step B 의 .so 가 baseline 환경에 inject 돼도 race trigger 안 함 (LD_PRELOAD 없으니 inject 0). + +- [ ] **Step 5: PR commit/push (HAMi-core fork)** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git log --oneline -10 +git push xiilab vulkan-layer 2>&1 | tail +``` + +Expected: 7개 commit 추가 (Tasks 1-7) push 성공. + +- [ ] **Step 6: HAMi 메인 fork 의 submodule SHA bump commit** + +```bash +cd /Users/xiilab/git/HAMi +NEW_SHA=$(cd libvgpu && git rev-parse HEAD) +echo "new HAMi-core SHA: $NEW_SHA" +git add libvgpu +git commit -s -m "chore(libvgpu): bump HAMi-core for Step B cuda hook hardening" \ + -m "Pulls in 7 commits adding NULL ptr guards to cuMemAlloc_v2, cuMemAllocHost_v2, cuMemAllocManaged, cuMemAllocPitch_v2, cuMemHostAlloc, cuMemHostRegister_v2, cuCtxGetDevice. Pattern matches commit 03f99d7 (cuMemGetInfo_v2). Reduces SegFault risk for callers (Isaac Sim Kit OptiX/Aftermath) that pass NULL during internal probes." +git push xiilab feat/vulkan-vgpu 2>&1 | tail +``` + +Expected: HAMi-core SHA 업데이트된 commit 1개 push 성공. + +- [ ] **Step 7: PR #182 + PR #1803 follow-up 코멘트 등록** + +```bash +cat > /tmp/pr182_step_b_done.md <<'EOF' +## Step B complete — CUDA hook NULL guard hardening + +Adds NULL pointer guards to 6 CUDA hooks following the pattern from `cuMemGetInfo_v2` (commit 03f99d7): + +| Hook | Commit | NULL arg behavior | +|---|---|---| +| cuMemAlloc_v2 | (sha) | Forward to driver, return driver's error | +| cuMemAllocHost_v2 | (sha) | Same | +| cuMemAllocManaged | (sha) | Same | +| cuMemAllocPitch_v2 | (sha) | Same (NULL dptr or NULL pPitch) | +| cuMemHostAlloc | (sha) | Same | +| cuMemHostRegister_v2 | (sha) | Same | +| cuCtxGetDevice | (sha) | Same | + +### Verification + +`test/test_cuda_null_guards.c` — 7 unit tests, all pass under `LD_PRELOAD=libvgpu.so`. ws-node074 isaac-launchable namespace baseline (5/5 runheadless.sh alive) preserved. + +### Why + +NVIDIA OptiX denoising / Aftermath / Carbonite tasking call HAMi-core hooks during init with NULL args during fallback probes. Without the guards, libvgpu.so would dereference NULL and SegFault inside Isaac Sim Kit init. Step C (Vulkan layer compat) follows. +EOF +gh api repos/Project-HAMi/HAMi-core/issues/182/comments -X POST -f body="$(cat /tmp/pr182_step_b_done.md)" --jq '.html_url' + +cat > /tmp/pr1803_step_b_done.md <<'EOF' +## Step B (HAMi-core hook hardening) complete + +HAMi-core PR #182 added NULL pointer guards to 7 CUDA hooks (cuMemAlloc_v2, cuMemAllocHost_v2, cuMemAllocManaged, cuMemAllocPitch_v2, cuMemHostAlloc, cuMemHostRegister_v2, cuCtxGetDevice). Pattern matches the existing `cuMemGetInfo_v2` fix (commit 03f99d7). + +The `libvgpu` submodule pointer is bumped to the new HAMi-core SHA. + +isaac-launchable baseline preserved (5/5 runheadless.sh alive). Step C (Vulkan layer compat for Isaac Sim Kit init under LD_PRELOAD) follows in a separate plan. + +Spec: `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md` +Plan: `docs/superpowers/plans/2026-04-28-hami-isolation-step-b-cuda-hook-hardening.md` +EOF +gh api repos/Project-HAMi/HAMi/issues/1803/comments -X POST -f body="$(cat /tmp/pr1803_step_b_done.md)" --jq '.html_url' +``` + +Expected: 두 코멘트 URL 출력. + +--- + +## Self-Review + +**1. Spec coverage:** Spec §8 (Step B) 의 7개 hook → Tasks 2-7 ✅. 통합 검증 → Task 8 ✅. cuMemFree_v2 / cuMemGetInfo_v2 / cuMemCreate 는 already-fixed 명시 ✅. + +**2. Placeholder scan:** "TBD"/"TODO"/"implement later" 없음 ✅. 단 Task 3 의 cuMemAllocHost_v2 / cuMemAllocManaged 본문은 "audit step 1 의 결과에 따라 다를 수 있음" 명시 — 이건 placeholder 가 아니라 실제 코드 dump 후 위 패턴 적용하라는 명시. + +**3. Type consistency:** `CUresult` / `CUdeviceptr` / `CUDA_OVERRIDE_CALL` macro 가 모든 task 에서 일관 사용 ✅. + +**4. Scope check:** Step B 만. Step A.2 / Step C / Step D 별도 plan 명시 ✅. + +--- + +## 일정 추정 + +| Task | 예상 시간 | +|---|---| +| 1 audit + notes commit | 15분 | +| 2 cuMemAlloc_v2 + test framework | 45분 | +| 3 cuMemAllocHost_v2 + cuMemAllocManaged | 30분 | +| 4 cuMemAllocPitch_v2 | 20분 | +| 5 cuMemHostAlloc | 20분 | +| 6 cuMemHostRegister_v2 | 20분 | +| 7 cuCtxGetDevice | 15분 | +| 8 통합 검증 + push + 코멘트 | 30분 | +| **총** | **약 3시간 15분** | + +(빌드 매 task 마다 1-2분 + Docker pull). 1일 작업. diff --git a/docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md b/docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md new file mode 100644 index 000000000..4acacf338 --- /dev/null +++ b/docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md @@ -0,0 +1,661 @@ +# HAMi vGPU 격리 — Step C: HAMi-core Vulkan Layer Compat Hardening Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** HAMi-core Vulkan layer (`libvgpu/src/vulkan/`) 가 NVIDIA Isaac Sim Kit (Carbonite/OptiX/Aftermath) 의 Vulkan 초기화 경로에서 NULL deref 없이 dispatch chain 을 끝까지 forwarding 하도록 보강한다. 핵심: **HAMI_VK_TRACE 로그로 실제 호출 패턴 수집 → evidence 있는 break 만 fix**, 추측성 hardening 금지. + +**Architecture:** 7개 task. (1) 미완성 WIP foundation commit, (2) GIPA/GDPA cached-fallback, (3) trace 로 evidence 수집, (4) 데이터 기반 hook 추가, (5) dispatch lifetime + chain copy audit (read-only), (6) LD_PRELOAD 강제 후 runheadless 5/5 + partition test, (7) push + draft PR comments. + +**Tech Stack:** C, CMake, Vulkan loader spec 1.3 §38, HAMi-core fork (`/Users/xiilab/git/HAMi/libvgpu`, branch `vulkan-layer`), Docker (build-in-docker), kubectl, ws-node074. + +**Plan scope:** Step C 만. Step D (isaac-launchable opt-in 활성화 + 4-path 검증) 별도 plan. + +--- + +## File Structure + +| 파일 | 변경 종류 | 책임 | +|---|---|---| +| `libvgpu/src/vulkan/layer.c` | Modify | g_first_next_gipa/gdpa cache, GIPA/GDPA fallback, 추가 PhysicalDevice query hook (Task 4 evidence 기반) | +| `libvgpu/src/vulkan/dispatch.c` | Modify | EnumerateDevice* dispatch table entry resolve, hami_instance_first() impl | +| `libvgpu/src/vulkan/dispatch.h` | Modify | dispatch struct EnumerateDevice* fields, hami_instance_first() decl | +| `libvgpu/docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md` (Task 3) | Create | Isaac Sim Kit 가 호출하는 GIPA name 목록 + NULL 반환 케이스 | +| `libvgpu/docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md` (Task 5) | Create | dispatch lifetime + chain copy audit 결과 | + +추가 hook 패턴 (Task 4 evidence 기반): + +```c +/* vkGetPhysicalDevice is a thin pass-through. We don't apply HAMi + * partitioning to read-only queries — only forward through the next + * layer/ICD via the cached dispatch entry. The reason we hook at all is + * because the loader pre-resolves these via GIPA(NULL, "vk...") during + * implicit-layer init: returning NULL there breaks Carbonite. */ +static VKAPI_ATTR void VKAPI_CALL +hami_vkGetPhysicalDevice(VkPhysicalDevice phys, ...) { + hami_instance_dispatch_t *d = hami_instance_first(); + if (!d || !d->) return; + d->(phys, ...); +} +``` + +--- + +## Tasks + +### Task 1: WIP foundation commit (Enumerate via dispatch + gipa/gdpa cache + first helper) + +**Files:** +- Modify (already in working tree, just need to commit): `libvgpu/src/vulkan/layer.c`, `libvgpu/src/vulkan/dispatch.c`, `libvgpu/src/vulkan/dispatch.h` + +이 task 는 **이미 working tree 에 있는 unstaged 변경을 commit 하는 것**. Step B 진행 중 controller 가 의도적으로 staging 안 했다. + +- [ ] **Step 1: Verify unstaged diff is consistent with the design** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git diff --stat src/vulkan/ +git diff src/vulkan/dispatch.h +git diff src/vulkan/dispatch.c +git diff src/vulkan/layer.c | head -200 +``` + +Expected diff: +- `dispatch.h` adds `EnumerateDeviceExtensionProperties` + `EnumerateDeviceLayerProperties` PFN fields to `hami_instance_dispatch_t`, adds `hami_instance_first()` decl. +- `dispatch.c` resolves both names in `hami_instance_register`, implements `hami_instance_first()` (returns `g_inst_head` under lock). +- `layer.c` adds `g_first_next_gipa` / `g_first_next_gdpa` static caches set in CreateInstance/CreateDevice, refactors `hami_vkEnumerateDeviceExtensionProperties` / `hami_vkEnumerateDeviceLayerProperties` to forward via `hami_instance_first()->Enumerate*`, expands comments. + +If diff has unrelated changes — STOP, ask controller. + +- [ ] **Step 2: build-in-docker on ws-node074 to verify the WIP compiles** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | tail -10' +``` + +Expected: `Built target vgpu`, no errors. + +- [ ] **Step 3: Run all existing unit tests under LD_PRELOAD (regression check for Step B)** + +```bash +ssh root@10.61.3.74 'cd /tmp/libvgpu-build/build/test && LD_PRELOAD=/tmp/libvgpu-build/build/libvgpu.so ./test_cuda_null_guards 2>&1; echo EXIT=$?' +``` + +Expected: 9 `[OK]` lines, EXIT=0. (Vulkan tests have separate build path — Step C plan does not modify them.) + +- [ ] **Step 4: Commit** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git add src/vulkan/layer.c src/vulkan/dispatch.c src/vulkan/dispatch.h +git commit -s -m "fix(vulkan): cache first next-gipa/gdpa + EnumerateDevice* via dispatch table" \ + -m "Foundation for Step C compat hardening: + +* dispatch.{h,c}: add EnumerateDeviceExtensionProperties + + EnumerateDeviceLayerProperties function pointers to the per-instance + dispatch struct; resolve both during hami_instance_register so the + layer's own Enumerate* hooks can forward correctly. Add + hami_instance_first() helper that returns the first registered + instance dispatch under lock — used by NULL-instance Enumerate + forwarding when the loader probes before any instance has been + created. +* layer.c: cache the first next-layer GetInstanceProcAddr / + GetDeviceProcAddr in static globals during CreateInstance / + CreateDevice. Expands comments documenting the Vulkan 1.3 §38.3.1 + contract for own-name vs NULL pLayerName Enumerate semantics, and + why an earlier draft returning LAYER_NOT_PRESENT broke + vkCreateDevice. + +This commit only restructures the existing Enumerate hooks; it does not +yet change GIPA/GDPA fallback behavior (Task 2)." +``` + +Expected: 1 commit on top of `7dcb5a4`, working tree clean. + +--- + +### Task 2: GIPA / GDPA cached-fallback for unknown instance / device + +**Files:** +- Modify: `libvgpu/src/vulkan/layer.c` — `hami_vkGetInstanceProcAddr`, `hami_vkGetDeviceProcAddr` + +**Bug:** When NVIDIA driver / Carbonite call our GIPA/GDPA with a `VkInstance`/`VkDevice` handle that we haven't registered (e.g., loader probe before `vkCreateInstance` returns, or upper layer wraps the handle), `hami_instance_lookup(instance)` returns NULL and we return NULL → caller dereferences NULL and SegFaults. + +**Fix:** When lookup returns NULL but we have `g_first_next_gipa`/`g_first_next_gdpa` cached from a previous `vkCreateInstance`/`vkCreateDevice`, forward to that cached function. Only when both lookup AND cache are NULL do we return NULL (legitimately uninitialized state — pre-CreateInstance loader bootstrap). + +- [ ] **Step 1: Modify `hami_vkGetInstanceProcAddr` (around line 297)** + +Change: +```c + hami_instance_dispatch_t *d = hami_instance_lookup(instance); + if (!d) { + HAMI_TRACE("hami_vkGetInstanceProcAddr: instance %p not registered, returning NULL", (void *)instance); + return NULL; + } + return d->next_gipa(instance, pName); +``` + +to: +```c + hami_instance_dispatch_t *d = hami_instance_lookup(instance); + if (d) return d->next_gipa(instance, pName); + /* Unknown VkInstance handle: NVIDIA driver and Carbonite occasionally + * probe through our GIPA with handles we haven't registered (e.g., + * during vkCreateInstance before our register call returns, or with + * an upper-layer-wrapped handle). Returning NULL would SegFault the + * caller. Forward to the first cached next-layer gipa instead — it + * was set the first time vkCreateInstance ran and is a valid pointer + * into the next layer / driver. */ + if (g_first_next_gipa) { + HAMI_TRACE("hami_vkGetInstanceProcAddr: instance %p not registered, forwarding via cached gipa", (void *)instance); + return g_first_next_gipa(instance, pName); + } + /* Pre-CreateInstance loader bootstrap: the only case where the spec + * allows us to return NULL for instance entry points (the loader + * still resolves the global Enumerate* hooks via the same GIPA, but + * those are matched above by HAMI_HOOK before this fall-through). */ + HAMI_TRACE("hami_vkGetInstanceProcAddr: instance %p not registered AND no cached gipa, returning NULL", (void *)instance); + return NULL; +``` + +- [ ] **Step 2: Same pattern for `hami_vkGetDeviceProcAddr` (around line 323)** + +Change: +```c + hami_device_dispatch_t *d = hami_device_lookup(device); + if (!d) return NULL; + return d->next_gdpa(device, pName); +``` + +to: +```c + hami_device_dispatch_t *d = hami_device_lookup(device); + if (d) return d->next_gdpa(device, pName); + if (g_first_next_gdpa) { + return g_first_next_gdpa(device, pName); + } + return NULL; +``` + +- [ ] **Step 3: Build + run existing unit tests (regression)** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | tail -10 && cd build/test && LD_PRELOAD=/tmp/libvgpu-build/build/libvgpu.so ./test_cuda_null_guards 2>&1; echo EXIT=$?' +``` + +Expected: build OK, 9 `[OK]` lines (Step B regression), EXIT=0. + +- [ ] **Step 4: Commit** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git add src/vulkan/layer.c +git commit -s -m "fix(vulkan): GIPA/GDPA fallback to cached next when instance/device unknown" \ + -m "NVIDIA driver and Carbonite probe through our GIPA/GDPA with handles +that may not yet be registered: during vkCreateInstance before our +register completes, or with upper-layer-wrapped handles. Returning +NULL there crashed the caller (SegFault inside libcarb.graphics-vulkan +when assembling the dispatch table). + +Now we forward to the first-cached next_gipa/next_gdpa from a previous +CreateInstance/CreateDevice. Only when both per-handle lookup AND the +cache are absent do we return NULL — that's the legitimate +pre-CreateInstance loader bootstrap window where Enumerate* hooks have +already been matched at the top of the function." +``` + +--- + +### Task 3: trace which vkGetPhysicalDevice* queries Isaac Sim Kit makes + +**Files:** +- Create: `libvgpu/docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md` + +이 task 는 코드 변경 0 — 실제 trace 수집. Task 4 의 데이터 기반 hook 추가에 입력. + +- [ ] **Step 1: Verify the new build (with Tasks 1-2 commits) is on ws-node074 + swap into /usr/local/vgpu/** + +```bash +ssh root@10.61.3.74 ' +md5sum /tmp/libvgpu-build/build/libvgpu.so +cp -av /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-step-c 2>&1 | tail -2 +cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so +md5sum /usr/local/vgpu/libvgpu.so +' +``` + +- [ ] **Step 2: runheadless.sh under HAMI_VK_TRACE=1 + LD_PRELOAD inside isaac-launchable pod** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -o jsonpath='{.items[0].metadata.name}') +echo "Pod: $NEWPOD" + +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/vk-trace +pkill -KILL kit 2>/dev/null; sleep 2 +timeout 50 env \ + ACCEPT_EULA=y \ + HAMI_VK_TRACE=1 \ + LD_PRELOAD=/usr/local/vgpu/libvgpu.so \ + /isaac-sim/runheadless.sh > /tmp/vk-trace/run.log 2>&1 +EC=$? +pkill -KILL kit 2>/dev/null +echo "exit=$EC" +echo "=== HAMI_VK_TRACE lines ===" +grep -c "HAMI_VK_TRACE" /tmp/vk-trace/run.log +echo "=== unique GIPA names (sorted by count) ===" +grep "hami_vkGetInstanceProcAddr.*name=" /tmp/vk-trace/run.log | sed -e "s/.*name=//" -e "s/ .*//" | sort | uniq -c | sort -rn | head -50 +echo "=== GDPA names ===" +grep "hami_vkGetDeviceProcAddr.*name=" /tmp/vk-trace/run.log 2>/dev/null | sed -e "s/.*name=//" | sort | uniq -c | sort -rn | head -30 +echo "=== unregistered fallback hits ===" +grep -c "not registered" /tmp/vk-trace/run.log +echo "=== SegFault / Segmentation ===" +grep -E "Segmentation|crash has occurred" /tmp/vk-trace/run.log | head -10 +' +``` + +Expected output structure: +- `exit=124` (timeout = alive) OR `exit=139` (crash — Step C still failing for this scenario) +- Top-N GIPA names: many `vkCreateInstance`, `vkGetPhysicalDeviceMemoryProperties`, `vkAllocateMemory`, etc. +- Names returning NULL: those that fall through (`not registered` lines) tell us which entry points needed cached-gipa fallback. + +- [ ] **Step 3: Save trace evidence to notes file** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +mkdir -p docs/superpowers/notes +cat > docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md <GetPhysicalDeviceFormatProperties2 = + (PFN_vkGetPhysicalDeviceFormatProperties2)resolve(gipa, inst, "vkGetPhysicalDeviceFormatProperties2"); +``` + +- [ ] **Step 3: layer.c 에 thin wrapper 추가** + +```c +static VKAPI_ATTR void VKAPI_CALL +hami_vkGetPhysicalDeviceFormatProperties2(VkPhysicalDevice phys, + VkFormat format, + VkFormatProperties2 *pProperties) { + hami_instance_dispatch_t *d = hami_instance_first(); + if (!d || !d->GetPhysicalDeviceFormatProperties2) return; + d->GetPhysicalDeviceFormatProperties2(phys, format, pProperties); +} +``` + +- [ ] **Step 4: HAMI_HOOK 추가 (in hami_vkGetInstanceProcAddr)** + +```c +HAMI_HOOK(GetPhysicalDeviceFormatProperties2); +``` + +(Repeat for each name from Task 3 evidence.) + +- [ ] **Step 5: build + verify the trace path no longer hits "not registered" for the new names** + +```bash +# (rebuild + swap .so + re-run trace from Task 3 Step 2) +# Expected: "not registered" count drops to ~0 for the names just hooked. +``` + +- [ ] **Step 6: Commit (one commit even if multiple names)** + +```bash +git add src/vulkan/{layer,dispatch}.{c,h} +git commit -s -m "fix(vulkan): hook vkGetPhysicalDevice* entry points missing in trace" \ + -m "Trace under HAMI_VK_TRACE=1 + LD_PRELOAD on Isaac Sim Kit init showed +the following names returned NULL through GIPA(VK_NULL_HANDLE, ...) +during loader implicit-layer probing: . Each is now hooked with +a thin pass-through wrapper that forwards to the next layer/ICD via +hami_instance_first()->Get*. The layer does not apply HAMi +partitioning to these read-only queries. + +See docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md for the +trace evidence." +``` + +--- + +### Task 5: dispatch lifetime + chain deep-copy audit (review-only) + +**Files:** +- Read: `libvgpu/src/vulkan/dispatch.c`, `libvgpu/src/vulkan/layer.c` +- Create: `libvgpu/docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md` + +이 task 는 **read-only audit** — 코드 변경은 evidence 가 있을 때만. + +- [ ] **Step 1: dispatch lifetime audit** + +Question: `hami_instance_unregister` / `hami_device_unregister` 호출 시점에 (a) 다른 thread 에서 lookup 중이면 race, (b) Carbonite 가 아직 valid handle 로 알고 있으면 use-after-free. + +Investigate: +- `hami_vkDestroyInstance` (layer.c:101) 의 lookup → forward → unregister 순서 +- 멀티 instance 환경에서 first instance unregister 후 `hami_instance_first()` 가 두 번째 instance 반환하는지 + +Document findings. + +- [ ] **Step 2: chain pLayerInfo in-place 수정 audit** + +`hami_vkCreateInstance` (layer.c:76): +```c +chain->u.pLayerInfo = chain->u.pLayerInfo->pNext; +``` + +Question: NVIDIA driver 가 createInfo 를 재사용해서 `chain->u.pLayerInfo` 가 이미 advance 된 상태로 본다면 두 번째 layer 가 chain 을 못 따라간다. + +Investigate: +- Vulkan loader spec 1.3 §38.4 의 chain 처리 표준 요구사항 +- 기존 NVIDIA layer 들 (e.g., nvidia_layers.json) 이 어떻게 처리하는지 (gpgpu/khronos vulkan-loader 소스 참조) + +Document findings. + +- [ ] **Step 3: notes 파일 작성 + commit** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +cat > docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md <) + +## Chain pLayerInfo in-place advance + +(findings — is in-place advance spec-standard? do real layers do this?) + +### Decision + +(no change / fix needed: ) +EOF +git add docs/superpowers/notes/2026-04-28-vk-dispatch-lifetime-audit.md +git commit -s -m "docs(notes): vk dispatch lifetime + chain copy audit" +``` + +(If audit reveals a real bug → STOP and ask controller for guidance on whether to add a Task 5b code-change task.) + +--- + +### Task 6: ws-node074 integration verify (runheadless 5/5 + partition test under LD_PRELOAD) + +**Files:** +- (no code change) +- Verify: ws-node074 isaac-launchable pod baseline under forced LD_PRELOAD + +이 task 는 진짜 integration test — Step B 의 Task 8 가 못 한 "LD_PRELOAD 강제 후 Isaac Sim 동작" 검증. + +- [ ] **Step 1: 새 .so 가 swap 되어 있는지 확인** + +```bash +ssh root@10.61.3.74 ' +md5sum /usr/local/vgpu/libvgpu.so +md5sum /tmp/libvgpu-build/build/libvgpu.so +' +``` + +만약 두 md5 다르면 → swap 다시: +```bash +ssh root@10.61.3.74 'cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so' +``` + +- [ ] **Step 2: runheadless.sh 5번 with LD_PRELOAD forced** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pod -o jsonpath='{.items[0].metadata.name}') +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/v +for i in 1 2 3 4 5; do + pkill -KILL kit 2>/dev/null; sleep 3 + timeout 50 env \ + ACCEPT_EULA=y \ + LD_PRELOAD=/usr/local/vgpu/libvgpu.so \ + /isaac-sim/runheadless.sh >/tmp/v/c$i.log 2>&1 + EC=$? + CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/c$i.log) + LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100|:30999") + echo "run $i (LD_PRELOAD): exit=$EC crash=$CRASH listen=$LISTEN" +done +pkill -KILL kit 2>/dev/null +' +``` + +Expected: 5/5 `exit=124 crash=0 listen=1`. **이게 진짜 Step C 성공 기준**. + +- [ ] **Step 3: vk_partition_test.py — Vulkan partition enforce 유지 확인** + +```bash +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +if [ -f vk_partition_test.py ]; then + LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/python.sh vk_partition_test.py 2>&1 | head -30 + echo "EXIT=$?" +else + echo "vk_partition_test.py 부재 — Step D 에서 작성" +fi +' +``` + +Expected: vk_partition_test.py 가 있으면 partition enforce (44 GiB → 23 GiB clamp) 결과 출력. 없으면 Step D 스킵 가능. + +- [ ] **Step 4: nvidia-smi raw 값 확인 (LD_PRELOAD 비활성 vs 활성)** + +```bash +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +echo "=== without LD_PRELOAD (raw) ===" +nvidia-smi --query-gpu=memory.total --format=csv,noheader +echo "=== with LD_PRELOAD (clamped) ===" +LD_PRELOAD=/usr/local/vgpu/libvgpu.so nvidia-smi --query-gpu=memory.total --format=csv,noheader +' +``` + +Expected: +- raw: ~46068 MiB +- clamped: 23552 MiB (HAMI_VULKAN_ENABLE + partition annotation 이 있으면; 없으면 raw) + +만약 isaac-launchable 에 아직 hami.io/vgpu=enabled label 없으면 clamp 안 됨 — Step D 에서 활성화. Step C 의 의무는 단지 "LD_PRELOAD forced 후 crash 안 함". + +--- + +### Task 7: push HAMi-core fork + bump submodule + draft PR comments (don't post) + +**Files:** +- Modify (parent repo): `libvgpu` submodule SHA bump +- Create: `/tmp/step-c-pr-drafts/{pr182,pr1803}.md` + +- [ ] **Step 1: Push libvgpu fork** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git log --oneline -8 +git push xiilab vulkan-layer 2>&1 | tail +``` + +Expected: Step C commits (Task 1, 2, 3, 4-if-any, 5) push 성공. + +- [ ] **Step 2: Bump HAMi parent submodule SHA** + +```bash +cd /Users/xiilab/git/HAMi +NEW_SHA=$(cd libvgpu && git rev-parse HEAD) +echo "new HAMi-core SHA: $NEW_SHA" +git add libvgpu +git commit -s -m "chore(libvgpu): bump HAMi-core for Step C vulkan layer compat" \ + -m "Pulls in Step C commits hardening the Vulkan layer for Isaac Sim Kit +init paths. See docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md +section 9 and the plan at docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md. + +Verified on ws-node074: 5/5 runheadless.sh exit=124 alive under +LD_PRELOAD=/usr/local/vgpu/libvgpu.so (Isaac Sim Kit 6.0.0-rc.22)." +git push xiilab feat/vulkan-vgpu 2>&1 | tail +``` + +- [ ] **Step 3: Draft PR comments (DO NOT post)** + +```bash +mkdir -p /tmp/step-c-pr-drafts + +cat > /tmp/step-c-pr-drafts/pr182.md <<'EOF' +## Step C complete — Vulkan layer compat hardening (Isaac Sim Kit) + +Builds on Step B (CUDA hook NULL guards). Adds Vulkan layer changes: + +| Commit | Change | +|---|---| +| (sha) | dispatch table: EnumerateDevice* PFNs + hami_instance_first() helper | +| (sha) | layer.c: cache first next-gipa/gdpa, refactor Enumerate hooks | +| (sha) | GIPA/GDPA fallback to cached gipa for unknown handles | +| (sha) | (if Task 4) hook vkGetPhysicalDevice entry points found NULL in trace | +| (sha) | docs/notes: trace evidence + dispatch lifetime audit | + +### Verification + +- 9/9 unit tests (Step B) regression pass +- ws-node074 isaac-launchable pod under `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` + Isaac Sim Kit 6.0.0-rc.22: + - 5/5 `runheadless.sh` exit=124 alive, no SegFault, listen :49100 + - HAMI_VK_TRACE evidence: GIPA lookups, 0 unhandled "not registered" fall-throughs +- Step D (isaac-launchable opt-in label activation) follows in a separate plan. +EOF + +cat > /tmp/step-c-pr-drafts/pr1803.md <<'EOF' +## Step C (Vulkan layer compat) complete + +HAMi-core PR #182 added Vulkan layer hardening for Isaac Sim Kit init: + +- dispatch table EnumerateDevice* + hami_instance_first() helper +- cached first next-gipa/gdpa +- GIPA/GDPA cached-fallback for unknown handles +- (if Task 4 added hooks) explicit hooks for vkGetPhysicalDevice names that returned NULL through GIPA(NULL, ...) + +The `libvgpu` submodule pointer is bumped to . + +### Verification + +ws-node074 isaac-launchable pod under `LD_PRELOAD=/usr/local/vgpu/libvgpu.so` runs Isaac Sim Kit (`runheadless.sh`) 5/5 alive (exit=124, listen :49100), no SegFault. Step D (opt-in activation + 4-path enforce verification) follows. + +Spec: `docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md` +Plan: `docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md` +EOF + +# Substitute real SHAs +HAMI_BUMP_SHA=$(cd /Users/xiilab/git/HAMi && git rev-parse HEAD) +sed -i.bak "s//$HAMI_BUMP_SHA/g" /tmp/step-c-pr-drafts/pr1803.md +rm /tmp/step-c-pr-drafts/pr1803.md.bak + +ls -la /tmp/step-c-pr-drafts/ +``` + +(SHA placeholders in pr182.md will be filled by the controller — too many to script.) + +- [ ] **Step 4: Report — DO NOT post comments. Wait for explicit user approval.** + +--- + +## Self-Review + +**1. Spec coverage:** spec §9.1 (foundation) → Task 1; §9.2 GIPA fallback → Task 2; §9.2 추가 hook → Tasks 3+4 (evidence-driven); §9.2 dispatch lifetime + chain copy → Task 5; §9.3 검증 → Task 6. ✅ + +**2. Placeholder scan:** Task 4 의 코드 예시는 evidence-driven 결과에 따라 실제 다를 수 있음을 명시 — placeholder 가 아니라 "case 별 구체적 패턴". 이외 placeholder 없음. ✅ + +**3. Type consistency:** `hami_instance_dispatch_t` / `PFN_vkGet*` / `g_first_next_gipa` 모든 task 에서 일관 사용. ✅ + +**4. Scope check:** Step C 만. Step D 별도 plan. Step B 는 이미 완료. ✅ + +**5. Evidence-driven 원칙:** Task 4 가 가장 큰 잠재 scope creep — 명시적으로 "Task 3 trace 결과로만 결정, 추측 hook 추가 금지" 박아둠. ✅ + +--- + +## 일정 추정 + +| Task | 예상 시간 | +|---|---| +| 1 WIP foundation commit | 20분 | +| 2 GIPA/GDPA cached-fallback | 30분 | +| 3 trace + notes | 45분 | +| 4 evidence-driven hooks (range: 0 ~ 6 names × 10min) | 0~60분 | +| 5 lifetime + chain audit (review-only) | 45분 | +| 6 ws-node074 integration verify | 30분 | +| 7 push + draft PR comments | 20분 | +| **총** | **약 3~4시간** | + +(Task 4 의 scope 가 trace 결과에 따라 0 ~ 60분으로 큰 편차. 최악의 경우에도 4시간 내.) diff --git a/docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md b/docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md new file mode 100644 index 000000000..518636e84 --- /dev/null +++ b/docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md @@ -0,0 +1,856 @@ +# Step C 재설계 — Vulkan layer 분리 (libvgpu_vk.so) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** `src/vulkan/*` 전체를 `libvgpu_vk.so` (신규) 로 분리하고 `libvgpu.so` 에는 HAMi-core 만 남긴다. Vulkan layer 활성은 implicit_layer manifest path 만. 이렇게 해서 2026-04-28 발견된 LD_PRELOAD-only path crash class 가 구조적으로 발생 불가능해진다. + +**Architecture:** 5개 HAMi-core 함수 (`oom_check`, `add_gpu_device_memory_usage`, `rm_gpu_device_memory_usage`, `get_current_device_memory_limit`, `rate_limiter`) 를 `hami_core_*` wrapper 로 명시 export → `libvgpu_vk.so` 가 DT_NEEDED 로 link → manifest dlopen 시점에 자동 resolve. Spec: `docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md`. + +**Tech Stack:** C, CMake (libvgpu/), Docker (`make build-in-docker`), kubectl, ws-node074. HAMi-core fork: `/Users/xiilab/git/HAMi/libvgpu`, branch `vulkan-layer` (현재 HEAD `83fd245` — Step C revert 상태). + +--- + +## File Structure + +| 파일 | 변경 종류 | 책임 | +|---|---|---| +| `libvgpu/src/include/hami_core_export.h` | Create | 5개 wrapper 함수 declaration. `__attribute__((visibility("default")))` | +| `libvgpu/src/hami_core_export.c` | Create | wrapper 정의 — 내부 HAMi-core 함수를 호출 | +| `libvgpu/src/CMakeLists.txt` | Modify | (a) `hami_core_export.c` 를 libvgpu.so source 에 추가 (b) `vulkan_mod` 를 libvgpu.so 에서 제거 (c) 신규 `libvgpu_vk` target 추가 | +| `libvgpu/src/vulkan/budget.c` | Modify | `extern` 선언 → `#include "hami_core_export.h"` + `hami_core_*` 호출 | +| `libvgpu/src/vulkan/throttle_adapter.c` | Modify | `extern rate_limiter` → `hami_core_throttle()` | +| `libvgpu/share/hami/hami.json` | Create | Vulkan implicit_layer manifest. `library_path` = `/usr/local/vgpu/libvgpu_vk.so` | + +추가 산출물 (build): +- `build/libvgpu.so` — HAMi-core 만, `vk*` 미export +- `build/libvgpu_vk.so` — Vulkan layer, DT_NEEDED `libvgpu.so` + +--- + +## Tasks + +### Task 1: Add `hami_core_export.{h,c}` — explicit export interface + +**Files:** +- Create: `libvgpu/src/include/hami_core_export.h` +- Create: `libvgpu/src/hami_core_export.c` + +- [ ] **Step 1: Write the header** + +```c +/* libvgpu/src/include/hami_core_export.h */ +#ifndef HAMI_CORE_EXPORT_H_ +#define HAMI_CORE_EXPORT_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* HAMi-core ↔ libvgpu_vk.so contract. + * These are the only HAMi-core symbols libvgpu_vk.so depends on. + * libvgpu.so MUST export them with default visibility; libvgpu_vk.so + * picks them up via DT_NEEDED link at dlopen() time. */ + +/* Returns 1 if reserving `addon` bytes on device `dev` would exceed the + * partition limit, else 0. */ +int hami_core_oom_check(int dev, size_t addon); + +/* Records `usage` bytes of allocation by (pid, dev). type==2 (DEVICE). + * Returns 0 on success, non-zero on failure. */ +int hami_core_add_memory_usage(int32_t pid, int dev, size_t usage, int type); + +/* Releases `usage` bytes by (pid, dev). type==2 (DEVICE). 0 = success. */ +int hami_core_rm_memory_usage(int32_t pid, int dev, size_t usage, int type); + +/* Returns the partition byte-limit for device `dev`, or 0 = unlimited. */ +uint64_t hami_core_get_memory_limit(int dev); + +/* Consumes one rate-limiter token (claim size = 1*1). */ +void hami_core_throttle(void); + +#ifdef __cplusplus +} +#endif + +#endif /* HAMI_CORE_EXPORT_H_ */ +``` + +- [ ] **Step 2: Write the implementation** + +```c +/* libvgpu/src/hami_core_export.c */ +#include "include/hami_core_export.h" + +#include +#include + +/* Internal HAMi-core symbols. Both libvgpu_vk.so and the wrappers below + * see the SAME object code linked into libvgpu.so. We make these + * symbols visible to other .so files only through the wrappers, never + * directly: that keeps the libvgpu.so→libvgpu_vk.so contract narrow. */ +extern int oom_check(int dev, size_t addon); +extern int add_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type); +extern int rm_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type); +extern uint64_t get_current_device_memory_limit(int dev); +extern void rate_limiter(int grids, int blocks); + +#define HAMI_EXPORT __attribute__((visibility("default"))) + +HAMI_EXPORT int hami_core_oom_check(int dev, size_t addon) { + return oom_check(dev, addon); +} + +HAMI_EXPORT int hami_core_add_memory_usage(int32_t pid, int dev, size_t usage, int type) { + return add_gpu_device_memory_usage(pid, dev, usage, type); +} + +HAMI_EXPORT int hami_core_rm_memory_usage(int32_t pid, int dev, size_t usage, int type) { + return rm_gpu_device_memory_usage(pid, dev, usage, type); +} + +HAMI_EXPORT uint64_t hami_core_get_memory_limit(int dev) { + return get_current_device_memory_limit(dev); +} + +HAMI_EXPORT void hami_core_throttle(void) { + rate_limiter(1, 1); +} +``` + +- [ ] **Step 3: Add to libvgpu.so build sources in `src/CMakeLists.txt`** + +Find the line: +```cmake +add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c $ $ $ $ $) +``` + +Replace with (still includes vulkan_mod for now — Task 5 splits it): +```cmake +add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c hami_core_export.c $ $ $ $ $) +``` + +- [ ] **Step 4: Verify it compiles (local docker)** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +make build-in-docker 2>&1 | tail -10 +``` + +Expected: `Built target vgpu`, no errors. (Tests/test targets compile too.) + +- [ ] **Step 5: Verify the wrappers are exported** + +```bash +docker run --rm -v "$PWD:/work" -w /work ubuntu:22.04 bash -c \ + "apt-get -qq update >/dev/null && apt-get -qq install -y binutils >/dev/null && \ + nm -D --defined-only build/libvgpu.so | grep ' T hami_core_'" +``` + +Expected: 5 lines, one per `hami_core_*` wrapper. (Symbols of type T = exported text.) + +- [ ] **Step 6: Commit** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git add src/include/hami_core_export.h src/hami_core_export.c src/CMakeLists.txt +git commit -s -m "feat(hami-core): explicit hami_core_* export wrappers" \ + -m "Five thin wrappers around the HAMi-core symbols that libvgpu_vk.so +will need after the upcoming Vulkan-layer split: oom_check, +add/rm_gpu_device_memory_usage, get_current_device_memory_limit, +rate_limiter. + +All five carry __attribute__((visibility(\"default\"))) so that the +release build (-fvisibility=hidden) keeps the export surface narrow: +libvgpu_vk.so DT_NEEDED-resolves only these names and nothing else from +HAMi-core internals. No call-site changes yet — that follows in the next +commit." +``` + +--- + +### Task 2: Update src/vulkan/budget.c + throttle_adapter.c to call wrappers + +**Files:** +- Modify: `libvgpu/src/vulkan/budget.c` +- Modify: `libvgpu/src/vulkan/throttle_adapter.c` + +- [ ] **Step 1: Replace extern declarations in `src/vulkan/budget.c`** + +Find the block (currently around line 22-30): +```c +extern int oom_check(const int dev, size_t addon); +extern int add_gpu_device_memory_usage(int32_t pid, int dev, + size_t usage, int type); +extern int rm_gpu_device_memory_usage(int32_t pid, int dev, + size_t usage, int type); +extern uint64_t get_current_device_memory_limit(const int dev); +``` + +Replace with: +```c +#include "include/hami_core_export.h" +``` + +Then update each call site in the same file: +- `oom_check(dev, size)` → `hami_core_oom_check(dev, size)` +- `add_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)` → `hami_core_add_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)` +- `rm_gpu_device_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)` → `hami_core_rm_memory_usage(getpid(), dev, size, HAMI_MEM_TYPE_DEVICE)` +- `get_current_device_memory_limit(dev)` → `hami_core_get_memory_limit(dev)` + +(Keep the `cuInit` extern — that's CUDA driver, not HAMi-core.) + +- [ ] **Step 2: Update `src/vulkan/throttle_adapter.c`** + +Replace the file body: +```c +#include "vulkan/throttle_adapter.h" +#include "include/hami_core_export.h" + +void hami_vulkan_throttle(void) { + /* Consume one token — represents "one queue submission". The + * underlying rate_limiter interprets (grids*blocks) as the claim + * size; the wrapper uses (1,1) so Vulkan submits compete fairly + * with tiny CUDA kernel launches. */ + hami_core_throttle(); +} +``` + +- [ ] **Step 3: Build (still combined libvgpu.so)** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +make build-in-docker 2>&1 | tail -5 +``` + +Expected: `Built target vgpu`, `Built target test_*`. No errors. + +- [ ] **Step 4: Step B regression test under LD_PRELOAD** + +Local docker run (ws-node074 not yet involved): +```bash +docker run --rm -v "$PWD/build:/build" --gpus none \ + ubuntu:22.04 bash -c \ + "LD_PRELOAD=/build/libvgpu.so /build/test/test_cuda_null_guards 2>&1 | tail -15; echo EXIT=\$?" +``` + +Expected: 9 `[OK]` lines, `EXIT=0`. (No GPU needed — test is hook-level NULL guards only.) + +- [ ] **Step 5: Commit** + +```bash +git add src/vulkan/budget.c src/vulkan/throttle_adapter.c +git commit -s -m "refactor(vulkan): use hami_core_* wrappers instead of internal externs" \ + -m "Replace the extern declarations of oom_check / add_/rm_gpu_device_ +memory_usage / get_current_device_memory_limit / rate_limiter in +src/vulkan/budget.c and src/vulkan/throttle_adapter.c with calls +through the new include/hami_core_export.h interface. + +This is a pure call-site rewrite — same runtime behavior, same .so +boundary (still linked into one libvgpu.so for now). The point is to +remove direct dependence on HAMi-core internal symbol names so the +upcoming libvgpu_vk.so split can keep DT_NEEDED narrow." +``` + +--- + +### Task 3: Pre-split sanity build (combined libvgpu.so still healthy) + +This task is verification only — confirms Tasks 1+2 didn't break anything before we attempt the split. + +**Files:** none (verification) + +- [ ] **Step 1: Build clean** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rm -rf build +make build-in-docker 2>&1 | tail -8 +``` + +Expected: `Built target vgpu`, `Built target strip_symbol`, no warnings about undefined references. + +- [ ] **Step 2: Verify exports include `hami_core_*` and `vk*` (still combined)** + +```bash +docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c \ + "apt-get -qq update >/dev/null && apt-get -qq install -y binutils >/dev/null && \ + echo '=== hami_core_* ==='; nm -D --defined-only /build/libvgpu.so | grep ' T hami_core_'; \ + echo '=== vk* ==='; nm -D --defined-only /build/libvgpu.so | grep ' T vk'" +``` + +Expected: 5 `hami_core_*` lines + 3 `vk*` lines (`vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`, `vkNegotiateLoaderLayerInterfaceVersion`). Combined .so still has Vulkan exports because `vulkan_mod` is still linked in. + +- [ ] **Step 3: Run all unit tests** + +```bash +docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c \ + "cd /build/test && for t in test_cuda_null_guards test_layer test_memprops test_alloc; do \ + [ -x ./\$t ] && (echo '---' \$t '---'; ./\$t 2>&1 | tail -8; echo EXIT=\$?); \ + done" +``` + +Expected: each test prints `[OK]` lines and exits 0. + +- [ ] **Step 4: No commit (verification only)** + +If any check fails, STOP and ask controller — don't proceed to split. + +--- + +### Task 4: Split CMake — create libvgpu_vk.so target, remove vulkan_mod from libvgpu.so + +**Files:** +- Modify: `libvgpu/src/CMakeLists.txt` + +- [ ] **Step 1: Edit `src/CMakeLists.txt`** + +Find: +```cmake +add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c hami_core_export.c $ $ $ $ $) +target_compile_options(${LIBVGPU} PUBLIC ${LIBRARY_COMPILE_FLAGS}) +target_compile_definitions(${LIBVGPU} PUBLIC HOOK_NVML_ENABLE) +target_link_libraries(${LIBVGPU} PUBLIC -lcuda -lnvidia-ml) + +if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") +add_custom_target(strip_symbol ALL + COMMAND strip -x ${CMAKE_BINARY_DIR}/lib${LIBVGPU}.so + DEPENDS ${LIBVGPU}) +endif() +``` + +Replace with: +```cmake +# libvgpu.so: HAMi-core only. Vulkan layer code now lives in libvgpu_vk.so. +add_library(${LIBVGPU} SHARED libvgpu.c utils.c log_utils.c hami_core_export.c $ $ $ $) +target_compile_options(${LIBVGPU} PUBLIC ${LIBRARY_COMPILE_FLAGS}) +target_compile_definitions(${LIBVGPU} PUBLIC HOOK_NVML_ENABLE) +target_link_libraries(${LIBVGPU} PUBLIC -lcuda -lnvidia-ml) + +# libvgpu_vk.so: Vulkan implicit-layer code. Activated via +# /etc/vulkan/implicit_layer.d/hami.json (see share/hami/hami.json). +# DT_NEEDED links libvgpu.so so the loader resolves the hami_core_* +# wrappers when the Vulkan loader dlopen()s us. +set(LIBVGPU_VK vgpu_vk) +add_library(${LIBVGPU_VK} SHARED $) +target_compile_options(${LIBVGPU_VK} PUBLIC ${LIBRARY_COMPILE_FLAGS}) +target_link_libraries(${LIBVGPU_VK} PUBLIC ${LIBVGPU} -lpthread) + +if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug") +add_custom_target(strip_symbol ALL + COMMAND strip -x ${CMAKE_BINARY_DIR}/lib${LIBVGPU}.so + COMMAND strip -x ${CMAKE_BINARY_DIR}/lib${LIBVGPU_VK}.so + DEPENDS ${LIBVGPU} ${LIBVGPU_VK}) +endif() +``` + +Notes: +- `target_link_libraries(${LIBVGPU_VK} PUBLIC ${LIBVGPU} ...)` makes CMake emit `-lvgpu` on the linker command line; ld.so records this as DT_NEEDED `libvgpu.so` in the resulting `libvgpu_vk.so`. +- `vulkan_mod` 의 OBJECT lib 는 그대로 유지 — 두 target 중 하나에만 link됨. + +- [ ] **Step 2: Build clean** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rm -rf build +make build-in-docker 2>&1 | tail -10 +``` + +Expected: `Built target vgpu`, `Built target vgpu_vk`, both without warnings about undefined references. + +- [ ] **Step 3: Verify both .so produced** + +```bash +ls -la build/libvgpu.so build/libvgpu_vk.so +``` + +Expected: both files present, executable. + +- [ ] **Step 4: Commit** + +```bash +git add src/CMakeLists.txt +git commit -s -m "build: split Vulkan layer into separate libvgpu_vk.so" \ + -m "libvgpu.so loses vulkan_mod and now contains only HAMi-core +(NVML/CUDA hooks + allocator + multiprocess). libvgpu_vk.so is a new +shared target that holds all of src/vulkan/* and links libvgpu.so as +DT_NEEDED so the hami_core_* wrappers resolve when the Vulkan loader +dlopen()s the new .so via the implicit-layer manifest. + +After this commit: +* nm -D libvgpu.so MUST NOT show vk* +* nm -D libvgpu_vk.so MUST show vkGetInstanceProcAddr, + vkGetDeviceProcAddr, vkNegotiateLoaderLayerInterfaceVersion (and only + those as exports thanks to -fvisibility=hidden + HAMI_LAYER_EXPORT). +* readelf -d libvgpu_vk.so MUST list libvgpu.so as NEEDED. + +Step C plan: docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md +Spec: docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md" +``` + +--- + +### Task 5: ELF / symbol diff verification (the structural-isolation proof) + +**Files:** none (verification only — but commit a script to docs/notes for future runs) + +- [ ] **Step 1: Run the symbol-isolation check** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c ' +apt-get -qq update >/dev/null +apt-get -qq install -y binutils >/dev/null +echo "=== libvgpu.so: must have hami_core_* but NO vk* ===" +echo "--- hami_core_* (expect 5) ---" +nm -D --defined-only /build/libvgpu.so | grep " T hami_core_" | wc -l +echo "--- vk* (expect 0) ---" +nm -D --defined-only /build/libvgpu.so | grep -E " T vk[A-Z]" | wc -l +echo +echo "=== libvgpu_vk.so: must have only the 3 layer entry points ===" +nm -D --defined-only /build/libvgpu_vk.so | grep " T " | grep -E "^[^[:space:]]+ T (vk[A-Z]|hami_)" | sort +echo +echo "=== libvgpu_vk.so: DT_NEEDED must include libvgpu.so ===" +readelf -d /build/libvgpu_vk.so | grep NEEDED +echo +echo "=== libvgpu_vk.so: undefined hami_core_* symbols (expect 5) ===" +nm -D --undefined-only /build/libvgpu_vk.so | grep "hami_core_" | wc -l +' +``` + +Expected: +- libvgpu.so hami_core_* count: `5` +- libvgpu.so vk* count: `0` +- libvgpu_vk.so exports: `vkGetDeviceProcAddr`, `vkGetInstanceProcAddr`, `vkNegotiateLoaderLayerInterfaceVersion` (3 lines, no `hami_*`) +- DT_NEEDED includes `libvgpu.so` and `libpthread.so.0` +- libvgpu_vk.so undefined hami_core_* count: `5` + +If any check fails — STOP. The structural-isolation property is the whole point of Step C. + +- [ ] **Step 2: No commit (verification only)** + +--- + +### Task 6: Unit tests against the split build + +**Files:** none (verification only) + +- [ ] **Step 1: Step B regression — `test_cuda_null_guards` under LD_PRELOAD libvgpu.so** + +```bash +docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c \ + "LD_PRELOAD=/build/libvgpu.so /build/test/test_cuda_null_guards 2>&1; echo EXIT=\$?" +``` + +Expected: 9 `[OK]` lines, `EXIT=0`. CUDA hook code unchanged across the split, so this MUST pass identically to Task 3 step 3. + +- [ ] **Step 2: Vulkan unit tests against libvgpu_vk.so** + +```bash +docker run --rm -v "$PWD/build:/build" ubuntu:22.04 bash -c ' +for t in test_layer test_memprops test_alloc; do + [ -x /build/test/$t ] || { echo "SKIP $t (not built)"; continue; } + echo "--- $t ---" + LD_LIBRARY_PATH=/build LD_PRELOAD=/build/libvgpu.so:/build/libvgpu_vk.so /build/test/$t 2>&1 | tail -10 + echo "EXIT=$?" +done' +``` + +Expected: each test exits 0 with its expected `[OK]` lines. + +(Why both .so in LD_PRELOAD: the Vulkan unit tests fake the next-layer GIPA and don't go through Vulkan loader manifest activation, so we have to hand-load libvgpu_vk.so. This only matters for unit tests; production uses manifest dlopen.) + +- [ ] **Step 3: No commit (verification only)** + +--- + +### Task 7: Add Vulkan implicit-layer manifest file + +**Files:** +- Create: `libvgpu/share/hami/hami.json` + +- [ ] **Step 1: Write the manifest** + +```json +{ + "file_format_version": "1.0.0", + "layer": { + "name": "VK_LAYER_HAMI_vgpu", + "type": "INSTANCE", + "library_path": "/usr/local/vgpu/libvgpu_vk.so", + "api_version": "1.3.0", + "implementation_version": "1", + "description": "HAMi vGPU partition layer — clamps device-memory queries and tracks Vulkan allocations against the per-pod budget.", + "instance_extensions": [], + "device_extensions": [] + } +} +``` + +Save to `libvgpu/share/hami/hami.json`. + +(Production install path: `/etc/vulkan/implicit_layer.d/hami.json`, typically a symlink to `/usr/local/vgpu/hami.json`. The webhook + DaemonSet that drops this file are Step D scope, not this plan.) + +- [ ] **Step 2: Validate the JSON** + +```bash +python3 -c "import json; json.load(open('share/hami/hami.json')); print('OK')" +``` + +Expected: `OK`. + +- [ ] **Step 3: Commit** + +```bash +git add share/hami/hami.json +git commit -s -m "feat(vulkan): ship hami.json implicit-layer manifest" \ + -m "Static manifest that the Step D webhook + DaemonSet will install +into /etc/vulkan/implicit_layer.d/ to activate libvgpu_vk.so via the +Vulkan loader. file_format_version 1.0.0, type INSTANCE, api 1.3.0. + +library_path is the production install path /usr/local/vgpu/libvgpu_vk.so; +no extensions claimed (the layer only intercepts existing entry points)." +``` + +--- + +### Task 8: ws-node074 LD_PRELOAD-only smoke (the regression-killed proof) + +**Files:** none (production-side verification) + +This task verifies the structural-isolation property on the actual hardware that exhibited the 2026-04-28 regression. The expected outcome is that LD_PRELOAD `libvgpu.so` (Vulkan layer NOT activated, manifest absent) leaves Isaac Sim Kit unaffected — because `libvgpu.so` no longer exports any `vk*` symbols. + +- [ ] **Step 1: Sync sources to ws-node074 and rebuild** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +rsync -az --exclude=build --exclude=.git/objects/pack . root@10.61.3.74:/tmp/libvgpu-build/ +ssh root@10.61.3.74 'cd /tmp/libvgpu-build && rm -rf .git build && git init -q && git add -A 2>&1 | tail -1 && git -c user.email=x@x -c user.name=x commit -q -m local --no-gpg-sign && make build-in-docker 2>&1 | tail -8' +``` + +Expected: Both `Built target vgpu` and `Built target vgpu_vk` lines. + +- [ ] **Step 2: Verify backups + swap libvgpu.so only (NOT installing manifest yet)** + +```bash +ssh root@10.61.3.74 ' +md5sum /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-step-c +cp -av /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu.so.bak-pre-stepC2 2>&1 | tail -1 +cp -f /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so +md5sum /tmp/libvgpu-build/build/libvgpu.so /usr/local/vgpu/libvgpu.so +ls -la /etc/vulkan/implicit_layer.d/ # confirm hami.json absent +' +``` + +Expected: pre-stepC2 backup created, swap completes, two md5 match (new file in place), `/etc/vulkan/implicit_layer.d/` shows only `nvidia_layers.json` (no `hami.json`). + +- [ ] **Step 3: Baseline runheadless under no LD_PRELOAD (confirm swap doesn't break steady state)** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | grep '^isaac-launchable-0' | awk '{print $1}' | head -1) +echo "Pod: $NEWPOD" +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +pkill -KILL kit 2>/dev/null; sleep 2 +timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/c-baseline.log 2>&1 +EC=$? +pkill -KILL kit 2>/dev/null +echo "exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/c-baseline.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)" +' +``` + +Expected: `exit=124 crash=0 listen=1`. If anything else, STOP and restore from `.bak-pre-stepC2`. + +- [ ] **Step 4: LD_PRELOAD-forced runheadless × 5 (the regression check)** + +```bash +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/v +PASS=0 +for i in 1 2 3 4 5; do + pkill -KILL kit 2>/dev/null; sleep 3 + timeout 50 env \ + ACCEPT_EULA=y \ + LD_PRELOAD=/usr/local/vgpu/libvgpu.so \ + /isaac-sim/runheadless.sh > /tmp/v/r$i.log 2>&1 + EC=$? + CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v/r$i.log) + LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100") + echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN" + [ "$EC" = "124" ] && [ "$CRASH" = "0" ] && PASS=$((PASS+1)) + pkill -KILL kit 2>/dev/null +done +echo "PASS=$PASS / 5" +' +``` + +Expected: `PASS=5 / 5` with each run reporting `exit=124 crash=0 listen=1`. + +If `PASS < 5`, the regression is NOT only-Vulkan-code — it lives in HAMi-core too. STOP. Restore `/usr/local/vgpu/libvgpu.so` from `.bak-pre-stepC2`. Open separate analysis (likely needs a full bisect on production hardware). + +- [ ] **Step 5: HAMi-core init verification (NVML hook should still work)** + +```bash +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +LD_PRELOAD=/usr/local/vgpu/libvgpu.so nvidia-smi --query-gpu=memory.total --format=csv,noheader +' +``` + +Expected: `23552 MiB` (clamped) — confirms NVML hook is active. If raw `46068 MiB`, partition env not picked up; investigate but NOT a Step C regression. + +- [ ] **Step 6: No commit. Record outcome locally** + +```bash +echo "Task 8 PASS=5/5: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> /tmp/step-c-task8-result.txt +``` + +(The commit comes in Task 10 with the submodule bump.) + +--- + +### Task 9: ws-node074 manifest-activated smoke (Vulkan layer actually doing its job) + +**Files:** none (production-side verification) + +This task confirms the new architecture's happy path: `libvgpu.so` LD_PRELOAD'd + `libvgpu_vk.so` installed at `/usr/local/vgpu/libvgpu_vk.so` + `hami.json` at `/etc/vulkan/implicit_layer.d/hami.json` → Isaac Sim Kit alive AND partition enforced. + +- [ ] **Step 1: Install libvgpu_vk.so + manifest on host** + +```bash +ssh root@10.61.3.74 ' +cp -av /tmp/libvgpu-build/build/libvgpu_vk.so /usr/local/vgpu/libvgpu_vk.so 2>&1 | tail -1 +md5sum /usr/local/vgpu/libvgpu_vk.so +cp -av /tmp/libvgpu-build/share/hami/hami.json /etc/vulkan/implicit_layer.d/hami.json 2>&1 | tail -1 +ls -la /etc/vulkan/implicit_layer.d/ +' +``` + +Expected: both files in place. Manifest path now lists `hami.json` alongside `nvidia_layers.json`. + +- [ ] **Step 2: Manifest-activated runheadless × 5 with HAMI_VK_TRACE on the first run only** + +```bash +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +mkdir -p /tmp/v2 +PASS=0 +for i in 1 2 3 4 5; do + pkill -KILL kit 2>/dev/null; sleep 3 + TRACE_ARG="" + [ "$i" = "1" ] && TRACE_ARG="HAMI_VK_TRACE=1" + timeout 50 env \ + ACCEPT_EULA=y \ + $TRACE_ARG \ + LD_PRELOAD=/usr/local/vgpu/libvgpu.so \ + /isaac-sim/runheadless.sh > /tmp/v2/r$i.log 2>&1 + EC=$? + CRASH=$(grep -cE "Segmentation fault|crash has occurred" /tmp/v2/r$i.log) + LISTEN=$(ss -tunlp 2>/dev/null | grep -c -E ":49100") + echo "run $i: exit=$EC crash=$CRASH listen=$LISTEN" + [ "$EC" = "124" ] && [ "$CRASH" = "0" ] && PASS=$((PASS+1)) + pkill -KILL kit 2>/dev/null +done +echo "PASS=$PASS / 5" +echo "=== run 1 trace lines ===" +grep -c HAMI_VK_TRACE /tmp/v2/r1.log +echo "=== run 1 top GIPA names ===" +grep "hami_vkGetInstanceProcAddr.*name=" /tmp/v2/r1.log | sed -e "s/.*name=//" -e "s/ .*//" | sort | uniq -c | sort -rn | head -20 +' +``` + +Expected: +- `PASS=5 / 5` +- run 1 trace lines > 100 (layer is now actually being invoked through the chain) +- top GIPA names: `vkCreateInstance`, `vkGetPhysicalDeviceMemoryProperties*`, `vkAllocateMemory`, etc. + +If `PASS < 5` even with manifest active, the layer code itself has a real bug. STOP, capture trace evidence, surface to controller. + +If trace lines = 0 with manifest active, the loader didn't pick up our manifest. Inspect: `nvidia_layers.json` content vs ours, JSON syntax, file permissions on `/etc/vulkan/implicit_layer.d/hami.json`. + +- [ ] **Step 3: Partition clamp verification under manifest-active path** + +```bash +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +echo "=== nvidia-smi clamp via NVML hook ===" +LD_PRELOAD=/usr/local/vgpu/libvgpu.so nvidia-smi --query-gpu=memory.total --format=csv,noheader +echo "=== Vulkan vkGetPhysicalDeviceMemoryProperties via vk_partition_test (if present) ===" +if [ -f vk_partition_test.py ]; then + LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/python.sh vk_partition_test.py 2>&1 | head -30 + echo "EXIT=$?" +else + echo "vk_partition_test.py 부재 — skip (Step D scope에서 작성)" +fi +' +``` + +Expected: nvidia-smi shows `23552 MiB`. If `vk_partition_test.py` exists, Vulkan-side memory query also clamped to `23552 MiB`. + +- [ ] **Step 4: No commit (verification only)** + +If the verification fails, STOP. Restore: `cp /usr/local/vgpu/libvgpu.so.bak-pre-stepC2 /usr/local/vgpu/libvgpu.so; rm /etc/vulkan/implicit_layer.d/hami.json`. + +--- + +### Task 10: Push HAMi-core fork + bump parent submodule + draft PR comments + +**Files:** +- Modify (parent repo): `libvgpu` submodule SHA bump +- Create: `/tmp/step-c-vk-split-pr-drafts/{pr182,pr1803}.md` + +- [ ] **Step 1: Push libvgpu fork** + +```bash +cd /Users/xiilab/git/HAMi/libvgpu +git log --oneline -10 +git push xiilab vulkan-layer 2>&1 | tail -10 +``` + +Expected: 4 new commits push successfully (the docs-only commits from the prior session + the Tasks 1-2-4-7 code commits). + +- [ ] **Step 2: Bump parent HAMi submodule** + +```bash +cd /Users/xiilab/git/HAMi +NEW_SHA=$(cd libvgpu && git rev-parse HEAD) +echo "new HAMi-core SHA: $NEW_SHA" +git add libvgpu +git commit -s -m "chore(libvgpu): bump HAMi-core for Step C — Vulkan layer split" \ + -m "Pulls in the Step C redesign: Vulkan layer code is now a separate +libvgpu_vk.so, activated by /etc/vulkan/implicit_layer.d/hami.json. +libvgpu.so retains only HAMi-core (NVML/CUDA hooks + allocator + +multiprocess) and loses all vk* exports. + +Verified on ws-node074: +* LD_PRELOAD libvgpu.so without manifest → 5/5 runheadless exit=124 + alive (the 2026-04-28 regression class is gone). +* LD_PRELOAD libvgpu.so + hami.json manifest → 5/5 alive, + HAMI_VK_TRACE > 100 lines, partition clamp 44 GiB → 23 GiB. + +Spec: docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md +Plan: docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md" +git push xiilab feat/vulkan-vgpu 2>&1 | tail -5 +``` + +- [ ] **Step 3: Draft PR comments — DO NOT POST** + +```bash +mkdir -p /tmp/step-c-vk-split-pr-drafts + +cat > /tmp/step-c-vk-split-pr-drafts/pr182.md <<'EOF' +## Step C redesigned — Vulkan layer split into libvgpu_vk.so + +The 2026-04-28 attempt (commits since reverted) regressed `runheadless.sh` +under LD_PRELOAD on ws-node074 — see notes/2026-04-28-vk-trace-isaac-sim.md. +Trace evidence proved our layer wrappers were never called; the +regression lived at the .so-load boundary. Rather than spending more +diagnostic cycles on production hardware, this redesign makes that +class of regression structurally impossible. + +| Commit | Change | +|---|---| +| (sha) | feat(hami-core): explicit hami_core_* export wrappers | +| (sha) | refactor(vulkan): use hami_core_* wrappers instead of internal externs | +| (sha) | build: split Vulkan layer into separate libvgpu_vk.so | +| (sha) | feat(vulkan): ship hami.json implicit-layer manifest | + +### What changed +- `libvgpu.so` keeps NVML/CUDA hooks + allocator + multiprocess. Loses + all `vk*` exports. +- New `libvgpu_vk.so` carries the entire `src/vulkan/*` and exports + only `vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`, + `vkNegotiateLoaderLayerInterfaceVersion`. DT_NEEDED includes + `libvgpu.so`, so the linker resolves the 5 `hami_core_*` wrappers at + Vulkan-loader dlopen time. +- `share/hami/hami.json` is the implicit-layer manifest the Step D + webhook drops into `/etc/vulkan/implicit_layer.d/`. + +### Verification on ws-node074 +- ELF: `nm -D libvgpu.so | grep 'T vk'` → 0 lines. `nm -D libvgpu_vk.so` + → exactly 3 `vk*` exports. `readelf -d libvgpu_vk.so` lists + `libvgpu.so` as NEEDED. +- Step B regression `test_cuda_null_guards`: 9/9 [OK] (CUDA hooks + unchanged across the split). +- LD_PRELOAD `libvgpu.so` without manifest, `runheadless.sh` × 5: 5/5 + `exit=124 crash=0 listen=1`. **The 2026-04-28 regression class is + gone.** +- LD_PRELOAD `libvgpu.so` + manifest, `runheadless.sh` × 5: 5/5 alive, + `HAMI_VK_TRACE` > 100 lines (layer in chain), partition clamp + 44 GiB → 23 GiB. + +### Out of scope +- The original Step C tasks (cache first next-gipa, GIPA/GDPA fallback, + `EnumerateDevice*` hooks) were reverted and stay deferred until this + architecture is verified in production. They will return as a follow-up + PR after the split is in. + +EOF + +cat > /tmp/step-c-vk-split-pr-drafts/pr1803.md <<'EOF' +## Step C — Vulkan layer split (libvgpu_vk.so) + +HAMi-core PR #182 redesigned Step C: `libvgpu.so` is now HAMi-core only, +and a new `libvgpu_vk.so` holds the Vulkan implicit layer. Activation +moves entirely to the manifest path, removing the LD_PRELOAD/Vulkan- +loader collision surface that bit us on 2026-04-28. + +The `libvgpu` submodule pointer is bumped to ``. + +### Verification (ws-node074, isaac-launchable-0) +- LD_PRELOAD `libvgpu.so` without manifest: 5/5 `runheadless.sh` alive + (regression class structurally gone). +- LD_PRELOAD `libvgpu.so` + `hami.json`: 5/5 alive, layer in chain + (`HAMI_VK_TRACE > 0`), partition clamp 44 GiB → 23 GiB. + +Spec: `docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md` +Plan: `docs/superpowers/plans/2026-04-29-step-c-vk-so-split.md` +EOF + +HAMI_BUMP_SHA=$(cd /Users/xiilab/git/HAMi && git rev-parse HEAD) +sed -i.bak "s//$HAMI_BUMP_SHA/g" /tmp/step-c-vk-split-pr-drafts/pr1803.md +rm /tmp/step-c-vk-split-pr-drafts/pr1803.md.bak + +ls -la /tmp/step-c-vk-split-pr-drafts/ +``` + +(SHA placeholders in pr182.md will be filled by the controller from `git log` output.) + +- [ ] **Step 4: Report — DO NOT post comments. Wait for explicit user approval.** + +--- + +## Self-Review + +**1. Spec coverage:** +- Spec §"Architecture" (split, DT_NEEDED, manifest-only activation) → Tasks 1-4, 7 +- Spec §"Components" (libvgpu.so loses vulkan_mod, libvgpu_vk.so, budget bridge update, hami.json) → Tasks 1-4, 7 +- Spec §"Data flow" (production happy path) → Tasks 8-9 verify +- Spec §"Error handling" (libvgpu.so absent, manifest absent, etc.) → Task 8 covers `libvgpu.so` absent indirectly (we only test the present case here; absent case is "loader skips layer" which is library-loader behavior we trust); manifest-absent case is exactly Task 8's main test. +- Spec §"Testing" (unit + ELF + LD_PRELOAD-only smoke + manifest smoke + HAMI_VK_TRACE) → Tasks 3, 5, 6, 8, 9 +- Spec §"Production safety gate" (backup before swap, baseline-after-swap check, md5 logging) → Task 8 step 2-3, plus restore guidance in step 4. +- Spec §"Out of scope" (Tasks 1+2 deferred, root-cause diagnostic skipped, webhook in Step A/D) → reflected in Task 10 PR draft language. ✅ + +**2. Placeholder scan:** Tasks 8 and 9 contain expected outputs and concrete kubectl/ssh commands. Task 10 PR drafts have one explicit `` placeholder that's substituted in step 3 and a `(sha)` placeholder in pr182.md noted as "filled by the controller". No `TODO`/`TBD`/`figure out`/`add appropriate ...` patterns. ✅ + +**3. Type consistency:** `hami_core_oom_check` / `hami_core_add_memory_usage` / `hami_core_rm_memory_usage` / `hami_core_get_memory_limit` / `hami_core_throttle` — same names in header, .c, call sites, and verification grep. `LIBVGPU_VK = vgpu_vk` → `lib${LIBVGPU_VK}.so` = `libvgpu_vk.so` consistent across CMake + ELF checks + manifest `library_path`. ✅ + +**4. Scope check:** Single .so split + manifest. Plan-able as one implementation. Step D (manifest install via webhook + opt-in label activation) is the next plan, not this one. ✅ + +**5. Production safety:** Task 8 verifies before installing the manifest (LD_PRELOAD-only) precisely so we get the regression-killed proof first. Task 9 only proceeds if Task 8 passes. Both have explicit restore commands at failure. ✅ + +--- + +## Estimated time + +| Task | 예상 | +|---|---| +| 1 hami_core_export wrappers | 25분 | +| 2 vulkan call-site rewrite | 15분 | +| 3 pre-split sanity build | 10분 | +| 4 CMake split | 20분 | +| 5 ELF / symbol diff verify | 10분 | +| 6 unit tests | 15분 | +| 7 manifest file | 10분 | +| 8 ws-node074 LD_PRELOAD-only smoke | 30분 | +| 9 ws-node074 manifest smoke | 30분 | +| 10 push + bump + PR drafts | 20분 | +| **총** | **약 3시간** | diff --git a/docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md b/docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md new file mode 100644 index 000000000..24999cec1 --- /dev/null +++ b/docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md @@ -0,0 +1,820 @@ +# Step D — Vulkan opt-in production activation + 4-path 검증 Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Step C 의 `libvgpu_vk.so` 가 production opt-in path 에서 실제로 chain 진입 + partition enforce 가 NVML / CUDA / Vulkan-memory-query / Vulkan-allocate 4 path 모두에서 작동함을 ws-node074 isaac-launchable-0 에서 검증. + +**Architecture:** volcano-vgpu-device-plugin image rebuild → 새 libvgpu.so + libvgpu_vk.so 호스트 install. `hami-vulkan-manifest` ConfigMap 의 `library_path` 를 `libvgpu_vk.so` 로 update + type INSTANCE. manifest installer DaemonSet 재활성. webhook 의 `applyVulkanAnnotation` 코드 그대로 — annotation `hami.io/vulkan: "true"` 가 trigger. + +**Tech Stack:** Docker (image rebuild), kubectl (CM/DS apply), ws-node074 (production verification), python (4-path test scripts). Repos: `Project-HAMi/HAMi`, `Project-HAMi/HAMi-core` (libvgpu submodule), `volcano-vgpu-device-plugin` fork at `/Users/xiilab/git/volcano-vgpu-device-plugin/`. Spec: `docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md`. + +--- + +## File Structure + +| 파일 | 변경 종류 | 책임 | +|---|---|---| +| `/Users/xiilab/git/volcano-vgpu-device-plugin/libvgpu` (submodule) | Modify | submodule SHA bump → `65930f4` (Step C end) | +| `/Users/xiilab/git/volcano-vgpu-device-plugin/docker/Dockerfile.ubuntu20.04` | Inspect / possibly Modify | image build 가 새 `libvgpu_vk.so` 도 `lib/nvidia/` 에 복사하도록 | +| `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml` | Create (copy from snapshot-2026-04-28) | library_path → libvgpu_vk.so, type → INSTANCE | +| `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml` | Create (copy from snapshot-2026-04-28) | nodeSelector 복구 | +| `cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml` | Create (copy from snapshot-2026-04-28) | image tag → vulkan-v2 | +| `cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh` | Create | NVML / CUDA / Vulkan memory / Vulkan allocate 검증 script | +| `cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py` | Create | Vulkan path 검증 python script (vkGetPhysicalDeviceMemoryProperties + vkAllocateMemory) | + +--- + +## Tasks + +### Task 1: Inventory current production state + baseline backup + +**Files:** none (state capture) + +- [ ] **Step 1: Capture current production state** + +```bash +ssh root@10.61.3.74 ' +echo "=== /usr/local/vgpu/ contents + md5 ===" +ls -la /usr/local/vgpu/ | head +md5sum /usr/local/vgpu/libvgpu*.so 2>/dev/null +echo +echo "=== ConfigMap hami-vulkan-manifest current state ===" +kubectl get cm -n kube-system hami-vulkan-manifest -o yaml | head -30 +echo +echo "=== DaemonSet hami-vulkan-manifest-installer status + nodeSelector ===" +kubectl get ds -n kube-system hami-vulkan-manifest-installer -o jsonpath="{.spec.template.spec.nodeSelector}{\"\n\"}{.status}{\"\n\"}" +echo +echo "=== DaemonSet volcano-device-plugin image + status ===" +kubectl get ds -n kube-system volcano-device-plugin -o jsonpath="{.spec.template.spec.containers[*].image}{\"\n\"}{.status}{\"\n\"}" +' > /tmp/step-d-pre-state.txt +cat /tmp/step-d-pre-state.txt +``` + +Expected output captured to `/tmp/step-d-pre-state.txt`. Verify: +- `libvgpu.so` md5 = `8f889313ece246b2d08ea6291f48b67a` (Step C end baseline) +- `hami-vulkan-manifest-installer` nodeSelector 가 `hami.io/disabled: "true"` (현재 비활성) +- `volcano-device-plugin` image 가 `vulkan-v1` + +- [ ] **Step 2: Baseline runheadless on isaac-launchable-0 + isaac-launchable-1** + +```bash +for POD in $(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-[0-9]/{print $1}'); do + echo "=== $POD baseline ===" + kubectl -n isaac-launchable exec $POD -c vscode -- bash -lc ' + pkill -KILL kit 2>/dev/null; sleep 2 + timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/baseline.log 2>&1 + EC=$? + pkill -KILL kit 2>/dev/null + echo "exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/baseline.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)" + rm -f /tmp/baseline.log + ' +done +``` + +Expected: 두 pod 모두 `exit=124 crash=0 listen=1`. + +- [ ] **Step 3: No commit (state capture only)** + +If any baseline check fails, STOP — production already broken pre-Step-D. Investigate before proceeding. + +--- + +### Task 2: Build & push volcano-vgpu-device-plugin:vulkan-v2 image with new libvgpu.so + libvgpu_vk.so + +**Files:** +- Modify (volcano fork): `libvgpu` submodule SHA → `65930f4` +- Inspect/Modify (volcano fork): `docker/Dockerfile.ubuntu20.04` + +- [ ] **Step 1: Inspect Dockerfile to confirm libvgpu_vk.so handling** + +```bash +cd /Users/xiilab/git/volcano-vgpu-device-plugin +sed -n '30,80p' docker/Dockerfile.ubuntu20.04 +``` + +Verify whether the Dockerfile copies BOTH `libvgpu.so` AND `libvgpu_vk.so` from the libvgpu build dir into `/k8s-vgpu/lib/nvidia/` (or wherever the postStart `cp -rf ... /usr/local/vgpu/` source path is). If only `libvgpu.so` is copied, ADD `libvgpu_vk.so` to the same COPY/cp step. + +Expected: Dockerfile already runs `make build-in-docker` or equivalent inside libvgpu and ends up with `libvgpu*.so` in the final image's `/k8s-vgpu/lib/nvidia/`. If not, edit Dockerfile to add the second .so. + +- [ ] **Step 2: Bump libvgpu submodule to Step C end** + +```bash +cd /Users/xiilab/git/volcano-vgpu-device-plugin/libvgpu +git fetch xiilab vulkan-layer +git checkout 65930f4 # Step C 끝 (feat(vulkan): ship hami.json implicit-layer manifest) +cd .. +git add libvgpu +git status +git -c user.email=je.kim@xiilab.com -c user.name=Jea-Eok-Kim commit -s -m "build: bump libvgpu submodule to Step C end (libvgpu_vk.so split)" -m "Pulls in HAMi-core vulkan-layer 65930f4 — the Step C redesign that +splits Vulkan layer code into a separate libvgpu_vk.so. After this +bump, the device plugin image will ship both libvgpu.so (HAMi-core +only, no vk* exports) and libvgpu_vk.so (Vulkan implicit layer) +into /k8s-vgpu/lib/nvidia/, and the existing postStart cp -rf will +install both onto /usr/local/vgpu/ on each scheduled node. + +Spec: HAMi-core docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md +Step D plan in HAMi parent: docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md" +``` + +- [ ] **Step 3: Build the image** + +```bash +cd /Users/xiilab/git/volcano-vgpu-device-plugin +docker build -f docker/Dockerfile.ubuntu20.04 \ + -t 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 \ + --platform linux/amd64 \ + . 2>&1 | tail -20 +``` + +Expected: `Successfully tagged 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2`. No errors during the libvgpu sub-build. + +If local Docker daemon isn't running, push the build to ws-node074: + +```bash +rsync -az --exclude=.git/objects/pack . root@10.61.3.74:/tmp/volcano-build/ +ssh root@10.61.3.74 'cd /tmp/volcano-build && docker build -f docker/Dockerfile.ubuntu20.04 -t 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 --platform linux/amd64 . 2>&1 | tail -20' +``` + +- [ ] **Step 4: Verify the image contains both .so** + +```bash +docker run --rm --entrypoint /bin/sh 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 \ + -c 'ls -la /k8s-vgpu/lib/nvidia/ ; md5sum /k8s-vgpu/lib/nvidia/libvgpu*.so' +``` + +Expected: 두 .so 모두 존재 + md5 가 우리 build 와 일치 (libvgpu.so `1bd8f078`, libvgpu_vk.so `95b44957` 또는 새로 빌드된 동일한 산출물). + +If on ws-node074 (no local docker): + +```bash +ssh root@10.61.3.74 'docker run --rm --entrypoint /bin/sh 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 -c "ls -la /k8s-vgpu/lib/nvidia/ ; md5sum /k8s-vgpu/lib/nvidia/libvgpu*.so"' +``` + +- [ ] **Step 5: Push to local registry** + +```bash +docker push 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 2>&1 | tail -5 +# or via ssh +ssh root@10.61.3.74 'docker push 10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v2 2>&1 | tail -5' +``` + +Expected: push 성공. + +- [ ] **Step 6: Push volcano fork commit** + +```bash +cd /Users/xiilab/git/volcano-vgpu-device-plugin +git remote -v # confirm xiilab fork +git push xiilab HEAD 2>&1 | tail -3 +``` + +--- + +### Task 3: Update hami-vulkan-manifest ConfigMap to point to libvgpu_vk.so + +**Files:** +- Create: `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml` + +- [ ] **Step 1: Create snapshot directory and copy base ConfigMap** + +```bash +cd /Users/xiilab/git/HAMi +mkdir -p cluster/runtime/snapshot-2026-04-29-step-d +cp cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml \ + cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml +``` + +- [ ] **Step 2: Edit the ConfigMap data — library_path + type** + +Use Edit tool to change in `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml`: + +OLD `data.hami.json` value (the inline JSON): +``` +"library_path": "/usr/local/vgpu/libvgpu.so" +``` +NEW: +``` +"library_path": "/usr/local/vgpu/libvgpu_vk.so" +``` + +OLD: +``` +"type": "GLOBAL" +``` +NEW: +``` +"type": "INSTANCE" +``` + +Also strip the runtime metadata that doesn't apply to a fresh apply: `creationTimestamp`, `resourceVersion`, `uid`, the `last-applied-configuration` annotation. Keep `name`, `namespace`, `data`. + +- [ ] **Step 3: Apply ConfigMap** + +```bash +kubectl apply -f cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml +kubectl get cm -n kube-system hami-vulkan-manifest -o jsonpath='{.data.hami\.json}' | python3 -m json.tool +``` + +Expected: parsed JSON shows `library_path` = `/usr/local/vgpu/libvgpu_vk.so` and `type` = `INSTANCE`. + +- [ ] **Step 4: Commit the snapshot** + +```bash +cd /Users/xiilab/git/HAMi +git add cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-cm.yaml +git commit -s -m "chore(runtime): Step D — update hami-vulkan-manifest CM to libvgpu_vk.so" \ + -m "library_path = /usr/local/vgpu/libvgpu_vk.so (Step C split target) +type = INSTANCE (per spec; matches single-instance Vulkan layer +contract instead of the deprecated GLOBAL). + +enable_environment HAMI_VULKAN_ENABLE=1 unchanged — opt-in trigger +flows through the existing webhook applyVulkanAnnotation." +``` + +--- + +### Task 4: Re-enable hami-vulkan-manifest-installer DaemonSet + +**Files:** +- Create: `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml` + +- [ ] **Step 1: Copy base + change nodeSelector** + +```bash +cp cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml \ + cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml +``` + +Edit `cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml`: + +OLD: +```yaml + nodeSelector: + hami.io/disabled: "true" +``` +NEW: +```yaml + nodeSelector: + nvidia.com/gpu.present: "true" +``` + +Also strip runtime metadata (creationTimestamp, resourceVersion, uid, status, generation, last-applied-configuration annotation). + +- [ ] **Step 2: Apply DaemonSet patch** + +```bash +kubectl apply -f cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml +``` + +- [ ] **Step 3: Wait for installer DS to schedule + run on GPU nodes** + +```bash +kubectl rollout status ds/hami-vulkan-manifest-installer -n kube-system --timeout=120s +kubectl -n kube-system get pods -l app=hami-vulkan-manifest-installer -o wide +``` + +Expected: at least 1 pod scheduled (ws-node074 has `nvidia.com/gpu.present=true`). + +- [ ] **Step 4: Verify manifest installed on host** + +```bash +ssh root@10.61.3.74 'ls -la /usr/local/vgpu/vulkan/implicit_layer.d/ ; cat /usr/local/vgpu/vulkan/implicit_layer.d/hami.json | head -20' +``` + +Expected: `hami.json` exists with `library_path: /usr/local/vgpu/libvgpu_vk.so`. + +- [ ] **Step 5: Post-step alive check (no annotation yet → loader still inert)** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1) +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +pkill -KILL kit 2>/dev/null; sleep 2 +timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/post-task4.log 2>&1 +EC=$? +pkill -KILL kit 2>/dev/null +echo "post-task4: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/post-task4.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)" +rm -f /tmp/post-task4.log +' +``` + +Expected: `exit=124 crash=0 listen=1`. (Manifest is now installed but `enable_environment` requires `HAMI_VULKAN_ENABLE=1`; without that env, the layer stays inert — should not regress baseline.) If anything else, immediately rollback installer DS to disabled state and STOP. + +- [ ] **Step 6: Commit** + +```bash +git add cluster/runtime/snapshot-2026-04-29-step-d/hami-vulkan-manifest-installer-ds.yaml +git commit -s -m "chore(runtime): Step D — re-enable hami-vulkan-manifest-installer DS" \ + -m "nodeSelector hami.io/disabled: true → nvidia.com/gpu.present: true. +Was disabled during the 4-27 night-patch rollback; re-enabling it here +because the Step C redesign (libvgpu_vk.so split + manifest INSTANCE +type + enable_environment gate) makes activation safe even when the +manifest is host-installed: layer stays inert until HAMI_VULKAN_ENABLE=1 +flows through the webhook on a per-pod basis." +``` + +--- + +### Task 5: Bump volcano-device-plugin DaemonSet image to vulkan-v2 + +**Files:** +- Create: `cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml` + +- [ ] **Step 1: Copy base + bump image tag** + +```bash +cp cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml \ + cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml +``` + +Edit the file: replace ALL occurrences of `volcano-vgpu-device-plugin:vulkan-v1` with `volcano-vgpu-device-plugin:vulkan-v2`. There are 2 (init container + main container) per the prior snapshot. Also strip runtime metadata. + +- [ ] **Step 2: Apply DaemonSet bump** + +```bash +kubectl apply -f cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml +kubectl rollout status ds/volcano-device-plugin -n kube-system --timeout=300s +``` + +Expected: pods rolling, eventually `numberReady` matches `desiredNumberScheduled`. + +- [ ] **Step 3: Verify host install — both .so present with new md5** + +```bash +ssh root@10.61.3.74 ' +md5sum /usr/local/vgpu/libvgpu.so /usr/local/vgpu/libvgpu_vk.so +ls -la /usr/local/vgpu/libvgpu*.so 2>&1 +' +``` + +Expected: both .so present. md5 of `libvgpu.so` = `1bd8f078...` (or whatever the Step C end build produced; compare against `/tmp/libvgpu-build/build/libvgpu.so` if still around). md5 of `libvgpu_vk.so` = `95b44957...`. + +- [ ] **Step 4: Post-step alive check on isaac-launchable-0 (still no annotation)** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1) +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +pkill -KILL kit 2>/dev/null; sleep 2 +timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/post-task5.log 2>&1 +EC=$? +pkill -KILL kit 2>/dev/null +echo "post-task5: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/post-task5.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)" +rm -f /tmp/post-task5.log +' +``` + +Expected: `exit=124 crash=0 listen=1`. (Without HAMI_VULKAN_ENABLE the layer is still inert.) If regression, immediate rollback to vulkan-v1 image. + +- [ ] **Step 5: Commit** + +```bash +git add cluster/runtime/snapshot-2026-04-29-step-d/volcano-device-plugin-ds.yaml +git commit -s -m "chore(runtime): Step D — bump volcano-device-plugin to vulkan-v2" \ + -m "Image vulkan-v1 → vulkan-v2. The new image ships libvgpu.so +(Step C end build, HAMi-core only) and libvgpu_vk.so (Vulkan layer) +in /k8s-vgpu/lib/nvidia/, so the existing postStart cp -rf ... +/usr/local/vgpu/ installs both onto every GPU node." +``` + +--- + +### Task 6: Annotate isaac-launchable-0 + restart + initial activation verify + +**Files:** none (state changes only) + +- [ ] **Step 1: Check current annotation** + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1) +kubectl -n isaac-launchable get pod $NEWPOD -o jsonpath='{.metadata.annotations}' | python3 -m json.tool 2>/dev/null | grep -i hami +``` + +If `hami.io/vulkan: "true"` already present, the deployment likely had it from prior testing; skip step 2 and go to step 3 (just delete pod to re-apply webhook). + +- [ ] **Step 2: Annotate the deployment / statefulset** + +```bash +# isaac-launchable-0 is likely managed by a Deployment/StatefulSet — patch the workload, not the pod +kubectl -n isaac-launchable get $(kubectl -n isaac-launchable get all -o name | grep -E "isaac-launchable-0$" | head -1) -o yaml > /tmp/isaac-0-pre.yaml +# Add hami.io/vulkan: "true" to spec.template.metadata.annotations +WORKLOAD=$(kubectl -n isaac-launchable get all -o name | grep -E "isaac-launchable-0$" | head -1) +echo "Workload: $WORKLOAD" +kubectl -n isaac-launchable patch $WORKLOAD --type=merge -p '{"spec":{"template":{"metadata":{"annotations":{"hami.io/vulkan":"true"}}}}}' +``` + +- [ ] **Step 3: Wait for new pod to come up** + +```bash +kubectl -n isaac-launchable rollout status $WORKLOAD --timeout=300s +NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1) +echo "New pod: $NEWPOD" +kubectl -n isaac-launchable get pod $NEWPOD -o jsonpath='{range .spec.containers[*]}{.name}: {.env[?(@.name=="HAMI_VULKAN_ENABLE")].value}{"\n"}{end}' +kubectl -n isaac-launchable get pod $NEWPOD -o jsonpath='{range .spec.containers[*]}{.name}: {.env[?(@.name=="NVIDIA_DRIVER_CAPABILITIES")].value}{"\n"}{end}' +``` + +Expected: `vscode: 1` for HAMI_VULKAN_ENABLE, NVIDIA_DRIVER_CAPABILITIES contains `graphics`. + +- [ ] **Step 4: Verify pod healthy + alive runheadless** + +```bash +kubectl -n isaac-launchable get pod $NEWPOD +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +pkill -KILL kit 2>/dev/null; sleep 2 +timeout 50 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/active.log 2>&1 +EC=$? +pkill -KILL kit 2>/dev/null +echo "active: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/active.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)" +rm -f /tmp/active.log +' +``` + +Expected: `exit=124 crash=0 listen=1`. If regression → rollback annotation, then if still bad rollback DS bumps too. + +- [ ] **Step 5: No commit** + +--- + +### Task 7: 4-path partition-enforcement verification + +**Files:** +- Create: `cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh` +- Create: `cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py` + +This task confirms partition enforce works in NVML, CUDA, Vulkan-memory-query, Vulkan-allocate. + +- [ ] **Step 1: Write the python Vulkan probe** + +Create `cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py`: + +```python +#!/usr/bin/env python3 +"""Step D 4-path verification — Vulkan-side partition enforce. + +Path 3: vkGetPhysicalDeviceMemoryProperties → device-local heap size MUST + be the partition limit (23552 MiB), not the raw 46068 MiB. +Path 4: vkAllocateMemory(size = 25 GiB) MUST fail with + VK_ERROR_OUT_OF_DEVICE_MEMORY (partition limit is 23 GiB). + +Requires: python3-vulkan or vulkan binding (pip install vulkan). +Run inside isaac-launchable-0 vscode container with HAMI_VULKAN_ENABLE=1 +already in env. +""" +import sys +import ctypes + +try: + import vulkan as vk +except ImportError: + print("ERR: pip install vulkan (or python3-vulkan)") + sys.exit(2) + +PARTITION_MIB = 23552 # Step C/D production limit +PARTITION_BYTES = PARTITION_MIB * 1024 * 1024 +OVER_BUDGET_BYTES = 25 * 1024 * 1024 * 1024 # 25 GiB > 23 GiB + +# Path 3: query memory properties +app_info = vk.VkApplicationInfo( + sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO, + pApplicationName="hami-step-d-probe", + applicationVersion=1, + pEngineName="probe", + engineVersion=1, + apiVersion=vk.VK_API_VERSION_1_3, +) +inst_info = vk.VkInstanceCreateInfo(sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, pApplicationInfo=app_info) +inst = vk.vkCreateInstance(inst_info, None) +phys_devs = vk.vkEnumeratePhysicalDevices(inst) +if not phys_devs: + print("ERR: no physical devices") + sys.exit(2) +dev = phys_devs[0] +mem_props = vk.vkGetPhysicalDeviceMemoryProperties(dev) + +device_local_heap_size = 0 +for i in range(mem_props.memoryHeapCount): + heap = mem_props.memoryHeaps[i] + if heap.flags & vk.VK_MEMORY_HEAP_DEVICE_LOCAL_BIT: + device_local_heap_size = max(device_local_heap_size, heap.size) +print(f"Path 3: device-local heap size = {device_local_heap_size} bytes ({device_local_heap_size // (1024*1024)} MiB)") +if abs(device_local_heap_size - PARTITION_BYTES) < (256 * 1024 * 1024): # 256 MiB tolerance + print(f"Path 3: PASS (within 256 MiB of {PARTITION_MIB} MiB partition)") +else: + print(f"Path 3: FAIL (expected ~{PARTITION_MIB} MiB, got {device_local_heap_size // (1024*1024)} MiB)") + +# Path 4: try to allocate over-budget +device_create_info = vk.VkDeviceCreateInfo( + sType=vk.VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + queueCreateInfoCount=1, + pQueueCreateInfos=[vk.VkDeviceQueueCreateInfo( + sType=vk.VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + queueFamilyIndex=0, + queueCount=1, + pQueuePriorities=[1.0], + )], +) +ldev = vk.vkCreateDevice(dev, device_create_info, None) +mem_type_idx = -1 +for i in range(mem_props.memoryTypeCount): + if mem_props.memoryTypes[i].propertyFlags & vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT: + mem_type_idx = i + break +alloc_info = vk.VkMemoryAllocateInfo( + sType=vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + allocationSize=OVER_BUDGET_BYTES, + memoryTypeIndex=mem_type_idx, +) +try: + mem = vk.vkAllocateMemory(ldev, alloc_info, None) + print(f"Path 4: FAIL (expected VK_ERROR_OUT_OF_DEVICE_MEMORY for {OVER_BUDGET_BYTES} bytes, got success — partition not enforced)") + vk.vkFreeMemory(ldev, mem, None) +except vk.VkErrorOutOfDeviceMemory: + print(f"Path 4: PASS (VK_ERROR_OUT_OF_DEVICE_MEMORY for {OVER_BUDGET_BYTES // (1024*1024*1024)} GiB > {PARTITION_MIB // 1024} GiB partition)") +except Exception as e: + print(f"Path 4: FAIL (unexpected error {type(e).__name__}: {e})") + +vk.vkDestroyDevice(ldev, None) +vk.vkDestroyInstance(inst, None) +``` + +- [ ] **Step 2: Write the orchestrator script** + +Create `cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh`: + +```bash +#!/bin/bash +# Step D 4-path verification orchestrator. +# Run from controller host; orchestrates 4-path checks inside isaac-launchable-0. +set -u + +NS=isaac-launchable +POD=$(kubectl -n $NS get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1) +if [ -z "$POD" ]; then + echo "ERR: isaac-launchable-0 pod not found"; exit 1 +fi +echo "Pod: $POD" + +# Copy the python probe into the pod +kubectl -n $NS cp "$(dirname "$0")/vk_partition_test.py" $POD:/tmp/vk_partition_test.py -c vscode + +PASS=0 +FAIL=0 + +echo +echo "=== Path 1: NVML hook (nvidia-smi clamp) ===" +RAW=$(kubectl -n $NS exec $POD -c vscode -- bash -lc 'env -u LD_PRELOAD nvidia-smi --query-gpu=memory.total --format=csv,noheader' 2>&1 | head -1) +HOOKED=$(kubectl -n $NS exec $POD -c vscode -- bash -lc 'nvidia-smi --query-gpu=memory.total --format=csv,noheader' 2>&1 | grep -E "MiB" | head -1) +echo " raw = $RAW" +echo " hook = $HOOKED" +if echo "$HOOKED" | grep -qE "23552 MiB"; then + echo " Path 1: PASS"; PASS=$((PASS+1)) +else + echo " Path 1: FAIL"; FAIL=$((FAIL+1)) +fi + +echo +echo "=== Path 2: CUDA driver hook (cuMemGetInfo clamp) ===" +P2=$(kubectl -n $NS exec $POD -c vscode -- bash -lc ' +python3 -c " +import sys +try: + import pycuda.driver as cuda + cuda.init() + ctx = cuda.Device(0).make_context() + free, total = cuda.mem_get_info() + print(f\"free={free} total={total}\") + ctx.pop() +except ImportError: + sys.exit(2) +except Exception as e: + print(f\"err: {e}\") +" 2>&1' || echo "ERR") +echo " $P2" +TOTAL_MIB=$(echo "$P2" | sed -nE "s/.*total=([0-9]+).*/\1/p" | awk "{print int(\$1/(1024*1024))}") +if [ "$TOTAL_MIB" = "23552" ] || [ "$TOTAL_MIB" -ge "23000" -a "$TOTAL_MIB" -le "24000" ]; then + echo " Path 2: PASS (~$TOTAL_MIB MiB)"; PASS=$((PASS+1)) +else + echo " Path 2: SKIP_OR_FAIL (no pycuda or unexpected total=$TOTAL_MIB)"; FAIL=$((FAIL+1)) +fi + +echo +echo "=== Paths 3 & 4: Vulkan memory query + allocate ===" +P34=$(kubectl -n $NS exec $POD -c vscode -- bash -lc ' +if ! python3 -c "import vulkan" 2>/dev/null; then + /isaac-sim/python.sh -m pip install vulkan 2>&1 | tail -3 +fi +/isaac-sim/python.sh /tmp/vk_partition_test.py 2>&1 +') +echo "$P34" +echo "$P34" | grep -q "Path 3: PASS" && PASS=$((PASS+1)) || FAIL=$((FAIL+1)) +echo "$P34" | grep -q "Path 4: PASS" && PASS=$((PASS+1)) || FAIL=$((FAIL+1)) + +echo +echo "=== Summary ===" +echo "PASS=$PASS FAIL=$FAIL of 4 paths" +[ "$FAIL" = "0" ] && exit 0 || exit 1 +``` + +- [ ] **Step 3: chmod + run** + +```bash +chmod +x cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh +./cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh +``` + +Expected: `PASS=4 FAIL=0 of 4 paths`. If any path fails, capture the output and STOP for analysis. Do not roll back automatically — the underlying issue may be a code bug, not a deployment issue. + +- [ ] **Step 4: Commit verification scripts** + +```bash +git add cluster/runtime/snapshot-2026-04-29-step-d/4-path-verification.sh \ + cluster/runtime/snapshot-2026-04-29-step-d/vk_partition_test.py +git commit -s -m "test(runtime): Step D — 4-path partition enforce verification scripts" \ + -m "Run on ws-node074 against isaac-launchable-0 with hami.io/vulkan +annotation active. Verifies: + +Path 1: NVML hook nvidia-smi → 23552 MiB clamp +Path 2: CUDA driver hook cuMemGetInfo → ~23 GiB total +Path 3: Vulkan vkGetPhysicalDeviceMemoryProperties → device-local heap + ~23 GiB +Path 4: Vulkan vkAllocateMemory(25 GiB) → VK_ERROR_OUT_OF_DEVICE_MEMORY + +Skip path 2 if pycuda unavailable in pod (informational FAIL — not +blocker, NVML+CUDA hooks already validated by Step B unit tests)." +``` + +--- + +### Task 8: HAMI_VK_TRACE host-loader verification + sanity check other Vulkan pods + +**Files:** none (verification only) + +- [ ] **Step 1: HAMI_VK_TRACE host-loader probe** + +Run a small Vulkan probe via host system Vulkan loader (NOT Kit's Conan-bundled loader) to confirm our layer is in chain: + +```bash +NEWPOD=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-0/{print $1}' | head -1) +kubectl -n isaac-launchable exec $NEWPOD -c vscode -- bash -lc ' +which vulkaninfo || apt list --installed 2>/dev/null | grep -i vulkan-tools +HAMI_VK_TRACE=1 vulkaninfo --summary 2>&1 | head -20 || echo "vulkaninfo unavailable" +echo +echo "=== HAMI_VK_TRACE lines via /isaac-sim python ===" +HAMI_VK_TRACE=1 /isaac-sim/python.sh -c " +import vulkan as vk +app = vk.VkApplicationInfo(sType=vk.VK_STRUCTURE_TYPE_APPLICATION_INFO, apiVersion=vk.VK_API_VERSION_1_3) +ci = vk.VkInstanceCreateInfo(sType=vk.VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, pApplicationInfo=app) +inst = vk.vkCreateInstance(ci, None) +print(\"created instance\") +vk.vkDestroyInstance(inst, None) +" 2>&1 | grep -E "HAMI_VK_TRACE|created" | head -20 +' +``` + +Expected: HAMI_VK_TRACE lines > 0 — at least the `vkGetInstanceProcAddr` lookups for each entry point during `vkCreateInstance`. This proves the layer is in the chain when activation conditions are met (manifest installed + HAMI_VULKAN_ENABLE=1 + python uses host's libvulkan, not Kit's Conan-bundled one). + +If trace=0 even here, capture full log and surface to controller — manifest activation is broken at the loader level. + +- [ ] **Step 2: Sanity check other Vulkan-using pods** + +```bash +echo "=== isaac-launchable-1 ===" +POD1=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^isaac-launchable-1/{print $1}' | head -1) +kubectl -n isaac-launchable exec $POD1 -c vscode -- bash -lc ' +pkill -KILL kit 2>/dev/null; sleep 2 +timeout 45 env ACCEPT_EULA=y /isaac-sim/runheadless.sh > /tmp/p1.log 2>&1 +EC=$?; pkill -KILL kit 2>/dev/null +echo "isaac-launchable-1: exit=$EC crash=$(grep -c "Segmentation\|crash has occurred" /tmp/p1.log) listen=$(ss -tunlp 2>/dev/null | grep -c -E :49100)" +rm -f /tmp/p1.log +' +echo +echo "=== usd-composer ===" +POD2=$(kubectl -n isaac-launchable get pods --no-headers | awk '/^usd-composer/{print $1}' | head -1) +[ -n "$POD2" ] && kubectl -n isaac-launchable get pod $POD2 +echo +echo "=== other isaac-launchable namespace pods status ===" +kubectl -n isaac-launchable get pods +``` + +Expected: +- isaac-launchable-1: `exit=124 crash=0 listen=1` (no annotation → still inert; should be unaffected by Step D changes). +- usd-composer: `3/3 Running`, no crash loop. +- All other pods steady. + +If isaac-launchable-1 regresses despite NOT having the annotation, that means the manifest is being activated globally somehow — the `enable_environment` gate is broken or the webhook is leaking annotation cross-pod. Investigate. + +- [ ] **Step 3: No commit** + +--- + +### Task 9: Push snapshot YAMLs + draft PR comments (DO NOT post) + +**Files:** +- Create: `/tmp/step-d-pr-drafts/{pr-hami,pr-volcano-fork}.md` + +- [ ] **Step 1: Push parent HAMi commits** + +```bash +cd /Users/xiilab/git/HAMi +git log --oneline xiilab/feat/vulkan-vgpu..HEAD 2>&1 | head -5 +git push xiilab feat/vulkan-vgpu 2>&1 | tail -3 +``` + +- [ ] **Step 2: Draft PR comments** + +```bash +mkdir -p /tmp/step-d-pr-drafts + +cat > /tmp/step-d-pr-drafts/pr-hami.md <<'EOF' +## Step D — Vulkan opt-in production activation + 4-path 검증 + +Step C 의 `libvgpu_vk.so` 분리 산출물을 production opt-in path 에서 활성화하고, partition enforce 가 4 path 모두에서 작동함을 ws-node074 에서 검증. + +### Commits + +- `chore(runtime): Step D — update hami-vulkan-manifest CM to libvgpu_vk.so` +- `chore(runtime): Step D — re-enable hami-vulkan-manifest-installer DS` +- `chore(runtime): Step D — bump volcano-device-plugin to vulkan-v2` +- `test(runtime): Step D — 4-path partition enforce verification scripts` + +### Verification on ws-node074, isaac-launchable-0 (with `hami.io/vulkan: "true"` annotation) + +| Path | Expected | Actual | +|---|---|---| +| 1. NVML `nvidia-smi` | 23552 MiB | (fill from script run) | +| 2. CUDA `cuMemGetInfo` | ~23 GiB | (fill) | +| 3. Vulkan `vkGetPhysicalDeviceMemoryProperties` device-local heap | ~23 GiB | (fill) | +| 4. Vulkan `vkAllocateMemory(25 GiB)` | `VK_ERROR_OUT_OF_DEVICE_MEMORY` | (fill) | + +`HAMI_VK_TRACE > 0` confirmed via host vulkan-loader path on python3-vulkan probe. + +### Companion changes +- volcano-vgpu-device-plugin fork: libvgpu submodule bumped to HAMi-core `65930f4` (Step C end). Image rebuilt and pushed as `vulkan-v2` to local registry. + +### Rollback path (if needed) +- DaemonSet `hami-vulkan-manifest-installer`: nodeSelector → `hami.io/disabled: "true"` (kubectl patch). +- DaemonSet `volcano-device-plugin`: image → `vulkan-v1`. +- Annotation `hami.io/vulkan` → remove from workload. + +Spec: `docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md` +Plan: `docs/superpowers/plans/2026-04-29-step-d-vulkan-opt-in-production-activation.md` +EOF + +cat > /tmp/step-d-pr-drafts/pr-volcano-fork.md <<'EOF' +## bump libvgpu submodule to HAMi-core Step C end (libvgpu_vk.so split) + +Pulls in HAMi-core `vulkan-layer` `65930f4` — the Step C redesign that splits Vulkan layer code into a separate `libvgpu_vk.so`. After this bump: + +- `libvgpu.so` (HAMi-core only, no `vk*` exports) and `libvgpu_vk.so` (Vulkan implicit layer) are both shipped in `/k8s-vgpu/lib/nvidia/`. +- The existing postStart `cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/` installs both onto every GPU node. +- Image tag bump: `vulkan-v1` → `vulkan-v2`. + +Verification done in HAMi parent Step D plan; partition enforce confirmed across NVML, CUDA, Vulkan-memory-query, Vulkan-allocate paths on ws-node074 isaac-launchable-0. + +Submodule SHA: `65930f4` (commit "feat(vulkan): ship hami.json implicit-layer manifest"). +EOF + +ls -la /tmp/step-d-pr-drafts/ +``` + +- [ ] **Step 3: Report — DO NOT post comments. Wait for explicit user approval.** + +--- + +## Self-Review + +**1. Spec coverage:** +- Spec §"핵심 결정 1" (image rebuild) → Task 2 +- Spec §"핵심 결정 2" (CM update) → Task 3 +- Spec §"핵심 결정 3" (installer DS 재활성) → Task 4 +- Spec §"핵심 결정 4" (annotation/webhook) → Task 6 +- Spec §"핵심 결정 5" (4-path verification) → Task 7 +- Spec §"핵심 결정 6" (rollback) → 각 Task post-step alive 체크 + restore 가이드 +- Spec §"Activation flow" → Tasks 3-6 순서대로 +- Spec §"4-path verification" → Task 7 +- Spec §"Production safety gate" → Tasks 1, 4-5 의 post-step 검증 + Task 8 의 sanity ✅ + +**2. Placeholder scan:** Task 7 의 (fill from script run) 자리는 PR draft 의 verification table 이고, 실행 후 채워질 자리이지 plan 자체의 결함이 아님. 그 외 placeholder 없음. ✅ + +**3. Type consistency:** `hami.io/vulkan` annotation 이름 / `HAMI_VULKAN_ENABLE` env 이름 / `library_path` JSON key — 모든 task 에서 일관 사용. PARTITION_MIB=23552 / OVER_BUDGET_BYTES=25 GiB — vk_partition_test.py 와 4-path-verification.sh 가 동일 값 사용. ✅ + +**4. Scope check:** 단일 production deploy + 검증. helm chart 통합 / Tasks 1+2 재도입 / multi-GPU 는 out of scope (spec 명시). 단일 plan 으로 실행 가능. ✅ + +**5. External-repo dependency**: Task 2 가 `volcano-vgpu-device-plugin` fork 작업 (HAMi parent repo 외). Plan 에 명시적으로 working dir 구분, git push 도 fork 만. 이 task 는 controller 가 외부 repo permissions / SSH 가 보장되는 환경에서 실행해야 함. 안 되면 BLOCKED 보고. ✅ + +--- + +## Estimated time + +| Task | 예상 | +|---|---| +| 1 inventory + baseline | 10분 | +| 2 image build + push (외부 repo, libvgpu submodule bump 포함) | 60분 | +| 3 CM update + apply | 15분 | +| 4 installer DS 재활성 | 15분 | +| 5 device plugin DS bump | 20분 | +| 6 annotation + restart + verify | 20분 | +| 7 4-path verification scripts + run | 45분 | +| 8 trace host-loader + sanity | 20분 | +| 9 push + PR drafts | 15분 | +| **총** | **약 3.5시간** | + +(Task 2 가 가장 변동성 큼 — image build 인프라/네트워크 의존도 높음.) diff --git a/docs/superpowers/plans/notes/hami-core-layout.md b/docs/superpowers/plans/notes/hami-core-layout.md new file mode 100644 index 000000000..4e1450a03 --- /dev/null +++ b/docs/superpowers/plans/notes/hami-core-layout.md @@ -0,0 +1,306 @@ +# HAMi-core layout notes (for Vulkan vGPU plan) + +HAMi-core submodule root: `libvgpu/` (HAMi-core). This note records the real +symbol names and file locations that the Vulkan vGPU plan (Tasks 1.4, 1.6) +will need when extracting a shared throttle utility and a VRAM budget +counter adapter. No source in `libvgpu/` is modified by this task. + +## 소스 구조 + +Top-level build artefacts: +- `libvgpu/CMakeLists.txt` — root CMake, adds `src/` and `test/` subdirs, + generates `config/static_config.h` from `src/static_config.h.in`. +- `libvgpu/Makefile` — wrapper (`make build` → `./build.sh`, + `make build-in-docker` runs the build inside an `nvidia/cuda:12.2.0-devel` + container). +- `libvgpu/build.sh` — invokes cmake with flags + `-DDLSYM_HOOK_ENABLE=1 -DMULTIPROCESS_LIMIT_ENABLE=1 -DHOOK_MEMINFO_ENABLE=1 + -DHOOK_NVML_ENABLE=1 -DCMAKE_BUILD_TYPE=Debug`, then `make -j$J`. + +`libvgpu/src/` (not flat — it is split into feature directories, each with +its own `CMakeLists.txt` that produces an OBJECT library linked together +into `libvgpu.so`): + +- `src/libvgpu.c` — top-level hook loader / dlsym dispatch (entrypoints). +- `src/utils.c` — misc helpers (`round_up`, env parsing). +- `src/static_config.h.in` — generated config header. +- `src/allocator/` — **VRAM accounting + oom-check + allocation list** layer. + - `allocator.c`, `allocator.h` — defines `allocate_raw`, `free_raw`, + `oom_check`, `add_chunk_only`, `remove_chunk_only`, etc. +- `src/cuda/` — CUDA driver API wrappers: + - `memory.c` — `cuMemAlloc_v2`, `cuMemAllocManaged`, `cuMemAllocPitch_v2`, + `cuMemFree_v2`, `cuLaunchKernel`, `cuLaunchKernelEx`, + `cuLaunchCooperativeKernel`, `cuMemCreate`/`cuMemRelease` (VMM), … + - `hook.c` — populates the cuda override table with the above symbols. + - `device.c`, `context.c`, `stream.c`, `event.c`, `graph.c`. +- `src/nvml/` — NVML wrappers (`nvml_entry.c`, `hook.c`). +- `src/multiprocess/` — **shared-memory region (cross-process counters) + + SM rate limiter**: + - `multiprocess_memory_limit.c/.h` — `shared_region_t`, per-proc slots, + `get_current_device_memory_limit`, `get_gpu_memory_usage`, + `add_gpu_device_memory_usage`, `rm_gpu_device_memory_usage`, + `pre_launch_kernel`. + - `multiprocess_utilization_watcher.c/.h` — `rate_limiter`, + `utilization_watcher` background thread, `init_utilization_watcher`, + `delta()`/`change_token()` token-bucket logic. + - `shrreg_tool.c` — standalone CLI for inspecting the shared region. +- `src/include/` — public headers (used by other subdirs via + `include "include/…"`). Notable: + - `memory_limit.h` — macros `ENSURE_RUNNING`, `INC_MEMORY_OR_RETURN_ERROR`, + `DECL_MEMORY_ON_ERROR/_SUCCESS`. + - `libcuda_hook.h`, `libnvml_hook.h` — override table enum/entries. + - `nvml-subset.h`, `nvml_override.h`, `nvml_prefix.h`. + - `log_utils.h` — `LOG_DEBUG/INFO/WARN/ERROR`, `CHECK_DRV_API`, + `CHECK_NVML_API`, `CHECK_CU_RESULT`. + +## VRAM 카운터 API (기존 CUDA 경로에서 사용) + +All three primitives live in **allocator + multiprocess** layers. The CUDA +memory wrappers in `src/cuda/memory.c` call them. + +### 예약 (reserve / budget check) + +- **Signature**: `int oom_check(const int dev, size_t addon);` +- **Defined at**: `libvgpu/src/allocator/allocator.c:36` +- **Declared at**: `libvgpu/src/allocator/allocator.h:155` +- **Semantics**: reads `get_current_device_memory_limit(dev)` and + `get_gpu_memory_usage(dev)`, returns `1` if `usage + addon > limit` + (OOM, caller must fail), returns `0` if OK. If `limit == 0` (unlimited) + always returns `0`. Note: this is a **check-only** primitive, it does + NOT reserve/increment the counter. +- **Counter increment** happens later via + `int add_gpu_device_memory_usage(int32_t pid, int dev, size_t usage, int type);` + defined at `libvgpu/src/multiprocess/multiprocess_memory_limit.c:336` + (declared at `…/multiprocess_memory_limit.h:147`). + - Returns `CUDA_DEVICE_MEMORY_UPDATE_SUCCESS (0)` on success, + `CUDA_DEVICE_MEMORY_UPDATE_FAILURE (1)` on failure. +- **Full reserve path used in CUDA wrappers**: the allocator wraps this in + `int allocate_raw(CUdeviceptr *dptr, size_t size)` at + `libvgpu/src/allocator/allocator.c:205`, which delegates to + `add_chunk(...)` at `:103` → calls `oom_check` then the real + `cuMemAlloc_v2`, then `add_gpu_device_memory_usage(getpid(), dev, size, 2)`. +- **Alt path** (for already-allocated buffers, e.g. managed/pitch/VMM): + `int add_chunk_only(CUdeviceptr address, size_t size);` at + `libvgpu/src/allocator/allocator.c:133` — same `oom_check` + counter + increment but without invoking `cuMemAlloc_v2`. + +### 해제 (release) + +- **Signature**: `int free_raw(CUdeviceptr dptr);` +- **Defined at**: `libvgpu/src/allocator/allocator.c:213` +- **Declared at**: `libvgpu/src/allocator/allocator.h:159` +- **Semantics**: looks up `dptr` in `device_overallocated` list, calls real + `cuMemFree_v2`, removes the entry, and calls + `rm_gpu_device_memory_usage(getpid(), dev, t_size, 2)` (defined at + `libvgpu/src/multiprocess/multiprocess_memory_limit.c:365`). + Returns `0` on success, `-1` if pointer not found. +- **Alt release-only** (no real `cuMemFree`): `int remove_chunk_only(CUdeviceptr dptr);` + at `libvgpu/src/allocator/allocator.c:185`. + +### 버짓 조회 (budget / limit) + +- **Signature**: `uint64_t get_current_device_memory_limit(const int dev);` +- **Defined at**: `libvgpu/src/multiprocess/multiprocess_memory_limit.c:828` +- **Declared at**: `libvgpu/src/multiprocess/multiprocess_memory_limit.h:126` +- **Semantics**: returns `region_info.shared_region->limit[dev]` from the + cross-process shared region (populated from + `CUDA_DEVICE_MEMORY_LIMIT_` env vars). Returns `0` when no limit is + set (interpreted as "unlimited" by `oom_check`). +- **Companion usage getter**: + `uint64_t get_current_device_memory_usage(const int dev);` at + `…/multiprocess_memory_limit.c:846` — sum of `used[dev].total` across + procs in the shared region; the lower-level + `size_t get_gpu_memory_usage(const int dev);` + (`…/multiprocess_memory_limit.c:243`) is what `oom_check` actually reads. + +### 실패 시 반환 규약 + +- `oom_check` → `int`: **`1` = OOM (caller must fail)**, `0` = OK, `limit==0` + also returns `0` (unlimited). Note: this is the **opposite** of the + typical "0 = success" Unix convention. +- `allocate_raw` / `add_chunk` / `add_chunk_only` → `int`: `0` on success, + `CUDA_ERROR_OUT_OF_MEMORY` (= `2`, a `CUresult`) on OOM, `-1` on malloc + failure. Callers in `cuda/memory.c` compare against `CUDA_SUCCESS` (0). +- `free_raw` → `int`: `0` on success, `-1` if pointer not tracked. +- `add_gpu_device_memory_usage` / `rm_gpu_device_memory_usage` → `int`: `0` + (`CUDA_DEVICE_MEMORY_UPDATE_SUCCESS`) on success, `1` + (`CUDA_DEVICE_MEMORY_UPDATE_FAILURE`) on failure. +- `get_current_device_memory_limit` → `uint64_t`: the budget in bytes; `0` + means "unlimited" (downstream code must treat 0 as a sentinel, not as + "zero budget"). + +## SM throttle 루프 (CUDA launch 래퍼) + +- **Wrapper file**: `libvgpu/src/cuda/memory.c` + - `cuLaunchKernel`: line 545 (calls `pre_launch_kernel()` then + `rate_limiter(grids, blocks)` when `pidfound==1`). + - `cuLaunchKernelEx`: line 556. + - `cuLaunchCooperativeKernel`: line 567 (only `pre_launch_kernel()`; no + rate limiter — possible gap). +- **Throttle function**: `void rate_limiter(int grids, int blocks);` + defined at `libvgpu/src/multiprocess/multiprocess_utilization_watcher.c:34`, + declared at `…/multiprocess_utilization_watcher.h:20`. +- **Background producer**: `void* utilization_watcher();` at + `…/multiprocess_utilization_watcher.c:178`, started by + `init_utilization_watcher()` at line 213 (creates a pthread at line 218) + when `0 < sm_limit <= 100`. Entry point called from `libvgpu.c:888`. +- **Loop structure (this is what Task 1.4 will extract)**: + 1. `rate_limiter` short-circuits if SM limit is `0` or `>=100` + (unlimited) or if `get_utilization_switch() == 0`. + 2. It does **NOT** itself call `nvmlDeviceGetUtilizationRates` or + `usleep`. Instead it implements a **token-bucket consumer**: + ``` + do { + before = g_cur_cuda_cores; // line 52 + if (before < 0) { nanosleep(&g_cycle, NULL); goto CHECK; } // line 55 + after = before - kernel_size; + } while (!CAS(&g_cur_cuda_cores, before, after)); // line 59 + ``` + When the shared counter is depleted it `nanosleep`s for + `g_cycle = 10 ms` (`TIME_TICK * MILLISEC`, from + `multiprocess_utilization_watcher.h:9`) and retries. + 3. The **actual NVML polling + token refill** runs in the separate + background thread `utilization_watcher` (lines 178–211): + ``` + while (1) { + nanosleep(&g_wait, NULL); // g_wait = 120 ms (header:14) + init_gpu_device_utilization(); + get_used_gpu_utilization(userutil, &sysprocnum); + share = delta(upper_limit, userutil[0], share); + change_token(share); + } + ``` + `get_used_gpu_utilization` (`:121`) calls + `nvmlDeviceGetComputeRunningProcesses` + + `nvmlDeviceGetProcessUtilization` (not + `nvmlDeviceGetUtilizationRates` — per-process sampling is used + instead). The NVML `nvmlDeviceGetUtilizationRates` symbol **is** + hooked (`src/nvml/nvml_entry.c:730`) but is a passthrough. + 4. Poll cadence: 120 ms refill loop (`g_wait`), 10 ms consumer backoff + (`g_cycle`). Max iterations: unbounded (while loop). + +**Implication for Task 1.4**: "throttle loop" here is actually a +producer/consumer pair. Extracting a shared utility for Vulkan probably +means extracting (a) a token-bucket consumer equivalent to +`rate_limiter`, and (b) sharing the existing background refill thread — +not extracting a simple `poll-utilization+usleep` helper, because that +pattern does not literally exist in the CUDA path. If Task 1.4 only wants +the passive "sleep-until-budget-available" semantics, the consumer loop +in `rate_limiter` (lines 50–60) is the single place to model on. + +## 빌드 / 테스트 + +### Makefile 타겟 +- `build` (default) — runs `./build.sh` locally (needs host CUDA at + `$CUDA_HOME` or `/usr/local/cuda`). +- `build-in-docker` — bind-mounts the repo into + `nvidia/cuda:12.2.0-devel-ubuntu20.04` and runs `build.sh` inside. + +### CMakeLists 구조 +- Root `CMakeLists.txt` (`libvgpu/CMakeLists.txt`) sets + `LIBRARY_COMPILE_FLAGS = -shared -fPIC -D_GNU_SOURCE -fvisibility=hidden + -Wall` (Debug adds `-g`, drops `-fvisibility=hidden`), generates + `config/static_config.h` from the `.h.in` template (git hash/branch + baked in), then `add_subdirectory(src)` and `add_subdirectory(test)`. +- `src/CMakeLists.txt` adds four subdirs (multiprocess, allocator, cuda, + nvml), each of which declares an OBJECT library + (`multiprocess_mod`, `allocator_mod`, `cuda_mod`, `nvml_mod`). The root + then links them into a single SHARED lib target `vgpu` + (= `libvgpu.so`), linking against `-lcuda -lnvidia-ml`. On Release a + `strip_symbol` custom target strips the `.so`. +- `test/CMakeLists.txt` globs every `*.c` / `*.cu` under `test/` and + builds one executable per file (linking `-lrt -lpthread -lnvidia-ml + -lcuda -lcudart`). No unit-test framework, no `ctest` registration. + +### 테스트 프레임워크 +- **없음.** The `test/` directory contains bare CUDA sample programs + (one-off allocation/launch harnesses) that are compiled into + stand-alone binaries. There is no GoogleTest, no `ctest`, no + assertion framework, no CI `make test`. Verification is manual + (run a binary under `LD_PRELOAD=libvgpu.so`, inspect logs). +- `test/python/` holds four manual PyTorch/TF/MXNet smoke scripts + (`limit_pytorch.py`, `limit_tensorflow.py`, `limit_tensorflow2.py`, + `limit_mxnet.py`) copied into the build dir via a `python_test` + custom target. + +### test/ 디렉토리 파일 목록 +``` +test/CMakeLists.txt +test/test_alloc.c +test/test_alloc_hold.c +test/test_alloc_host.c +test/test_alloc_managed.c +test/test_alloc_pitch.c +test/test_create_3d_array.c +test/test_create_array.c +test/test_host_alloc.c +test/test_host_register.c +test/test_runtime_alloc.c +test/test_runtime_alloc_host.c +test/test_runtime_alloc_managed.c +test/test_runtime_host_alloc.c +test/test_runtime_host_register.c +test/test_runtime_launch.cu +test/test_utils.h +test/python/limit_mxnet.py +test/python/limit_pytorch.py +test/python/limit_tensorflow.py +test/python/limit_tensorflow2.py +``` + +## 기타 관찰 + +### Vulkan 헤더 의존성 +- **현재 없음.** `grep -ri "vulkan\|VULKAN\|vk_" libvgpu/` returns zero + files. The build links only `-lcuda -lnvidia-ml`; `CMakeLists.txt` + references only `CUDA_HOME`. Any Vulkan layer work will have to add a + new `src/vulkan/` subdir and new dependency on vulkan-headers / + libvulkan. + +### 후속 Task에 영향 주는 주의사항 +1. **`oom_check` is check-only, not reserve+commit.** The CUDA path is: + `oom_check` → real `cuMemAlloc` → `add_gpu_device_memory_usage` (or the + combined `allocate_raw` / `add_chunk`). There is a TOCTOU window. For + the Vulkan adapter (Task 1.6) we must replicate this two-step pattern + (or add a new atomic `reserve(dev, size)` helper) and must commit the + counter with `add_gpu_device_memory_usage(..., type=2)` after the + Vulkan allocation succeeds. +2. **Sentinel value `limit == 0` means unlimited**, not "zero budget". + Downstream Vulkan code must preserve this. +3. **Per-process accounting key is `getpid()`** (plus a shared-region + `hostpid` fixed up by `update_host_pid()`). Vulkan allocations made + from the same process should reuse the existing shared region slot, + not allocate a new one. +4. **`rate_limiter` silently no-ops** when SM limit is `0`, `>=100`, or + `get_utilization_switch()==0`. A Vulkan consumer that reuses this + primitive inherits that behaviour — the Vulkan wrapper will need its + own switch/env var if we want independent SM partitioning. +5. **`cuLaunchCooperativeKernel` at `src/cuda/memory.c:567` is missing + the `rate_limiter` call** (only `pre_launch_kernel` runs). Not our + bug to fix, but worth knowing when auditing throttle coverage. +6. **No unit-test framework.** If Task 1.4/1.6 want unit tests around + the extracted utility, we will have to introduce one (GoogleTest or + equivalent) inside `libvgpu/`, which is a submodule change. A less + invasive option is to put unit tests on the HAMi (Go) side that + exercise the C symbols via cgo, or write new stand-alone C binaries + under `test/` following the current convention. +7. **Visibility is `-fvisibility=hidden` in Release builds.** Any new + symbols that Vulkan wrappers need to export from `libvgpu.so` must be + annotated (`__attribute__((visibility("default")))` or similar) or + they will not be dlsym-resolvable. + +## 시도한 검색 (참고) + +``` +grep -rn "oom_check" libvgpu/src/ + → allocator/allocator.h:155 decl, allocator.c:36 defn +grep -rn "allocate_raw\|free_raw\|add_chunk_only" libvgpu/src/ + → allocator/allocator.c:205 / :213 / :133 +grep -rn "get_current_device_memory_limit\|get_gpu_memory_usage" libvgpu/src/ + → multiprocess/multiprocess_memory_limit.c:828 / :243 +grep -rn "rate_limiter\|utilization_watcher\|nvmlDeviceGetUtilizationRates" libvgpu/src/ + → multiprocess/multiprocess_utilization_watcher.c:34 / :178 + → nvml/nvml_entry.c:730 (passthrough hook, not the throttle path) +grep -rin "vulkan\|VULKAN\|vk_" libvgpu/ + → (no matches) +``` diff --git a/docs/superpowers/plans/notes/hami-core-vulkan-sha.txt b/docs/superpowers/plans/notes/hami-core-vulkan-sha.txt new file mode 100644 index 000000000..8b0c8a294 --- /dev/null +++ b/docs/superpowers/plans/notes/hami-core-vulkan-sha.txt @@ -0,0 +1 @@ +579a421d1cae2df9bc692ca35f6b6d53ac7a7a1a diff --git a/docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md b/docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md new file mode 100644 index 000000000..ff20e964c --- /dev/null +++ b/docs/superpowers/specs/2026-04-21-vulkan-vgpu-partitioning-design.md @@ -0,0 +1,260 @@ +# HAMi Vulkan vGPU 분할 — 설계 스펙 + +- 작성일: 2026-04-21 +- 상태: 초안 (구현 전) +- 범위: NVIDIA GPU, Vulkan 컴퓨트 + 그래픽 워크로드 +- 영향 레포: `Project-HAMi/HAMi` (Go), `Project-HAMi/HAMi-core` (C, `libvgpu/` submodule) + +## 1. 문제 정의 + +HAMi는 `libvgpu.so`(HAMi-core)에서 CUDA 드라이버 API를 `LD_PRELOAD`로 가로채 NVIDIA GPU를 분할합니다. Vulkan 워크로드(컴퓨트 셰이더, `llama.cpp` Vulkan 백엔드, 렌더링 등)는 Vulkan이 별도 API 계층(`libvulkan.so` → ICD)이기 때문에 이 훅을 그대로 우회합니다. 결과적으로: + +- `nvidia.com/gpumem`으로 선언한 VRAM 제한이 Vulkan 할당에는 **적용되지 않음**. +- `nvidia.com/gpucores` SM/코어 throttle이 Vulkan 큐 제출에는 **적용되지 않음**. +- 기본값으로 컨테이너에 **Vulkan 라이브러리 자체가 마운트되지 않음** — HAMi는 `NVIDIA_DRIVER_CAPABILITIES`를 건드리지 않고, NVIDIA Container Toolkit 기본값(`compute,utility`)에는 Vulkan ICD가 포함되지 않음. + +이 설계 작성 시점에 레포 전체를 grep한 결과 `vulkan`/`VK_` 언급은 0건. + +## 2. 목표 + +1. 같은 파드 내 Vulkan 메모리 할당에 대해 기존 `nvidia.com/gpumem` 버짓을 **CUDA와 공유**하여 강제한다 (물리 VRAM 한 개 = 버짓 한 개). +2. 기존 `nvidia.com/gpucores` SM throttle을 Vulkan 큐 제출에 강제한다. +3. 요청이 있을 때 Vulkan 라이브러리가 실제로 컨테이너에 도달하게 한다. +4. 완전한 하위 호환성 유지: Vulkan을 요청하지 않은 파드는 동작 변화 없음. + +## 비목표 (Non-Goals) + +- NVIDIA 외 벤더(AMD, Intel, Moore Threads)의 Vulkan 분할. +- CUDA/Vulkan 별도 VRAM 버짓 (물리 실체는 VRAM 단일 풀). +- `NVIDIA_VISIBLE_DEVICES`가 이미 걸러주는 것 이상의 `vkEnumeratePhysicalDevices` 필터링. +- 그래픽 프레임 페이싱 보장 — SM throttle은 렌더링 워크로드에 지터를 유발할 수 있음(문서화 대상, 해결 대상은 아님). + +## 3. 결정 사항 + +| 항목 | 결정 | 근거 | +|------|------|------| +| 벤더 | NVIDIA 전용 | 기존 HAMi-core CUDA 훅 구조에 부합. | +| 제어 차원 | VRAM + SM | Vulkan으로 LLM 추론하는 수요에서 둘 다 필요. | +| 리소스 API | 기존 `nvidia.com/gpumem`, `nvidia.com/gpucores` 공유 버짓 | 물리 실체와 일치, 사용자 YAML 변경 없음. | +| 활성화 | 파드 annotation `hami.io/vulkan: "true"` opt-in | 모든 CUDA 전용 파드에 수십 MB 그래픽 라이브러리를 붙이지 않기 위해. | +| 후킹 방식 | HAMi-core `libvgpu.so`가 노출하는 Vulkan implicit layer | Vulkan 로더 표준 계약, LD_PRELOAD vs ICD 디스패치 이슈 회피. | +| 버짓 공유 | 프로세스 내 공유 카운터(기존 구조체 재사용) | 같은 `libvgpu.so` 인스턴스가 CUDA/Vulkan 훅을 모두 보유 → 별도 IPC 불필요. | + +## 4. 아키텍처 + +``` +┌───────────────────────────────────┐ +│ Project-HAMi/HAMi (Go) │ +│ pkg/device/nvidia/device.go │ ← MutateAdmission 확장 +│ (pkg/device/nvidia/device_test.go)│ +└────────────┬──────────────────────┘ + │ env: HAMI_VULKAN_ENABLE=1, + │ NVIDIA_DRIVER_CAPABILITIES⊇graphics + ▼ +┌───────────────────────────────────┐ +│ 컨테이너 │ +│ NVIDIA Container Toolkit가 │ +│ Vulkan ICD + libGLX_nvidia 마운트 │ +│ HAMi device-plugin가 │ +│ /usr/local/vgpu/libvgpu.so 마운트 │ +└────────────┬──────────────────────┘ + │ Vulkan 로더가 implicit_layer.d 스캔 + ▼ +┌───────────────────────────────────┐ +│ Project-HAMi/HAMi-core (C) │ +│ libvgpu.so │ +│ ├─ 기존 CUDA 훅 │ +│ ├─ 신규 Vulkan 레이어 │ +│ │ src/vulkan/*.c │ +│ └─ 공유 VRAM/SM 카운터 │ +│ etc/vulkan/implicit_layer.d/ │ +│ └─ hami.json (신규) │ +└───────────────────────────────────┘ +``` + +## 5. 컴포넌트 + +### 5.1 HAMi (Go) — `pkg/device/nvidia/device.go` + +신설 상수: +```go +const ( + VulkanEnableAnno = "hami.io/vulkan" + VulkanLayerName = "VK_LAYER_HAMI_vgpu" + NvidiaDriverCapsEnvVar = "NVIDIA_DRIVER_CAPABILITIES" + HamiVulkanEnvVar = "HAMI_VULKAN_ENABLE" +) +``` + +`MutateAdmission` 확장 (단, `hasResource == true`일 때만): +1. 파드 annotation `hami.io/vulkan`을 읽고 `"true"`일 때만 이후 로직 수행. +2. 신규 `NVIDIA_DRIVER_CAPABILITIES` 값 계산: + - 컨테이너에 미설정이면: `"compute,utility,graphics"`로 설정. + - 설정되어 있고 `"all"` 포함이면: 변경 없음. + - 그 외: 콤마 구분 토큰 파싱 후 `"graphics"`와 합집합, 다시 직렬화. +3. `HAMI_VULKAN_ENABLE=1`이 없으면 추가. +4. `NVIDIA_VISIBLE_DEVICES`, RuntimeClass는 건드리지 않음 (기존 로직 그대로). + +스케줄러 익스텐더, 리소스 회계, 디바이스 플러그인 할당 로직은 변경 없음. + +### 5.2 HAMi-core (C) — 신규 모듈 `src/vulkan/` + +파일 구성: +``` +src/vulkan/ + layer.c # vkNegotiateLoaderLayerInterfaceVersion, + # vk_layerGetInstanceProcAddr, + # vk_layerGetDeviceProcAddr + layer.h + dispatch.c # VkInstance/VkDevice 별 next-layer 디스패치 테이블 + hooks_memory.c # vkAllocateMemory, vkFreeMemory, + # vkGetPhysicalDeviceMemoryProperties/2 + hooks_buffer.c # vkCreateBuffer, vkCreateImage, + # vkBindBufferMemory/2 (회계상 필요 시) + hooks_submit.c # vkQueueSubmit, vkQueueSubmit2 +``` + +후킹 대상 엔트리포인트와 동작: + +| 함수 | 동작 | +|------|------| +| `vkGetPhysicalDeviceMemoryProperties` | next-layer 호출 후 device-local 힙의 `size`를 `min(real, pod_budget)`로 클램핑. | +| `vkGetPhysicalDeviceMemoryProperties2` | 동일 로직, `pNext` 체인으로 처리. | +| `vkAllocateMemory` | 공유 카운터 락 획득. `used + allocationSize > budget`이면 언락 후 `VK_ERROR_OUT_OF_DEVICE_MEMORY`. 가능하면 잠정 `used += allocationSize`, 언락, next-layer 호출. next-layer 실패 시 롤백. `VkDeviceMemory → allocationSize` 매핑 저장. | +| `vkFreeMemory` | 매핑에서 size 조회, 락, `used -= size`, 언락, next-layer 호출, 매핑 제거. | +| `vkQueueSubmit` / `vkQueueSubmit2` | CUDA `cuLaunchKernel` 래퍼와 공통화한 throttle 유틸 호출: `nvmlDeviceGetUtilizationRates` 폴링 + `usleep(POLL_INTERVAL)`을 `util < cores_limit` 또는 최대 재시도까지 반복. 이후 next-layer 호출. | + +레이어 ↔ 로더 계약: +- `vk_layer.h` 시그니처대로 `vkNegotiateLoaderLayerInterfaceVersion` export. +- 반환 구조체에 `vk_layerGetInstanceProcAddr` / `vk_layerGetDeviceProcAddr` 포인터 채움. +- `VkLayerInstanceCreateInfo` 체인에서 next-layer 포인터를 획득해 `VkInstance` 핸들 키의 디스패치 테이블에 저장. +- 훅 대상이 아닌 이름은 next-layer 포인터를 그대로 반환(pass-through). + +### 5.3 공유 VRAM / SM 카운터 + +HAMi-core는 이미 CUDA 래퍼가 참조하는 per-device `device_memory` 구조체를 갖고 있음. Vulkan 래퍼는 **같은** API를 호출: +```c +// 의사코드 +if (!reserve_device_memory(dev_idx, size)) return VK_ERROR_OUT_OF_DEVICE_MEMORY; +``` +`reserve_device_memory` 내부 뮤텍스가 CUDA/Vulkan 경로를 직렬화. 신규 IPC, 신규 공유메모리 세그먼트 없음. + +SM throttle 폴링 루프는 공통 유틸(`util_throttle(dev_idx)`)로 추출하여 `cuLaunchKernel` 래퍼(기존)와 `vkQueueSubmit` 래퍼(신규)가 공유. + +### 5.4 Vulkan 레이어 매니페스트 + +파일: `etc/vulkan/implicit_layer.d/hami.json`. HAMi-core Dockerfile이 이미지의 `/etc/vulkan/implicit_layer.d/hami.json` 경로에 설치. + +```json +{ + "file_format_version": "1.2.0", + "layer": { + "name": "VK_LAYER_HAMI_vgpu", + "type": "GLOBAL", + "library_path": "/usr/local/vgpu/libvgpu.so", + "api_version": "1.3.0", + "implementation_version": "1", + "description": "HAMi Vulkan vGPU limiter", + "enable_environment": { "HAMI_VULKAN_ENABLE": "1" }, + "disable_environment": { "HAMI_VULKAN_DISABLE": "1" } + } +} +``` + +`enable_environment`로 Go 웹훅이 주입한 env가 있을 때만 활성화되므로, 매니페스트가 존재하는 CUDA 전용 파드에서도 레이어는 비활성 상태. + +### 5.5 빌드 + +- HAMi-core `Makefile`: `src/vulkan/*.c` 소스 추가, CFLAGS에 `-I$(VULKAN_SDK_INCLUDE)` 추가, 런타임 링크 없음(`libvulkan.so`는 로더가 dlopen). +- HAMi-core Dockerfile: `apt-get install vulkan-headers`(또는 동등 패키지), `etc/vulkan/implicit_layer.d/hami.json`을 이미지의 `/etc/vulkan/implicit_layer.d/`로 복사. + +## 6. 데이터 흐름 + +### 6.1 Admission +1. 사용자가 `nvidia.com/gpumem: 3000`, `nvidia.com/gpucores: 30`, annotation `hami.io/vulkan: "true"`로 파드 생성. +2. HAMi 웹훅 `MutateAdmission` 기존 경로 — `NVIDIA_VISIBLE_DEVICES`, RuntimeClass 설정. +3. 신규 경로(annotation 존재 + `hasResource`): `NVIDIA_DRIVER_CAPABILITIES`에 `graphics` 합집합 병합, `HAMI_VULKAN_ENABLE=1` 추가. +4. 스케줄러/디바이스 플러그인 흐름은 변경 없음. + +### 6.2 컨테이너 시작 +1. NVIDIA Container Toolkit prestart 훅이 `NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics`를 감지해 Vulkan ICD JSON + `libGLX_nvidia.so.0` + `libnvidia-glvkspirv.so` 등을 마운트. +2. HAMi-core 이미지가 `libvgpu.so`와 `/etc/vulkan/implicit_layer.d/hami.json`을 이미 배치함. +3. Vulkan 로더가 `implicit_layer.d`를 스캔하고 `HAMI_VULKAN_ENABLE=1`을 확인한 뒤 `libvgpu.so`에서 `VK_LAYER_HAMI_vgpu` 로드. + +### 6.3 런타임 +- `vkAllocateMemory(size)` → 레이어 → 카운터 예약 → next-layer 또는 `VK_ERROR_OUT_OF_DEVICE_MEMORY`. +- `vkFreeMemory(mem)` → 레이어 → 카운터 반환 → next-layer. +- `vkGetPhysicalDeviceMemoryProperties` → next-layer → 힙 size 클램프 → 반환. +- `vkQueueSubmit` → 레이어 throttle 폴링 → next-layer. + +### 6.4 공유 버짓 (CUDA + Vulkan 동시 사용) +두 경로 모두 하나의 뮤텍스로 보호되는 `reserve_device_memory(dev, size)`에 진입. API를 가로질러 합산된 활성 할당량은 파드 버짓을 초과하지 않음. + +## 7. 에러 처리 + +| 상황 | 동작 | +|------|------| +| `HAMI_VULKAN_ENABLE` 미설정 | `enable_environment` 게이트 불통과 → 레이어 미활성화, Vulkan은 훅 없이 실행. | +| 런타임에 매니페스트 파일 누락 | 로더가 레이어를 발견 못 함 → Vulkan은 훅 없이 실행, HAMi-core 시작 프로브에서 경고 로그(추후). | +| 빌드 타임에 `vulkan-headers` 없음 | 컴파일 에러. 런타임 무관. | +| NVML 유틸리티 조회 실패 | throttle 스킵 (fail-open), errno 로그. | +| next-layer 체인 재진입 | 디스패치 테이블에서 저장된 next 포인터로 라우팅, 레이어 코드 비재진입 설계로 재귀 차단. | +| 멀티 physical device 컨테이너 | PCI 버스 ID / NVML 디바이스 핸들 기반 per-device 카운터. `NVIDIA_VISIBLE_DEVICES`가 이미 세트를 제한. | +| 예약 후 next-layer `vkAllocateMemory` 실패 | 카운터 롤백, 에러 그대로 반환. | +| 앱이 `VkDeviceMemory`를 leak (`vkFreeMemory` 호출 안 함) | 프로세스 동안 카운터 drift, 프로세스 종료 시 라이브러리 언로드로 해소. | +| non-NVIDIA 파드에 `hami.io/vulkan: true` annotation | NVIDIA 디바이스에서 `hasResource == false` → 조용히 no-op. | +| 사용자가 `NVIDIA_DRIVER_CAPABILITIES=all` 선설정 | 변경 없음 (`all` ⊇ `graphics`). | +| 사용자가 `NVIDIA_DRIVER_CAPABILITIES=compute` 선설정 | `compute,graphics`로 교체(합집합). | +| 사용자가 `NVIDIA_DRIVER_CAPABILITIES=compute,graphics` 선설정 | 변경 없음 (이미 `graphics` 포함). | + +## 8. 테스트 전략 + +### 8.1 Go 단위 테스트 — `pkg/device/nvidia/device_test.go` +- `TestMutateAdmission_VulkanAnno_AddsGraphicsCap` — annotation + HAMi 리소스 → env에 `graphics`, `HAMI_VULKAN_ENABLE=1` 포함. +- `TestMutateAdmission_VulkanAnno_MergesExistingCaps` — 기존 `compute` 있음 → `compute,graphics`로 병합. +- `TestMutateAdmission_VulkanAnno_AllCaps_NoChange` — 기존 `all` 있음 → 변경 없음. +- `TestMutateAdmission_NoVulkanAnno_NoChange` — annotation 없음 → env 주입 없음. +- `TestMutateAdmission_VulkanAnno_NoGPUResource` — annotation만 있고 HAMi 리소스 없음 → no-op. +- `TestMutateAdmission_VulkanAnno_IdempotentHamiEnable` — 웹훅 재적용 시 `HAMI_VULKAN_ENABLE` 중복 추가되지 않음. + +### 8.2 HAMi-core C 단위 테스트 +- `vk_layerGetInstanceProcAddr` — 훅 대상 이름은 래퍼 반환, 그 외는 next-layer 포인터 반환. +- `vkAllocateMemory`: + - 버짓 이내 → next-layer 호출, 카운터 증가. + - 버짓 초과 → `VK_ERROR_OUT_OF_DEVICE_MEMORY`, next-layer 미호출, 카운터 불변. + - next-layer 에러 반환 → 카운터 롤백. +- pthread 경쟁 스트레스: CUDA `cuMemAlloc` + Vulkan `vkAllocateMemory` 동시 실행 시 `used_memory ≤ budget` 불변식, 성공 합산이 버짓 초과 없음. +- `vkGetPhysicalDeviceMemoryProperties` 클램프: 반환된 구조체의 힙 size가 `min(real, budget)`. + +### 8.3 통합 / E2E +- 신규 예제 `examples/nvidia/vulkan_example.yaml` — `hami.io/vulkan: "true"`, `nvidia.com/gpumem: 1024`, `vulkaninfo` 이미지. 검증(수동 또는 스크립트): + - `vulkaninfo | grep heapSize`가 device-local 힙에서 ≤ 1024 MiB. + - `vkAllocateMemory` 테스트 바이너리(또는 `vkcube --size-mb 2048`)가 `OUT_OF_DEVICE_MEMORY`로 실패. +- (수동, CI 미포함) Vulkan 백엔드 llama.cpp 파드에 `gpumem: 4096` + 7B 모델 — 버짓 초과 시 할당 실패 로그 확인. `docs/vulkan-vgpu-support.md`에 기록. + +### 8.4 수동 검증 체크리스트 (문서) +- `vulkaninfo` 힙 size 클램프. +- `vkAllocateMemory` 버짓 초과 시 기대한 에러 반환. +- 큐 제출 집중 워크로드에서 `nvidia-smi` compute 사용률이 설정된 `gpucores` 근방에서 throttle. +- 한 파드에서 CUDA + Vulkan 혼합 워크로드가 합산 버짓을 준수. + +## 9. 딜리버리 계획 + +두 레포에 걸친 변경, 순서: + +1. **HAMi-core PR** (C): Vulkan 레이어 모듈, 매니페스트 JSON, Dockerfile 업데이트, Makefile 업데이트, C 단위 테스트. 신규 릴리스 태그(`vX.Y.0`). +2. **HAMi PR** (Go, 이 레포): + - `pkg/device/nvidia/device.go` — annotation → env 주입. + - `pkg/device/nvidia/device_test.go` — 단위 테스트. + - `libvgpu` submodule 포인터를 신규 HAMi-core 릴리스로 갱신. + - `examples/nvidia/vulkan_example.yaml`. + - `docs/vulkan-vgpu-support.md` (영문 + `_cn.md`). + +롤아웃: 기본 OFF (annotation 게이트). 기존 배포에 대한 마이그레이션/파괴적 변경 없음. + +## 10. 미해결 / 후속 과제 + +- SM throttle 하 그래픽 워크로드의 프레임 페이싱 — `vkQueueSubmit` 지터 측정 후 후속 릴리스에서 throttle 모드 설정(`strict` vs `cooperative`) 옵션 필요할 수 있음. +- Vulkan Video 확장(`VK_KHR_video_queue`) — v1에서는 후킹 대상 아님. +- Vulkan 할당 거부에 대한 Prometheus 메트릭 — 후속. +- MPS 모드와의 상호작용 — MPS는 Vulkan을 노출하지 않음. annotation + MPS 모드 조합은 에러 또는 `hami-core` 모드로 폴백 + 경고. 구현 단계에서 최종 결정. diff --git a/docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md b/docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md new file mode 100644 index 000000000..cbb5b550b --- /dev/null +++ b/docs/superpowers/specs/2026-04-27-volcano-vulkan-vgpu-design.md @@ -0,0 +1,210 @@ +# Volcano + Vulkan vGPU 통합 설계 + +**작성일**: 2026-04-27 +**관련 작업**: HAMi `feat/vulkan-vgpu` 브랜치의 Vulkan vGPU 기능을 `xiilab/volcano-vgpu-device-plugin` 환경에 적용 + +## 목적 + +Volcano scheduler 가 이미 운영 중인 클러스터에 HAMi 의 Vulkan vGPU 메모리 partitioning 기능을 추가한다. Volcano scheduler 와 `volcano-vgpu-device-plugin` 은 그대로 유지하면서 **Vulkan workload (Isaac Sim, Kit 등) 도 CUDA workload 와 동일하게 `nvidia.com/gpumem` 제약을 받도록** 한다. + +## 비목표 (Non-goals) + +- Volcano scheduler 동작/스케줄링 로직 변경 ❌ +- 기존 CUDA-only workload 의 동작 회귀 ❌ +- HAMi 자체 scheduler extender 또는 device-plugin 도입 ❌ +- 새 task scheduler 또는 webhook 체인 변경 ❌ + +## 현재 상태 (As-is) + +### HAMi `feat/vulkan-vgpu` 브랜치 (이미 검증됨) + +- `libvgpu` submodule (HAMi-core, vulkan-layer): `vkAllocateMemory` 후킹으로 Vulkan 메모리 enforcement +- `pkg/device/nvidia/device.go:applyVulkanAnnotation`: pod annotation `hami.io/vulkan: "true"` 검사 → `HAMI_VULKAN_ENABLE=1` env + `NVIDIA_DRIVER_CAPABILITIES` 에 `graphics` merge +- `0150ea7` commit: device-plugin 이 Vulkan implicit layer manifest (`hami.json`) 를 container 에 자동 mount +- 2026-04-26 production verification: ws-node074 의 Isaac Sim pod 에서 23 GB partition enforcement 확인 + +### `xiilab/volcano-vgpu-device-plugin` (현재) + +- Project-HAMi/volcano-vgpu-device-plugin 의 fork +- `libvgpu` submodule = `6660c84` (vulkan-layer 미포함) +- HAMi-core 사용은 하지만 CUDA path 만 enforce +- Volcano scheduler 와 ConfigMap (`deviceshare.VGPUEnable: true`) 으로 협업 +- standard / CDI 두 가지 deploy yaml 제공 + +## 설계: 책임 분담 + +| 레이어 | 담당자 | 변경 | +|---|---|---| +| Pod scheduling | Volcano scheduler | ❌ 변경 없음 | +| GPU 자원 sharing/할당 | volcano-vgpu-device-plugin | ⚠️ submodule + manifest mount | +| Pod spec mutation (env) | HAMi mutating webhook | ✅ 별도 deploy (annotation 처리) | +| Vulkan 메모리 enforcement | libvgpu (HAMi-core vulkan-layer) | ✅ submodule 갱신으로 자동 | + +### 핵심 결정 + +1. **HAMi webhook 만 별도 deploy** — Volcano 우회 아님. mutating admission webhook 은 scheduling 과 별개 단계라 scheduler 그대로 유지. +2. **submodule 단순 교체로는 부족** — Vulkan layer 코드는 들어오지만 manifest 파일 자동 mount + env 주입 두 가지 부수 효과 필요. +3. **manifest 파일은 device-plugin 이 hostPath mount** — HAMi commit `0150ea7` 패턴 그대로 포팅. 호스트 노드에 `/etc/vulkan/implicit_layer.d/hami.json` 사전 배치는 별도 DaemonSet 또는 helm chart init. + +## Components + +### C1. libvgpu submodule 교체 + +- **변경 위치**: `xiilab/volcano-vgpu-device-plugin/libvgpu` +- **변경 내용**: `6660c84` → vulkan-layer HEAD (HAMi 가 사용 중인 commit, 현재 `8d4f712`) +- **부수 효과**: vulkan source 추가, `vkQueueSubmit2` / `VkSubmitInfo2` Vulkan 1.3 가드 코드 포함 + +### C2. Vulkan manifest auto-mount + +- **변경 위치**: `xiilab/volcano-vgpu-device-plugin/pkg/.../allocate` (또는 device 응답 빌더) +- **변경 내용**: HAMi commit `0150ea7` 의 `injectVulkanLayerMount()` 함수 포팅 +- **동작**: device-plugin 의 `Allocate()` 응답에 다음 mount 추가 + ``` + hostPath: /etc/vulkan/implicit_layer.d/hami.json + containerPath: /etc/vulkan/implicit_layer.d/hami.json + readOnly: true + ``` +- **CDI 모드**: `volcano-vgpu-device-plugin-cdi.yml` 경로도 동일하게 처리. CDI spec yaml 에 mount 추가하는 형태로. + +### C3. 빌드 의존성 + +- **변경 위치**: `Dockerfile` (volcano-vgpu-device-plugin 의 builder stage) +- **변경 내용**: `libvulkan-dev` apt install (HAMi commit `50b37ff` 와 동일) +- **이유**: vulkan-layer source 컴파일에 Vulkan headers 필요 + +### C4. HAMi webhook deployment + +- **변경 위치**: 새 클러스터에 helm install (코드 변경 없음, deploy 작업) +- **values.yaml**: + ```yaml + devicePlugin: + enabled: false # volcano-vgpu-device-plugin 이 GPU 자원 등록 + scheduler: + kubeScheduler: + enabled: false # Volcano scheduler 사용 + extender: + enabled: false # HAMi extender 사용 안 함 + admissionWebhook: + enabled: true # Vulkan annotation 처리만 + ``` +- **결과**: HAMi 의 `applyVulkanAnnotation` 코드가 Volcano 환경에서도 동작. annotation 있는 pod 의 container env 자동 주입. + +### C5. Host 측 manifest 파일 사전 배치 (`volcano-vgpu-vulkan-manifest.yml`) + +- **변경 위치**: `xiilab/volcano-vgpu-device-plugin` 에 신규 raw yaml 추가 (기존 `volcano-vgpu-device-plugin.yml` 와 같은 디렉터리/패턴) +- **구성**: ConfigMap (`hami.json` 본문) + DaemonSet (initContainer 가 ConfigMap 의 `hami.json` 을 host 의 `/etc/vulkan/implicit_layer.d/hami.json` 으로 복사) +- **manifest 내용**: HAMi 의 `0150ea7` commit 에서 사용한 것 그대로 (layer 이름 `VK_LAYER_HAMI_vgpu`, library path `/usr/local/vgpu/libvgpu.so`, enable_environment `HAMI_VULKAN_ENABLE=1`) +- **DaemonSet 위치**: 모든 GPU 노드 (label `nvidia.com/gpu.present=true` 또는 동등 selector). manifest 파일이 ready 된 노드만 device-plugin 의 mount 가 성공할 수 있으므로 device-plugin DaemonSet 보다 먼저 배포하는 게 안전. +- **대안**: 사용자 image 에 manifest 베이크 — 비채택 (사용자 부담 증가) + +### C6. E2E 테스트 + +- **검증 항목**: + 1. annotation 있는 Vulkan pod → Kit boot log 의 `GPU Memory: 23000 MB` (partition enforce) + 2. annotation 없는 Vulkan pod → Kit boot log 의 `GPU Memory: 46068 MB` (full GPU) + 3. annotation 있는 CUDA-only pod → CUDA 정상 + Vulkan layer 안 로드 확인 + 4. 기존 volcano-vgpu-device-plugin CUDA sharing 회귀 (HAMi-core dynamic-mig 모드 포함) +- **참고 문서**: HAMi `docs/vulkan-vgpu-e2e-checklist.md` 의 체크리스트 그대로 적용 + +## Data flow (활성화 케이스) + +``` +1. kubectl apply isaac-sim.yaml + annotations: hami.io/vulkan: "true" + resources.limits: nvidia.com/gpumem: 23000 + +2. K8s API server + ├─ HAMi mutating webhook (별도 deploy 됨) + │ ├─ env += HAMI_VULKAN_ENABLE=1 + │ └─ env += NVIDIA_DRIVER_CAPABILITIES=compute,utility,graphics + └─ etcd 저장 + +3. Volcano scheduler (변경 없음) + └─ pod 을 ws-node074 로 schedule + +4. kubelet → volcano-vgpu-device-plugin Allocate() + ├─ GPU UUID 할당 (NVIDIA_VISIBLE_DEVICES) + ├─ libvgpu.so mount (CUDA + Vulkan 후킹용, 기존 코드) + └─ /etc/vulkan/implicit_layer.d/hami.json mount (C2 신규) + +5. Container 시작 + ├─ ld.so.preload 가 libvgpu.so 로드 (image 측 책임) + ├─ Vulkan app 시작 → loader 가 hami.json 발견 + ├─ enable_environment 가드 매치 (HAMI_VULKAN_ENABLE=1) + ├─ Vulkan layer 로드 → vkAllocateMemory 후킹 + └─ CUDA_DEVICE_MEMORY_LIMIT_0=23000m enforce +``` + +## Error handling / edge cases + +| 시나리오 | 동작 | 비고 | +|---|---|---| +| annotation 없는 pod | webhook no-op → env 미주입 → enable_environment 가드 unmatched → layer 안 로드 | 일반 CUDA pod 동작 그대로 | +| 노드에 manifest 파일 없음 | device-plugin Allocate 의 mount 시도 → kubelet mount 실패 → pod ContainerCreating | DaemonSet 의 manifest 배포 readiness 보장 필요 | +| HAMi webhook + Volcano webhook 순서 | mutating webhook chain 순차 실행. capability 추가 → Volcano 가 받는 spec 에 반영 → schedule 시 capability 미사용 | 충돌 없음 | +| CDI 모드 | `volcano-vgpu-device-plugin-cdi.yml` 의 device-plugin 도 동일하게 hami.json mount 추가 필요 | 코드 분기 | +| Vulkan ICD 의존성 부재 | libGLX_nvidia.so 가 vk_icdNegotiateLoaderICDInterfaceVersion -3 반환 → Vulkan init 실패 | 사용자 image 가 libEGL.so.1 + X11 + /dev/dri 포함해야 함 (HAMi 메모리 노트 참고) | + +## Risks + +1. **CDI 모드와 standard 모드 분기 누락**: 두 deploy yaml 이 서로 다른 device-plugin binary 를 사용한다면 manifest mount 코드도 두 곳에 들어가야 함. 점검 필요. +2. **DaemonSet 으로 host 노드에 manifest 배포 안 되어있는 경우**: pod 이 ContainerCreating 으로 stuck. helm chart 또는 별도 manifest 로 readinessGate 처리 필요. +3. **NVIDIA driver container 의존**: Volcano 환경이 NVIDIA gpu-operator 사용한다면 driver container 가 X11/EGL 라이브러리를 마운트해야 Vulkan 동작. HAMi 환경에서 검증한 것과 동일한 image 셋업 가정. +4. **upstream Project-HAMi/volcano-vgpu-device-plugin 과 divergence**: xiilab fork 가 별도 vulkan 코드 포함하는 동안 upstream 과 sync 가 어려워질 수 있음. 가능하면 upstream 에 PR 도 보내 divergence 최소화 권장. + +## Testing + +1. **Unit test**: 기존 volcano-vgpu-device-plugin 의 device allocate test 에 manifest mount 검증 추가 +2. **회귀 test**: CUDA-only workload 가 기존과 동일하게 동작 +3. **Integration**: kind/minikube 에서 Volcano + HAMi webhook + 새 device-plugin → 표준 CUDA pod 정상 동작 확인 +4. **E2E manual** (ws-node074 또는 별도 Volcano cluster): + - 4-1. Vulkan pod + annotation: 23 GB partition 확인 + - 4-2. Vulkan pod no-annotation: full GPU 확인 + - 4-3. CUDA pod + annotation: 영향 없음 + - 4-4. dynamic-mig 모드 회귀 (Ampere+ GPU 가용 시) + +## Deployment artifact (raw yaml 패턴) + +`xiilab/volcano-vgpu-device-plugin` 의 기존 패턴 (helm chart 없음, 평탄한 raw yaml) 을 그대로 따른다. + +``` +xiilab/volcano-vgpu-device-plugin/ +├── volcano-vgpu-device-plugin.yml # 기존 standard mode (image tag 갱신) +├── volcano-vgpu-device-plugin-cdi.yml # 기존 CDI mode (image tag 갱신) +└── volcano-vgpu-vulkan-manifest.yml # ★ 신규 — ConfigMap + DaemonSet +``` + +HAMi webhook 은 별도 yaml 작성하지 않고 **HAMi 본가 helm chart 재사용** (C4 의 values.yaml). + +## Deployment 순서 + +1. **PR-1: xiilab/volcano-vgpu-device-plugin** + - submodule 갱신 (C1: `6660c84` → vulkan-layer HEAD) + - device-plugin 코드에 manifest mount 추가 (C2) + - Dockerfile 빌드 의존성 (C3: `libvulkan-dev`) + - 기존 두 yaml 의 image tag 를 새 빌드 (`vulkan-v1`) 으로 갱신 + - 신규 `volcano-vgpu-vulkan-manifest.yml` 추가 (C5) + - image 빌드 + harbor push + +2. **클러스터 deploy** + - 2-1. `kubectl apply -f volcano-vgpu-vulkan-manifest.yml` (host 에 hami.json 배치) + - 2-2. `kubectl apply -f volcano-vgpu-device-plugin.yml` (또는 CDI 버전, 새 image rolling) + - 2-3. `helm install hami-webhook hami/hami` (C4 values 로 webhook only) + +3. **E2E 검증** (C6) + +## 관련 자료 + +- HAMi `feat/vulkan-vgpu` 브랜치 (현재) + - `pkg/device/nvidia/device.go:applyVulkanAnnotation` (webhook 코드) + - commit `0150ea7` (manifest auto-inject) + - commit `50b37ff` (libvulkan-dev 빌드 의존성) + - `docs/vulkan-vgpu-support.md`, `docs/vulkan-vgpu-e2e-checklist.md` +- xiilab/volcano-vgpu-device-plugin + - `https://github.com/xiilab/volcano-vgpu-device-plugin` + - 현재 libvgpu submodule: `6660c84` +- HAMi 메모리 노트 + - `project_hami_vulkan_verification.md` (production activation 검증) +- Volcano scheduler + - `https://github.com/volcano-sh/volcano` + - vGPU 활성화: `deviceshare.VGPUEnable: true` ConfigMap 설정 diff --git a/docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md b/docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md new file mode 100644 index 000000000..dfd139320 --- /dev/null +++ b/docs/superpowers/specs/2026-04-28-hami-isolation-isaac-sim-design.md @@ -0,0 +1,302 @@ +# HAMi vGPU 격리를 NVIDIA Isaac Sim Kit (Omniverse) 에 적용 — Design + +**Date**: 2026-04-28 +**Status**: Approved (사용자 design 승인 완료) +**Goal**: HAMi vGPU 격리(NVML + CUDA + Vulkan path) 를 NVIDIA Isaac Sim Kit (Carbonite/OptiX/Vulkan implicit layer chain) 와 호환되게 적용 + +## 1. Context + +PR #1803 (HAMi 메인 fork `xiilab/feat/vulkan-vgpu`) + PR #182 (HAMi-core fork `xiilab/vulkan-layer`) 가 Vulkan vGPU partition 격리를 추가했고, 2026-04-27 새벽에 4개 patch 가 cluster 에 deploy 되어 **노드 wide HAMi 격리** 가 활성화됐다. 그러나 이 시점부터 isaac-launchable namespace 의 **Isaac Sim Kit 6.0.0-rc.22** (`runheadless.sh`, `train.py --livestream 2`) 가 SegFault 로 더 이상 동작하지 않게 됐다. + +사용자가 2일 동안 정상 시연했던 baseline 은 **2026-04-27 08:44 이전** 이고, 이 시점 이후의 노드 wide 강제 격리가 NVIDIA Isaac Sim Kit 의 init path 와 호환 충돌한다. race lucky 가 아닌 진짜 regression. + +진정한 fix 는 격리 메커니즘을 namespace 단위 opt-in 으로 변경하고 (Step A), HAMi-core 의 hook code 를 Isaac Sim Kit init 시 안전하게 동작하도록 hardening (Step B/C) 한 다음, isaac-launchable namespace 도 opt-in 활성화하여 격리 + 동작 둘 다 만족하는 (Step D) 것이다. + +## 2. 4-27 새벽 patch (regression 시점) + +| 시각 (UTC) | 변경 | +|---|---| +| 02:02 | `volcano-vgpu-device-plugin:vulkan-v1` Harbor push (`10.61.3.124:30002/library/`) | +| 02:17:50 | `hami-vulkan-manifest-installer` daemonset 생성 (kube-system) — 노드의 `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 생성 | +| 03:34:22 | `hami-webhook` MutatingWebhookConfiguration install (helm release `hami-webhook` in `hami-system`) — pod 생성 시 자동 mutation (HAMI_VULKAN_ENABLE env, hami.json mount, NVIDIA_DRIVER_CAPABILITIES patch) | +| 08:44 | `/usr/local/vgpu/ld.so.preload` 만들어짐 — **노드 wide 모든 컨테이너 process 에 `libvgpu.so` 강제 inject** | + +마지막 (`ld.so.preload`) 이 결정적 trigger. + +## 3. Isaac Sim Kit 와의 호환 충돌 (backtrace 증거) + +### 3.1 OptiX denoising init 시 NULL deref +``` +000: libc.so.6!__sigaction +001: libvgpu.so!cuMemGetInfo_v2+0x52c (memory.c:513) ← HAMi-core CUDA hook +002: libnvoptix.so.1!rtGetSymbolTable +004: librtx.optixdenoising.plugin.so!carbOnPluginPreStartup +009: libcarb.scenerenderer-rtx.plugin.so!carbOnPluginPreStartup +010: libomni.hydra.rtx.plugin.so +``` +NVIDIA OptiX denoising plugin 이 init 시 `cuMemGetInfo_v2(NULL, NULL)` 호출 → HAMi-core hook 이 NULL pointer dereference 시도. +**Fix 이미 적용됨**: HAMi-core fork commit `03f99d7` — forward to real driver first + NULL guard. + +### 3.2 Carbonite Vulkan plugin extension list dangling +``` +001: libvulkan.so.1!+0x22bc8 ← Vulkan loader +002: libcarb.graphics-vulkan.plugin.so!std::vector::_M_emplace_aux +003: libgpu.foundation.plugin.so!Map_base::operator[] +009: libgpu.foundation.plugin.so!filesystem::path::~vector() +013: libomni.ui!Image::_loadSourceUrl +039: libomni.kit.renderer.plugin.so!carbOnPluginPreStartup +``` +Carbonite Vulkan plugin 이 enabled extension list 만들 때 layer chain 에서 `vkGetInstanceProcAddr(NULL, "vkEnumerate*ExtensionProperties")` 호출 → HAMi Vulkan layer 가 NULL 반환 → loader 가 NULL fn ptr 사용 → SegFault. +**Fix 이미 적용됨**: HAMi-core commit `2b6b875` — `vkEnumerate{Instance,Device}{Extension,Layer}Properties` hooks 추가. + +### 3.3 carb.tasking fiber init race +``` +014-017: libcarb.tasking.plugin.so!make_fcontext+0x39 +``` +NVIDIA Kit 의 task scheduler 가 fiber/coroutine context 생성 시 race. Layer chain 활성 시 dispatch 차이로 trigger. +**Fix 미적용** — Step C 영역. + +### 3.4 omni.clipboard.service utmp 부재 +``` +Failed to open [/var/run/utmp] +Active user not found. Using default user [kiosk] +``` +`omni.clipboard.service` 가 init 시 logged-in user 식별 실패. 직접 SegFault trigger 는 아니나 race 기여 가능. 우회: utmp record 만들기. + +## 4. 검증된 baseline (Step A 직전 상태) + +``` +ws-node074: + /usr/local/vgpu/ld.so.preload = "" (빈 파일, HAMi-core inject 비활성) + /usr/local/vgpu/libvgpu.so = HAMi-core fork build (md5 62fedf17) + /usr/local/vgpu/vulkan/implicit_layer.d/hami.json = 복원 + hami-vulkan-manifest-installer ds = nodeSelector hami.io/disabled=true (비활성) + isaac-launchable namespace label = hami.io/webhook=ignore (webhook opt-out) + +검증: + runheadless.sh 5번 → 5/5 exit=124 alive, crash=0, listen 49100/30999 ✅ + nvidia-smi total = 46068 MiB (raw — 격리 비활성) + 외부 http://10.61.3.118 = 5/5 → 200 + isaac-launchable-0/1, usd-composer pod 모두 3/3 Running +``` + +이 환경이 사용자가 본 2일 동안 동작하던 baseline 과 동등 (격리 0). + +## 5. Goal + +| 격리 path | 검증 방법 | 기대값 | +|---|---|---| +| **NVML** | `nvidia-smi --query-gpu=memory.total --format=csv,noheader` | `23552 MiB` | +| **CUDA** | `cuMemGetInfo_v2()` returned total / `cuMemAlloc(>23 GiB)` | partition value / `CUDA_ERROR_OUT_OF_MEMORY` | +| **Vulkan** | `vkGetPhysicalDeviceMemoryProperties` heap[0].size / `vkAllocateMemory(>23 GiB)` | `23 GiB` / `VK_ERROR_OUT_OF_DEVICE_MEMORY` | +| **Isaac Sim Kit** | `runheadless.sh` 5번 / `train.py --livestream 2` | 5/5 alive, listen 49100, 화면 표시, 학습 진행 | + +**4개 path 동시에 만족** = 성공. + +## 6. Architecture (4 Step) + +``` +Step A (namespace opt-in/out webhook) + ↓ +Step B (HAMi-core CUDA/NVML hook hardening) + ↓ +Step C (HAMi-core Vulkan layer compat hardening) + ↓ +Step D (isaac-launchable opt-in 활성화 + 4-path 검증) +``` + +각 step 은 independent (앞 step 결과물만 의존). Step A 끝나면 isaac-launchable 즉시 정상 운영. Step B, C 가 완료된 후에만 Step D 의 진짜 검증 가능. + +## 7. Step A — Namespace opt-in/out (1일) + +### 7.1 변경 대상 + +| 컴포넌트 | 현재 | 변경 | +|---|---|---| +| `hami-webhook` MutatingWebhookConfiguration `namespaceSelector` | opt-out (`hami.io/webhook NotIn ignore`) | **opt-in (`hami.io/vgpu In enabled`)** | +| `hami-vulkan-manifest-installer` daemonset (노드 wide hami.json install) | 모든 GPU 노드 활성 | **폐기 또는 webhook init container 로 변환** — pod 단위 hami.json mount | +| `/usr/local/vgpu/ld.so.preload` (노드 wide HAMi-core inject) | 모든 컨테이너 강제 inject | **폐기** — webhook 이 enabled namespace pod 에만 `LD_PRELOAD` env 주입 + `libvgpu.so` volume mount | + +### 7.2 새 webhook mutation 패턴 (enabled pod 만) + +```yaml +# Pod containers[*] 에 추가: +env: + - name: LD_PRELOAD + value: /usr/local/vgpu/libvgpu.so + - name: HAMI_VULKAN_ENABLE + value: "1" + - name: NVIDIA_DRIVER_CAPABILITIES + value: <기존값>,graphics # 이미 all 이면 noop +volumeMounts: + - name: hami-libvgpu + mountPath: /usr/local/vgpu + readOnly: true + - name: hami-vulkan-layer + mountPath: /etc/vulkan/implicit_layer.d/hami.json + subPath: hami.json + readOnly: true + +# Pod volumes 에 추가: +volumes: + - name: hami-libvgpu + hostPath: + path: /usr/local/vgpu + type: Directory + - name: hami-vulkan-layer + configMap: + name: hami-vulkan-layer + items: + - key: hami.json + path: hami.json +``` + +### 7.3 변경 파일 + +- `charts/hami/values.yaml` — namespaceSelector default mode (`opt-in` 추가) +- `charts/hami/templates/webhook-mutating.yaml` — selector mode 분기 +- `charts/hami/templates/manifest-installer-ds.yaml` — 제거 또는 init container 로 이동 +- `charts/hami/templates/preload-installer.yaml` (있다면) — 제거 (`/usr/local/vgpu/ld.so.preload` 만들기 daemonset) +- `pkg/scheduler/webhook/*` (mutation 로직 변경) + +### 7.4 검증 + +- isaac-launchable namespace = label 없음 → webhook mutation 0 → 현재 baseline 그대로 (5/5 alive) +- 새 namespace `hami-test` 에 label `hami.io/vgpu=enabled` + simple CUDA pod 배포 → `nvidia-smi 23552 MiB`, `cuMemAlloc(>23GiB)` 거부 검증 + +## 8. Step B — HAMi-core CUDA/NVML hook hardening (3-5일) + +### 8.1 Robustness 패턴 + +`cuMemGetInfo_v2` 의 fix 패턴 (commit `03f99d7`): + +```c +CUresult cuXxx(...) { + LOG_DEBUG("cuXxx"); + ENSURE_INITIALIZED(); + + /* 1. Forward to the real driver FIRST. NULL/missing-context errors + * surface exactly as without HAMi. We never dereference pointers + * the driver rejected. */ + CUresult r = REAL_CALL(cuXxx, ...); + if (r != CUDA_SUCCESS) return r; + + /* 2. NULL/invalid arg guard — return early without enforcement */ + if (...args invalid for HAMi logic...) return r; + + /* 3. Get device + apply HAMi 격리 logic */ + ... +} +``` + +### 8.2 Audit 대상 + +| Hook | 현재 상태 | 액션 | +|---|---|---| +| `cuMemGetInfo`, `cuMemGetInfo_v2` | ✅ Fixed (`03f99d7`) | unit test 추가 | +| `cuMemAlloc`, `cuMemAlloc_v2` | audit 필요 | NULL devptr / `bytesize == 0` guard | +| `cuMemAllocAsync`, `cuMemAllocPitch` | audit 필요 | 동일 패턴 | +| `cuMemFree`, `cuMemFree_v2`, `cuMemFreeAsync`, `cuMemFreeHost` | audit 필요 | untracked pointer fallback (이미 일부 fix `3bebc8a`) | +| `cuCtxGetDevice` | audit 필요 | NULL ctx 시 driver error pass-through | +| `cuMemCreate` | ✅ Fixed (`833c62c`) | 검증 | +| `nvmlDeviceGetMemoryInfo`, `_v2` | ✅ Robust | 검증 | + +### 8.3 단위 검증 + +각 hook 별로: +- normal happy path (정상 인자, 정상 반환) +- NULL pointer arg (driver 가 거부하면 그대로 반환) +- partition limit 도달 (OOM 반환) +- partition limit 0 (unlimited fallback) + +`vk_partition_test.py` 와 비슷한 단순 test 추가 (`cuda_partition_test.py`). + +### 8.4 Isaac Sim 통합 검증 (Step B 완료 시점) + +- `LD_PRELOAD=/usr/local/vgpu/libvgpu.so /isaac-sim/python.sh -c "from isaacsim import SimulationApp; SimulationApp({'headless': True}).close()"` — graceful exit (no SegFault) +- `runheadless.sh` 단독 실행 — Vulkan path 문제 잔존하므로 Step C 후 검증 + +## 9. Step C — HAMi-core Vulkan layer compat (5-7일) + +### 9.1 이미 적용된 fix (commits) + +- `93dd103`: deviceUUID zero → idx=0 fallback (single-GPU container 호환) +- `91ca00c`: HOOK_NVML_ENABLE build flag — NVML hook activate +- `2b6b875`: `vkEnumerate{Instance,Device}{Extension,Layer}Properties` hooks — GIPA NULL deref 방지 + +### 9.2 추가 hardening + +`hami_vkGetInstanceProcAddr` audit: +- 모든 instance-level entry point 호출 시 invalid handle pass-through 패턴 (단, NVIDIA driver 에 unknown handle 절대 forward 금지 — 정의되지 않은 동작) +- 현재 hook 안 한 함수들 (`vkGetPhysicalDeviceFormatProperties{,2}`, `vkGetPhysicalDeviceImageFormatProperties{,2}`, `vkGetPhysicalDeviceQueueFamilyProperties{,2}`, `vkGetPhysicalDeviceFeatures{,2}`, `vkGetPhysicalDeviceProperties{,2}`, `vkGetPhysicalDeviceSparseImageFormatProperties{,2}`) — instance dispatch 통해 next layer forward 가 표준 패턴이며 instance 등록 시 cache + +`hami_vkCreateInstance` / `hami_vkCreateDevice` audit: +- chain 변경의 in-place 수정 (`chain->u.pLayerInfo = chain->u.pLayerInfo->pNext`) 이 spec 표준 — caller 가 createInfo 재사용 안 한다고 가정. 그러나 NVIDIA OptiX 가 재사용 가능성 있음 → caller-safe deep copy 검토. + +dispatch lifetime audit: +- `hami_instance_unregister` / `hami_device_unregister` 가 caller-side에서 적절한 시점에 호출되는지 +- multi-instance 환경 (Carbonite 가 두 번째 instance 만드는 케이스) 에서 first instance 의 cached gipa 가 stale 안 되도록 + +OptiX/Aftermath 호환: +- `aftermath_status=auto-enabled` 환경에서 vkCreateDevice extensions 처리 검증 +- `librtx.optixdenoising.plugin.so` init path 추적 (Step B 의 cuMemGetInfo 이후 stage) + +### 9.3 검증 + +- `runheadless.sh` 5번 — 5/5 alive + listen 49100/30999 (현재 ld.so.preload 비활성에서 5/5 → layer 활성에서도 5/5 목표) +- `vk_partition_test.py` — Vulkan partition enforce 유지 (이미 통과) +- `train.py --livestream 2` — 학습 진행 + WebRTC 화면 표시 +- OptiX denoising 활성 시 Kit init 통과 + +## 10. Step D — isaac-launchable opt-in 활성화 + 검증 (1-2일) + +### 10.1 시나리오 + +1. isaac-launchable namespace label 변경: `hami.io/webhook=ignore` 제거 → `hami.io/vgpu=enabled` 추가 +2. isaac-launchable-* / usd-composer pod 재생성 +3. webhook 이 enabled mutation 적용 (LD_PRELOAD env, libvgpu.so mount, hami.json mount) +4. 4-path 동시 검증 + +### 10.2 검증 매트릭스 + +| Path | Command | Expected | +|---|---|---| +| NVML | `kubectl exec ... nvidia-smi --query-gpu=memory.total --format=csv,noheader` | `23552 MiB` | +| CUDA | `LD_PRELOAD=/usr/local/vgpu/libvgpu.so python -c "import cupy; cupy.cuda.runtime.malloc(25*1024**3)"` | `cudaErrorMemoryAllocation` | +| Vulkan | `kubectl exec ... /isaac-sim/python.sh vk_partition_test.py` | heap[0]=23 GiB, 25/30 GiB OOM | +| Isaac Sim | `kubectl exec ... ACCEPT_EULA=y /isaac-sim/runheadless.sh` 5회 | 5/5 alive, listen 49100/30999 | +| Isaac Sim 학습 | `kubectl exec ... ./isaaclab.sh -p train.py --livestream 2 --max_iterations 5` | `Iteration 0..4` reward 출력 + 화면 표시 | + +5/5 통과 = Step D 성공 = 전체 design goal 달성. + +## 11. 위험 및 대응 + +| 위험 | 영향 | 대응 | +|---|---|---| +| Step B/C 가 며칠 걸리는데 isaac-launchable 즉시 운영 필요 | 높음 | Step A 만으로 isaac-launchable 즉시 baseline 동작 (현 상태) | +| Step C 후에도 race 잔존 (NVIDIA Kit 자체 bug) | 중 | NVIDIA bug report, Isaac Sim GA / 다른 RC build 시도 | +| `namespaceSelector` opt-in 변경이 기존 사용자 영향 (label 없는 namespace 격리 0) | 중 | helm chart values 의 default mode 분기 — 기존 사용자는 명시적 enable, 새 사용자만 opt-in default | +| `ld.so.preload` 폐기로 cluster wide 격리 일시적 0 | 낮음-중 | Step A 후 즉시 namespace label 추가로 enabled namespace 격리 회복 | +| Webhook 의 volume mount 추가가 기존 pod spec 과 충돌 | 낮음 | mountPath 검증 (`/etc/vulkan/implicit_layer.d/hami.json` subPath) — 기존 nvidia_layers.json 과 공존 가능 | + +## 12. 일정 + +| Step | 일정 | 결과물 | +|---|---|---| +| A | 1일 | helm chart commit + push, webhook config 변경, isaac-launchable baseline 안정 | +| B | 3-5일 | HAMi-core PR #182 추가 commits (cuda/nvml hook hardening) + unit test | +| C | 5-7일 | HAMi-core PR #182 추가 commits (Vulkan layer compat) + Isaac Sim init 통과 | +| D | 1-2일 | isaac-launchable opt-in label + 4-path 검증, 운영 회복 + 격리 동시 만족 | + +**총 약 10-15일**. + +## 13. 산출물 + +- HAMi 메인 (`xiilab/feat/vulkan-vgpu` PR #1803): helm chart 변경 commit 들 +- HAMi-core (`xiilab/vulkan-layer` PR #182): hook hardening + Vulkan layer compat commits +- volcano-vgpu-device-plugin (`xiilab/pr/vulkan-upstream` PR #118): 변경 없음 (libvgpu.so hostPath mount 패턴 유지) +- 본 spec 문서 + 후속 implementation plans (`writing-plans` skill 출력) + +## 14. 다음 단계 + +이 spec 검토 후 `writing-plans` skill 으로 Step A 부터 step-by-step implementation plan 생성 → step별 commit/PR push → 검증 → 다음 step. diff --git a/docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md b/docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md new file mode 100644 index 000000000..31336a439 --- /dev/null +++ b/docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md @@ -0,0 +1,184 @@ +# Step C 재설계 — Vulkan layer 를 별도 `libvgpu_vk.so` 로 분리 + +## 배경 + +2026-04-28 Step C 첫 시도(`docs/superpowers/plans/2026-04-28-hami-isolation-step-c-vulkan-layer-compat.md`)는 ws-node074 production 환경에서 regression 을 만들었다. 검증 데이터는 `libvgpu/docs/superpowers/notes/2026-04-28-vk-trace-isaac-sim.md` 와 같은 폴더의 dispatch lifetime audit 노트에 보존. + +핵심 발견: + +- pre-Step-C build (md5 `8f889313`) 를 LD_PRELOAD 했을 때 isaac-launchable-0 의 `runheadless.sh` 는 `exit=124 alive`. +- post-Step-C build (md5 `9586feee`, 추가 시도 `1048daaf`) 를 같은 환경에서 LD_PRELOAD 하면 `exit=139` 에서 NVIDIA driver init path crash. +- crash backtrace 는 `libvulkan.so.1` → `libGLX_nvidia.so.0!vk_icdNegotiateLoaderICDInterfaceVersion` → `libEGL_nvidia.so.0!__egl_Main` → `libc.so.6!__sigaction`. +- HAMI_VK_TRACE 카운트는 두 시도 모두 0 — 우리 layer wrapper 는 호출되지 않음. +- HAMI_HOOK 매칭 가설을 falsify 하기 위해 `EnumerateDeviceExtensionProperties` / `EnumerateDeviceLayerProperties` 를 `g_inst_head != NULL` 로 게이트했으나 동일 crash. 즉 regression 은 Vulkan wrapper 코드 path 가 아니라 **`.so` load-time / NVIDIA driver init 시점의 ELF 수준 영향**. + +추가 진단 (nm/readelf diff, `7dcb5a4` clean rebuild md5 비교, `LD_DEBUG=symbols,bindings`) 은 sandbox 가 ws-node074 외 build 환경 부재로 차단. 코드 commits `996cb22`, `eea2beb` 는 이번 세션에서 revert (`83fd245`, `f52aada`). 노트 commits 은 보존하되 fork push 보류. + +## 목적 + +regression 의 root cause 진단에 추가 시간을 쓰지 않고, **regression 이 구조적으로 발생할 수 없는 architecture 로 Step C 의 본래 목표 (Carbonite/Kit init 호환되는 Vulkan layer hardening) 를 달성**한다. + +본래 목표는 plan 의 Section 1 그대로 — Vulkan layer 가 NVIDIA Isaac Sim Kit (Carbonite/OptiX/Aftermath) 의 Vulkan 초기화 경로에서 NULL deref 없이 dispatch chain 을 끝까지 forwarding. + +## 핵심 결정 + +| # | 결정 | 선택 | +|---|---|---| +| 1 | 접근 | 새 architecture 우선. root cause 진단 spike 생략 | +| 2 | Vulkan layer 활성 trigger | manifest 만 (`/etc/vulkan/implicit_layer.d/hami.json`). LD_PRELOAD path 는 Vulkan 활성 안 함 | +| 3 | 분리 boundary | full Vulkan split — `src/vulkan/*` 전체를 새 `libvgpu_vk.so` 로 | +| 4 | 검증 환경 | local docker (`make build-in-docker`) + ws-node074 integration | +| 5 | 기존 `libvgpu.so` Vulkan 코드 | 완전 제거 — `vulkan_mod` 를 `libvgpu.so` build 에서 제외 | + +## 비변경 사항 + +- HAMi-core (NVML/CUDA hook, allocator, multiprocess) 코드 변경 0. budget IPC 그대로. +- Step B 의 commits (`88143ab`, `275ba3d`, `01a58f1`, `7dcb5a4`) — CUDA NULL guards 보존. +- Step B 의 Vulkan 관련 commits (`2b6b875`, `91ca00c`, `93dd103`) — Carbonite SegFault 1차 수정, NVML hook 활성, deviceUUID zero fallback 모두 보존. `libvgpu_vk.so` 의 시작점은 이 Step B end 코드. +- Step A 의 webhook namespaceSelector (HAMi parent `master` 기반) 변경 0. +- Step D scope (isaac-launchable opt-in 활성화 + 4-path 검증) 그대로 — 이번 spec 은 .so / manifest 산출물만, 활성화 path 는 Step D 가 책임. +- Plan 첫 시도의 commits `996cb22`, `eea2beb` 는 revert 상태. Tasks 1+2 의 의도 (cache first next-gipa, GIPA/GDPA fallback) 는 새 architecture 검증 통과 후 별도 phase 에서 재도입 후보. + +## Architecture + +``` +process (LD_PRELOAD'd or manifest-activated): + + ┌──────────────────────────────────────────────┐ + │ libvgpu.so ← LD_PRELOAD by ld.so.preload │ + │ - NVML hooks (nvmlDeviceGetMemoryInfo …) │ + │ - CUDA hooks (cuMemAlloc …) │ + │ - allocator + multiprocess (budget IPC) │ + │ - exports: hami_core_budget_*, hami_core_ │ + │ get_partition_uuid(), … │ + │ - NO Vulkan symbols (vk* 미export) │ + └──────────────────────────────────────────────┘ + ▲ + │ DT_NEEDED (link-time dependency) + │ resolved at dlopen + ┌──────────────────────────────────────────────┐ + │ libvgpu_vk.so ← Vulkan loader dlopen via │ + │ /etc/vulkan/implicit_ │ + │ layer.d/hami.json │ + │ - layer.c, dispatch.c (entry points) │ + │ - hooks_alloc/memory/submit │ + │ - physdev_index, budget bridge, throttle │ + │ - exports: vkGetInstanceProcAddr, │ + │ vkGetDeviceProcAddr, │ + │ vkNegotiateLoaderLayerInterfaceVersion │ + └──────────────────────────────────────────────┘ +``` + +격리 속성: + +- Vulkan 코드는 `libvgpu_vk.so` 에 단 1개 copy. +- `libvgpu.so` LD_PRELOAD 단독 시 Vulkan symbol 0 → loader/ICD 가 우리 export 와 collision 가능 surface 0. 4-28 trace 에서 발견된 LD_PRELOAD-only crash class 가 구조적으로 불가능. +- Vulkan layer 활성은 manifest dlopen path 만. Vulkan loader 가 chain 을 정상적으로 build 한 후 우리 layer 에 진입 → `g_inst_head` 가 항상 set 된 상태에서만 wrapper 동작. +- `libvgpu_vk.so` 의 DT_NEEDED 가 `libvgpu.so` 를 가리켜, manifest 활성 시점에 LD_PRELOAD 된 `libvgpu.so` 의 export 자동 resolve. `libvgpu.so` 가 process 에 없으면 dlopen 실패 → loader 가 layer 자동 skip → Isaac Sim alive (no HAMi enforcement). webhook 실수 시 fail-safe. + +## Components + +| 단위 | 위치 | 책임 | 의존성 | +|---|---|---|---| +| `libvgpu.so` (수정) | `src/CMakeLists.txt` | HAMi-core. `vulkan_mod` OBJECT lib 제거. budget/UUID 조회 함수 export | -lcuda, -lnvidia-ml | +| `libvgpu_vk.so` (신규) | `src/vulkan/CMakeLists.txt` | Vulkan layer entry + dispatch + hooks | DT_NEEDED libvgpu.so, -lpthread | +| budget bridge | `src/vulkan/budget.c` 확장 | `libvgpu.so` 의 `hami_core_*` 함수를 layer hooks 가 호출하는 thin wrapper. 기존 budget.c 가 이미 HAMi-core 와 layer 사이 bridge 역할이므로 별도 파일 신규 없음 | libvgpu.so export | +| `hami.json` manifest | install path 결정 (`/usr/local/vgpu/hami.json` + symlink `/etc/vulkan/implicit_layer.d/hami.json`) | Vulkan implicit layer 정의. `library_path` = `/usr/local/vgpu/libvgpu_vk.so` | (정적 file) | +| 기존 `tests/vulkan/` | 그대로 유지 | layer/dispatch unit tests | libvgpu_vk.so | + +`libvgpu.so` 의 신규 export (HAMi-core 측 인터페이스): + +- `hami_core_get_device_uuid_count()` — NVML idx 매핑 +- `hami_core_get_device_memory_limit(int nvml_idx)` — partition 값 +- `hami_core_budget_charge(int nvml_idx, size_t bytes)` — 할당 시 budget 차감 +- `hami_core_budget_release(int nvml_idx, size_t bytes)` — 해제 시 복귀 +- `hami_core_budget_remaining(int nvml_idx)` — 남은 한도 조회 + +prefix `hami_core_*` 통일. 기존 internal 이름 (`get_used_memory_for_uuid` 등) 은 그대로 두고, 외부 인터페이스는 별도 파일 (`src/hami_core_export.c` 또는 기존 `libvgpu.c` 끝에 export 블록 추가) 의 thin wrapper 로 명시 export. CMake `-fvisibility=hidden` default 적용 + 외부 인터페이스 함수에만 `__attribute__((visibility("default")))` 부착해서 export surface 를 의도된 5개로 좁힘. + +## Data flow (production happy path) + +``` +1. Pod 시작 + → ld.so.preload 가 libvgpu.so LD_PRELOAD + → NVML/CUDA hook 활성, partition 값 ready + +2. Isaac Sim Kit 시작 + → Vulkan loader 가 implicit_layer.d/ scan + → hami.json 발견 → libvgpu_vk.so dlopen + → DT_NEEDED libvgpu.so 자동 resolve (이미 process 에 있음) + → vkNegotiateLoaderLayerInterfaceVersion 호출 + +3. 앱이 vkCreateInstance + → loader chain 거쳐 hami_vkCreateInstance + → hami_instance_register, hook table 구성 + +4. 앱이 vkAllocateMemory + → hami_vkAllocateMemory wrapper + → hami_core_budget_remaining(idx) 조회 (libvgpu.so call) + → 가능하면 next_alloc 호출 + hami_core_budget_charge + → 한도 초과 시 VK_ERROR_OUT_OF_DEVICE_MEMORY + +5. 앱이 vkGetPhysicalDeviceMemoryProperties + → hooks_memory.c + → hami_core_get_device_memory_limit 으로 raw 값 clamp +``` + +## Error handling + +| 시나리오 | 동작 | +|---|---| +| `libvgpu.so` 부재 + manifest 활성 | `libvgpu_vk.so` dlopen 시 DT_NEEDED 해결 실패 → loader 가 layer 자동 skip → Isaac Sim alive (no HAMi enforcement) | +| manifest 부재 + `libvgpu.so` LD_PRELOAD | Vulkan loader 가 layer 발견 0 → libvgpu_vk.so 미load → NVML/CUDA hook 만 동작. Vulkan 호출은 raw — partition 안 됨, 운영자 책임 | +| `hami_vkCreateInstance` 안에서 chain 실패 | 기존과 동일: `VK_ERROR_INITIALIZATION_FAILED` 반환 | +| budget 차감 시 `libvgpu.so` 함수 NULL (불가하지만 방어) | `hami_vkAllocateMemory` 가 next_alloc 그대로 forward (no enforcement). 로깅만 | +| `physdev_index` UUID 매핑 실패 | 기존과 동일: NVML idx=0 fallback (single-GPU). `93dd103` 패치 그대로 | +| Vulkan wrapper 진입 후 NULL deref 가능 path | Step B end 의 NULL guards (`2b6b875`) 그대로 보존 | + +Race / lifetime 분석은 기존 audit (`6fc7f9a` `2026-04-28-vk-dispatch-lifetime-audit.md`) 그대로 유효. 별도 .so 라도 같은 process · 같은 dispatch table — race surface 변경 없음. + +## Testing + +| 층 | 어디 실행 | 무엇 검증 | +|---|---|---| +| Unit (`test/vulkan/`) | local docker | 기존 `test_layer`, `test_memprops`, `test_alloc` 등이 새 `libvgpu_vk.so` 로 빌드/통과 | +| ELF / symbol diff | local | `nm -D libvgpu.so | grep '^.* T vk'` 결과 0줄. `nm -D libvgpu_vk.so` 에 `vkGetInstanceProcAddr`, `vkGetDeviceProcAddr`, `vkNegotiateLoaderLayerInterfaceVersion` 만 외부 export. `readelf -d libvgpu_vk.so | grep NEEDED` 에 libvgpu.so 포함 | +| Step B regression | local docker (LD_PRELOAD libvgpu.so) | `test_cuda_null_guards` 9/9 [OK] | +| LD_PRELOAD-only smoke | ws-node074 isaac-launchable-0 | LD_PRELOAD `libvgpu.so` (manifest 미설치) + runheadless.sh × 5 → 5/5 exit=124 alive crash=0. **regression class 가 사라졌다는 핵심 검증** | +| Manifest 활성 smoke (Step D 와 합치) | ws-node074 isaac-launchable-0 | LD_PRELOAD `libvgpu.so` + manifest hami.json + runheadless.sh × 5 → 5/5 alive + Vulkan partition enforce (44 GiB → 23 GiB clamp) | +| HAMI_VK_TRACE 수집 | ws-node074 manifest 활성 path | trace lines > 0 — layer 가 실제로 chain 에 진입했음 검증 | + +## Production safety gate + +이번 세션의 사고 재발 방지: + +1. ws-node074 의 `/usr/local/vgpu/libvgpu.so` swap 전 항상 `.bak-pre-stepC2` 백업. +2. Swap 직후 baseline runheadless 1회 (no LD_PRELOAD) → alive 확인. 실패 시 즉시 restore. +3. Baseline 통과 시에만 LD_PRELOAD-forced 검증 진행. +4. 모든 swap 단계는 `md5sum` before/after 로 기록. +5. isaac-launchable-0 / isaac-launchable-1 의 3/3 Running steady state 가 swap 후에도 유지되는지 monitor. + +## Compatibility / 호환성 약속 + +- 기존 manifest 사용자 (4-27 새벽 패치 시점에 manifest installer 가 활성된 환경) 는 manifest 의 `library_path` 만 update 하면 동작 — Vulkan layer 의 ABI / behavior 는 유지. +- Step D 의 활성화 webhook 은 manifest installer + LD_PRELOAD config 가 분리됨을 인지해야 함 (별도 .so 두 개 install). +- `libvgpu.so` 의 신규 export (`hami_core_*`) 는 추가일 뿐. 기존 internal 함수 변경 없음. + +## Out of scope (이번 spec 에서 다루지 않음) + +- Tasks 1+2 의 cache + GIPA fallback 재도입 — 새 architecture 검증 통과 후 별도 phase. +- root cause 진단 spike (ELF/symbol diff, LD_DEBUG) — `libvgpu_vk.so` 분리만으로 영향이 사라지는지 보고 결정. +- HAMi parent 의 webhook / namespaceSelector / opt-in label — Step A / Step D scope. +- `hami.json` manifest 의 자동 install/uninstall (DaemonSet 또는 webhook 주입) — Step D scope. 이번 spec 은 manifest 파일 자체와 그것이 가리킬 .so 만. + +## PR + +`Project-HAMi/HAMi-core` (libvgpu) 의 `vulkan-layer` branch 에 새 commits. 별도 PR 또는 PR #182 의 후속 commits. parent repo `HAMi` 의 submodule SHA bump 는 기존 PR #1803 또는 새 PR. + +## Test plan (high level) + +1. local docker `make build-in-docker` → `libvgpu.so` + `libvgpu_vk.so` 두 산출물 생성 검증. +2. local `nm -D` / `readelf -d` 로 export / NEEDED 검증. +3. local docker 에서 `test_cuda_null_guards` 9/9 + `test_layer`/`test_memprops`/`test_alloc` 통과. +4. ws-node074 swap → baseline runheadless alive → LD_PRELOAD-only × 5 alive (no manifest). +5. ws-node074 manifest 활성 (Step D 와 통합) → 5/5 alive + partition clamp + HAMI_VK_TRACE > 0. diff --git a/docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md b/docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md new file mode 100644 index 000000000..2687a88c1 --- /dev/null +++ b/docs/superpowers/specs/2026-04-29-step-d-vulkan-opt-in-production-activation.md @@ -0,0 +1,153 @@ +# Step D — Vulkan layer opt-in production activation + 4-path 검증 + +## 배경 + +Step C 재설계 (`docs/superpowers/specs/2026-04-29-step-c-redesign-vk-so-split.md`, plan `2026-04-29-step-c-vk-so-split.md`) 가 완료. 산출물: + +- `libvgpu.so`: HAMi-core 만 (vk* 미export, 5개 `hami_core_*` export). 검증된 build md5 `1bd8f078a15b20e86b78626ddb938141`. +- `libvgpu_vk.so` (신규): Vulkan implicit-layer code. DT_NEEDED → `libvgpu.so`. Build md5 `95b44957ca3546fb72f8b5d7d699a4aa`. +- `hami.json` manifest (`libvgpu/share/hami/hami.json`): `library_path = /usr/local/vgpu/libvgpu_vk.so`, `type = INSTANCE`, api 1.3.0. +- ws-node074 검증: LD_PRELOAD `libvgpu.so` (manifest 미설치) × 5 → 5/5 alive (regression class 사라짐). + +다만 RT9 의 manifest 활성 검증에서 `HAMI_VK_TRACE > 0` 은 확인되지 않음 — Kit 의 embedded Conan vulkan-loader 가 우리 GIPA 를 traverse 하지 않음. **Step D 의 4-path 검증이 이 부분의 closure**. + +기존 production state (4-27 새벽 패치 이후 baseline): + +- `volcano-device-plugin` DaemonSet (image `10.61.3.124:30002/library/volcano-vgpu-device-plugin:vulkan-v1`) 이 `postStart` lifecycle hook 으로 `cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/` 실행 → 호스트 `/usr/local/vgpu/libvgpu.so` 가 image 의 .so 로 매번 reset. +- `hami-vulkan-manifest` ConfigMap (`kube-system/`) 에 `hami.json` 정의. 현재 `library_path: /usr/local/vgpu/libvgpu.so`, `type: GLOBAL`, `enable_environment: HAMI_VULKAN_ENABLE=1`. +- `hami-vulkan-manifest-installer` DaemonSet 이 ConfigMap 의 `hami.json` 을 host `/usr/local/vgpu/vulkan/implicit_layer.d/hami.json` 으로 install. 현재 `nodeSelector: hami.io/disabled: "true"` 로 비활성 (4-27 새벽 패치 호환 충돌 후 baseline 보존). +- HAMi webhook (`pkg/device/nvidia/device.go::applyVulkanAnnotation`) 가 pod annotation `hami.io/vulkan: "true"` 인식해서 container 에 `HAMI_VULKAN_ENABLE=1` + `NVIDIA_DRIVER_CAPABILITIES` 에 `graphics` 추가. 이 코드는 이미 master 에 있음. + +## 목적 + +Step C 의 `libvgpu_vk.so` 가 **production opt-in 활성 path 에서 실제로 동작**함을 검증하고, partition enforce 가 4 path 모두에서 작동함을 입증한다. 검증은 ws-node074 isaac-launchable namespace 의 isaac-launchable-0 pod 에서 수행. + +## 핵심 결정 + +| # | 결정 | 선택 | +|---|---|---| +| 1 | `libvgpu.so` + `libvgpu_vk.so` 호스트 install 방식 | volcano-device-plugin image 에 두 파일 모두 ship (image rebuild). 기존 `cp -rf /k8s-vgpu/lib/nvidia/.` lifecycle 가 둘 다 install. 별도 DaemonSet 추가 안 함 | +| 2 | manifest CM 변경 | 기존 `hami-vulkan-manifest` ConfigMap update — `library_path` → `/usr/local/vgpu/libvgpu_vk.so`, `type` → `INSTANCE`, `enable_environment` 유지 (`HAMI_VULKAN_ENABLE: "1"`) | +| 3 | manifest installer DaemonSet 재활성 | `nodeSelector` 를 `hami.io/disabled: "true"` → `nvidia.com/gpu.present: "true"` 로 복귀. install path 그대로 (`/usr/local/vgpu/vulkan/implicit_layer.d/hami.json`) | +| 4 | opt-in trigger | 기존 `hami.io/vulkan: "true"` annotation + webhook injection 그대로. 추가 코드 변경 0 | +| 5 | 4-path 검증 method | isaac-launchable-0 vscode container 에서 ad-hoc shell + python script 실행. 별도 test pod 만들지 않음 (existing pod 활용) | +| 6 | rollback 안전장치 | swap 전 backup md5 기록, 각 단계 후 baseline runheadless 확인, 실패 시 즉시 backup restore | + +## 비변경 사항 + +- HAMi parent Go 코드 (`pkg/device/nvidia/device.go`, webhook). `applyVulkanAnnotation` 그대로. +- `libvgpu` (HAMi-core) 코드 — Step C 끝낸 그대로. +- helm chart templates — Step D 는 runtime YAMLs (`cluster/runtime/snapshot-2026-04-28/`) 만 update. chart 통합은 별도 Step. +- volcano-device-plugin (Volcano fork) Go 코드 — 변경 없이 image rebuild 만. + +## 호환성 약속 + +- `hami.io/vulkan: "true"` annotation 미설정 pod: HAMI_VULKAN_ENABLE 미주입 → loader manifest 의 `enable_environment` 매칭 실패 → layer 미활성. 기존 동작 그대로. +- annotation true 설정 pod: webhook 가 env 주입 → layer 활성 → partition enforce. +- `volcano-vgpu-device-plugin:vulkan-v1` image rebuild 는 기존 build pipeline 재사용. tag 만 `vulkan-v2` 로 bump. + +## Architecture + +``` +┌── volcano-device-plugin DS (priv container, image vulkan-v2 신규) ──┐ +│ - postStart: cp -rf /k8s-vgpu/lib/nvidia/. /usr/local/vgpu/ │ +│ → /usr/local/vgpu/libvgpu.so (Step C build) │ +│ → /usr/local/vgpu/libvgpu_vk.so (신규) │ +│ → /usr/local/vgpu/ld.so.preload (기존) │ +└────────────────────────────────────────────────────────────────────┘ + ↓ +┌── hami-vulkan-manifest ConfigMap (kube-system) ────────────────────┐ +│ hami.json: │ +│ "type": "INSTANCE" │ +│ "library_path": "/usr/local/vgpu/libvgpu_vk.so" │ +│ "enable_environment": { "HAMI_VULKAN_ENABLE": "1" } │ +└────────────────────────────────────────────────────────────────────┘ + ↓ +┌── hami-vulkan-manifest-installer DS (재활성, nodeSelector 복구) ───┐ +│ - cp /manifest/hami.json → /host/usr/local/vgpu/vulkan/ │ +│ implicit_layer.d/hami.json │ +└────────────────────────────────────────────────────────────────────┘ + ↓ +┌── pod (with annotation hami.io/vulkan: "true") ────────────────────┐ +│ webhook injects: │ +│ - HAMI_VULKAN_ENABLE=1 │ +│ - NVIDIA_DRIVER_CAPABILITIES = ...,graphics │ +│ Vulkan loader 가 manifest 발견 → enable_environment 매칭 → │ +│ libvgpu_vk.so dlopen → DT_NEEDED libvgpu.so → 5 hami_core_* │ +│ resolved → layer chain 진입 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +## Components + +| 단위 | 위치 | 변경 종류 | +|---|---|---| +| `volcano-vgpu-device-plugin` image (vulkan-v2) | external (Volcano fork) | rebuild — image 의 `/k8s-vgpu/lib/nvidia/` 에 새 `libvgpu.so` + `libvgpu_vk.so` 둘 다 포함 | +| `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-cm.yaml` | repo | update — library_path / type / 주석 | +| `cluster/runtime/snapshot-2026-04-28/hami-vulkan-manifest-installer-ds.yaml` | repo | update — nodeSelector 복구 | +| `cluster/runtime/snapshot-2026-04-28/volcano-device-plugin-ds.yaml` | repo | update — image tag → vulkan-v2 | +| `cluster/runtime/snapshot-2026-04-28/4-path-verification.sh` (신규) | repo | NVML / CUDA / Vulkan memory query / Vulkan allocate 검증 script | + +(snapshot 디렉토리 명을 `snapshot-2026-04-29-step-d` 로 새로 만들거나 기존 디렉토리 이름 변경할지는 plan 단계 결정.) + +## Activation flow + +production deploy: + +1. volcano-device-plugin image rebuild + push (`vulkan-v2` tag). +2. ConfigMap `hami-vulkan-manifest` apply (library_path 변경). +3. DaemonSet `hami-vulkan-manifest-installer` patch (nodeSelector 복구) → DS pod schedule → manifest install 실행. +4. DaemonSet `volcano-device-plugin` image bump → pod rollout → postStart lifecycle 가 새 .so 두 개 install. +5. isaac-launchable-0 pod 의 annotation 에 `hami.io/vulkan: "true"` 추가 (이미 있을 수도). pod 재시작 → webhook 가 env 주입. +6. 4-path verification 실행. + +## 4-path verification + +4 path 모두 `hami.io/vulkan: "true"` annotation 활성된 isaac-launchable-0 의 vscode container 에서 실행: + +| Path | 명령 | 기대 | +|---|---|---| +| 1. NVML hook | `nvidia-smi --query-gpu=memory.total --format=csv,noheader` | `23552 MiB` (clamp). 이미 검증 — 그대로. | +| 2. CUDA driver hook | python: `import pycuda.driver as cuda; cuda.init(); ctx = cuda.Device(0).make_context(); free, total = cuda.mem_get_info(); print(total)` | `23552 * 1024 * 1024` ≈ 24696061952 bytes (clamp) | +| 3. Vulkan memory query | python: `vkGetPhysicalDeviceMemoryProperties` 의 `memoryHeaps[device-local].size` | `23552 * 1024 * 1024` (clamp) | +| 4. Vulkan allocate | python: `vkAllocateMemory(VkMemoryAllocateInfo{ size = 25 * 1024 * 1024 * 1024 })` (25 GiB > 23 GiB partition) | `VK_ERROR_OUT_OF_DEVICE_MEMORY` | + +추가: + +- Manifest 가 active layer 로 enumerated 되는지 (`VK_LOADER_DEBUG=layer` 출력) 확인. +- HAMI_VK_TRACE > 0 (layer 가 호출됨을 입증) — Kit 의 embedded Conan loader 우회를 위해 host system Vulkan loader 쓰는 python 스크립트로 검증. + +`vk_partition_test.py` 같은 script 를 신규 작성 (또는 기존 isaac-sim/ 디렉토리에서 재사용). 위치: `cluster/runtime/snapshot-2026-04-28/4-path-verification.sh` 또는 isaac-launchable-0 의 home dir. + +## Production safety gate + +각 단계마다: + +1. **Pre-step 백업**: 현재 production state 의 md5sum + ConfigMap export + DaemonSet status 기록. +2. **Apply**: kubectl apply / patch. +3. **Post-step verify**: isaac-launchable-0 / -1 baseline runheadless 1회 — `exit=124 crash=0 listen=1` 확인. 실패 시 즉시 rollback (backup 적용). +4. **Roll forward only on green**. + +## 비검증 항목 + +- helm chart 통합 (현재 chart values 에 vulkan toggle 없음. 추가는 별도 Step). +- usd-composer / 다른 Vulkan 사용 pod 검증 (Step D 는 isaac-launchable-0 만). +- multi-GPU 케이스 (현재 ws-node074 single-GPU 로만 검증). + +## Test plan (high level) + +1. volcano-vgpu-device-plugin image rebuild + push. +2. ConfigMap update + patch DS — DS pod 가 manifest install 한 후 isaac-launchable-0 baseline runheadless 1회 확인. +3. volcano-device-plugin DS image bump → pod rollout — `/usr/local/vgpu/libvgpu_vk.so` 존재 + md5 = 새 build md5 확인. +4. isaac-launchable-0 annotation 확인 (이미 `hami.io/vulkan: "true"` 인지) + pod 재시작. +5. 4-path 검증 실행. 4/4 expected 결과. +6. HAMI_VK_TRACE > 0 확인 (host system loader path 통한 python script). +7. usd-composer 등 다른 Vulkan 사용 pod 영향 0 확인 (steady state Running 유지). + +## Out of scope (이번 spec 에서 다루지 않음) + +- Tasks 1+2 재도입 (`996cb22` cache + Enumerate hooks, `eea2beb` GIPA fallback). 별도 follow-up plan. +- helm chart 의 vulkan toggle / values 추가. +- `enable_environment` 외 alternative trigger (예: env-var-prefix manifest, side-channel labels). 현재 path 가 standard 이므로 그대로 유지. +- volcano-device-plugin Go 코드 변경 (image rebuild 만). +- Multi-GPU partition enforce 검증. diff --git a/docs/vulkan-vgpu-e2e-checklist.md b/docs/vulkan-vgpu-e2e-checklist.md new file mode 100644 index 000000000..30eb561a2 --- /dev/null +++ b/docs/vulkan-vgpu-e2e-checklist.md @@ -0,0 +1,83 @@ +# Vulkan vGPU — Manual E2E Verification Checklist + +This checklist must be executed on a Kubernetes cluster with at least one +NVIDIA GPU node running HAMi with the Vulkan-enabled `libvgpu.so`. Automation +is deferred until an NVIDIA-capable CI runner is available. + +## Prerequisites + +1. HAMi scheduler + device plugin built from `feat/vulkan-vgpu` branch, + including the bumped `libvgpu` submodule pointer (commit `b60b4e6` or + later). +2. NVIDIA Container Toolkit installed, default runtime `nvidia`. +3. `libvgpu.so` built from HAMi-core `vulkan-layer` branch (commit `579a421` + or later) and shipped with the manifest + `/etc/vulkan/implicit_layer.d/hami.json` in the HAMi vgpu image. + +## 1. Heap clamp (`vulkaninfo`) + +``` +kubectl apply -f examples/nvidia/vulkan_example.yaml +kubectl logs hami-vulkan-example | grep -iE "heap|device local" +``` + +**Pass criteria:** the reported `heapSize` for the `DEVICE_LOCAL` heap is +**≤ 1073741824 bytes (1 GiB)**, matching `nvidia.com/gpumem: 1024`. + +## 2. Allocation exceed → `VK_ERROR_OUT_OF_DEVICE_MEMORY` + +Build a tiny allocation-stress image (pseudocode): +```c +for (int i = 0; i < 5; ++i) { + VkMemoryAllocateInfo info = { .allocationSize = 512*1024*1024 }; + VkResult r = vkAllocateMemory(dev, &info, NULL, &m[i]); + printf("alloc %d -> %d\n", i, r); +} +``` +Package as `ghcr.io//vulkan-alloc-stress:latest`, deploy with the same +annotation + `gpumem: 1024`. + +**Pass criteria:** first two allocations return `VK_SUCCESS (0)`, the third +returns `VK_ERROR_OUT_OF_DEVICE_MEMORY (-2)`. + +## 3. SM throttle on `vkQueueSubmit` + +Image: any Vulkan compute workload that loops `vkQueueSubmit` continuously +(e.g. `vkcube --headless` loop, or custom compute shader pinging GPU). +Pod spec: add `nvidia.com/gpucores: "30"` annotation. + +**Pass criteria:** `nvidia-smi dmon -s u` on the host reports GPU compute +utilization averaged near 30% (± token-bucket refill jitter ±120 ms), not +100%. + +## 4. Mixed CUDA + Vulkan shared budget + +Image containing both a CUDA `cudaMalloc(512 MiB)` loop and Vulkan +`vkAllocateMemory(512 MiB)` loop. +Pod spec: `gpumem: 1024` + `hami.io/vulkan: "true"`. + +**Pass criteria:** +- Sum of successful allocations across CUDA + Vulkan does **not** exceed + 1024 MiB. +- Either path may be the one that starts failing depending on scheduling; + both `VK_ERROR_OUT_OF_DEVICE_MEMORY` and `cudaErrorMemoryAllocation` are + valid end states. + +## 5. Opt-out still works for CUDA-only pods + +Deploy a pod with `nvidia.com/gpumem` but **no** `hami.io/vulkan` annotation. + +**Pass criteria:** +- `env | grep NVIDIA_DRIVER_CAPABILITIES` inside the container is unchanged + from the image default (`compute,utility` unless image overrides). +- `env | grep HAMI_VULKAN_ENABLE` is empty. +- CUDA workloads continue to be throttled/clamped as before. + +## Results log + +Record cluster name, node GPU model, HAMi image tag, HAMi-core image tag, +and pass/fail for each of the 5 checks in a dated entry below. + +| Date | Cluster | GPU | HAMi tag | libvgpu tag | 1 | 2 | 3 | 4 | 5 | +|------|---------|-----|----------|-------------|---|---|---|---|---| +| _pending_ | - | - | - | - | - | - | - | - | - | diff --git a/docs/vulkan-vgpu-support.md b/docs/vulkan-vgpu-support.md new file mode 100644 index 000000000..af49f39e5 --- /dev/null +++ b/docs/vulkan-vgpu-support.md @@ -0,0 +1,41 @@ +# Vulkan vGPU Support + +HAMi partitions NVIDIA GPUs for Vulkan workloads by injecting a Vulkan implicit +layer (`VK_LAYER_HAMI_vgpu`) that shares the same VRAM and SM budgets used by +the existing CUDA hooks. + +## Enabling Vulkan partitioning + +Add the `hami.io/vulkan: "true"` annotation to any pod that uses HAMi NVIDIA +resources. The webhook will: + +- Union `graphics` into `NVIDIA_DRIVER_CAPABILITIES` so the NVIDIA Container + Toolkit mounts the Vulkan ICD and graphics libraries. +- Set `HAMI_VULKAN_ENABLE=1` which activates the HAMi Vulkan layer via its + `enable_environment` clause in the implicit layer manifest. + +Example: `examples/nvidia/vulkan_example.yaml`. + +## What gets limited + +- `nvidia.com/gpumem` enforces VRAM allocation across **both** CUDA and Vulkan + in the container, sharing a single budget. +- `nvidia.com/gpucores` throttles Vulkan `vkQueueSubmit[2]` using the same + token-bucket rate limiter as `cuLaunchKernel`. +- `vkGetPhysicalDeviceMemoryProperties[2]` clamps the device-local heap size + to the pod budget so apps that size allocations from this value self-limit. + +## What is not limited (yet) + +- Vulkan Video (`VK_KHR_video_queue`) submissions. +- Frame-pacing jitter introduced by throttling on graphics queues (documented + behavior; strict/cooperative modes are a future option). + +## Troubleshooting + +| Symptom | Check | +|---------|-------| +| Container has no `vulkan` CLI / libs | Annotation absent or `NVIDIA_DRIVER_CAPABILITIES` already frozen to `compute` by image. | +| `vkAllocateMemory` always succeeds | Layer did not activate — ensure `HAMI_VULKAN_ENABLE=1` set and `/etc/vulkan/implicit_layer.d/hami.json` exists. | +| `vulkaninfo` still shows full VRAM heap | Layer manifest not loaded; run `VK_LOADER_DEBUG=all vulkaninfo` to see layer scan. | +| Nothing gets throttled | `rate_limiter` no-ops when SM limit is 0, >=100, or HAMi's utilization switch is disabled. Confirm `nvidia.com/gpucores` was requested on the pod. | diff --git a/docs/vulkan-vgpu-support_cn.md b/docs/vulkan-vgpu-support_cn.md new file mode 100644 index 000000000..c4b4aa042 --- /dev/null +++ b/docs/vulkan-vgpu-support_cn.md @@ -0,0 +1,32 @@ +# Vulkan vGPU 支持 + +HAMi 通过注入 Vulkan 隐式层(`VK_LAYER_HAMI_vgpu`)对 NVIDIA GPU 进行 Vulkan 工作负载的切分。该层与已有的 CUDA 钩子共享同一套 VRAM 与 SM 预算。 + +## 启用方式 + +在使用 HAMi NVIDIA 资源的 Pod 上添加 annotation `hami.io/vulkan: "true"`。Webhook 会: + +- 将 `graphics` 合并进 `NVIDIA_DRIVER_CAPABILITIES`,以便 NVIDIA Container Toolkit 挂载 Vulkan ICD 与图形库。 +- 设置 `HAMI_VULKAN_ENABLE=1`,通过隐式层 manifest 的 `enable_environment` 激活 HAMi Vulkan 层。 + +示例:`examples/nvidia/vulkan_example.yaml`。 + +## 生效范围 + +- `nvidia.com/gpumem` 对容器内 CUDA 与 Vulkan 的 VRAM 分配**共享同一预算**。 +- `nvidia.com/gpucores` 通过与 `cuLaunchKernel` 相同的 token-bucket 限速器对 `vkQueueSubmit[2]` 进行限速。 +- `vkGetPhysicalDeviceMemoryProperties[2]` 将 device-local 堆大小裁剪为 Pod 预算。 + +## 未涵盖项(未来工作) + +- Vulkan Video(`VK_KHR_video_queue`)提交。 +- 图形队列限速导致的帧抖动(已记录,未来提供 strict/cooperative 模式)。 + +## 故障排查 + +| 现象 | 检查 | +|------|------| +| 容器没有 Vulkan 库 | annotation 缺失,或镜像已冻结 `NVIDIA_DRIVER_CAPABILITIES=compute`。 | +| `vkAllocateMemory` 总是成功 | 层未激活 — 确认 `HAMI_VULKAN_ENABLE=1` 与 `/etc/vulkan/implicit_layer.d/hami.json` 存在。 | +| `vulkaninfo` 仍报告全量 VRAM | Manifest 未加载;可 `VK_LOADER_DEBUG=all vulkaninfo` 查看扫描日志。 | +| 限速未生效 | `rate_limiter` 在 SM 限额为 0、>=100 或 HAMi 利用率开关关闭时不工作。确认 Pod 已请求 `nvidia.com/gpucores`。 | diff --git a/examples/nvidia/vulkan_example.yaml b/examples/nvidia/vulkan_example.yaml new file mode 100644 index 000000000..da7f01368 --- /dev/null +++ b/examples/nvidia/vulkan_example.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Pod +metadata: + name: hami-vulkan-example + annotations: + hami.io/vulkan: "true" +spec: + restartPolicy: Never + containers: + - name: vulkaninfo + image: khronosgroup/vulkan-samples:latest + command: ["vulkaninfo"] + resources: + limits: + nvidia.com/gpu: "1" + nvidia.com/gpumem: "1024" # 1 GiB VRAM budget (shared with CUDA) + nvidia.com/gpucores: "30" # 30% SM throttle (shared with CUDA) diff --git a/libvgpu b/libvgpu index 8c32de630..8733ec48b 160000 --- a/libvgpu +++ b/libvgpu @@ -1 +1 @@ -Subproject commit 8c32de630b24f5f7d6355fbeb0034845d3bdafb7 +Subproject commit 8733ec48b8486657ff4a4e725e520dae00c943a1 diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go index 4c10d7c71..810e23d43 100644 --- a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go +++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/server.go @@ -615,6 +615,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *kubeletdev podAllocationFailed(nodename, current, NodeLockNvidia) return nil, fmt.Errorf("failed to get allocate response: %v", err) } + response.Mounts = appendVulkanManifestMount(response.Mounts, hostHookPath) responses.ContainerResponses = append(responses.ContainerResponses, response) } else { currentCtr, devreq, err := GetNextDeviceRequest(nvidia.NvidiaGPUDevice, *current) @@ -699,6 +700,7 @@ func (plugin *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *kubeletdev ReadOnly: true}, ) } + response.Mounts = appendVulkanManifestMount(response.Mounts, hostHookPath) _, err = os.Stat(fmt.Sprintf("%s/vgpu/license", hostHookPath)) if err == nil { response.Mounts = append(response.Mounts, &kubeletdevicepluginv1beta1.Mount{ diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan.go new file mode 100644 index 000000000..edca8361a --- /dev/null +++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan.go @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The HAMi Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package plugin + +import ( + "os" + + kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" +) + +// appendVulkanManifestMount appends a bind-mount for the HAMi Vulkan implicit +// layer manifest when present on the host. The manifest is placed under +// hostHookPath/vgpu/vulkan/implicit_layer.d/hami.json by vgpu-init.sh as part +// of the standard lib distribution. +// +// The manifest's enable_environment guard means the Vulkan layer activates +// only when the pod sets HAMI_VULKAN_ENABLE=1 (injected by the admission +// webhook for pods that carry the hami.io/vulkan="true" annotation), so the +// mount is safe to append unconditionally for both vGPU and MIG paths. +// +// Returns the input slice unchanged when the host file is absent, so nodes +// without the Vulkan manifest do not block pod startup. +func appendVulkanManifestMount(mounts []*kubeletdevicepluginv1beta1.Mount, hostHookPath string) []*kubeletdevicepluginv1beta1.Mount { + manifestHost := hostHookPath + "/vgpu/vulkan/implicit_layer.d/hami.json" + if _, err := os.Stat(manifestHost); err != nil { + return mounts + } + return append(mounts, &kubeletdevicepluginv1beta1.Mount{ + ContainerPath: "/etc/vulkan/implicit_layer.d/hami.json", + HostPath: manifestHost, + ReadOnly: true, + }) +} diff --git a/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan_test.go b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan_test.go new file mode 100644 index 000000000..4397b1fef --- /dev/null +++ b/pkg/device-plugin/nvidiadevice/nvinternal/plugin/vulkan_test.go @@ -0,0 +1,95 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The HAMi Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package plugin + +import ( + "os" + "path/filepath" + "testing" + + kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" +) + +// appendVulkanManifestMount must return the input slice untouched when the +// Vulkan implicit-layer manifest is absent on the host. This is the path +// taken on nodes where vgpu-init.sh has not (or cannot) place the manifest, +// and Pod startup must not block on a missing optional file. +func TestAppendVulkanManifestMount_Absent(t *testing.T) { + dir := t.TempDir() // no vgpu/vulkan/implicit_layer.d/hami.json under here + in := []*kubeletdevicepluginv1beta1.Mount{ + {ContainerPath: "/already/there", HostPath: "/already/there"}, + } + out := appendVulkanManifestMount(in, dir) + if len(out) != len(in) { + t.Fatalf("expected mounts unchanged when manifest absent, got %d mounts (want %d)", len(out), len(in)) + } + for i := range in { + if out[i] != in[i] { + t.Fatalf("mount[%d] mutated: got %+v, want %+v", i, out[i], in[i]) + } + } +} + +// When the Vulkan implicit-layer manifest is present on the host, the helper +// must append a single bind-mount at the well-known container path so the +// Vulkan loader picks the layer up via the enable_environment guard. +func TestAppendVulkanManifestMount_Present(t *testing.T) { + dir := t.TempDir() + manifestRel := "vgpu/vulkan/implicit_layer.d/hami.json" + if err := os.MkdirAll(filepath.Dir(filepath.Join(dir, manifestRel)), 0o755); err != nil { + t.Fatalf("setup mkdir: %v", err) + } + manifestHost := filepath.Join(dir, manifestRel) + if err := os.WriteFile(manifestHost, []byte("{}"), 0o644); err != nil { + t.Fatalf("setup writefile: %v", err) + } + + in := []*kubeletdevicepluginv1beta1.Mount{} + out := appendVulkanManifestMount(in, dir) + if len(out) != 1 { + t.Fatalf("expected exactly one mount appended, got %d", len(out)) + } + m := out[0] + if m.ContainerPath != "/etc/vulkan/implicit_layer.d/hami.json" { + t.Errorf("ContainerPath = %q, want /etc/vulkan/implicit_layer.d/hami.json", m.ContainerPath) + } + if m.HostPath != manifestHost { + t.Errorf("HostPath = %q, want %q", m.HostPath, manifestHost) + } + if !m.ReadOnly { + t.Errorf("ReadOnly = false, want true (manifest must not be writable from container)") + } +} + +// Helper must preserve the order and identity of preceding mounts when it +// appends. Regression guard for the MIG / non-MIG callers in server.go that +// rely on positional ordering. +func TestAppendVulkanManifestMount_PreservesPriorMounts(t *testing.T) { + dir := t.TempDir() + manifestRel := "vgpu/vulkan/implicit_layer.d/hami.json" + if err := os.MkdirAll(filepath.Dir(filepath.Join(dir, manifestRel)), 0o755); err != nil { + t.Fatalf("setup mkdir: %v", err) + } + if err := os.WriteFile(filepath.Join(dir, manifestRel), []byte("{}"), 0o644); err != nil { + t.Fatalf("setup writefile: %v", err) + } + + first := &kubeletdevicepluginv1beta1.Mount{ContainerPath: "/a", HostPath: "/a"} + second := &kubeletdevicepluginv1beta1.Mount{ContainerPath: "/b", HostPath: "/b"} + out := appendVulkanManifestMount([]*kubeletdevicepluginv1beta1.Mount{first, second}, dir) + if len(out) != 3 { + t.Fatalf("expected 3 mounts, got %d", len(out)) + } + if out[0] != first || out[1] != second { + t.Fatalf("prior mounts reordered or replaced") + } + if out[2].ContainerPath != "/etc/vulkan/implicit_layer.d/hami.json" { + t.Errorf("appended mount has wrong ContainerPath: %q", out[2].ContainerPath) + } +} diff --git a/pkg/device/nvidia/device.go b/pkg/device/nvidia/device.go index 9deb08cdd..0b5611be9 100644 --- a/pkg/device/nvidia/device.go +++ b/pkg/device/nvidia/device.go @@ -56,6 +56,34 @@ const ( MpsMode = "mps" ) +const ( + VulkanEnableAnno = "hami.io/vulkan" + VulkanLayerName = "VK_LAYER_HAMI_vgpu" + NvidiaDriverCapsEnvVar = "NVIDIA_DRIVER_CAPABILITIES" + HamiVulkanEnvVar = "HAMI_VULKAN_ENABLE" + + // VulkanManifestVolumeName is the pod-level volume that exposes the + // host's hami.json implicit-layer manifest into the container at the + // standard Vulkan loader search path. + VulkanManifestVolumeName = "hami-vulkan-manifest" + // VulkanManifestHostPath is where the hami-vulkan-manifest-installer + // DaemonSet drops the manifest on each GPU node. The webhook mounts + // this single file (not the whole directory) into the container so + // the existing /etc/vulkan/implicit_layer.d/nvidia_layers.json from + // the image keeps working alongside it. + VulkanManifestHostPath = "/etc/vulkan/implicit_layer.d/hami.json" + VulkanManifestContainerPath = "/etc/vulkan/implicit_layer.d/hami.json" + // VulkanLibSoVolumeName exposes libvgpu_vk.so (Step C split) into + // the container at the path the manifest's library_path references. + // volcano-vgpu-device-plugin already mounts /usr/local/vgpu/libvgpu.so + // for every GPU pod, but it does NOT mount libvgpu_vk.so — that file + // is only ever needed by Vulkan-opt-in pods, so the webhook handles + // it on a per-pod basis. + VulkanLibSoVolumeName = "hami-vulkan-lib-so" + VulkanLibSoHostPath = "/usr/local/vgpu/libvgpu_vk.so" + VulkanLibSoContainerPath = "/usr/local/vgpu/libvgpu_vk.so" +) + var ( NodeName string RuntimeSocketFlag string @@ -370,6 +398,7 @@ func (dev *NvidiaGPUDevices) MutateAdmission(ctr *corev1.Container, p *corev1.Po if p.Spec.RuntimeClassName == nil && dev.config.RuntimeClassName != "" { p.Spec.RuntimeClassName = &dev.config.RuntimeClassName } + applyVulkanAnnotation(ctr, p) } if !hasResource && dev.config.OverwriteEnv { @@ -381,6 +410,116 @@ func (dev *NvidiaGPUDevices) MutateAdmission(ctr *corev1.Container, p *corev1.Po return hasResource, nil } +// mergeGraphicsCap returns the union of existing NVIDIA_DRIVER_CAPABILITIES +// tokens with "graphics". If existing contains "all", it is returned unchanged. +// An empty (or whitespace/comma-only) existing value becomes +// "compute,utility,graphics". +func mergeGraphicsCap(existing string) string { + if strings.TrimSpace(existing) == "" { + return "compute,utility,graphics" + } + tokens := strings.Split(existing, ",") + cleaned := make([]string, 0, len(tokens)+1) + seen := make(map[string]struct{}, len(tokens)+1) + for _, t := range tokens { + t = strings.TrimSpace(t) + if t == "" { + continue + } + if t == "all" { + return existing + } + if _, ok := seen[t]; ok { + continue + } + seen[t] = struct{}{} + cleaned = append(cleaned, t) + } + if len(cleaned) == 0 { + return "compute,utility,graphics" + } + if _, ok := seen["graphics"]; ok { + return existing + } + cleaned = append(cleaned, "graphics") + return strings.Join(cleaned, ",") +} + +// applyVulkanAnnotation mutates the container env and pod spec when the +// pod opts into Vulkan partitioning. It (1) ensures NVIDIA_DRIVER_ +// CAPABILITIES contains "graphics", (2) sets HAMI_VULKAN_ENABLE=1 so the +// loader's enable_environment gate matches, and (3) injects a hostPath +// volume that exposes the per-node hami.json implicit-layer manifest at +// the container's /etc/vulkan/implicit_layer.d/ path. No-op otherwise. +func applyVulkanAnnotation(ctr *corev1.Container, pod *corev1.Pod) { + if pod == nil || pod.Annotations[VulkanEnableAnno] != "true" { + return + } + + capsIdx := -1 + hasEnable := false + for i, e := range ctr.Env { + switch e.Name { + case NvidiaDriverCapsEnvVar: + capsIdx = i + case HamiVulkanEnvVar: + hasEnable = true + } + } + + if capsIdx >= 0 { + ctr.Env[capsIdx].Value = mergeGraphicsCap(ctr.Env[capsIdx].Value) + } else { + ctr.Env = append(ctr.Env, corev1.EnvVar{Name: NvidiaDriverCapsEnvVar, Value: mergeGraphicsCap("")}) + } + + if !hasEnable { + ctr.Env = append(ctr.Env, corev1.EnvVar{Name: HamiVulkanEnvVar, Value: "1"}) + } + + ensureHostPathFileVolume(pod, VulkanManifestVolumeName, VulkanManifestHostPath) + ensureHostPathFileVolumeMount(ctr, VulkanManifestVolumeName, VulkanManifestContainerPath) + ensureHostPathFileVolume(pod, VulkanLibSoVolumeName, VulkanLibSoHostPath) + ensureHostPathFileVolumeMount(ctr, VulkanLibSoVolumeName, VulkanLibSoContainerPath) +} + +// ensureHostPathFileVolume appends a HostPathFile volume to the pod once +// (idempotent across calls — used when the same opt-in trigger fires per +// container of a multi-container pod). The volume source is the named +// host file (not directory) so the bind mount is precise. +func ensureHostPathFileVolume(pod *corev1.Pod, name, hostPath string) { + for _, v := range pod.Spec.Volumes { + if v.Name == name { + return + } + } + fileType := corev1.HostPathFile + pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: hostPath, + Type: &fileType, + }, + }, + }) +} + +// ensureHostPathFileVolumeMount appends a read-only volumeMount referring +// to the named volume into the container. Idempotent per container. +func ensureHostPathFileVolumeMount(ctr *corev1.Container, name, mountPath string) { + for _, m := range ctr.VolumeMounts { + if m.Name == name { + return + } + } + ctr.VolumeMounts = append(ctr.VolumeMounts, corev1.VolumeMount{ + Name: name, + MountPath: mountPath, + ReadOnly: true, + }) +} + func (dev *NvidiaGPUDevices) mutateContainerResource(ctr *corev1.Container) bool { _, resourceNameOK := ctr.Resources.Limits[corev1.ResourceName(dev.config.ResourceCountName)] if resourceNameOK { diff --git a/pkg/device/nvidia/device_test.go b/pkg/device/nvidia/device_test.go index 493fec3c5..f6d1a5046 100644 --- a/pkg/device/nvidia/device_test.go +++ b/pkg/device/nvidia/device_test.go @@ -2617,3 +2617,303 @@ func TestFit_TopologyBestCombination(t *testing.T) { assert.Assert(t, uuids["dev-0"]) assert.Assert(t, uuids["dev-2"]) } + +func TestMutateAdmission_VulkanAnno_AddsGraphicsCap(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + ResourceMemoryName: "nvidia.com/gpumem", + ResourceCoreName: "nvidia.com/gpucores", + ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage", + }, + } + ctr := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{VulkanEnableAnno: "true"}, + }, + } + _, err := dev.MutateAdmission(ctr, pod) + assert.NilError(t, err) + + var caps, enable string + for _, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + caps = e.Value + } + if e.Name == HamiVulkanEnvVar { + enable = e.Value + } + } + assert.Assert(t, strings.Contains(caps, "graphics"), "expected graphics in caps, got %q", caps) + assert.Equal(t, enable, "1") +} + +func TestMutateAdmission_VulkanAnno_MergesExistingCaps(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + ResourceMemoryName: "nvidia.com/gpumem", + ResourceCoreName: "nvidia.com/gpucores", + ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage", + }, + } + ctr := &corev1.Container{ + Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "compute,utility"}}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + + var caps string + for _, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + caps = e.Value + } + } + assert.Assert(t, strings.Contains(caps, "compute")) + assert.Assert(t, strings.Contains(caps, "utility")) + assert.Assert(t, strings.Contains(caps, "graphics")) +} + +func TestMutateAdmission_VulkanAnno_AllCaps_NoChange(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Env: []corev1.EnvVar{{Name: NvidiaDriverCapsEnvVar, Value: "all"}}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + + for _, e := range ctr.Env { + if e.Name == NvidiaDriverCapsEnvVar { + assert.Equal(t, e.Value, "all") + } + } +} + +func TestMutateAdmission_NoVulkanAnno_NoChange(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{} + _, _ = dev.MutateAdmission(ctr, pod) + for _, e := range ctr.Env { + assert.Assert(t, e.Name != NvidiaDriverCapsEnvVar, "unexpected caps env") + assert.Assert(t, e.Name != HamiVulkanEnvVar, "unexpected enable env") + } +} + +func TestMutateAdmission_VulkanAnno_NoGPUResource(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ + ResourceCountName: "nvidia.com/gpu", + ResourceMemoryName: "nvidia.com/gpumem", + ResourceCoreName: "nvidia.com/gpucores", + ResourceMemoryPercentageName: "nvidia.com/gpumem-percentage", + }, + } + ctr := &corev1.Container{Resources: corev1.ResourceRequirements{Limits: corev1.ResourceList{}}} + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + for _, e := range ctr.Env { + assert.Assert(t, e.Name != HamiVulkanEnvVar, "no Vulkan env on non-GPU pod") + } +} + +func TestMutateAdmission_VulkanAnno_IdempotentHamiEnable(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Env: []corev1.EnvVar{{Name: HamiVulkanEnvVar, Value: "1"}}, + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + count := 0 + for _, e := range ctr.Env { + if e.Name == HamiVulkanEnvVar { + count++ + } + } + assert.Equal(t, count, 1) +} + +func TestMutateAdmission_VulkanAnno_InjectsManifestVolumeMount(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr, pod) + + var volume *corev1.Volume + for i := range pod.Spec.Volumes { + if pod.Spec.Volumes[i].Name == VulkanManifestVolumeName { + volume = &pod.Spec.Volumes[i] + } + } + assert.Assert(t, volume != nil, "expected volume %q on pod", VulkanManifestVolumeName) + assert.Assert(t, volume.HostPath != nil, "expected HostPath source") + assert.Equal(t, volume.HostPath.Path, VulkanManifestHostPath) + assert.Assert(t, volume.HostPath.Type != nil) + assert.Equal(t, *volume.HostPath.Type, corev1.HostPathFile) + + var mount *corev1.VolumeMount + for i := range ctr.VolumeMounts { + if ctr.VolumeMounts[i].Name == VulkanManifestVolumeName { + mount = &ctr.VolumeMounts[i] + } + } + assert.Assert(t, mount != nil, "expected volumeMount %q on container", VulkanManifestVolumeName) + assert.Equal(t, mount.MountPath, VulkanManifestContainerPath) + assert.Equal(t, mount.ReadOnly, true) +} + +func TestMutateAdmission_VulkanAnno_VolumeIdempotent(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr1 := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + ctr2 := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{VulkanEnableAnno: "true"}}, + } + _, _ = dev.MutateAdmission(ctr1, pod) + _, _ = dev.MutateAdmission(ctr2, pod) + + count := 0 + for _, v := range pod.Spec.Volumes { + if v.Name == VulkanManifestVolumeName { + count++ + } + } + assert.Equal(t, count, 1, "volume should be added once even when multiple containers opt in") + + mountCount1 := 0 + for _, m := range ctr1.VolumeMounts { + if m.Name == VulkanManifestVolumeName { + mountCount1++ + } + } + mountCount2 := 0 + for _, m := range ctr2.VolumeMounts { + if m.Name == VulkanManifestVolumeName { + mountCount2++ + } + } + assert.Equal(t, mountCount1, 1) + assert.Equal(t, mountCount2, 1) +} + +func TestMutateAdmission_NoVulkanAnno_NoVolume(t *testing.T) { + dev := &NvidiaGPUDevices{ + config: NvidiaConfig{ResourceCountName: "nvidia.com/gpu"}, + } + ctr := &corev1.Container{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "nvidia.com/gpu": *resource.NewQuantity(1, resource.BinarySI), + }, + }, + } + pod := &corev1.Pod{} + _, _ = dev.MutateAdmission(ctr, pod) + for _, v := range pod.Spec.Volumes { + assert.Assert(t, v.Name != VulkanManifestVolumeName, "no manifest volume without annotation") + } + for _, m := range ctr.VolumeMounts { + assert.Assert(t, m.Name != VulkanManifestVolumeName, "no manifest mount without annotation") + } +} + +func TestMergeGraphicsCap_Empty(t *testing.T) { + assert.Equal(t, mergeGraphicsCap(""), "compute,utility,graphics") +} + +func TestMergeGraphicsCap_WhitespaceOnly(t *testing.T) { + assert.Equal(t, mergeGraphicsCap(" "), "compute,utility,graphics") +} + +func TestMergeGraphicsCap_CommasOnly(t *testing.T) { + // All tokens are empty after trimming -> default fallback. + assert.Equal(t, mergeGraphicsCap(", , ,"), "compute,utility,graphics") +} + +func TestMergeGraphicsCap_All(t *testing.T) { + // "all" implies every capability; do not modify. + assert.Equal(t, mergeGraphicsCap("all"), "all") + assert.Equal(t, mergeGraphicsCap("compute,all,utility"), "compute,all,utility") +} + +func TestMergeGraphicsCap_AlreadyHasGraphics(t *testing.T) { + // graphics already present -> return existing untouched. + assert.Equal(t, mergeGraphicsCap("compute,graphics,utility"), "compute,graphics,utility") +} + +func TestMergeGraphicsCap_DuplicatesAndPadding(t *testing.T) { + // Duplicate tokens are deduped; surrounding whitespace is trimmed; graphics appended. + out := mergeGraphicsCap("compute, compute , utility") + assert.Equal(t, out, "compute,utility,graphics") +} + +func TestMergeGraphicsCap_AppendGraphics(t *testing.T) { + assert.Equal(t, mergeGraphicsCap("compute"), "compute,graphics") + assert.Equal(t, mergeGraphicsCap("compute,utility"), "compute,utility,graphics") +}