Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions lib/hypervisor/cloudhypervisor/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@ package cloudhypervisor

import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"os"
"path/filepath"
"strings"
"syscall"
"time"

Expand Down Expand Up @@ -55,6 +61,8 @@ func (s *Starter) GetVersion(p *paths.Paths) (string, error) {
// StartVM launches Cloud Hypervisor, configures the VM, and boots it.
// Returns the process ID and a Hypervisor client for subsequent operations.
func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, socketPath string, config hypervisor.VMConfig) (int, hypervisor.Hypervisor, error) {
log := logger.FromContext(ctx)

// Validate version
chVersion := vmm.CHVersion(version)
if !vmm.IsVersionSupported(chVersion) {
Expand Down Expand Up @@ -85,18 +93,22 @@ func (s *Starter) StartVM(ctx context.Context, p *paths.Paths, version string, s
vmConfig := ToVMConfig(config)
resp, err := hv.client.CreateVMWithResponse(ctx, vmConfig)
if err != nil {
logStartVMFailureDiagnostics(ctx, log, socketPath, pid, "create_vm", err, 0, "")
return 0, nil, fmt.Errorf("create vm: %w", err)
}
if resp.StatusCode() != 204 {
logStartVMFailureDiagnostics(ctx, log, socketPath, pid, "create_vm", nil, resp.StatusCode(), string(resp.Body))
return 0, nil, fmt.Errorf("create vm failed with status %d: %s", resp.StatusCode(), string(resp.Body))
}

// 4. Boot the VM via HTTP API
bootResp, err := hv.client.BootVMWithResponse(ctx)
if err != nil {
logStartVMFailureDiagnostics(ctx, log, socketPath, pid, "boot_vm", err, 0, "")
return 0, nil, fmt.Errorf("boot vm: %w", err)
}
if bootResp.StatusCode() != 204 {
logStartVMFailureDiagnostics(ctx, log, socketPath, pid, "boot_vm", nil, bootResp.StatusCode(), string(bootResp.Body))
return 0, nil, fmt.Errorf("boot vm failed with status %d: %s", bootResp.StatusCode(), string(bootResp.Body))
}

Expand Down Expand Up @@ -164,3 +176,94 @@ func (s *Starter) RestoreVM(ctx context.Context, p *paths.Paths, version string,
func ptr[T any](v T) *T {
return &v
}

func logStartVMFailureDiagnostics(ctx context.Context, log *slog.Logger, socketPath string, pid int, operation string, requestErr error, statusCode int, responseBody string) {
if log == nil {
return
}

socketExists := false
if _, err := os.Stat(socketPath); err == nil {
socketExists = true
}

socketDialable, socketDialErr := canDialUnixSocket(socketPath)
processRunning := false
if pid > 0 && syscall.Kill(pid, 0) == nil {
processRunning = true
}

ctxErr := ctx.Err()
ctxCause := context.Cause(ctx)
deadline, hasDeadline := ctx.Deadline()

attrs := []any{
"operation", operation,
"socket_path", socketPath,
"socket_exists", socketExists,
"socket_dialable", socketDialable,
"pid", pid,
"process_running", processRunning,
"ctx_err", errorString(ctxErr),
"ctx_cause", errorString(ctxCause),
"vmm_log_tail", tailFile(filepath.Join(filepath.Dir(socketPath), "logs", "vmm.log"), 4096),
}
if hasDeadline {
attrs = append(attrs,
"ctx_deadline", deadline.UTC().Format(time.RFC3339Nano),
"ctx_deadline_in_ms", time.Until(deadline).Milliseconds(),
)
}
if socketDialErr != nil {
attrs = append(attrs, "socket_dial_error", socketDialErr.Error())
}
if requestErr != nil {
attrs = append(attrs,
"request_error", requestErr.Error(),
"request_error_is_context_canceled", errors.Is(requestErr, context.Canceled),
"request_error_is_deadline_exceeded", errors.Is(requestErr, context.DeadlineExceeded),
)
}
if statusCode > 0 {
attrs = append(attrs, "response_status_code", statusCode)
}
if responseBody != "" {
attrs = append(attrs, "response_body", truncateString(responseBody, 1024))
}

log.ErrorContext(ctx, "cloud-hypervisor start_vm diagnostics", attrs...)
}

func canDialUnixSocket(socketPath string) (bool, error) {
conn, err := net.DialTimeout("unix", socketPath, 100*time.Millisecond)
if err != nil {
return false, err
}
_ = conn.Close()
return true, nil
}

func tailFile(path string, maxBytes int) string {
data, err := os.ReadFile(path)
if err != nil {
return fmt.Sprintf("unavailable: %v", err)
}
if len(data) > maxBytes {
data = data[len(data)-maxBytes:]
}
return truncateString(strings.TrimSpace(string(data)), maxBytes)
}

func truncateString(s string, max int) string {
if max <= 0 || len(s) <= max {
return s
}
return s[:max] + "...(truncated)"
}

func errorString(err error) string {
if err == nil {
return ""
}
return err.Error()
}
Loading