From 896f3647c5dbba08c989a959b6f84f7b5ce408cb Mon Sep 17 00:00:00 2001 From: Jakub Dobry Date: Mon, 23 Feb 2026 21:02:00 +0000 Subject: [PATCH 01/11] feat(orchestrator): enable per-build CPU/memory tracking for build sandboxes Add sandbox_type column to sandbox_host_stats and sandbox_metrics_gauge ClickHouse tables to explicitly distinguish build vs runtime metrics. Thread BuildID and SandboxType through RuntimeMetadata so build sandboxes are properly attributed to the overall build. Key changes: - Add cgroup setup to Factory.CreateSandbox (unified with ResumeSandbox) - Initialize HostStatsCollector in CreateSandbox (feature-flag-gated) - Enable envd metrics (MetricsWriteFlag) for CreateSandbox - Add build_id + sandbox_type OTel attributes for sandbox_metrics_gauge - Update materialized view to extract new attributes - Pass TeamID, BuildID, SandboxType from build layer executors --- ...3120000_add_sandbox_type_to_host_stats.sql | 19 +++++++ ...001_add_build_columns_to_metrics_gauge.sql | 51 +++++++++++++++++ packages/clickhouse/pkg/hoststats/delivery.go | 6 +- .../clickhouse/pkg/hoststats/hoststats.go | 2 + .../internal/metrics/sandboxes.go | 7 ++- .../internal/sandbox/fc/process.go | 3 +- .../internal/sandbox/hoststats.go | 6 ++ .../internal/sandbox/hoststats_collector.go | 2 + .../orchestrator/internal/sandbox/sandbox.go | 56 +++++++++++++++---- .../template/build/layer/create_sandbox.go | 3 + .../template/build/layer/resume_sandbox.go | 3 + 11 files changed, 142 insertions(+), 16 deletions(-) create mode 100644 packages/clickhouse/migrations/20260223120000_add_sandbox_type_to_host_stats.sql create mode 100644 packages/clickhouse/migrations/20260223120001_add_build_columns_to_metrics_gauge.sql diff --git a/packages/clickhouse/migrations/20260223120000_add_sandbox_type_to_host_stats.sql b/packages/clickhouse/migrations/20260223120000_add_sandbox_type_to_host_stats.sql new file mode 100644 index 0000000000..ae6b52a66c --- /dev/null +++ b/packages/clickhouse/migrations/20260223120000_add_sandbox_type_to_host_stats.sql @@ -0,0 +1,19 @@ +-- +goose Up +-- +goose StatementBegin +ALTER TABLE sandbox_host_stats_local + ADD COLUMN IF NOT EXISTS sandbox_type LowCardinality(String) DEFAULT 'sandbox' CODEC (ZSTD(1)); +-- +goose StatementEnd + +-- +goose StatementBegin +ALTER TABLE sandbox_host_stats + ADD COLUMN IF NOT EXISTS sandbox_type LowCardinality(String) DEFAULT 'sandbox' CODEC (ZSTD(1)); +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +ALTER TABLE sandbox_host_stats_local DROP COLUMN IF EXISTS sandbox_type; +-- +goose StatementEnd + +-- +goose StatementBegin +ALTER TABLE sandbox_host_stats DROP COLUMN IF EXISTS sandbox_type; +-- +goose StatementEnd diff --git a/packages/clickhouse/migrations/20260223120001_add_build_columns_to_metrics_gauge.sql b/packages/clickhouse/migrations/20260223120001_add_build_columns_to_metrics_gauge.sql new file mode 100644 index 0000000000..bd6d2b6be9 --- /dev/null +++ b/packages/clickhouse/migrations/20260223120001_add_build_columns_to_metrics_gauge.sql @@ -0,0 +1,51 @@ +-- +goose Up +-- +goose StatementBegin +ALTER TABLE sandbox_metrics_gauge_local + ADD COLUMN IF NOT EXISTS build_id String DEFAULT '' CODEC (ZSTD(1)), + ADD COLUMN IF NOT EXISTS sandbox_type LowCardinality(String) DEFAULT 'sandbox' CODEC (ZSTD(1)); +-- +goose StatementEnd + +-- +goose StatementBegin +ALTER TABLE sandbox_metrics_gauge + ADD COLUMN IF NOT EXISTS build_id String DEFAULT '' CODEC (ZSTD(1)), + ADD COLUMN IF NOT EXISTS sandbox_type LowCardinality(String) DEFAULT 'sandbox' CODEC (ZSTD(1)); +-- +goose StatementEnd + +-- +goose StatementBegin +ALTER TABLE sandbox_metrics_gauge_mv MODIFY QUERY +SELECT + toDateTime64(TimeUnix, 9) AS timestamp, + Attributes['sandbox_id'] AS sandbox_id, + Attributes['team_id'] AS team_id, + Attributes['build_id'] AS build_id, + Attributes['sandbox_type'] AS sandbox_type, + MetricName AS metric_name, + Value AS value +FROM metrics_gauge +WHERE MetricName LIKE 'e2b.sandbox.%'; +-- +goose StatementEnd + +-- +goose Down +-- +goose StatementBegin +ALTER TABLE sandbox_metrics_gauge_mv MODIFY QUERY +SELECT + toDateTime64(TimeUnix, 9) AS timestamp, + Attributes['sandbox_id'] AS sandbox_id, + Attributes['team_id'] AS team_id, + MetricName AS metric_name, + Value AS value +FROM metrics_gauge +WHERE MetricName LIKE 'e2b.sandbox.%'; +-- +goose StatementEnd + +-- +goose StatementBegin +ALTER TABLE sandbox_metrics_gauge_local + DROP COLUMN IF EXISTS build_id, + DROP COLUMN IF EXISTS sandbox_type; +-- +goose StatementEnd + +-- +goose StatementBegin +ALTER TABLE sandbox_metrics_gauge + DROP COLUMN IF EXISTS build_id, + DROP COLUMN IF EXISTS sandbox_type; +-- +goose StatementEnd diff --git a/packages/clickhouse/pkg/hoststats/delivery.go b/packages/clickhouse/pkg/hoststats/delivery.go index a8833b0bcf..bd9697324a 100644 --- a/packages/clickhouse/pkg/hoststats/delivery.go +++ b/packages/clickhouse/pkg/hoststats/delivery.go @@ -31,9 +31,10 @@ const InsertSandboxHostStatQuery = `INSERT INTO sandbox_host_stats cgroup_cpu_user_usec, cgroup_cpu_system_usec, cgroup_memory_usage_bytes, - cgroup_memory_peak_bytes + cgroup_memory_peak_bytes, + sandbox_type ) -VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` +VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` type ClickhouseDelivery struct { batcher *batcher.Batcher[SandboxHostStat] @@ -123,6 +124,7 @@ func (c *ClickhouseDelivery) batchInserter(ctx context.Context, stats []SandboxH stat.CgroupCPUSystemUsec, stat.CgroupMemoryUsage, stat.CgroupMemoryPeak, + stat.SandboxType, ) if err != nil { return fmt.Errorf("error appending %d host stat to batch: %w", len(stats), err) diff --git a/packages/clickhouse/pkg/hoststats/hoststats.go b/packages/clickhouse/pkg/hoststats/hoststats.go index dcf0459097..693f1e88df 100644 --- a/packages/clickhouse/pkg/hoststats/hoststats.go +++ b/packages/clickhouse/pkg/hoststats/hoststats.go @@ -31,6 +31,8 @@ type SandboxHostStat struct { CgroupCPUSystemUsec uint64 `ch:"cgroup_cpu_system_usec"` // cumulative, microseconds CgroupMemoryUsage uint64 `ch:"cgroup_memory_usage_bytes"` // current, bytes CgroupMemoryPeak uint64 `ch:"cgroup_memory_peak_bytes"` // lifetime peak, bytes + + SandboxType string `ch:"sandbox_type"` // "sandbox" or "build" } // Delivery is the interface for delivering host stats to storage backend diff --git a/packages/orchestrator/internal/metrics/sandboxes.go b/packages/orchestrator/internal/metrics/sandboxes.go index 501e4dbe8c..702d685fda 100644 --- a/packages/orchestrator/internal/metrics/sandboxes.go +++ b/packages/orchestrator/internal/metrics/sandboxes.go @@ -179,7 +179,12 @@ func (so *SandboxObserver) startObserving() (metric.Registration, error) { return err } - attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID)) + sandboxType := sbx.Runtime.SandboxType + if sandboxType == "" { + sandboxType = sandbox.SandboxTypeSandbox + } + + attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID), attribute.String("build_id", sbx.Runtime.BuildID), attribute.String("sandbox_type", sandboxType)) ok, err = utils.IsGTEVersion(sbx.Config.Envd.Version, minEnvVersionForMetricsTimestamp) if err != nil { diff --git a/packages/orchestrator/internal/sandbox/fc/process.go b/packages/orchestrator/internal/sandbox/fc/process.go index d16fb92211..dd88f260fa 100644 --- a/packages/orchestrator/internal/sandbox/fc/process.go +++ b/packages/orchestrator/internal/sandbox/fc/process.go @@ -259,6 +259,7 @@ func (p *Process) Create( hugePages bool, options ProcessOptions, txRateLimit TxRateLimiterConfig, + cgroupFD int, ) error { ctx, childSpan := tracer.Start(ctx, "create-fc") defer childSpan.End() @@ -274,7 +275,7 @@ func (p *Process) Create( sbxMetadata, options.Stdout, options.Stderr, - cgroup.NoCgroupFD, + cgroupFD, ) if err != nil { fcStopErr := p.Stop(ctx) diff --git a/packages/orchestrator/internal/sandbox/hoststats.go b/packages/orchestrator/internal/sandbox/hoststats.go index d122e1c1ff..3ae44ec0e8 100644 --- a/packages/orchestrator/internal/sandbox/hoststats.go +++ b/packages/orchestrator/internal/sandbox/hoststats.go @@ -42,6 +42,11 @@ func initializeHostStatsCollector( logger.L().Error(ctx, "error parsing team ID", logger.WithTeamID(runtime.TeamID), zap.Error(err)) } + sandboxType := runtime.SandboxType + if sandboxType == "" { + sandboxType = SandboxTypeSandbox + } + var cgroupStats CgroupStatsFunc if sbx.cgroupHandle != nil { cgroupStats = sbx.cgroupHandle.GetStats @@ -56,6 +61,7 @@ func initializeHostStatsCollector( TeamID: teamID, VCPUCount: config.Vcpu, MemoryMB: config.RamMB, + SandboxType: sandboxType, }, int32(firecrackerPID), hostStatsDelivery, diff --git a/packages/orchestrator/internal/sandbox/hoststats_collector.go b/packages/orchestrator/internal/sandbox/hoststats_collector.go index 62c8952573..139a33d927 100644 --- a/packages/orchestrator/internal/sandbox/hoststats_collector.go +++ b/packages/orchestrator/internal/sandbox/hoststats_collector.go @@ -39,6 +39,7 @@ type HostStatsMetadata struct { TeamID uuid.UUID VCPUCount int64 MemoryMB int64 + SandboxType string } func NewHostStatsCollector( @@ -96,6 +97,7 @@ func (h *HostStatsCollector) CollectSample(ctx context.Context) error { FirecrackerCPUSystemTime: times.System, // seconds FirecrackerMemoryRSS: memInfo.RSS, // bytes FirecrackerMemoryVMS: memInfo.VMS, // bytes + SandboxType: h.metadata.SandboxType, } if h.cgroupStats != nil { diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index 5cc2c35a80..d1e259ea89 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -95,6 +95,11 @@ type EnvdMetadata struct { Version string } +const ( + SandboxTypeSandbox = "sandbox" + SandboxTypeBuild = "build" +) + type RuntimeMetadata struct { TemplateID string SandboxID string @@ -102,6 +107,11 @@ type RuntimeMetadata struct { // TeamID optional, used only for logging TeamID string + + // BuildID is the overall build ID, set for build sandboxes + BuildID string + // SandboxType distinguishes build sandboxes from regular sandboxes ("sandbox" or "build") + SandboxType string } type Resources struct { @@ -311,6 +321,9 @@ func (f *Factory) CreateSandbox( return nil, err } + cgroupHandle, cgroupFD := createCgroup(ctx, f.cgroupManager, runtime.SandboxID, cleanup) + defer releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) + fcHandle, err := fc.NewProcess( ctx, execCtx, @@ -344,7 +357,10 @@ func (f *Factory) CreateSandbox( Ops: fc.TokenBucketConfig(throttleConfig.Ops), Bandwidth: fc.TokenBucketConfig(throttleConfig.Bandwidth), }, + cgroupFD, ) + + releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) if err != nil { return nil, fmt.Errorf("failed to create FC: %w", err) } @@ -371,8 +387,9 @@ func (f *Factory) CreateSandbox( sbx := &Sandbox{ LifecycleID: uuid.NewString(), - Resources: resources, - Metadata: metadata, + Resources: resources, + Metadata: metadata, + cgroupHandle: cgroupHandle, Template: template, config: f.config, @@ -386,11 +403,17 @@ func (f *Factory) CreateSandbox( exit: exit, } - sbx.Checks = NewChecks(sbx, false) + useClickhouseMetrics := f.featureFlags.BoolFlag(ctx, featureflags.MetricsWriteFlag) + sbx.Checks = NewChecks(sbx, useClickhouseMetrics) // Stop the sandbox first if it is still running, otherwise do nothing cleanup.AddPriority(ctx, sbx.Stop) + if f.featureFlags.BoolFlag(execCtx, featureflags.HostStatsEnabled) { + samplingInterval := time.Duration(f.featureFlags.IntFlag(execCtx, featureflags.HostStatsSamplingInterval)) * time.Millisecond + initializeHostStatsCollector(execCtx, sbx, fcHandle, runtime.BuildID, runtime, config, f.hostStatsDelivery, samplingInterval) + } + go func() { defer execSpan.End() @@ -594,6 +617,7 @@ func (f *Factory) ResumeSandbox( // Create cgroup for sandbox resource accounting cgroupHandle, cgroupFD := createCgroup(ctx, f.cgroupManager, runtime.SandboxID, cleanup) + defer releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) fcHandle, fcErr := fc.NewProcess( ctx, @@ -656,14 +680,7 @@ func (f *Factory) ResumeSandbox( }, ) - // Release the cgroup directory FD — the kernel already used it during clone - if cgroupHandle != nil { - if releaseErr := cgroupHandle.ReleaseCgroupFD(); releaseErr != nil { - logger.L().Warn(ctx, "failed to release cgroup directory FD", - logger.WithSandboxID(runtime.SandboxID), - zap.Error(releaseErr)) - } - } + releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) if fcStartErr != nil { return nil, fmt.Errorf("failed to start FC: %w", fcStartErr) @@ -733,7 +750,12 @@ func (f *Factory) ResumeSandbox( if f.featureFlags.BoolFlag(execCtx, featureflags.HostStatsEnabled) { samplingInterval := time.Duration(f.featureFlags.IntFlag(execCtx, featureflags.HostStatsSamplingInterval)) * time.Millisecond - initializeHostStatsCollector(execCtx, sbx, fcHandle, meta.Template.BuildID, runtime, config, f.hostStatsDelivery, samplingInterval) + buildID := meta.Template.BuildID + if runtime.BuildID != "" { + buildID = runtime.BuildID + } + + initializeHostStatsCollector(execCtx, sbx, fcHandle, buildID, runtime, config, f.hostStatsDelivery, samplingInterval) } go sbx.Checks.Start(execCtx) @@ -1247,6 +1269,16 @@ func (s *Sandbox) WaitForEnvd( return nil } +func releaseCgroupFD(ctx context.Context, cgroupHandle *cgroup.CgroupHandle, sandboxID string) { + if cgroupHandle != nil { + if releaseErr := cgroupHandle.ReleaseCgroupFD(); releaseErr != nil { + logger.L().Warn(ctx, "failed to release cgroup directory FD", + logger.WithSandboxID(sandboxID), + zap.Error(releaseErr)) + } + } +} + func (f *Factory) GetEnvdInitRequestTimeout(ctx context.Context) time.Duration { envdInitRequestTimeoutMs := f.featureFlags.IntFlag(ctx, featureflags.EnvdInitTimeoutMilliseconds) diff --git a/packages/orchestrator/internal/template/build/layer/create_sandbox.go b/packages/orchestrator/internal/template/build/layer/create_sandbox.go index 86a37dc556..c3f6be6a19 100644 --- a/packages/orchestrator/internal/template/build/layer/create_sandbox.go +++ b/packages/orchestrator/internal/template/build/layer/create_sandbox.go @@ -105,6 +105,9 @@ func (cs *CreateSandbox) Sandbox( TemplateID: layerExecutor.Config.TemplateID, SandboxID: config.InstanceBuildPrefix + id.Generate(), ExecutionID: uuid.NewString(), + TeamID: layerExecutor.Config.TeamID, + BuildID: layerExecutor.Template.BuildID, + SandboxType: sandbox.SandboxTypeBuild, }, template, cs.timeout, diff --git a/packages/orchestrator/internal/template/build/layer/resume_sandbox.go b/packages/orchestrator/internal/template/build/layer/resume_sandbox.go index 801300fe29..4f357e7361 100644 --- a/packages/orchestrator/internal/template/build/layer/resume_sandbox.go +++ b/packages/orchestrator/internal/template/build/layer/resume_sandbox.go @@ -39,6 +39,9 @@ func (rs *ResumeSandbox) Sandbox( TemplateID: layerExecutor.Config.TemplateID, SandboxID: config.InstanceBuildPrefix + id.Generate(), ExecutionID: uuid.NewString(), + TeamID: layerExecutor.Config.TeamID, + BuildID: layerExecutor.Template.BuildID, + SandboxType: sandbox.SandboxTypeBuild, }, time.Now(), time.Now().Add(rs.timeout), From c012f24ad67cfbfb7a1bd5bd457844f07658280e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 2 Mar 2026 22:14:41 +0100 Subject: [PATCH 02/11] fix(orchestrator): remove redundant cgroup FD release calls The cgroup FD is already released via a defer registered right after cgroup creation, making these explicit releaseCgroupFD calls redundant. --- packages/orchestrator/internal/sandbox/sandbox.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index d1e259ea89..73baa4279b 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -360,7 +360,6 @@ func (f *Factory) CreateSandbox( cgroupFD, ) - releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) if err != nil { return nil, fmt.Errorf("failed to create FC: %w", err) } @@ -680,8 +679,6 @@ func (f *Factory) ResumeSandbox( }, ) - releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) - if fcStartErr != nil { return nil, fmt.Errorf("failed to start FC: %w", fcStartErr) } From a1cc954bd16db0be34caff10b7e7171489d3ba30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 2 Mar 2026 12:49:34 +0100 Subject: [PATCH 03/11] feat(orchestrator): use unique cgroup name per sandbox resume to avoid collisions Add SandboxCgroupName() to SandboxFiles that combines SandboxID with randomID, consistent with how other sandbox resource names are derived. Use it in ResumeSandbox to ensure cgroup names don't collide between paused and restored sandbox instances if cleanup fails. --- packages/orchestrator/internal/sandbox/sandbox.go | 4 ++-- packages/shared/pkg/storage/sandbox.go | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index 73baa4279b..a310f4d716 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -321,7 +321,7 @@ func (f *Factory) CreateSandbox( return nil, err } - cgroupHandle, cgroupFD := createCgroup(ctx, f.cgroupManager, runtime.SandboxID, cleanup) + cgroupHandle, cgroupFD := createCgroup(ctx, f.cgroupManager, sandboxFiles.SandboxCgroupName(), cleanup) defer releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) fcHandle, err := fc.NewProcess( @@ -615,7 +615,7 @@ func (f *Factory) ResumeSandbox( } // Create cgroup for sandbox resource accounting - cgroupHandle, cgroupFD := createCgroup(ctx, f.cgroupManager, runtime.SandboxID, cleanup) + cgroupHandle, cgroupFD := createCgroup(ctx, f.cgroupManager, sandboxFiles.SandboxCgroupName(), cleanup) defer releaseCgroupFD(ctx, cgroupHandle, runtime.SandboxID) fcHandle, fcErr := fc.NewProcess( diff --git a/packages/shared/pkg/storage/sandbox.go b/packages/shared/pkg/storage/sandbox.go index 89693ba10e..cf58fdaa1c 100644 --- a/packages/shared/pkg/storage/sandbox.go +++ b/packages/shared/pkg/storage/sandbox.go @@ -62,3 +62,7 @@ func (s *SandboxFiles) SandboxCacheRootfsLinkPath(config Config) string { func (s *SandboxFiles) SandboxMetricsFifoPath() string { return filepath.Join(s.tmpDir, fmt.Sprintf("fc-metrics-%s-%s.fifo", s.SandboxID, s.randomID)) } + +func (s *SandboxFiles) SandboxCgroupName() string { + return fmt.Sprintf("%s-%s", s.SandboxID, s.randomID) +} From 64a50376a86afb887630b50149bd0a64eb6c4904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Tue, 3 Mar 2026 13:33:56 +0100 Subject: [PATCH 04/11] fix(orchestrator): set missing TeamID, BuildID, and SandboxType in base builder RuntimeMetadata Without these fields, ClickHouse columns for team and build ID were empty for base builder sandboxes, and sandbox type was incorrectly reported as "sandbox" instead of "build". --- .../internal/template/build/phases/base/builder.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/orchestrator/internal/template/build/phases/base/builder.go b/packages/orchestrator/internal/template/build/phases/base/builder.go index 391e4d93c4..d2442126a5 100644 --- a/packages/orchestrator/internal/template/build/phases/base/builder.go +++ b/packages/orchestrator/internal/template/build/phases/base/builder.go @@ -224,6 +224,9 @@ func (bb *BaseBuilder) buildLayerFromOCI( TemplateID: bb.Config.TemplateID, SandboxID: config.InstanceBuildPrefix + id.Generate(), ExecutionID: uuid.NewString(), + TeamID: bb.Config.TeamID, + BuildID: bb.Template.BuildID, + SandboxType: sandbox.SandboxTypeBuild, }, localTemplate, rootfsPath, From 16dc813dc463bc3ff9483b1bf6be460657f26b84 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 3 Mar 2026 12:46:50 +0000 Subject: [PATCH 05/11] chore: auto-commit generated changes --- packages/orchestrator/internal/sandbox/sandbox.go | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index a310f4d716..acc197163c 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -359,7 +359,6 @@ func (f *Factory) CreateSandbox( }, cgroupFD, ) - if err != nil { return nil, fmt.Errorf("failed to create FC: %w", err) } From f55721a53656070b03278c095708caa5f5b15d20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Tue, 3 Mar 2026 16:36:19 +0100 Subject: [PATCH 06/11] fix(orchestrator): set SandboxType for regular sandboxes in Create and Checkpoint handlers The SandboxType field was never set when creating sandboxes via the gRPC Create handler or when resuming them via Checkpoint, causing it to default to an empty string. Build sandboxes were unaffected because all build paths already set SandboxType explicitly. Add warning logs in the fallback paths in hoststats and metrics so unexpected empty types are visible. --- packages/orchestrator/internal/metrics/sandboxes.go | 1 + packages/orchestrator/internal/sandbox/hoststats.go | 1 + packages/orchestrator/internal/server/sandboxes.go | 2 ++ 3 files changed, 4 insertions(+) diff --git a/packages/orchestrator/internal/metrics/sandboxes.go b/packages/orchestrator/internal/metrics/sandboxes.go index 702d685fda..56dabc4492 100644 --- a/packages/orchestrator/internal/metrics/sandboxes.go +++ b/packages/orchestrator/internal/metrics/sandboxes.go @@ -182,6 +182,7 @@ func (so *SandboxObserver) startObserving() (metric.Registration, error) { sandboxType := sbx.Runtime.SandboxType if sandboxType == "" { sandboxType = sandbox.SandboxTypeSandbox + logger.L().Warn(ctx, "unknown sandbox type", logger.WithSandboxID(sbx.Runtime.SandboxID)) } attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID), attribute.String("build_id", sbx.Runtime.BuildID), attribute.String("sandbox_type", sandboxType)) diff --git a/packages/orchestrator/internal/sandbox/hoststats.go b/packages/orchestrator/internal/sandbox/hoststats.go index 3ae44ec0e8..358fddc6c4 100644 --- a/packages/orchestrator/internal/sandbox/hoststats.go +++ b/packages/orchestrator/internal/sandbox/hoststats.go @@ -45,6 +45,7 @@ func initializeHostStatsCollector( sandboxType := runtime.SandboxType if sandboxType == "" { sandboxType = SandboxTypeSandbox + logger.L().Warn(ctx, "unknown sandbox type", logger.WithSandboxID(runtime.SandboxID)) } var cgroupStats CgroupStatsFunc diff --git a/packages/orchestrator/internal/server/sandboxes.go b/packages/orchestrator/internal/server/sandboxes.go index e2949395e0..ad53b3d957 100644 --- a/packages/orchestrator/internal/server/sandboxes.go +++ b/packages/orchestrator/internal/server/sandboxes.go @@ -164,6 +164,7 @@ func (s *Server) Create(ctx context.Context, req *orchestrator.SandboxCreateRequ SandboxID: req.GetSandbox().GetSandboxId(), ExecutionID: req.GetSandbox().GetExecutionId(), TeamID: req.GetSandbox().GetTeamId(), + SandboxType: sandbox.SandboxTypeSandbox, }, req.GetStartTime().AsTime(), req.GetEndTime().AsTime(), @@ -485,6 +486,7 @@ func (s *Server) Checkpoint(ctx context.Context, in *orchestrator.SandboxCheckpo SandboxID: sbx.Runtime.SandboxID, ExecutionID: sbx.Runtime.ExecutionID, TeamID: sbx.Runtime.TeamID, + SandboxType: sbx.Runtime.SandboxType, }, sbx.GetStartedAt(), sbx.GetEndAt(), From 16ddbd4230abf0551c34c52461eb899f828578be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Tue, 3 Mar 2026 17:18:16 +0100 Subject: [PATCH 07/11] fix(orchestrator): set BuildID for regular sandboxes in Create and Checkpoint handlers The BuildID field was never copied from the gRPC request into RuntimeMetadata when creating regular sandboxes, even though the API sends it. Add BuildID to both the Create and Checkpoint handlers, add a warning log in the metrics observer for when BuildID is unexpectedly empty, and update the BuildID comment to reflect its use across all sandbox types. --- packages/orchestrator/internal/metrics/sandboxes.go | 4 ++++ packages/orchestrator/internal/sandbox/sandbox.go | 2 +- packages/orchestrator/internal/server/sandboxes.go | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/orchestrator/internal/metrics/sandboxes.go b/packages/orchestrator/internal/metrics/sandboxes.go index 56dabc4492..cc1b68005b 100644 --- a/packages/orchestrator/internal/metrics/sandboxes.go +++ b/packages/orchestrator/internal/metrics/sandboxes.go @@ -185,6 +185,10 @@ func (so *SandboxObserver) startObserving() (metric.Registration, error) { logger.L().Warn(ctx, "unknown sandbox type", logger.WithSandboxID(sbx.Runtime.SandboxID)) } + if sbx.Runtime.BuildID == "" { + logger.L().Warn(ctx, "unknown build id", logger.WithSandboxID(sbx.Runtime.SandboxID)) + } + attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID), attribute.String("build_id", sbx.Runtime.BuildID), attribute.String("sandbox_type", sandboxType)) ok, err = utils.IsGTEVersion(sbx.Config.Envd.Version, minEnvVersionForMetricsTimestamp) diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index acc197163c..b704c648da 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -108,7 +108,7 @@ type RuntimeMetadata struct { // TeamID optional, used only for logging TeamID string - // BuildID is the overall build ID, set for build sandboxes + // BuildID is the ID of the associated template build. BuildID string // SandboxType distinguishes build sandboxes from regular sandboxes ("sandbox" or "build") SandboxType string diff --git a/packages/orchestrator/internal/server/sandboxes.go b/packages/orchestrator/internal/server/sandboxes.go index ad53b3d957..aa749b333f 100644 --- a/packages/orchestrator/internal/server/sandboxes.go +++ b/packages/orchestrator/internal/server/sandboxes.go @@ -164,6 +164,7 @@ func (s *Server) Create(ctx context.Context, req *orchestrator.SandboxCreateRequ SandboxID: req.GetSandbox().GetSandboxId(), ExecutionID: req.GetSandbox().GetExecutionId(), TeamID: req.GetSandbox().GetTeamId(), + BuildID: req.GetSandbox().GetBuildId(), SandboxType: sandbox.SandboxTypeSandbox, }, req.GetStartTime().AsTime(), @@ -486,6 +487,7 @@ func (s *Server) Checkpoint(ctx context.Context, in *orchestrator.SandboxCheckpo SandboxID: sbx.Runtime.SandboxID, ExecutionID: sbx.Runtime.ExecutionID, TeamID: sbx.Runtime.TeamID, + BuildID: sbx.Runtime.BuildID, SandboxType: sbx.Runtime.SandboxType, }, sbx.GetStartedAt(), From f9aef2b0bcf35b1869155a1aa74254c55eb96852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 4 Mar 2026 11:30:21 +0100 Subject: [PATCH 08/11] refactor(orchestrator): remove debug warning logs for empty SandboxType and BuildID The root cause (missing field assignments in Create and Checkpoint handlers) has been fixed. Replace the warning logs with silent fallbacks to avoid log spam in the metrics observer and host stats collector. --- packages/orchestrator/internal/metrics/sandboxes.go | 5 ----- packages/orchestrator/internal/sandbox/hoststats.go | 1 - 2 files changed, 6 deletions(-) diff --git a/packages/orchestrator/internal/metrics/sandboxes.go b/packages/orchestrator/internal/metrics/sandboxes.go index cc1b68005b..702d685fda 100644 --- a/packages/orchestrator/internal/metrics/sandboxes.go +++ b/packages/orchestrator/internal/metrics/sandboxes.go @@ -182,11 +182,6 @@ func (so *SandboxObserver) startObserving() (metric.Registration, error) { sandboxType := sbx.Runtime.SandboxType if sandboxType == "" { sandboxType = sandbox.SandboxTypeSandbox - logger.L().Warn(ctx, "unknown sandbox type", logger.WithSandboxID(sbx.Runtime.SandboxID)) - } - - if sbx.Runtime.BuildID == "" { - logger.L().Warn(ctx, "unknown build id", logger.WithSandboxID(sbx.Runtime.SandboxID)) } attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID), attribute.String("build_id", sbx.Runtime.BuildID), attribute.String("sandbox_type", sandboxType)) diff --git a/packages/orchestrator/internal/sandbox/hoststats.go b/packages/orchestrator/internal/sandbox/hoststats.go index 358fddc6c4..3ae44ec0e8 100644 --- a/packages/orchestrator/internal/sandbox/hoststats.go +++ b/packages/orchestrator/internal/sandbox/hoststats.go @@ -45,7 +45,6 @@ func initializeHostStatsCollector( sandboxType := runtime.SandboxType if sandboxType == "" { sandboxType = SandboxTypeSandbox - logger.L().Warn(ctx, "unknown sandbox type", logger.WithSandboxID(runtime.SandboxID)) } var cgroupStats CgroupStatsFunc From 9a75a1f700b40ce985b02cd954000126bda26872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 4 Mar 2026 11:31:52 +0100 Subject: [PATCH 09/11] refactor(orchestrator): introduce SandboxType enum with default fallback Replace the raw string SandboxType field with a named SandboxType type and a String() method that defaults to "sandbox" when empty. This eliminates the scattered fallback checks in the metrics observer and host stats collector, ensuring a single source of truth for the default. --- .../internal/metrics/sandboxes.go | 7 +------ .../internal/sandbox/hoststats.go | 7 +------ .../internal/sandbox/hoststats_collector.go | 4 ++-- .../orchestrator/internal/sandbox/sandbox.go | 21 ++++++++++++++----- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/packages/orchestrator/internal/metrics/sandboxes.go b/packages/orchestrator/internal/metrics/sandboxes.go index 702d685fda..82700f286a 100644 --- a/packages/orchestrator/internal/metrics/sandboxes.go +++ b/packages/orchestrator/internal/metrics/sandboxes.go @@ -179,12 +179,7 @@ func (so *SandboxObserver) startObserving() (metric.Registration, error) { return err } - sandboxType := sbx.Runtime.SandboxType - if sandboxType == "" { - sandboxType = sandbox.SandboxTypeSandbox - } - - attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID), attribute.String("build_id", sbx.Runtime.BuildID), attribute.String("sandbox_type", sandboxType)) + attributes := metric.WithAttributes(attribute.String("sandbox_id", sbx.Runtime.SandboxID), attribute.String("team_id", sbx.Runtime.TeamID), attribute.String("build_id", sbx.Runtime.BuildID), attribute.String("sandbox_type", sbx.Runtime.SandboxType.String())) ok, err = utils.IsGTEVersion(sbx.Config.Envd.Version, minEnvVersionForMetricsTimestamp) if err != nil { diff --git a/packages/orchestrator/internal/sandbox/hoststats.go b/packages/orchestrator/internal/sandbox/hoststats.go index 3ae44ec0e8..5020fcdbba 100644 --- a/packages/orchestrator/internal/sandbox/hoststats.go +++ b/packages/orchestrator/internal/sandbox/hoststats.go @@ -42,11 +42,6 @@ func initializeHostStatsCollector( logger.L().Error(ctx, "error parsing team ID", logger.WithTeamID(runtime.TeamID), zap.Error(err)) } - sandboxType := runtime.SandboxType - if sandboxType == "" { - sandboxType = SandboxTypeSandbox - } - var cgroupStats CgroupStatsFunc if sbx.cgroupHandle != nil { cgroupStats = sbx.cgroupHandle.GetStats @@ -61,7 +56,7 @@ func initializeHostStatsCollector( TeamID: teamID, VCPUCount: config.Vcpu, MemoryMB: config.RamMB, - SandboxType: sandboxType, + SandboxType: runtime.SandboxType, }, int32(firecrackerPID), hostStatsDelivery, diff --git a/packages/orchestrator/internal/sandbox/hoststats_collector.go b/packages/orchestrator/internal/sandbox/hoststats_collector.go index 139a33d927..fce3d71fcc 100644 --- a/packages/orchestrator/internal/sandbox/hoststats_collector.go +++ b/packages/orchestrator/internal/sandbox/hoststats_collector.go @@ -39,7 +39,7 @@ type HostStatsMetadata struct { TeamID uuid.UUID VCPUCount int64 MemoryMB int64 - SandboxType string + SandboxType SandboxType } func NewHostStatsCollector( @@ -97,7 +97,7 @@ func (h *HostStatsCollector) CollectSample(ctx context.Context) error { FirecrackerCPUSystemTime: times.System, // seconds FirecrackerMemoryRSS: memInfo.RSS, // bytes FirecrackerMemoryVMS: memInfo.VMS, // bytes - SandboxType: h.metadata.SandboxType, + SandboxType: h.metadata.SandboxType.String(), } if h.cgroupStats != nil { diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index b704c648da..be51661e6f 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -95,11 +95,23 @@ type EnvdMetadata struct { Version string } +// SandboxType distinguishes build sandboxes from regular sandboxes. +type SandboxType string + const ( - SandboxTypeSandbox = "sandbox" - SandboxTypeBuild = "build" + SandboxTypeSandbox SandboxType = "sandbox" + SandboxTypeBuild SandboxType = "build" ) +// String returns the sandbox type as a string, defaulting to "sandbox" if empty. +func (t SandboxType) String() string { + if t == "" { + return string(SandboxTypeSandbox) + } + + return string(t) +} + type RuntimeMetadata struct { TemplateID string SandboxID string @@ -109,9 +121,8 @@ type RuntimeMetadata struct { TeamID string // BuildID is the ID of the associated template build. - BuildID string - // SandboxType distinguishes build sandboxes from regular sandboxes ("sandbox" or "build") - SandboxType string + BuildID string + SandboxType SandboxType } type Resources struct { From a20957f86f33d1eba0cf86127b5b69c77b45d53a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 4 Mar 2026 11:33:18 +0100 Subject: [PATCH 10/11] refactor(orchestrator): remove separate buildID parameter from initializeHostStatsCollector Now that RuntimeMetadata.BuildID is always populated in both the Create and Checkpoint handlers, the separate buildID parameter and the fallback to meta.Template.BuildID in ResumeSandbox are unnecessary. Read BuildID directly from runtime metadata instead. --- packages/orchestrator/internal/sandbox/hoststats.go | 3 +-- packages/orchestrator/internal/sandbox/sandbox.go | 9 ++------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/packages/orchestrator/internal/sandbox/hoststats.go b/packages/orchestrator/internal/sandbox/hoststats.go index 5020fcdbba..e4ebf17688 100644 --- a/packages/orchestrator/internal/sandbox/hoststats.go +++ b/packages/orchestrator/internal/sandbox/hoststats.go @@ -18,7 +18,6 @@ func initializeHostStatsCollector( ctx context.Context, sbx *Sandbox, fcHandle *fc.Process, - buildID string, runtime RuntimeMetadata, config Config, hostStatsDelivery hoststats.Delivery, @@ -52,7 +51,7 @@ func initializeHostStatsCollector( SandboxID: runtime.SandboxID, ExecutionID: runtime.ExecutionID, TemplateID: runtime.TemplateID, - BuildID: buildID, + BuildID: runtime.BuildID, TeamID: teamID, VCPUCount: config.Vcpu, MemoryMB: config.RamMB, diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index be51661e6f..1917929495 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -420,7 +420,7 @@ func (f *Factory) CreateSandbox( if f.featureFlags.BoolFlag(execCtx, featureflags.HostStatsEnabled) { samplingInterval := time.Duration(f.featureFlags.IntFlag(execCtx, featureflags.HostStatsSamplingInterval)) * time.Millisecond - initializeHostStatsCollector(execCtx, sbx, fcHandle, runtime.BuildID, runtime, config, f.hostStatsDelivery, samplingInterval) + initializeHostStatsCollector(execCtx, sbx, fcHandle, runtime, config, f.hostStatsDelivery, samplingInterval) } go func() { @@ -757,12 +757,7 @@ func (f *Factory) ResumeSandbox( if f.featureFlags.BoolFlag(execCtx, featureflags.HostStatsEnabled) { samplingInterval := time.Duration(f.featureFlags.IntFlag(execCtx, featureflags.HostStatsSamplingInterval)) * time.Millisecond - buildID := meta.Template.BuildID - if runtime.BuildID != "" { - buildID = runtime.BuildID - } - - initializeHostStatsCollector(execCtx, sbx, fcHandle, buildID, runtime, config, f.hostStatsDelivery, samplingInterval) + initializeHostStatsCollector(execCtx, sbx, fcHandle, runtime, config, f.hostStatsDelivery, samplingInterval) } go sbx.Checks.Start(execCtx) From 1a6c1b4b6afcfcbdcd2623ba9303f1d97dd5edaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Wed, 4 Mar 2026 12:13:40 +0100 Subject: [PATCH 11/11] refactor(orchestrator): consolidate cgroup naming by moving sbx- prefix into SandboxCgroupName() The sbx- prefix was applied inside the cgroup package's sandboxCgroupPath(), separate from the name generated by SandboxCgroupName(). Move the prefix into SandboxCgroupName() so it is the single source of truth for cgroup naming, and simplify the cgroup package to just join the root path with the provided name. Rename sandboxID to cgroupName throughout the cgroup package to reflect that it receives a cgroup name, not a raw sandbox ID. --- .../internal/sandbox/cgroup/manager.go | 30 +++++++++---------- .../internal/sandbox/cgroup/manager_test.go | 6 ++-- .../orchestrator/internal/sandbox/sandbox.go | 8 ++--- packages/shared/pkg/storage/sandbox.go | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/packages/orchestrator/internal/sandbox/cgroup/manager.go b/packages/orchestrator/internal/sandbox/cgroup/manager.go index 0df5596817..819c3667c3 100644 --- a/packages/orchestrator/internal/sandbox/cgroup/manager.go +++ b/packages/orchestrator/internal/sandbox/cgroup/manager.go @@ -45,7 +45,7 @@ type Stats struct { // whether Start succeeded or failed). Remove() closes the memory.peak FD and // deletes the cgroup directory — it does not release the cgroup directory FD. type CgroupHandle struct { - sandboxID string + cgroupName string path string file *os.File // Open FD to the cgroup directory (nil after ReleaseCgroupFD) memoryPeakFile *os.File // Open FD to memory.peak for per-FD reset (nil after Remove or if not available) @@ -126,7 +126,7 @@ func (h *CgroupHandle) Remove(ctx context.Context) error { } logger.L().Debug(ctx, "removed cgroup for sandbox", - logger.WithSandboxID(h.sandboxID), + zap.String("cgroup_name", h.cgroupName), zap.String("path", h.path)) return nil @@ -141,13 +141,13 @@ func (h *CgroupHandle) Path() string { return h.path } -// SandboxID returns the sandbox ID this cgroup is for -func (h *CgroupHandle) SandboxID() string { +// CgroupName returns the name of the cgroup +func (h *CgroupHandle) CgroupName() string { if h == nil { return "" } - return h.sandboxID + return h.cgroupName } // Manager handles initialization and creation of cgroups @@ -157,10 +157,10 @@ type Manager interface { // Should be called once at orchestrator startup Initialize(ctx context.Context) error - // Create creates a cgroup for a sandbox and returns a handle + // Create creates a cgroup with the given name and returns a handle // The handle provides access to the cgroup's FD, stats, and cleanup // Returns error if cgroup creation fails - Create(ctx context.Context, sandboxID string) (*CgroupHandle, error) + Create(ctx context.Context, cgroupName string) (*CgroupHandle, error) } type managerImpl struct{} @@ -190,8 +190,8 @@ func (m *managerImpl) Initialize(ctx context.Context) error { return nil } -func (m *managerImpl) Create(ctx context.Context, sandboxID string) (*CgroupHandle, error) { - cgroupPath := m.sandboxCgroupPath(sandboxID) +func (m *managerImpl) Create(ctx context.Context, cgroupName string) (*CgroupHandle, error) { + cgroupPath := m.cgroupPath(cgroupName) if err := os.MkdirAll(cgroupPath, 0o755); err != nil { return nil, fmt.Errorf("failed to create cgroup directory: %w", err) @@ -211,14 +211,14 @@ func (m *managerImpl) Create(ctx context.Context, sandboxID string) (*CgroupHand if peakErr != nil { // Not fatal — memory.peak may not exist on older kernels logger.L().Debug(ctx, "failed to open memory.peak", - logger.WithSandboxID(sandboxID), + zap.String("cgroup_name", cgroupName), zap.String("path", memPeakPath), zap.Error(peakErr)) memoryPeakFile = nil } handle := &CgroupHandle{ - sandboxID: sandboxID, + cgroupName: cgroupName, path: cgroupPath, file: file, memoryPeakFile: memoryPeakFile, @@ -226,7 +226,7 @@ func (m *managerImpl) Create(ctx context.Context, sandboxID string) (*CgroupHand } logger.L().Debug(ctx, "created cgroup for sandbox", - logger.WithSandboxID(sandboxID), + zap.String("cgroup_name", cgroupName), zap.String("path", cgroupPath), zap.Int("fd", handle.GetFD())) @@ -304,7 +304,7 @@ func (m *managerImpl) readMemoryPeak(memoryPeakFile *os.File) (uint64, error) { return peakBytes, nil } -// sandboxCgroupPath returns the filesystem path for a sandbox's cgroup -func (m *managerImpl) sandboxCgroupPath(sandboxID string) string { - return filepath.Join(RootCgroupPath, fmt.Sprintf("sbx-%s", sandboxID)) +// cgroupPath returns the filesystem path for a sandbox's cgroup +func (m *managerImpl) cgroupPath(cgroupName string) string { + return filepath.Join(RootCgroupPath, cgroupName) } diff --git a/packages/orchestrator/internal/sandbox/cgroup/manager_test.go b/packages/orchestrator/internal/sandbox/cgroup/manager_test.go index e4c293f464..1fc28ddcc2 100644 --- a/packages/orchestrator/internal/sandbox/cgroup/manager_test.go +++ b/packages/orchestrator/internal/sandbox/cgroup/manager_test.go @@ -75,7 +75,7 @@ func TestCgroupHandleLifecycle(t *testing.T) { require.NotNil(t, handle) defer handle.Remove(ctx) - assert.Equal(t, testSandboxID, handle.SandboxID()) + assert.Equal(t, testSandboxID, handle.CgroupName()) assert.Contains(t, handle.Path(), testSandboxID) assert.Positive(t, handle.GetFD()) @@ -141,7 +141,7 @@ func TestCgroupHandleWithProcessCreation(t *testing.T) { procCgroupPath := fmt.Sprintf("/proc/%d/cgroup", cmd.Process.Pid) cgroupData, err := os.ReadFile(procCgroupPath) require.NoError(t, err) - assert.Contains(t, string(cgroupData), fmt.Sprintf("e2b/sbx-%s", testSandboxID)) + assert.Contains(t, string(cgroupData), fmt.Sprintf("e2b/%s", testSandboxID)) cmd.Process.Kill() cmd.Wait() @@ -292,7 +292,7 @@ func TestStatsParsing(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - cgroupPath := filepath.Join(tmpDir, "sbx-test-parse-sandbox") + cgroupPath := filepath.Join(tmpDir, "test-parse-sandbox") err := os.MkdirAll(cgroupPath, 0o755) require.NoError(t, err) diff --git a/packages/orchestrator/internal/sandbox/sandbox.go b/packages/orchestrator/internal/sandbox/sandbox.go index 1917929495..24d72bdf1b 100644 --- a/packages/orchestrator/internal/sandbox/sandbox.go +++ b/packages/orchestrator/internal/sandbox/sandbox.go @@ -1111,9 +1111,9 @@ func pauseProcessRootfs( // // Returns the CgroupHandle and the cgroup directory FD to pass to the // Firecracker process. If cgroup accounting is disabled, returns (nil, cgroup.NoCgroupFD). -func createCgroup(ctx context.Context, cgroupManager cgroup.Manager, sandboxID string, cleanup *Cleanup) (*cgroup.CgroupHandle, int) { +func createCgroup(ctx context.Context, cgroupManager cgroup.Manager, cgroupName string, cleanup *Cleanup) (*cgroup.CgroupHandle, int) { ctx, span := tracer.Start(ctx, "sandbox-create-cgroup", trace.WithAttributes( - telemetry.WithSandboxID(sandboxID), + attribute.String("cgroup_name", cgroupName), )) defer span.End() @@ -1121,10 +1121,10 @@ func createCgroup(ctx context.Context, cgroupManager cgroup.Manager, sandboxID s return nil, cgroup.NoCgroupFD } - handle, err := cgroupManager.Create(ctx, sandboxID) + handle, err := cgroupManager.Create(ctx, cgroupName) if err != nil { logger.L().Warn(ctx, "failed to create cgroup, continuing without cgroup accounting", - logger.WithSandboxID(sandboxID), + zap.String("cgroup_name", cgroupName), zap.Error(err)) telemetry.ReportEvent(ctx, "cgroup creation failed, continuing without accounting") diff --git a/packages/shared/pkg/storage/sandbox.go b/packages/shared/pkg/storage/sandbox.go index cf58fdaa1c..047b0df2a1 100644 --- a/packages/shared/pkg/storage/sandbox.go +++ b/packages/shared/pkg/storage/sandbox.go @@ -64,5 +64,5 @@ func (s *SandboxFiles) SandboxMetricsFifoPath() string { } func (s *SandboxFiles) SandboxCgroupName() string { - return fmt.Sprintf("%s-%s", s.SandboxID, s.randomID) + return fmt.Sprintf("sbx-%s-%s", s.SandboxID, s.randomID) }