Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions packages/api/internal/handlers/proxy_grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/e2b-dev/infra/packages/api/internal/utils"
dbtypes "github.com/e2b-dev/infra/packages/db/pkg/types"
"github.com/e2b-dev/infra/packages/shared/pkg/consts"
featureflags "github.com/e2b-dev/infra/packages/shared/pkg/feature-flags"
proxygrpc "github.com/e2b-dev/infra/packages/shared/pkg/grpc/proxy"
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
sharedutils "github.com/e2b-dev/infra/packages/shared/pkg/utils"
Expand Down Expand Up @@ -98,10 +99,6 @@ func (s *SandboxService) ResumeSandbox(ctx context.Context, req *proxygrpc.Sandb

teamID := snap.Snapshot.TeamID

// Fixed 5 minutes for client-proxy initiated resume.
// This intentionally does not allow callers to override timeouts via gRPC.
timeout := 300 * time.Second

var autoResume *dbtypes.SandboxAutoResumeConfig
if snap.Snapshot.Config != nil {
autoResume = snap.Snapshot.Config.AutoResume
Expand All @@ -114,6 +111,8 @@ func (s *SandboxService) ResumeSandbox(ctx context.Context, req *proxygrpc.Sandb
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to get team: %v", err)
}
minAutoResumeTimeout := time.Duration(s.api.featureFlags.IntFlag(ctx, featureflags.MinAutoResumeTimeoutSeconds)) * time.Second
timeout := calculateAutoResumeTimeout(autoResume, minAutoResumeTimeout, team)

autoPause := snap.Snapshot.AutoPause
nodeID := &snap.Snapshot.OriginNodeID
Expand Down
4 changes: 4 additions & 0 deletions packages/api/internal/handlers/sandbox_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ func (a *APIStore) PostSandboxes(c *gin.Context) {

return
}
if autoResume != nil {
minAutoResumeTimeout := time.Duration(a.featureFlags.IntFlag(ctx, featureflags.MinAutoResumeTimeoutSeconds)) * time.Second
autoResume.Timeout = calculateTimeoutSeconds(timeout, minAutoResumeTimeout, teamInfo)
}

var envdAccessToken *string = nil
if body.Secure != nil && *body.Secure == true {
Expand Down
45 changes: 45 additions & 0 deletions packages/api/internal/handlers/timeout_helper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package handlers

import (
"time"

typesteam "github.com/e2b-dev/infra/packages/auth/pkg/types"
dbtypes "github.com/e2b-dev/infra/packages/db/pkg/types"
)

const (
defaultProxyAutoResumeTimeout = 5 * time.Minute
)

func getTeamPlanLimit(team *typesteam.Team) time.Duration {
if team == nil || team.Limits == nil {
return 0
}

return time.Duration(team.Limits.MaxLengthHours) * time.Hour
}

func clampAutoResumeTimeout(requestedTimeout, teamPlanLimit, minAutoResumeTimeout time.Duration) time.Duration {
timeout := requestedTimeout
if teamPlanLimit > 0 && timeout > teamPlanLimit {
timeout = teamPlanLimit
}
if timeout < minAutoResumeTimeout {
timeout = minAutoResumeTimeout
}

return timeout
}

func calculateTimeoutSeconds(requestedTimeout, minAutoResumeTimeout time.Duration, team *typesteam.Team) uint64 {
return uint64(clampAutoResumeTimeout(requestedTimeout, getTeamPlanLimit(team), minAutoResumeTimeout).Seconds())
}

func calculateAutoResumeTimeout(autoResume *dbtypes.SandboxAutoResumeConfig, minAutoResumeTimeout time.Duration, team *typesteam.Team) time.Duration {
timeout := defaultProxyAutoResumeTimeout
if autoResume != nil && autoResume.Timeout > 0 {
timeout = time.Duration(autoResume.Timeout) * time.Second
}

return clampAutoResumeTimeout(timeout, getTeamPlanLimit(team), minAutoResumeTimeout)
}
67 changes: 67 additions & 0 deletions packages/api/internal/handlers/timeout_helper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package handlers

import (
"testing"
"time"

"github.com/stretchr/testify/require"

typesteam "github.com/e2b-dev/infra/packages/auth/pkg/types"
dbtypes "github.com/e2b-dev/infra/packages/db/pkg/types"
)

func testTeamWithMaxLengthHours(hours int64) *typesteam.Team {
return &typesteam.Team{
Limits: &typesteam.TeamLimits{
MaxLengthHours: hours,
},
}
}

// TestCalculateTimeoutSeconds verifies create-time timeout handling:
// no timeout -> do not persist, short timeout -> min floor, long timeout -> team cap.
func TestCalculateTimeoutSeconds(t *testing.T) {
t.Parallel()
team := testTeamWithMaxLengthHours(1)
minTimeout := time.Minute

// Create without explicit timeout should floor to the anti-thrash minimum.
timeout := calculateTimeoutSeconds(0, minTimeout, team)
require.Equal(t, uint64(60), timeout)

// Very short requests are floored to the anti-thrash minimum.
timeout = calculateTimeoutSeconds(15*time.Second, minTimeout, team)
require.Equal(t, uint64(60), timeout)

// Very long requests are capped by the team's maximum sandbox length.
timeout = calculateTimeoutSeconds(2*time.Hour, minTimeout, team)
require.Equal(t, uint64(3600), timeout)
}

// TestCalculateAutoResumeTimeout verifies resume-time timeout handling:
// default fallback, persisted timeout minimum floor, and team cap.
func TestCalculateAutoResumeTimeout(t *testing.T) {
t.Parallel()
team := testTeamWithMaxLengthHours(1)
minTimeout := time.Minute

// Older snapshots without persisted value should use the proxy fallback timeout.
timeout := calculateAutoResumeTimeout(nil, minTimeout, team)
require.Equal(t, 5*time.Minute, timeout)

// Persisted values below minimum are floored to the anti-thrash minimum.
timeout = calculateAutoResumeTimeout(
&dbtypes.SandboxAutoResumeConfig{Timeout: 20},
minTimeout,
team,
)
require.Equal(t, time.Minute, timeout)

// Persisted values above plan limit are capped by the team limit.
timeout = calculateAutoResumeTimeout(
&dbtypes.SandboxAutoResumeConfig{Timeout: 7200},
minTimeout,
team,
)
require.Equal(t, time.Hour, timeout)
}
4 changes: 2 additions & 2 deletions packages/api/internal/orchestrator/create_instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,9 @@ func (o *Orchestrator) CreateSandbox(

var orchAutoResume *orchestrator.SandboxAutoResumeConfig
if autoResume != nil {
policy := string(autoResume.Policy)
orchAutoResume = &orchestrator.SandboxAutoResumeConfig{
Policy: policy,
Policy: string(autoResume.Policy),
TimeoutSeconds: autoResume.Timeout,
}
}

Expand Down
7 changes: 4 additions & 3 deletions packages/api/internal/orchestrator/nodemanager/sandboxes.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,10 @@ func (n *Node) GetSandboxes(ctx context.Context) ([]sandbox.Sandbox, error) {

var autoResume *types.SandboxAutoResumeConfig
if autoResumeCfg := config.GetAutoResume(); autoResumeCfg != nil {
p := autoResumeCfg.GetPolicy()
policy := types.SandboxAutoResumePolicy(p)
autoResume = &types.SandboxAutoResumeConfig{Policy: policy}
autoResume = &types.SandboxAutoResumeConfig{
Policy: types.SandboxAutoResumePolicy(autoResumeCfg.GetPolicy()),
Timeout: autoResumeCfg.GetTimeoutSeconds(),
}
}

sandboxesInfo = append(
Expand Down
3 changes: 2 additions & 1 deletion packages/db/pkg/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ const (
)

type SandboxAutoResumeConfig struct {
Policy SandboxAutoResumePolicy `json:"policy"`
Policy SandboxAutoResumePolicy `json:"policy"`
Timeout uint64 `json:"timeout,omitempty"`
}

type PausedSandboxConfig struct {
Expand Down
2 changes: 2 additions & 0 deletions packages/orchestrator/orchestrator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ message SandboxConfig {
message SandboxAutoResumeConfig {
// Policy values are owned by the API layer today (e.g. "off", "any").
string policy = 1;
// Timeout requested on initial sandbox create (seconds).
uint64 timeout_seconds = 2;
}

message SandboxVolumeMount {
Expand Down
4 changes: 4 additions & 0 deletions packages/shared/pkg/feature-flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ var (

// BuildBaseRootfsSizeLimitMB is the maximum size of the base rootfs filesystem created from the OCI image, in MB.
BuildBaseRootfsSizeLimitMB = newIntFlag("build-base-rootfs-size-limit-mb", 25000)

// MinAutoResumeTimeoutSeconds is the minimum auto-resume timeout in seconds.
// This prevents thrashing from very short timeouts.
MinAutoResumeTimeoutSeconds = newIntFlag("minimum-autoresume-timeout", 60)
)

type StringFlag struct {
Expand Down
Loading
Loading