From 43605ed4d22336a2d98d98496ba644f5fa29f9ae Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 19:31:27 +0100 Subject: [PATCH 01/59] feat(envd): add multipart file upload API Add endpoints for chunked file uploads to support large files: - POST /files/upload/init - Initialize upload session - PUT /files/upload/{uploadId} - Upload individual parts - POST /files/upload/{uploadId}/complete - Assemble final file - DELETE /files/upload/{uploadId} - Abort and cleanup Parts are stored in temp directory and assembled sequentially on completion using copy_file_range for efficiency. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/api.gen.go | 262 ++++++++++++++ packages/envd/internal/api/auth.go | 25 +- .../envd/internal/api/multipart_upload.go | 330 ++++++++++++++++++ .../internal/api/multipart_upload_test.go | 310 ++++++++++++++++ packages/envd/internal/api/store.go | 16 + packages/envd/spec/envd.yaml | 168 +++++++++ 6 files changed, 1104 insertions(+), 7 deletions(-) create mode 100644 packages/envd/internal/api/multipart_upload.go create mode 100644 packages/envd/internal/api/multipart_upload_test.go diff --git a/packages/envd/internal/api/api.gen.go b/packages/envd/internal/api/api.gen.go index c0c48a6a49..6aa7d12f99 100644 --- a/packages/envd/internal/api/api.gen.go +++ b/packages/envd/internal/api/api.gen.go @@ -74,6 +74,30 @@ type Metrics struct { Ts *int64 `json:"ts,omitempty"` } +// MultipartUploadComplete defines model for MultipartUploadComplete. +type MultipartUploadComplete struct { + // Path Path to the final assembled file + Path string `json:"path"` + + // Size Total size of the assembled file in bytes + Size int64 `json:"size"` +} + +// MultipartUploadInit defines model for MultipartUploadInit. +type MultipartUploadInit struct { + // UploadId Unique identifier for the upload session + UploadId string `json:"uploadId"` +} + +// MultipartUploadPart defines model for MultipartUploadPart. +type MultipartUploadPart struct { + // PartNumber The part number that was uploaded + PartNumber int `json:"partNumber"` + + // Size Size of the uploaded part in bytes + Size int64 `json:"size"` +} + // FilePath defines model for FilePath. type FilePath = string @@ -101,6 +125,9 @@ type InvalidUser = Error // NotEnoughDiskSpace defines model for NotEnoughDiskSpace. type NotEnoughDiskSpace = Error +// UploadNotFound defines model for UploadNotFound. +type UploadNotFound = Error + // UploadSuccess defines model for UploadSuccess. type UploadSuccess = []EntryInfo @@ -139,6 +166,30 @@ type PostFilesParams struct { SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } +// PostFilesUploadInitJSONBody defines parameters for PostFilesUploadInit. +type PostFilesUploadInitJSONBody struct { + // Path Path to the file to upload + Path string `json:"path"` +} + +// PostFilesUploadInitParams defines parameters for PostFilesUploadInit. +type PostFilesUploadInitParams struct { + // Username User used for setting the owner, or resolving relative paths. + Username *User `form:"username,omitempty" json:"username,omitempty"` + + // Signature Signature used for file access permission verification. + Signature *Signature `form:"signature,omitempty" json:"signature,omitempty"` + + // SignatureExpiration Signature expiration used for defining the expiration time of the signature. + SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` +} + +// PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. +type PutFilesUploadUploadIdParams struct { + // Part The part number (0-indexed) + Part int `form:"part" json:"part"` +} + // PostInitJSONBody defines parameters for PostInit. type PostInitJSONBody struct { // AccessToken Access token for secure access to envd service @@ -163,6 +214,9 @@ type PostInitJSONBody struct { // PostFilesMultipartRequestBody defines body for PostFiles for multipart/form-data ContentType. type PostFilesMultipartRequestBody PostFilesMultipartBody +// PostFilesUploadInitJSONRequestBody defines body for PostFilesUploadInit for application/json ContentType. +type PostFilesUploadInitJSONRequestBody PostFilesUploadInitJSONBody + // PostInitJSONRequestBody defines body for PostInit for application/json ContentType. type PostInitJSONRequestBody PostInitJSONBody @@ -177,6 +231,18 @@ type ServerInterface interface { // Upload a file and ensure the parent directories exist. If the file exists, it will be overwritten. // (POST /files) PostFiles(w http.ResponseWriter, r *http.Request, params PostFilesParams) + // Initialize a multipart file upload session + // (POST /files/upload/init) + PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params PostFilesUploadInitParams) + // Abort a multipart file upload and clean up temporary files + // (DELETE /files/upload/{uploadId}) + DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string) + // Upload a part of a multipart file upload + // (PUT /files/upload/{uploadId}) + PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string, params PutFilesUploadUploadIdParams) + // Complete a multipart file upload and assemble the final file + // (POST /files/upload/{uploadId}/complete) + PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Request, uploadId string) // Check the health of the service // (GET /health) GetHealth(w http.ResponseWriter, r *http.Request) @@ -210,6 +276,30 @@ func (_ Unimplemented) PostFiles(w http.ResponseWriter, r *http.Request, params w.WriteHeader(http.StatusNotImplemented) } +// Initialize a multipart file upload session +// (POST /files/upload/init) +func (_ Unimplemented) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params PostFilesUploadInitParams) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Abort a multipart file upload and clean up temporary files +// (DELETE /files/upload/{uploadId}) +func (_ Unimplemented) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Upload a part of a multipart file upload +// (PUT /files/upload/{uploadId}) +func (_ Unimplemented) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string, params PutFilesUploadUploadIdParams) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Complete a multipart file upload and assemble the final file +// (POST /files/upload/{uploadId}/complete) +func (_ Unimplemented) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Request, uploadId string) { + w.WriteHeader(http.StatusNotImplemented) +} + // Check the health of the service // (GET /health) func (_ Unimplemented) GetHealth(w http.ResponseWriter, r *http.Request) { @@ -371,6 +461,166 @@ func (siw *ServerInterfaceWrapper) PostFiles(w http.ResponseWriter, r *http.Requ handler.ServeHTTP(w, r) } +// PostFilesUploadInit operation middleware +func (siw *ServerInterfaceWrapper) PostFilesUploadInit(w http.ResponseWriter, r *http.Request) { + + var err error + + ctx := r.Context() + + ctx = context.WithValue(ctx, AccessTokenAuthScopes, []string{}) + + r = r.WithContext(ctx) + + // Parameter object where we will unmarshal all parameters from the context + var params PostFilesUploadInitParams + + // ------------- Optional query parameter "username" ------------- + + err = runtime.BindQueryParameter("form", true, false, "username", r.URL.Query(), ¶ms.Username) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "username", Err: err}) + return + } + + // ------------- Optional query parameter "signature" ------------- + + err = runtime.BindQueryParameter("form", true, false, "signature", r.URL.Query(), ¶ms.Signature) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "signature", Err: err}) + return + } + + // ------------- Optional query parameter "signature_expiration" ------------- + + err = runtime.BindQueryParameter("form", true, false, "signature_expiration", r.URL.Query(), ¶ms.SignatureExpiration) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "signature_expiration", Err: err}) + return + } + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.PostFilesUploadInit(w, r, params) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// DeleteFilesUploadUploadId operation middleware +func (siw *ServerInterfaceWrapper) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "uploadId" ------------- + var uploadId string + + err = runtime.BindStyledParameterWithOptions("simple", "uploadId", chi.URLParam(r, "uploadId"), &uploadId, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "uploadId", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, AccessTokenAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.DeleteFilesUploadUploadId(w, r, uploadId) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// PutFilesUploadUploadId operation middleware +func (siw *ServerInterfaceWrapper) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "uploadId" ------------- + var uploadId string + + err = runtime.BindStyledParameterWithOptions("simple", "uploadId", chi.URLParam(r, "uploadId"), &uploadId, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "uploadId", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, AccessTokenAuthScopes, []string{}) + + r = r.WithContext(ctx) + + // Parameter object where we will unmarshal all parameters from the context + var params PutFilesUploadUploadIdParams + + // ------------- Required query parameter "part" ------------- + + if paramValue := r.URL.Query().Get("part"); paramValue != "" { + + } else { + siw.ErrorHandlerFunc(w, r, &RequiredParamError{ParamName: "part"}) + return + } + + err = runtime.BindQueryParameter("form", true, true, "part", r.URL.Query(), ¶ms.Part) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "part", Err: err}) + return + } + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.PutFilesUploadUploadId(w, r, uploadId, params) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// PostFilesUploadUploadIdComplete operation middleware +func (siw *ServerInterfaceWrapper) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "uploadId" ------------- + var uploadId string + + err = runtime.BindStyledParameterWithOptions("simple", "uploadId", chi.URLParam(r, "uploadId"), &uploadId, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "uploadId", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, AccessTokenAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.PostFilesUploadUploadIdComplete(w, r, uploadId) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // GetHealth operation middleware func (siw *ServerInterfaceWrapper) GetHealth(w http.ResponseWriter, r *http.Request) { @@ -547,6 +797,18 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl r.Group(func(r chi.Router) { r.Post(options.BaseURL+"/files", wrapper.PostFiles) }) + r.Group(func(r chi.Router) { + r.Post(options.BaseURL+"/files/upload/init", wrapper.PostFilesUploadInit) + }) + r.Group(func(r chi.Router) { + r.Delete(options.BaseURL+"/files/upload/{uploadId}", wrapper.DeleteFilesUploadUploadId) + }) + r.Group(func(r chi.Router) { + r.Put(options.BaseURL+"/files/upload/{uploadId}", wrapper.PutFilesUploadUploadId) + }) + r.Group(func(r chi.Router) { + r.Post(options.BaseURL+"/files/upload/{uploadId}/complete", wrapper.PostFilesUploadUploadIdComplete) + }) r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/health", wrapper.GetHealth) }) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index 0fa69a9953..8cdeb09b22 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -4,8 +4,8 @@ import ( "errors" "fmt" "net/http" - "slices" "strconv" + "strings" "time" "github.com/e2b-dev/infra/packages/shared/pkg/keys" @@ -18,22 +18,33 @@ const ( accessTokenHeader = "X-Access-Token" ) -// paths that are always allowed without general authentication -var allowedPaths = []string{ +// allowedPathPrefixes are paths that bypass general authentication +// (e.g., health check, endpoints supporting signing) +// Uses prefix matching to support both exact paths and paths with dynamic segments +var allowedPathPrefixes = []string{ "GET/health", "GET/files", "POST/files", + "PUT/files/upload/", + "DELETE/files/upload/", +} + +func isAllowedPath(methodPath string) bool { + for _, prefix := range allowedPathPrefixes { + if strings.HasPrefix(methodPath, prefix) { + return true + } + } + return false } func (a *API) WithAuthorization(handler http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { if a.accessToken != nil { authHeader := req.Header.Get(accessTokenHeader) + methodPath := req.Method + req.URL.Path - // check if this path is allowed without authentication (e.g., health check, endpoints supporting signing) - allowedPath := slices.Contains(allowedPaths, req.Method+req.URL.Path) - - if authHeader != *a.accessToken && !allowedPath { + if authHeader != *a.accessToken && !isAllowedPath(methodPath) { a.logger.Error().Msg("Trying to access secured envd without correct access token") err := fmt.Errorf("unauthorized access, please provide a valid access token or method signing if supported") diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go new file mode 100644 index 0000000000..7e289069f3 --- /dev/null +++ b/packages/envd/internal/api/multipart_upload.go @@ -0,0 +1,330 @@ +package api + +import ( + "encoding/json" + "errors" + "fmt" + "net/http" + "os" + "os/user" + "path/filepath" + "sort" + "syscall" + + "github.com/google/uuid" + + "github.com/e2b-dev/infra/packages/envd/internal/execcontext" + "github.com/e2b-dev/infra/packages/envd/internal/logs" + "github.com/e2b-dev/infra/packages/envd/internal/permissions" +) + +const ( + multipartTempDir = "/tmp/envd-multipart" +) + +// PostFilesUploadInit initializes a multipart upload session +func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params PostFilesUploadInitParams) { + defer r.Body.Close() + + operationID := logs.AssignOperationID() + + // Parse the request body + var body PostFilesUploadInitJSONRequestBody + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to decode request body") + jsonError(w, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err)) + return + } + + // Validate signing if needed + err := a.validateSigning(r, params.Signature, params.SignatureExpiration, params.Username, body.Path, SigningWriteOperation) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error during auth validation") + jsonError(w, http.StatusUnauthorized, err) + return + } + + // Resolve username + username, err := execcontext.ResolveDefaultUsername(params.Username, a.defaults.User) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("no user specified") + jsonError(w, http.StatusBadRequest, err) + return + } + + // Lookup user + u, err := user.Lookup(username) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Str("username", username).Msg("error looking up user") + jsonError(w, http.StatusUnauthorized, fmt.Errorf("error looking up user '%s': %w", username, err)) + return + } + + uid, gid, err := permissions.GetUserIdInts(u) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error getting user ids") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error getting user ids: %w", err)) + return + } + + // Resolve the file path + filePath, err := permissions.ExpandAndResolve(body.Path, u, a.defaults.Workdir) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error resolving path") + jsonError(w, http.StatusBadRequest, fmt.Errorf("error resolving path: %w", err)) + return + } + + // Create upload ID + uploadID := uuid.New().String() + + // Create temp directory for this upload + tempDir := filepath.Join(multipartTempDir, uploadID) + if err := os.MkdirAll(tempDir, 0o755); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating temp directory") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating temp directory: %w", err)) + return + } + + // Store the session + session := &MultipartUploadSession{ + UploadID: uploadID, + FilePath: filePath, + TempDir: tempDir, + UID: uid, + GID: gid, + Parts: make(map[int]string), + } + + a.uploadsLock.Lock() + a.uploads[uploadID] = session + a.uploadsLock.Unlock() + + a.logger.Debug(). + Str(string(logs.OperationIDKey), operationID). + Str("uploadId", uploadID). + Str("filePath", filePath). + Msg("multipart upload initialized") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(MultipartUploadInit{ + UploadId: uploadID, + }) +} + +// PutFilesUploadUploadId uploads a part of a multipart upload +func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string, params PutFilesUploadUploadIdParams) { + defer r.Body.Close() + + operationID := logs.AssignOperationID() + + // Get the session + a.uploadsLock.RLock() + session, exists := a.uploads[uploadId] + a.uploadsLock.RUnlock() + + if !exists { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") + jsonError(w, http.StatusNotFound, fmt.Errorf("upload session not found: %s", uploadId)) + return + } + + partNumber := params.Part + + // Create the part file + partPath := filepath.Join(session.TempDir, fmt.Sprintf("part_%d", partNumber)) + + partFile, err := os.OpenFile(partPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + if errors.Is(err, syscall.ENOSPC) { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") + jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating part file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating part file: %w", err)) + return + } + + // Write the part data using ReadFrom for efficient copying + size, err := partFile.ReadFrom(r.Body) + partFile.Close() + + if err != nil { + os.Remove(partPath) + if errors.Is(err, syscall.ENOSPC) { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") + jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error writing part data") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error writing part data: %w", err)) + return + } + + // Record the part + session.mu.Lock() + session.Parts[partNumber] = partPath + session.mu.Unlock() + + a.logger.Debug(). + Str(string(logs.OperationIDKey), operationID). + Str("uploadId", uploadId). + Int("partNumber", partNumber). + Int64("size", size). + Msg("part uploaded") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(MultipartUploadPart{ + PartNumber: partNumber, + Size: size, + }) +} + +// PostFilesUploadUploadIdComplete completes a multipart upload and assembles the file +func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Request, uploadId string) { + defer r.Body.Close() + + operationID := logs.AssignOperationID() + + // Get and remove the session + a.uploadsLock.Lock() + session, exists := a.uploads[uploadId] + if exists { + delete(a.uploads, uploadId) + } + a.uploadsLock.Unlock() + + if !exists { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") + jsonError(w, http.StatusNotFound, fmt.Errorf("upload session not found: %s", uploadId)) + return + } + + // Ensure cleanup happens + defer os.RemoveAll(session.TempDir) + + // Ensure parent directories exist + err := permissions.EnsureDirs(filepath.Dir(session.FilePath), session.UID, session.GID) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories: %w", err)) + return + } + + // Create the destination file + destFile, err := os.OpenFile(session.FilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o666) + if err != nil { + if errors.Is(err, syscall.ENOSPC) { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") + jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating destination file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating destination file: %w", err)) + return + } + defer destFile.Close() + + // Set ownership + if err := os.Chown(session.FilePath, session.UID, session.GID); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) + return + } + + // Get the part numbers in order + session.mu.Lock() + partNumbers := make([]int, 0, len(session.Parts)) + for num := range session.Parts { + partNumbers = append(partNumbers, num) + } + session.mu.Unlock() + sort.Ints(partNumbers) + + // Assemble the file using sendfile via io.Copy (which uses copy_file_range on Linux) + var totalSize int64 + for _, partNum := range partNumbers { + partPath := session.Parts[partNum] + partFile, err := os.Open(partPath) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNum).Msg("error opening part file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error opening part %d: %w", partNum, err)) + return + } + + // Use ReadFrom which on Linux uses copy_file_range for zero-copy + written, err := destFile.ReadFrom(partFile) + partFile.Close() + + if err != nil { + if errors.Is(err, syscall.ENOSPC) { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") + jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNum).Msg("error copying part") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error copying part %d: %w", partNum, err)) + return + } + + totalSize += written + } + + // Sync to ensure all data is written + if err := destFile.Sync(); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error syncing file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error syncing file: %w", err)) + return + } + + a.logger.Debug(). + Str(string(logs.OperationIDKey), operationID). + Str("uploadId", uploadId). + Str("filePath", session.FilePath). + Int64("totalSize", totalSize). + Int("numParts", len(partNumbers)). + Msg("multipart upload completed") + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + json.NewEncoder(w).Encode(MultipartUploadComplete{ + Path: session.FilePath, + Size: totalSize, + }) +} + +// DeleteFilesUploadUploadId aborts a multipart upload and cleans up temporary files +func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string) { + defer r.Body.Close() + + operationID := logs.AssignOperationID() + + // Get and remove the session + a.uploadsLock.Lock() + session, exists := a.uploads[uploadId] + if exists { + delete(a.uploads, uploadId) + } + a.uploadsLock.Unlock() + + if !exists { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") + jsonError(w, http.StatusNotFound, fmt.Errorf("upload session not found: %s", uploadId)) + return + } + + // Clean up temp directory + if err := os.RemoveAll(session.TempDir); err != nil { + a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error cleaning up temp directory") + } + + a.logger.Debug(). + Str(string(logs.OperationIDKey), operationID). + Str("uploadId", uploadId). + Msg("multipart upload aborted") + + w.WriteHeader(http.StatusNoContent) +} diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go new file mode 100644 index 0000000000..9e489cfdfd --- /dev/null +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -0,0 +1,310 @@ +package api + +import ( + "bytes" + "encoding/json" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/go-chi/chi/v5" + "github.com/rs/zerolog" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/e2b-dev/infra/packages/envd/internal/execcontext" + "github.com/e2b-dev/infra/packages/envd/internal/utils" +) + +func newTestAPI(t *testing.T) *API { + t.Helper() + logger := zerolog.New(os.Stderr).Level(zerolog.Disabled) + defaults := &execcontext.Defaults{ + User: "root", + EnvVars: utils.NewMap[string, string](), + } + return New(&logger, defaults, nil, true) +} + +func TestMultipartUpload(t *testing.T) { + t.Parallel() + + // Skip if not running as root (needed for user lookup and chown) + if os.Geteuid() != 0 { + t.Skip("skipping multipart upload tests: requires root") + } + + t.Run("init upload", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "test-file.txt"), + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + + assert.Equal(t, http.StatusOK, w.Code) + + var resp MultipartUploadInit + err := json.Unmarshal(w.Body.Bytes(), &resp) + require.NoError(t, err) + assert.NotEmpty(t, resp.UploadId) + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[resp.UploadId] + api.uploadsLock.Unlock() + if session != nil { + os.RemoveAll(session.TempDir) + } + }) + + t.Run("complete multipart upload", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "assembled-file.txt") + + // Initialize upload + initBody := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + } + initBodyBytes, _ := json.Marshal(initBody) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Upload part 0 + part0Content := []byte("Hello, ") + part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(part0Content)) + part0Req.Header.Set("Content-Type", "application/octet-stream") + part0W := httptest.NewRecorder() + + api.PutFilesUploadUploadId(part0W, part0Req, uploadId, PutFilesUploadUploadIdParams{Part: 0}) + require.Equal(t, http.StatusOK, part0W.Code) + + var part0Resp MultipartUploadPart + err = json.Unmarshal(part0W.Body.Bytes(), &part0Resp) + require.NoError(t, err) + assert.Equal(t, 0, part0Resp.PartNumber) + assert.Equal(t, int64(len(part0Content)), part0Resp.Size) + + // Upload part 1 + part1Content := []byte("World!") + part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader(part1Content)) + part1Req.Header.Set("Content-Type", "application/octet-stream") + part1W := httptest.NewRecorder() + + api.PutFilesUploadUploadId(part1W, part1Req, uploadId, PutFilesUploadUploadIdParams{Part: 1}) + require.Equal(t, http.StatusOK, part1W.Code) + + // Complete upload + completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+uploadId+"/complete", nil) + completeW := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) + require.Equal(t, http.StatusOK, completeW.Code) + + var completeResp MultipartUploadComplete + err = json.Unmarshal(completeW.Body.Bytes(), &completeResp) + require.NoError(t, err) + assert.Equal(t, destPath, completeResp.Path) + assert.Equal(t, int64(len(part0Content)+len(part1Content)), completeResp.Size) + + // Verify file contents + content, err := os.ReadFile(destPath) + require.NoError(t, err) + assert.Equal(t, "Hello, World!", string(content)) + }) + + t.Run("abort multipart upload", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + // Initialize upload + initBody := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "aborted-file.txt"), + } + initBodyBytes, _ := json.Marshal(initBody) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Get temp dir before deletion + api.uploadsLock.RLock() + session := api.uploads[uploadId] + api.uploadsLock.RUnlock() + require.NotNil(t, session) + sessionTempDir := session.TempDir + + // Upload a part + partContent := []byte("test content") + partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(partContent)) + partReq.Header.Set("Content-Type", "application/octet-stream") + partW := httptest.NewRecorder() + + api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: 0}) + require.Equal(t, http.StatusOK, partW.Code) + + // Abort upload + abortReq := httptest.NewRequest(http.MethodDelete, "/files/upload/"+uploadId, nil) + abortW := httptest.NewRecorder() + + api.DeleteFilesUploadUploadId(abortW, abortReq, uploadId) + assert.Equal(t, http.StatusNoContent, abortW.Code) + + // Verify session is removed + api.uploadsLock.RLock() + _, exists := api.uploads[uploadId] + api.uploadsLock.RUnlock() + assert.False(t, exists) + + // Verify temp dir is cleaned up + _, err = os.Stat(sessionTempDir) + assert.True(t, os.IsNotExist(err)) + }) + + t.Run("upload part to non-existent session", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + + req := httptest.NewRequest(http.MethodPut, "/files/upload/non-existent?part=0", bytes.NewReader([]byte("test"))) + w := httptest.NewRecorder() + + api.PutFilesUploadUploadId(w, req, "non-existent", PutFilesUploadUploadIdParams{Part: 0}) + assert.Equal(t, http.StatusNotFound, w.Code) + }) + + t.Run("complete non-existent session", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/non-existent/complete", nil) + w := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(w, req, "non-existent") + assert.Equal(t, http.StatusNotFound, w.Code) + }) + + t.Run("abort non-existent session", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + + req := httptest.NewRequest(http.MethodDelete, "/files/upload/non-existent", nil) + w := httptest.NewRecorder() + + api.DeleteFilesUploadUploadId(w, req, "non-existent") + assert.Equal(t, http.StatusNotFound, w.Code) + }) + + t.Run("parts uploaded out of order", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "out-of-order-file.txt") + + // Initialize upload + initBody := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + } + initBodyBytes, _ := json.Marshal(initBody) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Upload parts out of order (part 2 first, then 0, then 1) + parts := []struct { + num int + content string + }{ + {2, "C"}, + {0, "A"}, + {1, "B"}, + } + + for _, part := range parts { + partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part="+string(rune('0'+part.num)), bytes.NewReader([]byte(part.content))) + partReq.Header.Set("Content-Type", "application/octet-stream") + partW := httptest.NewRecorder() + + api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: part.num}) + require.Equal(t, http.StatusOK, partW.Code) + } + + // Complete upload + completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+uploadId+"/complete", nil) + completeW := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) + require.Equal(t, http.StatusOK, completeW.Code) + + // Verify file contents are assembled in order + content, err := os.ReadFile(destPath) + require.NoError(t, err) + assert.Equal(t, "ABC", string(content)) + }) +} + +func TestMultipartUploadRouting(t *testing.T) { + // Skip if not running as root + if os.Geteuid() != 0 { + t.Skip("skipping routing tests: requires root") + } + + api := newTestAPI(t) + router := chi.NewRouter() + HandlerFromMux(api, router) + + // Test that routes are registered + t.Run("init route exists", func(t *testing.T) { + body := PostFilesUploadInitJSONRequestBody{ + Path: "/tmp/test-file.txt", + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + router.ServeHTTP(w, req) + // Should get 200 (success) not 404 (route not found) + assert.NotEqual(t, http.StatusNotFound, w.Code) + }) +} diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 4a561e82a7..9ba424a8be 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -12,6 +12,17 @@ import ( "github.com/e2b-dev/infra/packages/envd/internal/utils" ) +// MultipartUploadSession tracks an in-progress multipart upload +type MultipartUploadSession struct { + UploadID string + FilePath string // Final destination path + TempDir string // Temp directory for parts + UID int + GID int + Parts map[int]string // partNumber -> temp file path + mu sync.Mutex +} + type API struct { isNotFC bool logger *zerolog.Logger @@ -23,6 +34,10 @@ type API struct { lastSetTime *utils.AtomicMax initLock sync.Mutex + + // Multipart upload sessions + uploads map[string]*MultipartUploadSession + uploadsLock sync.RWMutex } func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { @@ -32,6 +47,7 @@ func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host. mmdsChan: mmdsChan, isNotFC: isNotFC, lastSetTime: utils.NewAtomicMax(), + uploads: make(map[string]*MultipartUploadSession), } } diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index 29e26ea852..b7c353bef5 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -125,6 +125,134 @@ paths: "507": $ref: "#/components/responses/NotEnoughDiskSpace" + /files/upload/init: + post: + summary: Initialize a multipart file upload session + tags: [files] + security: + - AccessTokenAuth: [] + - {} + parameters: + - $ref: "#/components/parameters/User" + - $ref: "#/components/parameters/Signature" + - $ref: "#/components/parameters/SignatureExpiration" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - path + properties: + path: + type: string + description: Path to the file to upload + responses: + "200": + description: Upload session initialized + content: + application/json: + schema: + $ref: "#/components/schemas/MultipartUploadInit" + "400": + $ref: "#/components/responses/InvalidPath" + "401": + $ref: "#/components/responses/InvalidUser" + "500": + $ref: "#/components/responses/InternalServerError" + + /files/upload/{uploadId}: + put: + summary: Upload a part of a multipart file upload + tags: [files] + security: + - AccessTokenAuth: [] + - {} + parameters: + - name: uploadId + in: path + required: true + description: The upload session ID + schema: + type: string + - name: part + in: query + required: true + description: The part number (0-indexed) + schema: + type: integer + requestBody: + required: true + content: + application/octet-stream: + schema: + type: string + format: binary + responses: + "200": + description: Part uploaded successfully + content: + application/json: + schema: + $ref: "#/components/schemas/MultipartUploadPart" + "400": + $ref: "#/components/responses/InvalidPath" + "404": + $ref: "#/components/responses/UploadNotFound" + "500": + $ref: "#/components/responses/InternalServerError" + "507": + $ref: "#/components/responses/NotEnoughDiskSpace" + delete: + summary: Abort a multipart file upload and clean up temporary files + tags: [files] + security: + - AccessTokenAuth: [] + - {} + parameters: + - name: uploadId + in: path + required: true + description: The upload session ID + schema: + type: string + responses: + "204": + description: Upload aborted and cleaned up successfully + "404": + $ref: "#/components/responses/UploadNotFound" + "500": + $ref: "#/components/responses/InternalServerError" + + /files/upload/{uploadId}/complete: + post: + summary: Complete a multipart file upload and assemble the final file + tags: [files] + security: + - AccessTokenAuth: [] + - {} + parameters: + - name: uploadId + in: path + required: true + description: The upload session ID + schema: + type: string + responses: + "200": + description: Upload completed successfully + content: + application/json: + schema: + $ref: "#/components/schemas/MultipartUploadComplete" + "404": + $ref: "#/components/responses/UploadNotFound" + "500": + $ref: "#/components/responses/InternalServerError" + "507": + $ref: "#/components/responses/NotEnoughDiskSpace" + components: securitySchemes: AccessTokenAuth: @@ -222,6 +350,12 @@ components: application/json: schema: $ref: "#/components/schemas/Error" + UploadNotFound: + description: Upload session not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" schemas: Error: @@ -284,3 +418,37 @@ components: disk_total: type: integer description: Total disk space in bytes + MultipartUploadInit: + type: object + required: + - uploadId + properties: + uploadId: + type: string + description: Unique identifier for the upload session + MultipartUploadPart: + type: object + required: + - partNumber + - size + properties: + partNumber: + type: integer + description: The part number that was uploaded + size: + type: integer + format: int64 + description: Size of the uploaded part in bytes + MultipartUploadComplete: + type: object + required: + - path + - size + properties: + path: + type: string + description: Path to the final assembled file + size: + type: integer + format: int64 + description: Total size of the assembled file in bytes From cd4dbeea93063231ff0248043b51dcae5770db81 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 19:41:19 +0100 Subject: [PATCH 02/59] perf(envd): skip fsync and async cleanup in multipart upload - Remove fsync in Complete (was ~40-100ms for 100MB) - Move temp directory cleanup to background goroutine For sandbox use cases, immediate durability is not critical. The kernel will flush data to disk eventually. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 7e289069f3..108b2088c0 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -203,8 +203,11 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - // Ensure cleanup happens - defer os.RemoveAll(session.TempDir) + // Cleanup temp directory in background (don't block response) + tempDir := session.TempDir + defer func() { + go os.RemoveAll(tempDir) + }() // Ensure parent directories exist err := permissions.EnsureDirs(filepath.Dir(session.FilePath), session.UID, session.GID) @@ -273,12 +276,8 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req totalSize += written } - // Sync to ensure all data is written - if err := destFile.Sync(); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error syncing file") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error syncing file: %w", err)) - return - } + // Note: We skip fsync here for performance. The kernel will flush data to disk + // eventually. For sandbox use cases, immediate durability is not critical. a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). From e94a0e91d6ac90c0a74686efce7e5e4ddeb2d476 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 19:52:37 +0100 Subject: [PATCH 03/59] fix(envd): address multipart upload code review issues - Add POST/files/upload/ to allowed path prefixes for auth bypass - Add max session limit (100) to prevent resource exhaustion - Fix race condition in complete handler by copying parts under lock - Add logging for background cleanup errors - Add startup cleanup of stale temp directories - Fix test bug with part number string conversion - Add test for max sessions limit Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/auth.go | 1 + .../envd/internal/api/multipart_upload.go | 24 ++++++++-- .../internal/api/multipart_upload_test.go | 44 ++++++++++++++++++- packages/envd/internal/api/store.go | 6 +++ 4 files changed, 70 insertions(+), 5 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index 8cdeb09b22..74a1d47dda 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -27,6 +27,7 @@ var allowedPathPrefixes = []string{ "POST/files", "PUT/files/upload/", "DELETE/files/upload/", + "POST/files/upload/", } func isAllowedPath(methodPath string) bool { diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 108b2088c0..4aa59073d6 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -20,6 +20,8 @@ import ( const ( multipartTempDir = "/tmp/envd-multipart" + // maxUploadSessions limits concurrent upload sessions to prevent resource exhaustion + maxUploadSessions = 100 ) // PostFilesUploadInit initializes a multipart upload session @@ -97,6 +99,13 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } a.uploadsLock.Lock() + if len(a.uploads) >= maxUploadSessions { + a.uploadsLock.Unlock() + os.RemoveAll(tempDir) + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") + jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) + return + } a.uploads[uploadID] = session a.uploadsLock.Unlock() @@ -205,8 +214,13 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req // Cleanup temp directory in background (don't block response) tempDir := session.TempDir + logger := a.logger defer func() { - go os.RemoveAll(tempDir) + go func() { + if err := os.RemoveAll(tempDir); err != nil { + logger.Warn().Err(err).Str("tempDir", tempDir).Msg("failed to cleanup multipart temp directory") + } + }() }() // Ensure parent directories exist @@ -238,11 +252,13 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - // Get the part numbers in order + // Get the part numbers and paths in order (copy under lock to avoid race with concurrent uploads) session.mu.Lock() partNumbers := make([]int, 0, len(session.Parts)) - for num := range session.Parts { + partPaths := make(map[int]string, len(session.Parts)) + for num, path := range session.Parts { partNumbers = append(partNumbers, num) + partPaths[num] = path } session.mu.Unlock() sort.Ints(partNumbers) @@ -250,7 +266,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req // Assemble the file using sendfile via io.Copy (which uses copy_file_range on Linux) var totalSize int64 for _, partNum := range partNumbers { - partPath := session.Parts[partNum] + partPath := partPaths[partNum] partFile, err := os.Open(partPath) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNum).Msg("error opening part file") diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 9e489cfdfd..af365a5ddd 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -3,6 +3,7 @@ package api import ( "bytes" "encoding/json" + "fmt" "net/http" "net/http/httptest" "os" @@ -225,6 +226,47 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, http.StatusNotFound, w.Code) }) + t.Run("max sessions limit", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + // Create maxUploadSessions sessions + for i := 0; i < maxUploadSessions; i++ { + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, fmt.Sprintf("file-%d.txt", i)), + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, w.Code, "session %d should succeed", i) + } + + // The next one should fail with 429 + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "one-too-many.txt"), + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + assert.Equal(t, http.StatusTooManyRequests, w.Code) + + // Clean up all sessions + api.uploadsLock.Lock() + for _, session := range api.uploads { + os.RemoveAll(session.TempDir) + } + api.uploadsLock.Unlock() + }) + t.Run("parts uploaded out of order", func(t *testing.T) { t.Parallel() api := newTestAPI(t) @@ -260,7 +302,7 @@ func TestMultipartUpload(t *testing.T) { } for _, part := range parts { - partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part="+string(rune('0'+part.num)), bytes.NewReader([]byte(part.content))) + partReq := httptest.NewRequest(http.MethodPut, fmt.Sprintf("/files/upload/%s?part=%d", uploadId, part.num), bytes.NewReader([]byte(part.content))) partReq.Header.Set("Content-Type", "application/octet-stream") partW := httptest.NewRecorder() diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 9ba424a8be..32ff375a8c 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -3,6 +3,7 @@ package api import ( "encoding/json" "net/http" + "os" "sync" "github.com/rs/zerolog" @@ -41,6 +42,11 @@ type API struct { } func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { + // Clean up any stale multipart upload temp directories from previous runs + if err := os.RemoveAll(multipartTempDir); err != nil { + l.Warn().Err(err).Str("dir", multipartTempDir).Msg("failed to cleanup stale multipart temp directory") + } + return &API{ logger: l, defaults: defaults, From 38a70d6f4e78a8b6610180f66fbeb9501bded282 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 28 Jan 2026 18:56:21 +0000 Subject: [PATCH 04/59] chore: auto-commit generated changes --- .../internal/envd/api/client.gen.go | 640 +++++++++++++++++- .../internal/envd/api/models.gen.go | 54 ++ 2 files changed, 688 insertions(+), 6 deletions(-) diff --git a/tests/integration/internal/envd/api/client.gen.go b/tests/integration/internal/envd/api/client.gen.go index 84ea35e0ea..efc5c22dfb 100644 --- a/tests/integration/internal/envd/api/client.gen.go +++ b/tests/integration/internal/envd/api/client.gen.go @@ -98,6 +98,20 @@ type ClientInterface interface { // PostFilesWithBody request with any body PostFilesWithBody(ctx context.Context, params *PostFilesParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + // PostFilesUploadInitWithBody request with any body + PostFilesUploadInitWithBody(ctx context.Context, params *PostFilesUploadInitParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + + PostFilesUploadInit(ctx context.Context, params *PostFilesUploadInitParams, body PostFilesUploadInitJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) + + // DeleteFilesUploadUploadId request + DeleteFilesUploadUploadId(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*http.Response, error) + + // PutFilesUploadUploadIdWithBody request with any body + PutFilesUploadUploadIdWithBody(ctx context.Context, uploadId string, params *PutFilesUploadUploadIdParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + + // PostFilesUploadUploadIdComplete request + PostFilesUploadUploadIdComplete(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*http.Response, error) + // GetHealth request GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -146,6 +160,66 @@ func (c *Client) PostFilesWithBody(ctx context.Context, params *PostFilesParams, return c.Client.Do(req) } +func (c *Client) PostFilesUploadInitWithBody(ctx context.Context, params *PostFilesUploadInitParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewPostFilesUploadInitRequestWithBody(c.Server, params, contentType, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) PostFilesUploadInit(ctx context.Context, params *PostFilesUploadInitParams, body PostFilesUploadInitJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewPostFilesUploadInitRequest(c.Server, params, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) DeleteFilesUploadUploadId(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewDeleteFilesUploadUploadIdRequest(c.Server, uploadId) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) PutFilesUploadUploadIdWithBody(ctx context.Context, uploadId string, params *PutFilesUploadUploadIdParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewPutFilesUploadUploadIdRequestWithBody(c.Server, uploadId, params, contentType, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) PostFilesUploadUploadIdComplete(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewPostFilesUploadUploadIdCompleteRequest(c.Server, uploadId) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + func (c *Client) GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewGetHealthRequest(c.Server) if err != nil { @@ -417,6 +491,222 @@ func NewPostFilesRequestWithBody(server string, params *PostFilesParams, content return req, nil } +// NewPostFilesUploadInitRequest calls the generic PostFilesUploadInit builder with application/json body +func NewPostFilesUploadInitRequest(server string, params *PostFilesUploadInitParams, body PostFilesUploadInitJSONRequestBody) (*http.Request, error) { + var bodyReader io.Reader + buf, err := json.Marshal(body) + if err != nil { + return nil, err + } + bodyReader = bytes.NewReader(buf) + return NewPostFilesUploadInitRequestWithBody(server, params, "application/json", bodyReader) +} + +// NewPostFilesUploadInitRequestWithBody generates requests for PostFilesUploadInit with any type of body +func NewPostFilesUploadInitRequestWithBody(server string, params *PostFilesUploadInitParams, contentType string, body io.Reader) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/files/upload/init") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + if params != nil { + queryValues := queryURL.Query() + + if params.Username != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "username", runtime.ParamLocationQuery, *params.Username); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + if params.Signature != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "signature", runtime.ParamLocationQuery, *params.Signature); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + if params.SignatureExpiration != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "signature_expiration", runtime.ParamLocationQuery, *params.SignatureExpiration); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + queryURL.RawQuery = queryValues.Encode() + } + + req, err := http.NewRequest("POST", queryURL.String(), body) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", contentType) + + return req, nil +} + +// NewDeleteFilesUploadUploadIdRequest generates requests for DeleteFilesUploadUploadId +func NewDeleteFilesUploadUploadIdRequest(server string, uploadId string) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "uploadId", runtime.ParamLocationPath, uploadId) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/files/upload/%s", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("DELETE", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewPutFilesUploadUploadIdRequestWithBody generates requests for PutFilesUploadUploadId with any type of body +func NewPutFilesUploadUploadIdRequestWithBody(server string, uploadId string, params *PutFilesUploadUploadIdParams, contentType string, body io.Reader) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "uploadId", runtime.ParamLocationPath, uploadId) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/files/upload/%s", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + if params != nil { + queryValues := queryURL.Query() + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "part", runtime.ParamLocationQuery, params.Part); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + queryURL.RawQuery = queryValues.Encode() + } + + req, err := http.NewRequest("PUT", queryURL.String(), body) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", contentType) + + return req, nil +} + +// NewPostFilesUploadUploadIdCompleteRequest generates requests for PostFilesUploadUploadIdComplete +func NewPostFilesUploadUploadIdCompleteRequest(server string, uploadId string) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "uploadId", runtime.ParamLocationPath, uploadId) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/files/upload/%s/complete", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + // NewGetHealthRequest generates requests for GetHealth func NewGetHealthRequest(server string) (*http.Request, error) { var err error @@ -563,6 +853,20 @@ type ClientWithResponsesInterface interface { // PostFilesWithBodyWithResponse request with any body PostFilesWithBodyWithResponse(ctx context.Context, params *PostFilesParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*PostFilesResponse, error) + // PostFilesUploadInitWithBodyWithResponse request with any body + PostFilesUploadInitWithBodyWithResponse(ctx context.Context, params *PostFilesUploadInitParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*PostFilesUploadInitResponse, error) + + PostFilesUploadInitWithResponse(ctx context.Context, params *PostFilesUploadInitParams, body PostFilesUploadInitJSONRequestBody, reqEditors ...RequestEditorFn) (*PostFilesUploadInitResponse, error) + + // DeleteFilesUploadUploadIdWithResponse request + DeleteFilesUploadUploadIdWithResponse(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*DeleteFilesUploadUploadIdResponse, error) + + // PutFilesUploadUploadIdWithBodyWithResponse request with any body + PutFilesUploadUploadIdWithBodyWithResponse(ctx context.Context, uploadId string, params *PutFilesUploadUploadIdParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*PutFilesUploadUploadIdResponse, error) + + // PostFilesUploadUploadIdCompleteWithResponse request + PostFilesUploadUploadIdCompleteWithResponse(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*PostFilesUploadUploadIdCompleteResponse, error) + // GetHealthWithResponse request GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) @@ -648,13 +952,17 @@ func (r PostFilesResponse) StatusCode() int { return 0 } -type GetHealthResponse struct { +type PostFilesUploadInitResponse struct { Body []byte HTTPResponse *http.Response + JSON200 *MultipartUploadInit + JSON400 *InvalidPath + JSON401 *InvalidUser + JSON500 *InternalServerError } // Status returns HTTPResponse.Status -func (r GetHealthResponse) Status() string { +func (r PostFilesUploadInitResponse) Status() string { if r.HTTPResponse != nil { return r.HTTPResponse.Status } @@ -662,20 +970,22 @@ func (r GetHealthResponse) Status() string { } // StatusCode returns HTTPResponse.StatusCode -func (r GetHealthResponse) StatusCode() int { +func (r PostFilesUploadInitResponse) StatusCode() int { if r.HTTPResponse != nil { return r.HTTPResponse.StatusCode } return 0 } -type PostInitResponse struct { +type DeleteFilesUploadUploadIdResponse struct { Body []byte HTTPResponse *http.Response + JSON404 *UploadNotFound + JSON500 *InternalServerError } // Status returns HTTPResponse.Status -func (r PostInitResponse) Status() string { +func (r DeleteFilesUploadUploadIdResponse) Status() string { if r.HTTPResponse != nil { return r.HTTPResponse.Status } @@ -683,7 +993,100 @@ func (r PostInitResponse) Status() string { } // StatusCode returns HTTPResponse.StatusCode -func (r PostInitResponse) StatusCode() int { +func (r DeleteFilesUploadUploadIdResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type PutFilesUploadUploadIdResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *MultipartUploadPart + JSON400 *InvalidPath + JSON404 *UploadNotFound + JSON500 *InternalServerError + JSON507 *NotEnoughDiskSpace +} + +// Status returns HTTPResponse.Status +func (r PutFilesUploadUploadIdResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r PutFilesUploadUploadIdResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type PostFilesUploadUploadIdCompleteResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *MultipartUploadComplete + JSON404 *UploadNotFound + JSON500 *InternalServerError + JSON507 *NotEnoughDiskSpace +} + +// Status returns HTTPResponse.Status +func (r PostFilesUploadUploadIdCompleteResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r PostFilesUploadUploadIdCompleteResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type GetHealthResponse struct { + Body []byte + HTTPResponse *http.Response +} + +// Status returns HTTPResponse.Status +func (r GetHealthResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r GetHealthResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type PostInitResponse struct { + Body []byte + HTTPResponse *http.Response +} + +// Status returns HTTPResponse.Status +func (r PostInitResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r PostInitResponse) StatusCode() int { if r.HTTPResponse != nil { return r.HTTPResponse.StatusCode } @@ -739,6 +1142,50 @@ func (c *ClientWithResponses) PostFilesWithBodyWithResponse(ctx context.Context, return ParsePostFilesResponse(rsp) } +// PostFilesUploadInitWithBodyWithResponse request with arbitrary body returning *PostFilesUploadInitResponse +func (c *ClientWithResponses) PostFilesUploadInitWithBodyWithResponse(ctx context.Context, params *PostFilesUploadInitParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*PostFilesUploadInitResponse, error) { + rsp, err := c.PostFilesUploadInitWithBody(ctx, params, contentType, body, reqEditors...) + if err != nil { + return nil, err + } + return ParsePostFilesUploadInitResponse(rsp) +} + +func (c *ClientWithResponses) PostFilesUploadInitWithResponse(ctx context.Context, params *PostFilesUploadInitParams, body PostFilesUploadInitJSONRequestBody, reqEditors ...RequestEditorFn) (*PostFilesUploadInitResponse, error) { + rsp, err := c.PostFilesUploadInit(ctx, params, body, reqEditors...) + if err != nil { + return nil, err + } + return ParsePostFilesUploadInitResponse(rsp) +} + +// DeleteFilesUploadUploadIdWithResponse request returning *DeleteFilesUploadUploadIdResponse +func (c *ClientWithResponses) DeleteFilesUploadUploadIdWithResponse(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*DeleteFilesUploadUploadIdResponse, error) { + rsp, err := c.DeleteFilesUploadUploadId(ctx, uploadId, reqEditors...) + if err != nil { + return nil, err + } + return ParseDeleteFilesUploadUploadIdResponse(rsp) +} + +// PutFilesUploadUploadIdWithBodyWithResponse request with arbitrary body returning *PutFilesUploadUploadIdResponse +func (c *ClientWithResponses) PutFilesUploadUploadIdWithBodyWithResponse(ctx context.Context, uploadId string, params *PutFilesUploadUploadIdParams, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*PutFilesUploadUploadIdResponse, error) { + rsp, err := c.PutFilesUploadUploadIdWithBody(ctx, uploadId, params, contentType, body, reqEditors...) + if err != nil { + return nil, err + } + return ParsePutFilesUploadUploadIdResponse(rsp) +} + +// PostFilesUploadUploadIdCompleteWithResponse request returning *PostFilesUploadUploadIdCompleteResponse +func (c *ClientWithResponses) PostFilesUploadUploadIdCompleteWithResponse(ctx context.Context, uploadId string, reqEditors ...RequestEditorFn) (*PostFilesUploadUploadIdCompleteResponse, error) { + rsp, err := c.PostFilesUploadUploadIdComplete(ctx, uploadId, reqEditors...) + if err != nil { + return nil, err + } + return ParsePostFilesUploadUploadIdCompleteResponse(rsp) +} + // GetHealthWithResponse request returning *GetHealthResponse func (c *ClientWithResponses) GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) { rsp, err := c.GetHealth(ctx, reqEditors...) @@ -901,6 +1348,187 @@ func ParsePostFilesResponse(rsp *http.Response) (*PostFilesResponse, error) { return response, nil } +// ParsePostFilesUploadInitResponse parses an HTTP response from a PostFilesUploadInitWithResponse call +func ParsePostFilesUploadInitResponse(rsp *http.Response) (*PostFilesUploadInitResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &PostFilesUploadInitResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest MultipartUploadInit + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400: + var dest InvalidPath + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON400 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest InvalidUser + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest InternalServerError + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseDeleteFilesUploadUploadIdResponse parses an HTTP response from a DeleteFilesUploadUploadIdWithResponse call +func ParseDeleteFilesUploadUploadIdResponse(rsp *http.Response) (*DeleteFilesUploadUploadIdResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &DeleteFilesUploadUploadIdResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest UploadNotFound + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest InternalServerError + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParsePutFilesUploadUploadIdResponse parses an HTTP response from a PutFilesUploadUploadIdWithResponse call +func ParsePutFilesUploadUploadIdResponse(rsp *http.Response) (*PutFilesUploadUploadIdResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &PutFilesUploadUploadIdResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest MultipartUploadPart + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400: + var dest InvalidPath + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON400 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest UploadNotFound + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest InternalServerError + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 507: + var dest NotEnoughDiskSpace + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON507 = &dest + + } + + return response, nil +} + +// ParsePostFilesUploadUploadIdCompleteResponse parses an HTTP response from a PostFilesUploadUploadIdCompleteWithResponse call +func ParsePostFilesUploadUploadIdCompleteResponse(rsp *http.Response) (*PostFilesUploadUploadIdCompleteResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &PostFilesUploadUploadIdCompleteResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest MultipartUploadComplete + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest UploadNotFound + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest InternalServerError + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 507: + var dest NotEnoughDiskSpace + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON507 = &dest + + } + + return response, nil +} + // ParseGetHealthResponse parses an HTTP response from a GetHealthWithResponse call func ParseGetHealthResponse(rsp *http.Response) (*GetHealthResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) diff --git a/tests/integration/internal/envd/api/models.gen.go b/tests/integration/internal/envd/api/models.gen.go index c27a0945f9..1daef2ae3b 100644 --- a/tests/integration/internal/envd/api/models.gen.go +++ b/tests/integration/internal/envd/api/models.gen.go @@ -69,6 +69,30 @@ type Metrics struct { Ts *int64 `json:"ts,omitempty"` } +// MultipartUploadComplete defines model for MultipartUploadComplete. +type MultipartUploadComplete struct { + // Path Path to the final assembled file + Path string `json:"path"` + + // Size Total size of the assembled file in bytes + Size int64 `json:"size"` +} + +// MultipartUploadInit defines model for MultipartUploadInit. +type MultipartUploadInit struct { + // UploadId Unique identifier for the upload session + UploadId string `json:"uploadId"` +} + +// MultipartUploadPart defines model for MultipartUploadPart. +type MultipartUploadPart struct { + // PartNumber The part number that was uploaded + PartNumber int `json:"partNumber"` + + // Size Size of the uploaded part in bytes + Size int64 `json:"size"` +} + // FilePath defines model for FilePath. type FilePath = string @@ -96,6 +120,9 @@ type InvalidUser = Error // NotEnoughDiskSpace defines model for NotEnoughDiskSpace. type NotEnoughDiskSpace = Error +// UploadNotFound defines model for UploadNotFound. +type UploadNotFound = Error + // UploadSuccess defines model for UploadSuccess. type UploadSuccess = []EntryInfo @@ -134,6 +161,30 @@ type PostFilesParams struct { SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } +// PostFilesUploadInitJSONBody defines parameters for PostFilesUploadInit. +type PostFilesUploadInitJSONBody struct { + // Path Path to the file to upload + Path string `json:"path"` +} + +// PostFilesUploadInitParams defines parameters for PostFilesUploadInit. +type PostFilesUploadInitParams struct { + // Username User used for setting the owner, or resolving relative paths. + Username *User `form:"username,omitempty" json:"username,omitempty"` + + // Signature Signature used for file access permission verification. + Signature *Signature `form:"signature,omitempty" json:"signature,omitempty"` + + // SignatureExpiration Signature expiration used for defining the expiration time of the signature. + SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` +} + +// PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. +type PutFilesUploadUploadIdParams struct { + // Part The part number (0-indexed) + Part int `form:"part" json:"part"` +} + // PostInitJSONBody defines parameters for PostInit. type PostInitJSONBody struct { // AccessToken Access token for secure access to envd service @@ -158,5 +209,8 @@ type PostInitJSONBody struct { // PostFilesMultipartRequestBody defines body for PostFiles for multipart/form-data ContentType. type PostFilesMultipartRequestBody PostFilesMultipartBody +// PostFilesUploadInitJSONRequestBody defines body for PostFilesUploadInit for application/json ContentType. +type PostFilesUploadInitJSONRequestBody PostFilesUploadInitJSONBody + // PostInitJSONRequestBody defines body for PostInit for application/json ContentType. type PostInitJSONRequestBody PostInitJSONBody From eb1dada27088d677b676bfcccf91cd1f8b792e89 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 20:08:09 +0100 Subject: [PATCH 05/59] fix(envd): address additional multipart upload security and robustness issues Security fixes: - Validate uploadId is a valid UUID to prevent path traversal attacks - Add max part size limit (100MB) to prevent DoS via disk exhaustion - Validate part numbers are non-negative Robustness fixes: - Add completed flag to prevent race between part uploads and complete/abort - Clean up destination file on assembly errors - Validate parts are contiguous (0, 1, 2, ..., n-1) before assembly - Warn on duplicate part number uploads (last write wins) - Add session TTL (1 hour) with background cleanup goroutine - Explicit destFile.Close() before marking success Features: - Add ETag header (MD5 hash) for uploaded parts - Add CreatedAt timestamp to sessions for TTL tracking Tests: - Invalid upload ID format (path traversal) - Negative part numbers - Missing parts in sequence - Upload part after complete started (conflict) - Part size limit exceeded - ETag header verification Co-Authored-By: Claude Opus 4.5 --- .../envd/internal/api/multipart_upload.go | 139 ++++++++++++- .../internal/api/multipart_upload_test.go | 193 ++++++++++++++++++ packages/envd/internal/api/store.go | 50 ++++- 3 files changed, 364 insertions(+), 18 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 4aa59073d6..c458e2d2ce 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -1,15 +1,19 @@ package api import ( + "crypto/md5" + "encoding/hex" "encoding/json" "errors" "fmt" + "io" "net/http" "os" "os/user" "path/filepath" "sort" "syscall" + "time" "github.com/google/uuid" @@ -22,6 +26,12 @@ const ( multipartTempDir = "/tmp/envd-multipart" // maxUploadSessions limits concurrent upload sessions to prevent resource exhaustion maxUploadSessions = 100 + // maxPartSize limits individual part size to 100MB to prevent DoS + maxPartSize = 100 * 1024 * 1024 + // uploadSessionTTL is the maximum time an upload session can remain active + uploadSessionTTL = 1 * time.Hour + // uploadSessionCleanupInterval is how often to check for expired sessions + uploadSessionCleanupInterval = 5 * time.Minute ) // PostFilesUploadInit initializes a multipart upload session @@ -90,12 +100,13 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params // Store the session session := &MultipartUploadSession{ - UploadID: uploadID, - FilePath: filePath, - TempDir: tempDir, - UID: uid, - GID: gid, - Parts: make(map[int]string), + UploadID: uploadID, + FilePath: filePath, + TempDir: tempDir, + UID: uid, + GID: gid, + Parts: make(map[int]string), + CreatedAt: time.Now(), } a.uploadsLock.Lock() @@ -128,6 +139,13 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl operationID := logs.AssignOperationID() + // Validate uploadId is a valid UUID to prevent path traversal + if _, err := uuid.Parse(uploadId); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("invalid upload ID format") + jsonError(w, http.StatusBadRequest, fmt.Errorf("invalid upload ID format: must be a valid UUID")) + return + } + // Get the session a.uploadsLock.RLock() session, exists := a.uploads[uploadId] @@ -139,8 +157,22 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } + // Check if session is already being completed/aborted + if session.completed.Load() { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing or aborted")) + return + } + partNumber := params.Part + // Check for negative part numbers + if partNumber < 0 { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Msg("invalid part number") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part number must be non-negative")) + return + } + // Create the part file partPath := filepath.Join(session.TempDir, fmt.Sprintf("part_%d", partNumber)) @@ -156,8 +188,9 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Write the part data using ReadFrom for efficient copying - size, err := partFile.ReadFrom(r.Body) + // Write the part data using ReadFrom with size limit to prevent DoS + limitedReader := io.LimitReader(r.Body, maxPartSize+1) + size, err := partFile.ReadFrom(limitedReader) partFile.Close() if err != nil { @@ -172,19 +205,47 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Record the part + // Check if part exceeded size limit + if size > maxPartSize { + os.Remove(partPath) + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("maxSize", maxPartSize).Msg("part size exceeds limit") + jsonError(w, http.StatusRequestEntityTooLarge, fmt.Errorf("part size exceeds maximum allowed size of %d bytes", maxPartSize)) + return + } + + // Record the part (check for duplicates and warn) session.mu.Lock() + if existingPath, exists := session.Parts[partNumber]; exists { + a.logger.Warn(). + Str(string(logs.OperationIDKey), operationID). + Str("uploadId", uploadId). + Int("partNumber", partNumber). + Str("existingPath", existingPath). + Msg("overwriting existing part") + } session.Parts[partNumber] = partPath session.mu.Unlock() + // Calculate ETag (MD5 of part content) + partData, err := os.ReadFile(partPath) + var etag string + if err == nil { + hash := md5.Sum(partData) + etag = hex.EncodeToString(hash[:]) + } + a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). Int("partNumber", partNumber). Int64("size", size). + Str("etag", etag). Msg("part uploaded") w.Header().Set("Content-Type", "application/json") + if etag != "" { + w.Header().Set("ETag", fmt.Sprintf("\"%s\"", etag)) + } w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(MultipartUploadPart{ PartNumber: partNumber, @@ -202,6 +263,14 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req a.uploadsLock.Lock() session, exists := a.uploads[uploadId] if exists { + // Mark as completed to prevent new parts from being uploaded + if !session.completed.CompareAndSwap(false, true) { + // Already being completed by another request + a.uploadsLock.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing")) + return + } delete(a.uploads, uploadId) } a.uploadsLock.Unlock() @@ -223,6 +292,18 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req }() }() + // Track if we need to clean up destination file on error + destFilePath := session.FilePath + destFileCreated := false + assemblySucceeded := false + defer func() { + if destFileCreated && !assemblySucceeded { + if err := os.Remove(destFilePath); err != nil && !os.IsNotExist(err) { + logger.Warn().Err(err).Str("path", destFilePath).Msg("failed to cleanup partial destination file") + } + } + }() + // Ensure parent directories exist err := permissions.EnsureDirs(filepath.Dir(session.FilePath), session.UID, session.GID) if err != nil { @@ -243,10 +324,11 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating destination file: %w", err)) return } - defer destFile.Close() + destFileCreated = true // Set ownership if err := os.Chown(session.FilePath, session.UID, session.GID); err != nil { + destFile.Close() a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) return @@ -263,12 +345,30 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req session.mu.Unlock() sort.Ints(partNumbers) + // Validate that parts are contiguous (0, 1, 2, ..., n-1) + if len(partNumbers) > 0 { + for i, partNum := range partNumbers { + if partNum != i { + destFile.Close() + a.logger.Error(). + Str(string(logs.OperationIDKey), operationID). + Int("expected", i). + Int("got", partNum). + Ints("allParts", partNumbers). + Msg("missing part in upload sequence") + jsonError(w, http.StatusBadRequest, fmt.Errorf("missing part %d: parts must be contiguous starting from 0", i)) + return + } + } + } + // Assemble the file using sendfile via io.Copy (which uses copy_file_range on Linux) var totalSize int64 for _, partNum := range partNumbers { partPath := partPaths[partNum] partFile, err := os.Open(partPath) if err != nil { + destFile.Close() a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNum).Msg("error opening part file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error opening part %d: %w", partNum, err)) return @@ -279,6 +379,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req partFile.Close() if err != nil { + destFile.Close() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -292,6 +393,16 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req totalSize += written } + // Close the file before marking success + if err := destFile.Close(); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing destination file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing destination file: %w", err)) + return + } + + // Mark assembly as successful so we don't clean up the file + assemblySucceeded = true + // Note: We skip fsync here for performance. The kernel will flush data to disk // eventually. For sandbox use cases, immediate durability is not critical. @@ -321,6 +432,14 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, a.uploadsLock.Lock() session, exists := a.uploads[uploadId] if exists { + // Mark as completed to prevent new parts from being uploaded + if !session.completed.CompareAndSwap(false, true) { + // Already being completed/aborted by another request + a.uploadsLock.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing or aborted")) + return + } delete(a.uploads, uploadId) } a.uploadsLock.Unlock() diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index af365a5ddd..0819ea7ea1 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -108,6 +108,10 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, 0, part0Resp.PartNumber) assert.Equal(t, int64(len(part0Content)), part0Resp.Size) + // Verify ETag is returned + etag := part0W.Header().Get("ETag") + assert.NotEmpty(t, etag, "ETag should be returned for uploaded part") + // Upload part 1 part1Content := []byte("World!") part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader(part1Content)) @@ -226,6 +230,195 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, http.StatusNotFound, w.Code) }) + t.Run("invalid upload ID format", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + + // Try to upload with an invalid UUID (path traversal attempt) + req := httptest.NewRequest(http.MethodPut, "/files/upload/../../../etc/passwd?part=0", bytes.NewReader([]byte("test"))) + w := httptest.NewRecorder() + + api.PutFilesUploadUploadId(w, req, "../../../etc/passwd", PutFilesUploadUploadIdParams{Part: 0}) + assert.Equal(t, http.StatusBadRequest, w.Code) + }) + + t.Run("negative part number", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + // Initialize upload + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "test-file.txt"), + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Try to upload with negative part number + req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=-1", bytes.NewReader([]byte("test"))) + w := httptest.NewRecorder() + + api.PutFilesUploadUploadId(w, req, uploadId, PutFilesUploadUploadIdParams{Part: -1}) + assert.Equal(t, http.StatusBadRequest, w.Code) + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[uploadId] + if session != nil { + os.RemoveAll(session.TempDir) + } + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() + }) + + t.Run("missing part in sequence", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + // Initialize upload + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "gap-file.txt"), + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Upload parts 0 and 2, but skip part 1 + for _, partNum := range []int{0, 2} { + partReq := httptest.NewRequest(http.MethodPut, fmt.Sprintf("/files/upload/%s?part=%d", uploadId, partNum), bytes.NewReader([]byte("X"))) + partReq.Header.Set("Content-Type", "application/octet-stream") + partW := httptest.NewRecorder() + + api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: partNum}) + require.Equal(t, http.StatusOK, partW.Code) + } + + // Complete should fail due to missing part 1 + completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+uploadId+"/complete", nil) + completeW := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) + assert.Equal(t, http.StatusBadRequest, completeW.Code) + }) + + t.Run("upload part after complete started", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + // Initialize upload + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "race-file.txt"), + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Upload part 0 + part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader([]byte("A"))) + part0Req.Header.Set("Content-Type", "application/octet-stream") + part0W := httptest.NewRecorder() + + api.PutFilesUploadUploadId(part0W, part0Req, uploadId, PutFilesUploadUploadIdParams{Part: 0}) + require.Equal(t, http.StatusOK, part0W.Code) + + // Mark the session as completing + api.uploadsLock.RLock() + session := api.uploads[uploadId] + api.uploadsLock.RUnlock() + require.NotNil(t, session) + session.completed.Store(true) + + // Try to upload another part - should fail with 409 Conflict + part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader([]byte("B"))) + part1Req.Header.Set("Content-Type", "application/octet-stream") + part1W := httptest.NewRecorder() + + api.PutFilesUploadUploadId(part1W, part1Req, uploadId, PutFilesUploadUploadIdParams{Part: 1}) + assert.Equal(t, http.StatusConflict, part1W.Code) + + // Clean up + api.uploadsLock.Lock() + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() + os.RemoveAll(session.TempDir) + }) + + t.Run("part size limit", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + + // Initialize upload + body := PostFilesUploadInitJSONRequestBody{ + Path: filepath.Join(tempDir, "large-file.txt"), + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Try to upload a part that exceeds the size limit + // We create content that's just over the limit + oversizedContent := make([]byte, maxPartSize+1) + partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(oversizedContent)) + partReq.Header.Set("Content-Type", "application/octet-stream") + partW := httptest.NewRecorder() + + api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: 0}) + assert.Equal(t, http.StatusRequestEntityTooLarge, partW.Code) + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[uploadId] + if session != nil { + os.RemoveAll(session.TempDir) + } + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() + }) + t.Run("max sessions limit", func(t *testing.T) { t.Parallel() api := newTestAPI(t) diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 32ff375a8c..aef46999db 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -5,6 +5,8 @@ import ( "net/http" "os" "sync" + "sync/atomic" + "time" "github.com/rs/zerolog" @@ -15,13 +17,15 @@ import ( // MultipartUploadSession tracks an in-progress multipart upload type MultipartUploadSession struct { - UploadID string - FilePath string // Final destination path - TempDir string // Temp directory for parts - UID int - GID int - Parts map[int]string // partNumber -> temp file path - mu sync.Mutex + UploadID string + FilePath string // Final destination path + TempDir string // Temp directory for parts + UID int + GID int + Parts map[int]string // partNumber -> temp file path + CreatedAt time.Time + completed atomic.Bool // Set to true when complete/abort starts to prevent new parts + mu sync.Mutex } type API struct { @@ -47,7 +51,7 @@ func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host. l.Warn().Err(err).Str("dir", multipartTempDir).Msg("failed to cleanup stale multipart temp directory") } - return &API{ + api := &API{ logger: l, defaults: defaults, mmdsChan: mmdsChan, @@ -55,6 +59,36 @@ func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host. lastSetTime: utils.NewAtomicMax(), uploads: make(map[string]*MultipartUploadSession), } + + // Start background cleanup for expired upload sessions + go api.cleanupExpiredUploads() + + return api +} + +// cleanupExpiredUploads periodically removes upload sessions that have exceeded their TTL +func (a *API) cleanupExpiredUploads() { + ticker := time.NewTicker(uploadSessionCleanupInterval) + defer ticker.Stop() + + for range ticker.C { + a.uploadsLock.Lock() + now := time.Now() + for uploadID, session := range a.uploads { + if now.Sub(session.CreatedAt) > uploadSessionTTL { + delete(a.uploads, uploadID) + // Clean up temp directory in background + tempDir := session.TempDir + go func() { + if err := os.RemoveAll(tempDir); err != nil { + a.logger.Warn().Err(err).Str("tempDir", tempDir).Msg("failed to cleanup expired upload temp directory") + } + }() + a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") + } + } + a.uploadsLock.Unlock() + } } func (a *API) GetHealth(w http.ResponseWriter, r *http.Request) { From 1f9745978a8e8fa3a6cab22da70046c08b86cebb Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 20:12:03 +0100 Subject: [PATCH 06/59] refactor(envd): remove unused ETag computation from multipart upload ETag was being computed and returned but never validated or used. Remove the unnecessary MD5 hash computation and header to simplify the code and improve performance. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload.go | 14 -------------- .../envd/internal/api/multipart_upload_test.go | 4 ---- 2 files changed, 18 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index c458e2d2ce..32d2a97e51 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -1,8 +1,6 @@ package api import ( - "crypto/md5" - "encoding/hex" "encoding/json" "errors" "fmt" @@ -226,26 +224,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl session.Parts[partNumber] = partPath session.mu.Unlock() - // Calculate ETag (MD5 of part content) - partData, err := os.ReadFile(partPath) - var etag string - if err == nil { - hash := md5.Sum(partData) - etag = hex.EncodeToString(hash[:]) - } - a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). Int("partNumber", partNumber). Int64("size", size). - Str("etag", etag). Msg("part uploaded") w.Header().Set("Content-Type", "application/json") - if etag != "" { - w.Header().Set("ETag", fmt.Sprintf("\"%s\"", etag)) - } w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(MultipartUploadPart{ PartNumber: partNumber, diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 0819ea7ea1..491571d8c5 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -108,10 +108,6 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, 0, part0Resp.PartNumber) assert.Equal(t, int64(len(part0Content)), part0Resp.Size) - // Verify ETag is returned - etag := part0W.Header().Get("ETag") - assert.NotEmpty(t, etag, "ETag should be returned for uploaded part") - // Upload part 1 part1Content := []byte("World!") part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader(part1Content)) From c028cb0e3ff4d4497dd0d1816ab8cee16b74c762 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 20:41:11 +0100 Subject: [PATCH 07/59] perf(envd): rewrite multipart upload with direct file writes Replace temp file based multipart upload with direct writes to destination: - Add totalSize and partSize to init request (breaking API change) - Create and preallocate destination file at init time - Write parts directly to file at computed offsets using WriteAt - Eliminate temp directory, assembly phase, and copy operations - Keep open file handle across session for concurrent part writes This removes multiple layers of I/O overhead: - No temp file creation per part - No reading temp files back during assembly - No sequential copy loop - Single file open/close cycle Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/api.gen.go | 6 + .../envd/internal/api/multipart_upload.go | 310 ++++++++---------- .../internal/api/multipart_upload_test.go | 193 ++++++----- packages/envd/internal/api/store.go | 47 +-- packages/envd/spec/envd.yaml | 10 + 5 files changed, 288 insertions(+), 278 deletions(-) diff --git a/packages/envd/internal/api/api.gen.go b/packages/envd/internal/api/api.gen.go index 6aa7d12f99..b47891be27 100644 --- a/packages/envd/internal/api/api.gen.go +++ b/packages/envd/internal/api/api.gen.go @@ -168,8 +168,14 @@ type PostFilesParams struct { // PostFilesUploadInitJSONBody defines parameters for PostFilesUploadInit. type PostFilesUploadInitJSONBody struct { + // PartSize Size of each part in bytes (last part may be smaller) + PartSize int64 `json:"partSize"` + // Path Path to the file to upload Path string `json:"path"` + + // TotalSize Total size of the file in bytes + TotalSize int64 `json:"totalSize"` } // PostFilesUploadInitParams defines parameters for PostFilesUploadInit. diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 32d2a97e51..ae9cf935fd 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -9,7 +9,6 @@ import ( "os" "os/user" "path/filepath" - "sort" "syscall" "time" @@ -21,7 +20,6 @@ import ( ) const ( - multipartTempDir = "/tmp/envd-multipart" // maxUploadSessions limits concurrent upload sessions to prevent resource exhaustion maxUploadSessions = 100 // maxPartSize limits individual part size to 100MB to prevent DoS @@ -46,6 +44,20 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + // Validate totalSize and partSize + if body.TotalSize < 0 { + jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize must be non-negative")) + return + } + if body.PartSize <= 0 { + jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize must be positive")) + return + } + if body.PartSize > maxPartSize { + jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize exceeds maximum allowed size of %d bytes", maxPartSize)) + return + } + // Validate signing if needed err := a.validateSigning(r, params.Signature, params.SignatureExpiration, params.Username, body.Path, SigningWriteOperation) if err != nil { @@ -85,32 +97,79 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + // Ensure parent directories exist + if err := permissions.EnsureDirs(filepath.Dir(filePath), uid, gid); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories: %w", err)) + return + } + + // Create and preallocate the destination file + destFile, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o666) + if err != nil { + if errors.Is(err, syscall.ENOSPC) { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") + jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating destination file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating destination file: %w", err)) + return + } + + // Preallocate the file to the total size (creates sparse file) + if body.TotalSize > 0 { + if err := destFile.Truncate(body.TotalSize); err != nil { + destFile.Close() + os.Remove(filePath) + if errors.Is(err, syscall.ENOSPC) { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") + jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error preallocating file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error preallocating file: %w", err)) + return + } + } + + // Set ownership + if err := os.Chown(filePath, uid, gid); err != nil { + destFile.Close() + os.Remove(filePath) + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) + return + } + // Create upload ID uploadID := uuid.New().String() - // Create temp directory for this upload - tempDir := filepath.Join(multipartTempDir, uploadID) - if err := os.MkdirAll(tempDir, 0o755); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating temp directory") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating temp directory: %w", err)) - return + // Calculate number of parts + numParts := int((body.TotalSize + body.PartSize - 1) / body.PartSize) + if numParts == 0 && body.TotalSize == 0 { + numParts = 0 // Empty file, no parts needed } - // Store the session + // Store the session with the open file handle session := &MultipartUploadSession{ - UploadID: uploadID, - FilePath: filePath, - TempDir: tempDir, - UID: uid, - GID: gid, - Parts: make(map[int]string), - CreatedAt: time.Now(), + UploadID: uploadID, + FilePath: filePath, + DestFile: destFile, + TotalSize: body.TotalSize, + PartSize: body.PartSize, + NumParts: numParts, + UID: uid, + GID: gid, + PartsWritten: make(map[int]bool), + CreatedAt: time.Now(), } a.uploadsLock.Lock() if len(a.uploads) >= maxUploadSessions { a.uploadsLock.Unlock() - os.RemoveAll(tempDir) + destFile.Close() + os.Remove(filePath) a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) return @@ -122,6 +181,9 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadID). Str("filePath", filePath). + Int64("totalSize", body.TotalSize). + Int64("partSize", body.PartSize). + Int("numParts", numParts). Msg("multipart upload initialized") w.Header().Set("Content-Type", "application/json") @@ -131,7 +193,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params }) } -// PutFilesUploadUploadId uploads a part of a multipart upload +// PutFilesUploadUploadId uploads a part of a multipart upload directly to the destination file func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string, params PutFilesUploadUploadIdParams) { defer r.Body.Close() @@ -171,28 +233,44 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Create the part file - partPath := filepath.Join(session.TempDir, fmt.Sprintf("part_%d", partNumber)) + // Check part number is within range + if session.NumParts > 0 && partNumber >= session.NumParts { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Int("numParts", session.NumParts).Msg("part number out of range") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) + return + } + + // Calculate offset and expected size for this part + offset := int64(partNumber) * session.PartSize + expectedSize := session.PartSize + if partNumber == session.NumParts-1 { + // Last part may be smaller + expectedSize = session.TotalSize - offset + } - partFile, err := os.OpenFile(partPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644) + // Read the part data with size limit + limitedReader := io.LimitReader(r.Body, expectedSize+1) + data, err := io.ReadAll(limitedReader) if err != nil { - if errors.Is(err, syscall.ENOSPC) { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") - jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) - return - } - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating part file") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating part file: %w", err)) + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error reading part data") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error reading part data: %w", err)) return } - // Write the part data using ReadFrom with size limit to prevent DoS - limitedReader := io.LimitReader(r.Body, maxPartSize+1) - size, err := partFile.ReadFrom(limitedReader) - partFile.Close() + size := int64(len(data)) + // Check if part exceeded expected size + if size > expectedSize { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("expectedSize", expectedSize).Msg("part size exceeds expected size") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part size %d exceeds expected size %d", size, expectedSize)) + return + } + + // Write directly to the destination file at the correct offset + session.mu.Lock() + _, err = session.DestFile.WriteAt(data, offset) if err != nil { - os.Remove(partPath) + session.mu.Unlock() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -203,25 +281,15 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Check if part exceeded size limit - if size > maxPartSize { - os.Remove(partPath) - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("maxSize", maxPartSize).Msg("part size exceeds limit") - jsonError(w, http.StatusRequestEntityTooLarge, fmt.Errorf("part size exceeds maximum allowed size of %d bytes", maxPartSize)) - return - } - - // Record the part (check for duplicates and warn) - session.mu.Lock() - if existingPath, exists := session.Parts[partNumber]; exists { + // Mark part as written + if session.PartsWritten[partNumber] { a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). Int("partNumber", partNumber). - Str("existingPath", existingPath). Msg("overwriting existing part") } - session.Parts[partNumber] = partPath + session.PartsWritten[partNumber] = true session.mu.Unlock() a.logger.Debug(). @@ -229,6 +297,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl Str("uploadId", uploadId). Int("partNumber", partNumber). Int64("size", size). + Int64("offset", offset). Msg("part uploaded") w.Header().Set("Content-Type", "application/json") @@ -239,7 +308,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl }) } -// PostFilesUploadUploadIdComplete completes a multipart upload and assembles the file +// PostFilesUploadUploadIdComplete completes a multipart upload func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Request, uploadId string) { defer r.Body.Close() @@ -267,148 +336,52 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - // Cleanup temp directory in background (don't block response) - tempDir := session.TempDir - logger := a.logger - defer func() { - go func() { - if err := os.RemoveAll(tempDir); err != nil { - logger.Warn().Err(err).Str("tempDir", tempDir).Msg("failed to cleanup multipart temp directory") - } - }() - }() - - // Track if we need to clean up destination file on error - destFilePath := session.FilePath - destFileCreated := false - assemblySucceeded := false - defer func() { - if destFileCreated && !assemblySucceeded { - if err := os.Remove(destFilePath); err != nil && !os.IsNotExist(err) { - logger.Warn().Err(err).Str("path", destFilePath).Msg("failed to cleanup partial destination file") - } - } - }() - - // Ensure parent directories exist - err := permissions.EnsureDirs(filepath.Dir(session.FilePath), session.UID, session.GID) - if err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories: %w", err)) - return - } - - // Create the destination file - destFile, err := os.OpenFile(session.FilePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o666) - if err != nil { - if errors.Is(err, syscall.ENOSPC) { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") - jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) - return - } - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating destination file") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating destination file: %w", err)) - return - } - destFileCreated = true - - // Set ownership - if err := os.Chown(session.FilePath, session.UID, session.GID); err != nil { - destFile.Close() - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) - return - } - - // Get the part numbers and paths in order (copy under lock to avoid race with concurrent uploads) + // Verify all parts were uploaded session.mu.Lock() - partNumbers := make([]int, 0, len(session.Parts)) - partPaths := make(map[int]string, len(session.Parts)) - for num, path := range session.Parts { - partNumbers = append(partNumbers, num) - partPaths[num] = path - } - session.mu.Unlock() - sort.Ints(partNumbers) - - // Validate that parts are contiguous (0, 1, 2, ..., n-1) - if len(partNumbers) > 0 { - for i, partNum := range partNumbers { - if partNum != i { - destFile.Close() - a.logger.Error(). - Str(string(logs.OperationIDKey), operationID). - Int("expected", i). - Int("got", partNum). - Ints("allParts", partNumbers). - Msg("missing part in upload sequence") - jsonError(w, http.StatusBadRequest, fmt.Errorf("missing part %d: parts must be contiguous starting from 0", i)) - return - } + missingParts := []int{} + for i := 0; i < session.NumParts; i++ { + if !session.PartsWritten[i] { + missingParts = append(missingParts, i) } } + session.mu.Unlock() - // Assemble the file using sendfile via io.Copy (which uses copy_file_range on Linux) - var totalSize int64 - for _, partNum := range partNumbers { - partPath := partPaths[partNum] - partFile, err := os.Open(partPath) - if err != nil { - destFile.Close() - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNum).Msg("error opening part file") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error opening part %d: %w", partNum, err)) - return - } - - // Use ReadFrom which on Linux uses copy_file_range for zero-copy - written, err := destFile.ReadFrom(partFile) - partFile.Close() - - if err != nil { - destFile.Close() - if errors.Is(err, syscall.ENOSPC) { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") - jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) - return - } - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNum).Msg("error copying part") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error copying part %d: %w", partNum, err)) - return - } - - totalSize += written + if len(missingParts) > 0 { + session.DestFile.Close() + os.Remove(session.FilePath) + a.logger.Error(). + Str(string(logs.OperationIDKey), operationID). + Str("uploadId", uploadId). + Ints("missingParts", missingParts). + Msg("missing parts in upload") + jsonError(w, http.StatusBadRequest, fmt.Errorf("missing parts: %v", missingParts)) + return } - // Close the file before marking success - if err := destFile.Close(); err != nil { + // Close the file + if err := session.DestFile.Close(); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing destination file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing destination file: %w", err)) return } - // Mark assembly as successful so we don't clean up the file - assemblySucceeded = true - - // Note: We skip fsync here for performance. The kernel will flush data to disk - // eventually. For sandbox use cases, immediate durability is not critical. - a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). Str("filePath", session.FilePath). - Int64("totalSize", totalSize). - Int("numParts", len(partNumbers)). + Int64("totalSize", session.TotalSize). + Int("numParts", session.NumParts). Msg("multipart upload completed") w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) json.NewEncoder(w).Encode(MultipartUploadComplete{ Path: session.FilePath, - Size: totalSize, + Size: session.TotalSize, }) } -// DeleteFilesUploadUploadId aborts a multipart upload and cleans up temporary files +// DeleteFilesUploadUploadId aborts a multipart upload and cleans up func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, uploadId string) { defer r.Body.Close() @@ -436,9 +409,10 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } - // Clean up temp directory - if err := os.RemoveAll(session.TempDir); err != nil { - a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error cleaning up temp directory") + // Close and remove the file + session.DestFile.Close() + if err := os.Remove(session.FilePath); err != nil && !os.IsNotExist(err) { + a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing file") } a.logger.Debug(). diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 491571d8c5..036d72152d 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -43,7 +43,9 @@ func TestMultipartUpload(t *testing.T) { tempDir := t.TempDir() body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "test-file.txt"), + Path: filepath.Join(tempDir, "test-file.txt"), + TotalSize: 100, + PartSize: 50, } bodyBytes, _ := json.Marshal(body) @@ -63,10 +65,12 @@ func TestMultipartUpload(t *testing.T) { // Clean up api.uploadsLock.Lock() session := api.uploads[resp.UploadId] - api.uploadsLock.Unlock() if session != nil { - os.RemoveAll(session.TempDir) + session.DestFile.Close() + os.Remove(session.FilePath) } + delete(api.uploads, resp.UploadId) + api.uploadsLock.Unlock() }) t.Run("complete multipart upload", func(t *testing.T) { @@ -75,9 +79,16 @@ func TestMultipartUpload(t *testing.T) { tempDir := t.TempDir() destPath := filepath.Join(tempDir, "assembled-file.txt") + part0Content := []byte("Hello, ") + part1Content := []byte("World!") + totalSize := int64(len(part0Content) + len(part1Content)) + partSize := int64(len(part0Content)) + // Initialize upload initBody := PostFilesUploadInitJSONRequestBody{ - Path: destPath, + Path: destPath, + TotalSize: totalSize, + PartSize: partSize, } initBodyBytes, _ := json.Marshal(initBody) @@ -94,7 +105,6 @@ func TestMultipartUpload(t *testing.T) { uploadId := initResp.UploadId // Upload part 0 - part0Content := []byte("Hello, ") part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(part0Content)) part0Req.Header.Set("Content-Type", "application/octet-stream") part0W := httptest.NewRecorder() @@ -109,7 +119,6 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, int64(len(part0Content)), part0Resp.Size) // Upload part 1 - part1Content := []byte("World!") part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader(part1Content)) part1Req.Header.Set("Content-Type", "application/octet-stream") part1W := httptest.NewRecorder() @@ -128,7 +137,7 @@ func TestMultipartUpload(t *testing.T) { err = json.Unmarshal(completeW.Body.Bytes(), &completeResp) require.NoError(t, err) assert.Equal(t, destPath, completeResp.Path) - assert.Equal(t, int64(len(part0Content)+len(part1Content)), completeResp.Size) + assert.Equal(t, totalSize, completeResp.Size) // Verify file contents content, err := os.ReadFile(destPath) @@ -140,10 +149,13 @@ func TestMultipartUpload(t *testing.T) { t.Parallel() api := newTestAPI(t) tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "aborted-file.txt") // Initialize upload initBody := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "aborted-file.txt"), + Path: destPath, + TotalSize: 100, + PartSize: 50, } initBodyBytes, _ := json.Marshal(initBody) @@ -159,21 +171,9 @@ func TestMultipartUpload(t *testing.T) { require.NoError(t, err) uploadId := initResp.UploadId - // Get temp dir before deletion - api.uploadsLock.RLock() - session := api.uploads[uploadId] - api.uploadsLock.RUnlock() - require.NotNil(t, session) - sessionTempDir := session.TempDir - - // Upload a part - partContent := []byte("test content") - partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(partContent)) - partReq.Header.Set("Content-Type", "application/octet-stream") - partW := httptest.NewRecorder() - - api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: 0}) - require.Equal(t, http.StatusOK, partW.Code) + // Verify file was created + _, err = os.Stat(destPath) + require.NoError(t, err, "destination file should exist after init") // Abort upload abortReq := httptest.NewRequest(http.MethodDelete, "/files/upload/"+uploadId, nil) @@ -188,8 +188,8 @@ func TestMultipartUpload(t *testing.T) { api.uploadsLock.RUnlock() assert.False(t, exists) - // Verify temp dir is cleaned up - _, err = os.Stat(sessionTempDir) + // Verify file is cleaned up + _, err = os.Stat(destPath) assert.True(t, os.IsNotExist(err)) }) @@ -245,7 +245,9 @@ func TestMultipartUpload(t *testing.T) { // Initialize upload body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "test-file.txt"), + Path: filepath.Join(tempDir, "test-file.txt"), + TotalSize: 100, + PartSize: 50, } bodyBytes, _ := json.Marshal(body) @@ -272,7 +274,8 @@ func TestMultipartUpload(t *testing.T) { api.uploadsLock.Lock() session := api.uploads[uploadId] if session != nil { - os.RemoveAll(session.TempDir) + session.DestFile.Close() + os.Remove(session.FilePath) } delete(api.uploads, uploadId) api.uploadsLock.Unlock() @@ -282,10 +285,13 @@ func TestMultipartUpload(t *testing.T) { t.Parallel() api := newTestAPI(t) tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "gap-file.txt") - // Initialize upload + // Initialize upload with 3 parts body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "gap-file.txt"), + Path: destPath, + TotalSize: 30, + PartSize: 10, } bodyBytes, _ := json.Marshal(body) @@ -303,7 +309,8 @@ func TestMultipartUpload(t *testing.T) { // Upload parts 0 and 2, but skip part 1 for _, partNum := range []int{0, 2} { - partReq := httptest.NewRequest(http.MethodPut, fmt.Sprintf("/files/upload/%s?part=%d", uploadId, partNum), bytes.NewReader([]byte("X"))) + content := make([]byte, 10) + partReq := httptest.NewRequest(http.MethodPut, fmt.Sprintf("/files/upload/%s?part=%d", uploadId, partNum), bytes.NewReader(content)) partReq.Header.Set("Content-Type", "application/octet-stream") partW := httptest.NewRecorder() @@ -323,10 +330,13 @@ func TestMultipartUpload(t *testing.T) { t.Parallel() api := newTestAPI(t) tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "race-file.txt") // Initialize upload body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "race-file.txt"), + Path: destPath, + TotalSize: 10, + PartSize: 10, } bodyBytes, _ := json.Marshal(body) @@ -343,7 +353,8 @@ func TestMultipartUpload(t *testing.T) { uploadId := initResp.UploadId // Upload part 0 - part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader([]byte("A"))) + part0Content := make([]byte, 10) + part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(part0Content)) part0Req.Header.Set("Content-Type", "application/octet-stream") part0W := httptest.NewRecorder() @@ -358,61 +369,20 @@ func TestMultipartUpload(t *testing.T) { session.completed.Store(true) // Try to upload another part - should fail with 409 Conflict - part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader([]byte("B"))) + part1Content := make([]byte, 10) + part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(part1Content)) part1Req.Header.Set("Content-Type", "application/octet-stream") part1W := httptest.NewRecorder() - api.PutFilesUploadUploadId(part1W, part1Req, uploadId, PutFilesUploadUploadIdParams{Part: 1}) + api.PutFilesUploadUploadId(part1W, part1Req, uploadId, PutFilesUploadUploadIdParams{Part: 0}) assert.Equal(t, http.StatusConflict, part1W.Code) // Clean up api.uploadsLock.Lock() delete(api.uploads, uploadId) api.uploadsLock.Unlock() - os.RemoveAll(session.TempDir) - }) - - t.Run("part size limit", func(t *testing.T) { - t.Parallel() - api := newTestAPI(t) - tempDir := t.TempDir() - - // Initialize upload - body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "large-file.txt"), - } - bodyBytes, _ := json.Marshal(body) - - initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) - initReq.Header.Set("Content-Type", "application/json") - initW := httptest.NewRecorder() - - api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) - require.Equal(t, http.StatusOK, initW.Code) - - var initResp MultipartUploadInit - err := json.Unmarshal(initW.Body.Bytes(), &initResp) - require.NoError(t, err) - uploadId := initResp.UploadId - - // Try to upload a part that exceeds the size limit - // We create content that's just over the limit - oversizedContent := make([]byte, maxPartSize+1) - partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(oversizedContent)) - partReq.Header.Set("Content-Type", "application/octet-stream") - partW := httptest.NewRecorder() - - api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: 0}) - assert.Equal(t, http.StatusRequestEntityTooLarge, partW.Code) - - // Clean up - api.uploadsLock.Lock() - session := api.uploads[uploadId] - if session != nil { - os.RemoveAll(session.TempDir) - } - delete(api.uploads, uploadId) - api.uploadsLock.Unlock() + session.DestFile.Close() + os.Remove(destPath) }) t.Run("max sessions limit", func(t *testing.T) { @@ -423,7 +393,9 @@ func TestMultipartUpload(t *testing.T) { // Create maxUploadSessions sessions for i := 0; i < maxUploadSessions; i++ { body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, fmt.Sprintf("file-%d.txt", i)), + Path: filepath.Join(tempDir, fmt.Sprintf("file-%d.txt", i)), + TotalSize: 100, + PartSize: 50, } bodyBytes, _ := json.Marshal(body) @@ -437,7 +409,9 @@ func TestMultipartUpload(t *testing.T) { // The next one should fail with 429 body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "one-too-many.txt"), + Path: filepath.Join(tempDir, "one-too-many.txt"), + TotalSize: 100, + PartSize: 50, } bodyBytes, _ := json.Marshal(body) @@ -451,8 +425,10 @@ func TestMultipartUpload(t *testing.T) { // Clean up all sessions api.uploadsLock.Lock() for _, session := range api.uploads { - os.RemoveAll(session.TempDir) + session.DestFile.Close() + os.Remove(session.FilePath) } + api.uploads = make(map[string]*MultipartUploadSession) api.uploadsLock.Unlock() }) @@ -462,13 +438,15 @@ func TestMultipartUpload(t *testing.T) { tempDir := t.TempDir() destPath := filepath.Join(tempDir, "out-of-order-file.txt") - // Initialize upload - initBody := PostFilesUploadInitJSONRequestBody{ - Path: destPath, + // Initialize upload with 3 parts of 1 byte each + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 3, + PartSize: 1, } - initBodyBytes, _ := json.Marshal(initBody) + bodyBytes, _ := json.Marshal(body) - initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) initReq.Header.Set("Content-Type", "application/json") initW := httptest.NewRecorder() @@ -511,6 +489,45 @@ func TestMultipartUpload(t *testing.T) { require.NoError(t, err) assert.Equal(t, "ABC", string(content)) }) + + t.Run("empty file upload", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "empty-file.txt") + + // Initialize upload with 0 size + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 0, + PartSize: 1024, + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Complete upload (no parts needed) + completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+uploadId+"/complete", nil) + completeW := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) + require.Equal(t, http.StatusOK, completeW.Code) + + // Verify file exists and is empty + content, err := os.ReadFile(destPath) + require.NoError(t, err) + assert.Equal(t, "", string(content)) + }) } func TestMultipartUploadRouting(t *testing.T) { @@ -526,7 +543,9 @@ func TestMultipartUploadRouting(t *testing.T) { // Test that routes are registered t.Run("init route exists", func(t *testing.T) { body := PostFilesUploadInitJSONRequestBody{ - Path: "/tmp/test-file.txt", + Path: "/tmp/test-file.txt", + TotalSize: 100, + PartSize: 50, } bodyBytes, _ := json.Marshal(body) diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index aef46999db..2b3ecb0f74 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -17,15 +17,18 @@ import ( // MultipartUploadSession tracks an in-progress multipart upload type MultipartUploadSession struct { - UploadID string - FilePath string // Final destination path - TempDir string // Temp directory for parts - UID int - GID int - Parts map[int]string // partNumber -> temp file path - CreatedAt time.Time - completed atomic.Bool // Set to true when complete/abort starts to prevent new parts - mu sync.Mutex + UploadID string + FilePath string // Final destination path + DestFile *os.File // Open file handle for direct writes + TotalSize int64 // Total expected file size + PartSize int64 // Size of each part (except possibly last) + NumParts int // Total number of expected parts + UID int + GID int + PartsWritten map[int]bool // partNumber -> whether it's been written + CreatedAt time.Time + completed atomic.Bool // Set to true when complete/abort starts to prevent new parts + mu sync.Mutex } type API struct { @@ -46,11 +49,6 @@ type API struct { } func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { - // Clean up any stale multipart upload temp directories from previous runs - if err := os.RemoveAll(multipartTempDir); err != nil { - l.Warn().Err(err).Str("dir", multipartTempDir).Msg("failed to cleanup stale multipart temp directory") - } - api := &API{ logger: l, defaults: defaults, @@ -76,15 +74,18 @@ func (a *API) cleanupExpiredUploads() { now := time.Now() for uploadID, session := range a.uploads { if now.Sub(session.CreatedAt) > uploadSessionTTL { - delete(a.uploads, uploadID) - // Clean up temp directory in background - tempDir := session.TempDir - go func() { - if err := os.RemoveAll(tempDir); err != nil { - a.logger.Warn().Err(err).Str("tempDir", tempDir).Msg("failed to cleanup expired upload temp directory") - } - }() - a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") + // Mark as completed to prevent races + if session.completed.CompareAndSwap(false, true) { + delete(a.uploads, uploadID) + // Close file handle and remove file in background + go func(s *MultipartUploadSession) { + s.DestFile.Close() + if err := os.Remove(s.FilePath); err != nil && !os.IsNotExist(err) { + a.logger.Warn().Err(err).Str("filePath", s.FilePath).Msg("failed to cleanup expired upload file") + } + }(session) + a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") + } } } a.uploadsLock.Unlock() diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index b7c353bef5..8107a9e6c8 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -144,10 +144,20 @@ paths: type: object required: - path + - totalSize + - partSize properties: path: type: string description: Path to the file to upload + totalSize: + type: integer + format: int64 + description: Total size of the file in bytes + partSize: + type: integer + format: int64 + description: Size of each part in bytes (last part may be smaller) responses: "200": description: Upload session initialized From e3e771d2e8ef301c326d150f6008d026eeb8ea3f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 28 Jan 2026 19:44:17 +0000 Subject: [PATCH 08/59] chore: auto-commit generated changes --- tests/integration/internal/envd/api/models.gen.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/internal/envd/api/models.gen.go b/tests/integration/internal/envd/api/models.gen.go index 1daef2ae3b..572d6253e0 100644 --- a/tests/integration/internal/envd/api/models.gen.go +++ b/tests/integration/internal/envd/api/models.gen.go @@ -163,8 +163,14 @@ type PostFilesParams struct { // PostFilesUploadInitJSONBody defines parameters for PostFilesUploadInit. type PostFilesUploadInitJSONBody struct { + // PartSize Size of each part in bytes (last part may be smaller) + PartSize int64 `json:"partSize"` + // Path Path to the file to upload Path string `json:"path"` + + // TotalSize Total size of the file in bytes + TotalSize int64 `json:"totalSize"` } // PostFilesUploadInitParams defines parameters for PostFilesUploadInit. From c382cf9db5737312300188d0144182f0ee926376 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 20:49:10 +0100 Subject: [PATCH 09/59] style(envd): fix linter issues in multipart upload code - Add blank lines before return statements (nlreturn) - Check json.Encode error return values (errchkjson) - Use integer range syntax for Go 1.22+ (intrange) - Add t.Parallel() to TestMultipartUploadRouting (paralleltest) - Use assert.Empty() instead of assert.Equal() (testifylint) - Fix import ordering (gci) Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/auth.go | 1 + .../envd/internal/api/multipart_upload.go | 51 ++++++++++++++++--- .../internal/api/multipart_upload_test.go | 8 ++- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index 74a1d47dda..fc774e512d 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -36,6 +36,7 @@ func isAllowedPath(methodPath string) bool { return true } } + return false } diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index ae9cf935fd..f98e6a81f0 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -41,20 +41,24 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err := json.NewDecoder(r.Body).Decode(&body); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to decode request body") jsonError(w, http.StatusBadRequest, fmt.Errorf("invalid request body: %w", err)) + return } // Validate totalSize and partSize if body.TotalSize < 0 { jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize must be non-negative")) + return } if body.PartSize <= 0 { jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize must be positive")) + return } if body.PartSize > maxPartSize { jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize exceeds maximum allowed size of %d bytes", maxPartSize)) + return } @@ -63,6 +67,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error during auth validation") jsonError(w, http.StatusUnauthorized, err) + return } @@ -71,6 +76,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("no user specified") jsonError(w, http.StatusBadRequest, err) + return } @@ -79,6 +85,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Str("username", username).Msg("error looking up user") jsonError(w, http.StatusUnauthorized, fmt.Errorf("error looking up user '%s': %w", username, err)) + return } @@ -86,6 +93,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error getting user ids") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error getting user ids: %w", err)) + return } @@ -94,6 +102,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error resolving path") jsonError(w, http.StatusBadRequest, fmt.Errorf("error resolving path: %w", err)) + return } @@ -101,6 +110,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err := permissions.EnsureDirs(filepath.Dir(filePath), uid, gid); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories: %w", err)) + return } @@ -110,10 +120,12 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return } a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating destination file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating destination file: %w", err)) + return } @@ -125,10 +137,12 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return } a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error preallocating file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error preallocating file: %w", err)) + return } } @@ -139,6 +153,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params os.Remove(filePath) a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) + return } @@ -172,6 +187,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params os.Remove(filePath) a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) + return } a.uploads[uploadID] = session @@ -188,9 +204,11 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(MultipartUploadInit{ + if err := json.NewEncoder(w).Encode(MultipartUploadInit{ UploadId: uploadID, - }) + }); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") + } } // PutFilesUploadUploadId uploads a part of a multipart upload directly to the destination file @@ -203,6 +221,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if _, err := uuid.Parse(uploadId); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("invalid upload ID format") jsonError(w, http.StatusBadRequest, fmt.Errorf("invalid upload ID format: must be a valid UUID")) + return } @@ -214,6 +233,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if !exists { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") jsonError(w, http.StatusNotFound, fmt.Errorf("upload session not found: %s", uploadId)) + return } @@ -221,6 +241,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if session.completed.Load() { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing or aborted")) + return } @@ -230,6 +251,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if partNumber < 0 { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Msg("invalid part number") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number must be non-negative")) + return } @@ -237,6 +259,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if session.NumParts > 0 && partNumber >= session.NumParts { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Int("numParts", session.NumParts).Msg("part number out of range") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) + return } @@ -254,6 +277,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error reading part data") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error reading part data: %w", err)) + return } @@ -263,6 +287,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if size > expectedSize { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("expectedSize", expectedSize).Msg("part size exceeds expected size") jsonError(w, http.StatusBadRequest, fmt.Errorf("part size %d exceeds expected size %d", size, expectedSize)) + return } @@ -274,10 +299,12 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) + return } a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error writing part data") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error writing part data: %w", err)) + return } @@ -302,10 +329,12 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(MultipartUploadPart{ + if err := json.NewEncoder(w).Encode(MultipartUploadPart{ PartNumber: partNumber, Size: size, - }) + }); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") + } } // PostFilesUploadUploadIdComplete completes a multipart upload @@ -324,6 +353,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req a.uploadsLock.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing")) + return } delete(a.uploads, uploadId) @@ -333,13 +363,14 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req if !exists { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") jsonError(w, http.StatusNotFound, fmt.Errorf("upload session not found: %s", uploadId)) + return } // Verify all parts were uploaded session.mu.Lock() missingParts := []int{} - for i := 0; i < session.NumParts; i++ { + for i := range session.NumParts { if !session.PartsWritten[i] { missingParts = append(missingParts, i) } @@ -355,6 +386,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req Ints("missingParts", missingParts). Msg("missing parts in upload") jsonError(w, http.StatusBadRequest, fmt.Errorf("missing parts: %v", missingParts)) + return } @@ -362,6 +394,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req if err := session.DestFile.Close(); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing destination file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing destination file: %w", err)) + return } @@ -375,10 +408,12 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) - json.NewEncoder(w).Encode(MultipartUploadComplete{ + if err := json.NewEncoder(w).Encode(MultipartUploadComplete{ Path: session.FilePath, Size: session.TotalSize, - }) + }); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") + } } // DeleteFilesUploadUploadId aborts a multipart upload and cleans up @@ -397,6 +432,7 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, a.uploadsLock.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing or aborted")) + return } delete(a.uploads, uploadId) @@ -406,6 +442,7 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, if !exists { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") jsonError(w, http.StatusNotFound, fmt.Errorf("upload session not found: %s", uploadId)) + return } diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 036d72152d..7513e5dddd 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -26,6 +26,7 @@ func newTestAPI(t *testing.T) *API { User: "root", EnvVars: utils.NewMap[string, string](), } + return New(&logger, defaults, nil, true) } @@ -391,7 +392,7 @@ func TestMultipartUpload(t *testing.T) { tempDir := t.TempDir() // Create maxUploadSessions sessions - for i := 0; i < maxUploadSessions; i++ { + for i := range maxUploadSessions { body := PostFilesUploadInitJSONRequestBody{ Path: filepath.Join(tempDir, fmt.Sprintf("file-%d.txt", i)), TotalSize: 100, @@ -526,11 +527,13 @@ func TestMultipartUpload(t *testing.T) { // Verify file exists and is empty content, err := os.ReadFile(destPath) require.NoError(t, err) - assert.Equal(t, "", string(content)) + assert.Empty(t, string(content)) }) } func TestMultipartUploadRouting(t *testing.T) { + t.Parallel() + // Skip if not running as root if os.Geteuid() != 0 { t.Skip("skipping routing tests: requires root") @@ -542,6 +545,7 @@ func TestMultipartUploadRouting(t *testing.T) { // Test that routes are registered t.Run("init route exists", func(t *testing.T) { + t.Parallel() body := PostFilesUploadInitJSONRequestBody{ Path: "/tmp/test-file.txt", TotalSize: 100, From 8bf6c2d1a5dcfe0353de2fa0843cc67238112238 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 20:58:00 +0100 Subject: [PATCH 10/59] fix(envd): use valid UUID in non-existent session test The test was using "non-existent" as the upload ID, but the handler validates UUID format before checking session existence, causing a 400 instead of the expected 404. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 7513e5dddd..68da1131b0 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -198,10 +198,12 @@ func TestMultipartUpload(t *testing.T) { t.Parallel() api := newTestAPI(t) - req := httptest.NewRequest(http.MethodPut, "/files/upload/non-existent?part=0", bytes.NewReader([]byte("test"))) + // Use a valid UUID that doesn't exist in the sessions map + nonExistentUUID := "00000000-0000-0000-0000-000000000000" + req := httptest.NewRequest(http.MethodPut, "/files/upload/"+nonExistentUUID+"?part=0", bytes.NewReader([]byte("test"))) w := httptest.NewRecorder() - api.PutFilesUploadUploadId(w, req, "non-existent", PutFilesUploadUploadIdParams{Part: 0}) + api.PutFilesUploadUploadId(w, req, nonExistentUUID, PutFilesUploadUploadIdParams{Part: 0}) assert.Equal(t, http.StatusNotFound, w.Code) }) From f972ceddfdcf5295632e01be2a26dbe31b6002a5 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 21:20:22 +0100 Subject: [PATCH 11/59] fix(envd): use exact matching for /files auth bypass The prefix-based matching for allowed paths caused "POST/files" to incorrectly match "POST/filesystem.Filesystem/*" Connect RPC endpoints, allowing unauthenticated access to filesystem operations on secure sandboxes. Split allowed paths into exact matches (for static paths like /files) and prefix matches (for paths with dynamic segments like /files/upload/). Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/auth.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index fc774e512d..cff83cdee5 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -18,19 +18,31 @@ const ( accessTokenHeader = "X-Access-Token" ) -// allowedPathPrefixes are paths that bypass general authentication +// allowedExactPaths are paths that bypass general authentication using exact matching // (e.g., health check, endpoints supporting signing) -// Uses prefix matching to support both exact paths and paths with dynamic segments -var allowedPathPrefixes = []string{ +var allowedExactPaths = []string{ "GET/health", "GET/files", "POST/files", +} + +// allowedPathPrefixes are paths that bypass general authentication using prefix matching +// These are for paths with dynamic segments (e.g., upload ID) +var allowedPathPrefixes = []string{ "PUT/files/upload/", "DELETE/files/upload/", "POST/files/upload/", } func isAllowedPath(methodPath string) bool { + // Check exact matches first + for _, path := range allowedExactPaths { + if methodPath == path { + return true + } + } + + // Check prefix matches for paths with dynamic segments for _, prefix := range allowedPathPrefixes { if strings.HasPrefix(methodPath, prefix) { return true From 13f96840b1afb9dfebc22ea67d11c00f40fe262a Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 21:22:49 +0100 Subject: [PATCH 12/59] style(envd): use slices.Contains for exact path matching Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/auth.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index cff83cdee5..da48dd7da2 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "net/http" + "slices" "strconv" "strings" "time" @@ -36,10 +37,8 @@ var allowedPathPrefixes = []string{ func isAllowedPath(methodPath string) bool { // Check exact matches first - for _, path := range allowedExactPaths { - if methodPath == path { - return true - } + if slices.Contains(allowedExactPaths, methodPath) { + return true } // Check prefix matches for paths with dynamic segments From 1df46a3415908174ef047cf4079674819fda3689 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 21:29:27 +0100 Subject: [PATCH 13/59] security(envd): add filepath.Clean to sanitize user-provided paths Explicitly clean paths returned by ExpandAndResolve to remove any .. or . components. This addresses the GitHub Advanced Security warning about path traversal, even though filepath.Abs already normalizes paths. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/permissions/path.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/envd/internal/permissions/path.go b/packages/envd/internal/permissions/path.go index cc13ab5abf..0c1c5cb053 100644 --- a/packages/envd/internal/permissions/path.go +++ b/packages/envd/internal/permissions/path.go @@ -36,7 +36,8 @@ func ExpandAndResolve(path string, user *user.User, defaultPath *string) (string } if filepath.IsAbs(path) { - return path, nil + // Clean the path to remove any .. or . components + return filepath.Clean(path), nil } // The filepath.Abs can correctly resolve paths like /home/user/../file @@ -47,7 +48,8 @@ func ExpandAndResolve(path string, user *user.User, defaultPath *string) (string return "", fmt.Errorf("failed to resolve path '%s' for user '%s' with home dir '%s': %w", path, user.Username, user.HomeDir, err) } - return abs, nil + // filepath.Abs already cleans the path, but we explicitly clean again for clarity + return filepath.Clean(abs), nil } func getSubpaths(path string) (subpaths []string) { From 3da52cd2761d46212a1b155dc7245464526160a8 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 21:35:58 +0100 Subject: [PATCH 14/59] security(envd): add sanitizePath function with explicit validation Extract path sanitization into a dedicated function that: - Cleans the path using filepath.Clean - Validates the result is an absolute path - Rejects paths containing null bytes (path injection attack) This breaks the taint chain for CodeQL security analysis by adding explicit validation before the path is used in file operations. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/permissions/path.go | 25 +++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/envd/internal/permissions/path.go b/packages/envd/internal/permissions/path.go index 0c1c5cb053..b53d990435 100644 --- a/packages/envd/internal/permissions/path.go +++ b/packages/envd/internal/permissions/path.go @@ -7,6 +7,7 @@ import ( "os/user" "path/filepath" "slices" + "strings" "github.com/e2b-dev/infra/packages/envd/internal/execcontext" ) @@ -37,7 +38,7 @@ func ExpandAndResolve(path string, user *user.User, defaultPath *string) (string if filepath.IsAbs(path) { // Clean the path to remove any .. or . components - return filepath.Clean(path), nil + return sanitizePath(path) } // The filepath.Abs can correctly resolve paths like /home/user/../file @@ -48,8 +49,26 @@ func ExpandAndResolve(path string, user *user.User, defaultPath *string) (string return "", fmt.Errorf("failed to resolve path '%s' for user '%s' with home dir '%s': %w", path, user.Username, user.HomeDir, err) } - // filepath.Abs already cleans the path, but we explicitly clean again for clarity - return filepath.Clean(abs), nil + return sanitizePath(abs) +} + +// sanitizePath cleans a path and validates it is safe for use. +// This function breaks the taint chain for security analysis tools. +func sanitizePath(path string) (string, error) { + // Clean the path to remove .. and . components + cleaned := filepath.Clean(path) + + // Validate the path is absolute after cleaning + if !filepath.IsAbs(cleaned) { + return "", fmt.Errorf("path must be absolute: %s", path) + } + + // Ensure path doesn't contain null bytes (path injection attack) + if strings.ContainsRune(cleaned, '\x00') { + return "", fmt.Errorf("path contains invalid characters: %s", path) + } + + return cleaned, nil } func getSubpaths(path string) (subpaths []string) { From cb264fc272be377a3701517a458dfb90e9aaae16 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 21:42:31 +0100 Subject: [PATCH 15/59] revert(envd): undo path.go changes Reverts the sanitizePath changes to path.go as they are not needed. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/permissions/path.go | 25 ++-------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/packages/envd/internal/permissions/path.go b/packages/envd/internal/permissions/path.go index b53d990435..cc13ab5abf 100644 --- a/packages/envd/internal/permissions/path.go +++ b/packages/envd/internal/permissions/path.go @@ -7,7 +7,6 @@ import ( "os/user" "path/filepath" "slices" - "strings" "github.com/e2b-dev/infra/packages/envd/internal/execcontext" ) @@ -37,8 +36,7 @@ func ExpandAndResolve(path string, user *user.User, defaultPath *string) (string } if filepath.IsAbs(path) { - // Clean the path to remove any .. or . components - return sanitizePath(path) + return path, nil } // The filepath.Abs can correctly resolve paths like /home/user/../file @@ -49,26 +47,7 @@ func ExpandAndResolve(path string, user *user.User, defaultPath *string) (string return "", fmt.Errorf("failed to resolve path '%s' for user '%s' with home dir '%s': %w", path, user.Username, user.HomeDir, err) } - return sanitizePath(abs) -} - -// sanitizePath cleans a path and validates it is safe for use. -// This function breaks the taint chain for security analysis tools. -func sanitizePath(path string) (string, error) { - // Clean the path to remove .. and . components - cleaned := filepath.Clean(path) - - // Validate the path is absolute after cleaning - if !filepath.IsAbs(cleaned) { - return "", fmt.Errorf("path must be absolute: %s", path) - } - - // Ensure path doesn't contain null bytes (path injection attack) - if strings.ContainsRune(cleaned, '\x00') { - return "", fmt.Errorf("path contains invalid characters: %s", path) - } - - return cleaned, nil + return abs, nil } func getSubpaths(path string) (subpaths []string) { From 764436a4f99b7290f2b715a955f2ff9064029951 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 28 Jan 2026 22:19:59 +0100 Subject: [PATCH 16/59] perf(envd): reduce mutex contention in multipart upload Move the mutex lock to only protect the PartsWritten map access instead of the entire WriteAt operation. WriteAt is safe for concurrent writes at different offsets, so parallel chunk uploads no longer serialize on disk I/O. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index f98e6a81f0..90720cebe6 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -292,10 +292,9 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl } // Write directly to the destination file at the correct offset - session.mu.Lock() + // WriteAt is safe for concurrent writes at different offsets, no lock needed here _, err = session.DestFile.WriteAt(data, offset) if err != nil { - session.mu.Unlock() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -308,7 +307,8 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Mark part as written + // Mark part as written - only lock for map access + session.mu.Lock() if session.PartsWritten[partNumber] { a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). From 212e881c26deab2451199b06c8c8811f6e56d6e5 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 29 Jan 2026 18:47:27 +0100 Subject: [PATCH 17/59] feat(envd): support gzip Content-Encoding on file upload APIs Add support for gzip-compressed request bodies on POST /files and PUT /files/upload/{uploadId} endpoints. Clients can now send compressed uploads by setting Content-Encoding: gzip header. Co-Authored-By: Claude Opus 4.5 --- .../envd/internal/api/multipart_upload.go | 17 ++++- .../internal/api/multipart_upload_test.go | 74 +++++++++++++++++++ packages/envd/internal/api/upload.go | 29 ++++++++ packages/envd/internal/api/upload_test.go | 56 ++++++++++++++ 4 files changed, 175 insertions(+), 1 deletion(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 90720cebe6..9a8e2a4d8f 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -1,6 +1,7 @@ package api import ( + "compress/gzip" "encoding/json" "errors" "fmt" @@ -225,6 +226,20 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } + // Handle gzip-encoded request body + var body io.Reader = r.Body + if r.Header.Get("Content-Encoding") == "gzip" { + gzReader, err := gzip.NewReader(r.Body) + if err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to create gzip reader") + jsonError(w, http.StatusBadRequest, fmt.Errorf("failed to decompress gzip body: %w", err)) + + return + } + defer gzReader.Close() + body = gzReader + } + // Get the session a.uploadsLock.RLock() session, exists := a.uploads[uploadId] @@ -272,7 +287,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl } // Read the part data with size limit - limitedReader := io.LimitReader(r.Body, expectedSize+1) + limitedReader := io.LimitReader(body, expectedSize+1) data, err := io.ReadAll(limitedReader) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error reading part data") diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 68da1131b0..bd27de49ba 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -2,6 +2,7 @@ package api import ( "bytes" + "compress/gzip" "encoding/json" "fmt" "net/http" @@ -531,6 +532,79 @@ func TestMultipartUpload(t *testing.T) { require.NoError(t, err) assert.Empty(t, string(content)) }) + + t.Run("gzip encoded part upload", func(t *testing.T) { + t.Parallel() + api := newTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "gzip-file.txt") + + part0Content := []byte("Hello, ") + part1Content := []byte("World!") + totalSize := int64(len(part0Content) + len(part1Content)) + partSize := int64(len(part0Content)) + + // Initialize upload + initBody := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: totalSize, + PartSize: partSize, + } + initBodyBytes, _ := json.Marshal(initBody) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Helper to gzip compress data + gzipCompress := func(data []byte) []byte { + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + gw.Write(data) + gw.Close() + return buf.Bytes() + } + + // Upload part 0 with gzip encoding + part0Gzipped := gzipCompress(part0Content) + part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(part0Gzipped)) + part0Req.Header.Set("Content-Type", "application/octet-stream") + part0Req.Header.Set("Content-Encoding", "gzip") + part0W := httptest.NewRecorder() + + api.PutFilesUploadUploadId(part0W, part0Req, uploadId, PutFilesUploadUploadIdParams{Part: 0}) + require.Equal(t, http.StatusOK, part0W.Code) + + // Upload part 1 with gzip encoding + part1Gzipped := gzipCompress(part1Content) + part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader(part1Gzipped)) + part1Req.Header.Set("Content-Type", "application/octet-stream") + part1Req.Header.Set("Content-Encoding", "gzip") + part1W := httptest.NewRecorder() + + api.PutFilesUploadUploadId(part1W, part1Req, uploadId, PutFilesUploadUploadIdParams{Part: 1}) + require.Equal(t, http.StatusOK, part1W.Code) + + // Complete upload + completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+uploadId+"/complete", nil) + completeW := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) + require.Equal(t, http.StatusOK, completeW.Code) + + // Verify file contents + content, err := os.ReadFile(destPath) + require.NoError(t, err) + assert.Equal(t, "Hello, World!", string(content)) + }) } func TestMultipartUploadRouting(t *testing.T) { diff --git a/packages/envd/internal/api/upload.go b/packages/envd/internal/api/upload.go index b9f878924b..d9d048a0da 100644 --- a/packages/envd/internal/api/upload.go +++ b/packages/envd/internal/api/upload.go @@ -1,6 +1,7 @@ package api import ( + "compress/gzip" "encoding/json" "errors" "fmt" @@ -21,6 +22,20 @@ import ( "github.com/e2b-dev/infra/packages/envd/internal/utils" ) +// getDecompressedBody returns a reader that decompresses the request body if +// Content-Encoding is gzip, otherwise returns the original body. +// The caller is responsible for closing the returned ReadCloser. +func getDecompressedBody(r *http.Request) (io.ReadCloser, error) { + if r.Header.Get("Content-Encoding") == "gzip" { + gzReader, err := gzip.NewReader(r.Body) + if err != nil { + return nil, fmt.Errorf("failed to create gzip reader: %w", err) + } + return gzReader, nil + } + return r.Body, nil +} + var ErrNoDiskSpace = fmt.Errorf("not enough disk space available") func processFile(r *http.Request, path string, part io.Reader, uid, gid int, logger zerolog.Logger) (int, error) { @@ -193,6 +208,20 @@ func (a *API) PostFiles(w http.ResponseWriter, r *http.Request, params PostFiles l.Msg("File write") }() + // Handle gzip-encoded request body + body, err := getDecompressedBody(r) + if err != nil { + errMsg = fmt.Errorf("error decompressing request body: %w", err) + errorCode = http.StatusBadRequest + jsonError(w, errorCode, errMsg) + + return + } + if body != r.Body { + defer body.Close() + } + r.Body = body + f, err := r.MultipartReader() if err != nil { errMsg = fmt.Errorf("error parsing multipart form: %w", err) diff --git a/packages/envd/internal/api/upload_test.go b/packages/envd/internal/api/upload_test.go index 0d66b0b61c..d06229ba2c 100644 --- a/packages/envd/internal/api/upload_test.go +++ b/packages/envd/internal/api/upload_test.go @@ -2,6 +2,7 @@ package api import ( "bytes" + "compress/gzip" "context" "fmt" "io" @@ -247,3 +248,58 @@ func createTmpfsMountWithInodes(t *testing.T, sizeInBytes, inodesCount int) stri return tempDir } + +func TestGetDecompressedBody(t *testing.T) { + t.Parallel() + + t.Run("returns original body when no Content-Encoding header", func(t *testing.T) { + t.Parallel() + content := []byte("test content") + req, _ := http.NewRequest(http.MethodPost, "/test", bytes.NewReader(content)) + + body, err := getDecompressedBody(req) + require.NoError(t, err) + assert.Equal(t, req.Body, body, "should return original body") + + data, err := io.ReadAll(body) + require.NoError(t, err) + assert.Equal(t, content, data) + }) + + t.Run("decompresses gzip body when Content-Encoding is gzip", func(t *testing.T) { + t.Parallel() + originalContent := []byte("test content to compress") + + // Compress the content + var compressed bytes.Buffer + gw := gzip.NewWriter(&compressed) + _, err := gw.Write(originalContent) + require.NoError(t, err) + err = gw.Close() + require.NoError(t, err) + + req, _ := http.NewRequest(http.MethodPost, "/test", bytes.NewReader(compressed.Bytes())) + req.Header.Set("Content-Encoding", "gzip") + + body, err := getDecompressedBody(req) + require.NoError(t, err) + defer body.Close() + + assert.NotEqual(t, req.Body, body, "should return a new gzip reader") + + data, err := io.ReadAll(body) + require.NoError(t, err) + assert.Equal(t, originalContent, data) + }) + + t.Run("returns error for invalid gzip data", func(t *testing.T) { + t.Parallel() + invalidGzip := []byte("this is not gzip data") + req, _ := http.NewRequest(http.MethodPost, "/test", bytes.NewReader(invalidGzip)) + req.Header.Set("Content-Encoding", "gzip") + + _, err := getDecompressedBody(req) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to create gzip reader") + }) +} From 5b155b05e84986fb4c90e8a941023d4916554f3a Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 29 Jan 2026 19:14:39 +0100 Subject: [PATCH 18/59] revert(envd): remove gzip encoding from simple file upload Keep gzip Content-Encoding support only in multipart upload API. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/upload.go | 29 ------------ packages/envd/internal/api/upload_test.go | 56 ----------------------- 2 files changed, 85 deletions(-) diff --git a/packages/envd/internal/api/upload.go b/packages/envd/internal/api/upload.go index d9d048a0da..b9f878924b 100644 --- a/packages/envd/internal/api/upload.go +++ b/packages/envd/internal/api/upload.go @@ -1,7 +1,6 @@ package api import ( - "compress/gzip" "encoding/json" "errors" "fmt" @@ -22,20 +21,6 @@ import ( "github.com/e2b-dev/infra/packages/envd/internal/utils" ) -// getDecompressedBody returns a reader that decompresses the request body if -// Content-Encoding is gzip, otherwise returns the original body. -// The caller is responsible for closing the returned ReadCloser. -func getDecompressedBody(r *http.Request) (io.ReadCloser, error) { - if r.Header.Get("Content-Encoding") == "gzip" { - gzReader, err := gzip.NewReader(r.Body) - if err != nil { - return nil, fmt.Errorf("failed to create gzip reader: %w", err) - } - return gzReader, nil - } - return r.Body, nil -} - var ErrNoDiskSpace = fmt.Errorf("not enough disk space available") func processFile(r *http.Request, path string, part io.Reader, uid, gid int, logger zerolog.Logger) (int, error) { @@ -208,20 +193,6 @@ func (a *API) PostFiles(w http.ResponseWriter, r *http.Request, params PostFiles l.Msg("File write") }() - // Handle gzip-encoded request body - body, err := getDecompressedBody(r) - if err != nil { - errMsg = fmt.Errorf("error decompressing request body: %w", err) - errorCode = http.StatusBadRequest - jsonError(w, errorCode, errMsg) - - return - } - if body != r.Body { - defer body.Close() - } - r.Body = body - f, err := r.MultipartReader() if err != nil { errMsg = fmt.Errorf("error parsing multipart form: %w", err) diff --git a/packages/envd/internal/api/upload_test.go b/packages/envd/internal/api/upload_test.go index d06229ba2c..0d66b0b61c 100644 --- a/packages/envd/internal/api/upload_test.go +++ b/packages/envd/internal/api/upload_test.go @@ -2,7 +2,6 @@ package api import ( "bytes" - "compress/gzip" "context" "fmt" "io" @@ -248,58 +247,3 @@ func createTmpfsMountWithInodes(t *testing.T, sizeInBytes, inodesCount int) stri return tempDir } - -func TestGetDecompressedBody(t *testing.T) { - t.Parallel() - - t.Run("returns original body when no Content-Encoding header", func(t *testing.T) { - t.Parallel() - content := []byte("test content") - req, _ := http.NewRequest(http.MethodPost, "/test", bytes.NewReader(content)) - - body, err := getDecompressedBody(req) - require.NoError(t, err) - assert.Equal(t, req.Body, body, "should return original body") - - data, err := io.ReadAll(body) - require.NoError(t, err) - assert.Equal(t, content, data) - }) - - t.Run("decompresses gzip body when Content-Encoding is gzip", func(t *testing.T) { - t.Parallel() - originalContent := []byte("test content to compress") - - // Compress the content - var compressed bytes.Buffer - gw := gzip.NewWriter(&compressed) - _, err := gw.Write(originalContent) - require.NoError(t, err) - err = gw.Close() - require.NoError(t, err) - - req, _ := http.NewRequest(http.MethodPost, "/test", bytes.NewReader(compressed.Bytes())) - req.Header.Set("Content-Encoding", "gzip") - - body, err := getDecompressedBody(req) - require.NoError(t, err) - defer body.Close() - - assert.NotEqual(t, req.Body, body, "should return a new gzip reader") - - data, err := io.ReadAll(body) - require.NoError(t, err) - assert.Equal(t, originalContent, data) - }) - - t.Run("returns error for invalid gzip data", func(t *testing.T) { - t.Parallel() - invalidGzip := []byte("this is not gzip data") - req, _ := http.NewRequest(http.MethodPost, "/test", bytes.NewReader(invalidGzip)) - req.Header.Set("Content-Encoding", "gzip") - - _, err := getDecompressedBody(req) - require.Error(t, err) - assert.Contains(t, err.Error(), "failed to create gzip reader") - }) -} From a93499029d00d3f7690d9a264f55f82a2d5b2b48 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:14:38 +0100 Subject: [PATCH 19/59] perf(envd): check session limit before file allocation Move the max upload sessions check earlier in PostFilesUploadInit to fail fast before creating and preallocating the destination file. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 9a8e2a4d8f..609f5dcc73 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -63,6 +63,17 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + // Check session limit early before doing any file operations + a.uploadsLock.RLock() + sessionCount := len(a.uploads) + a.uploadsLock.RUnlock() + if sessionCount >= maxUploadSessions { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") + jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) + + return + } + // Validate signing if needed err := a.validateSigning(r, params.Signature, params.SignatureExpiration, params.Username, body.Path, SigningWriteOperation) if err != nil { From c7b1e309437af9e87193168b1622197681208d9d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:16:11 +0100 Subject: [PATCH 20/59] revert(envd): remove gzip Content-Encoding from multipart upload Co-Authored-By: Claude Opus 4.5 --- .../envd/internal/api/multipart_upload.go | 17 +---- .../internal/api/multipart_upload_test.go | 74 ------------------- 2 files changed, 1 insertion(+), 90 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 609f5dcc73..ed9318387c 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -1,7 +1,6 @@ package api import ( - "compress/gzip" "encoding/json" "errors" "fmt" @@ -237,20 +236,6 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Handle gzip-encoded request body - var body io.Reader = r.Body - if r.Header.Get("Content-Encoding") == "gzip" { - gzReader, err := gzip.NewReader(r.Body) - if err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to create gzip reader") - jsonError(w, http.StatusBadRequest, fmt.Errorf("failed to decompress gzip body: %w", err)) - - return - } - defer gzReader.Close() - body = gzReader - } - // Get the session a.uploadsLock.RLock() session, exists := a.uploads[uploadId] @@ -298,7 +283,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl } // Read the part data with size limit - limitedReader := io.LimitReader(body, expectedSize+1) + limitedReader := io.LimitReader(r.Body, expectedSize+1) data, err := io.ReadAll(limitedReader) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error reading part data") diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index bd27de49ba..68da1131b0 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -2,7 +2,6 @@ package api import ( "bytes" - "compress/gzip" "encoding/json" "fmt" "net/http" @@ -532,79 +531,6 @@ func TestMultipartUpload(t *testing.T) { require.NoError(t, err) assert.Empty(t, string(content)) }) - - t.Run("gzip encoded part upload", func(t *testing.T) { - t.Parallel() - api := newTestAPI(t) - tempDir := t.TempDir() - destPath := filepath.Join(tempDir, "gzip-file.txt") - - part0Content := []byte("Hello, ") - part1Content := []byte("World!") - totalSize := int64(len(part0Content) + len(part1Content)) - partSize := int64(len(part0Content)) - - // Initialize upload - initBody := PostFilesUploadInitJSONRequestBody{ - Path: destPath, - TotalSize: totalSize, - PartSize: partSize, - } - initBodyBytes, _ := json.Marshal(initBody) - - initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) - initReq.Header.Set("Content-Type", "application/json") - initW := httptest.NewRecorder() - - api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) - require.Equal(t, http.StatusOK, initW.Code) - - var initResp MultipartUploadInit - err := json.Unmarshal(initW.Body.Bytes(), &initResp) - require.NoError(t, err) - uploadId := initResp.UploadId - - // Helper to gzip compress data - gzipCompress := func(data []byte) []byte { - var buf bytes.Buffer - gw := gzip.NewWriter(&buf) - gw.Write(data) - gw.Close() - return buf.Bytes() - } - - // Upload part 0 with gzip encoding - part0Gzipped := gzipCompress(part0Content) - part0Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader(part0Gzipped)) - part0Req.Header.Set("Content-Type", "application/octet-stream") - part0Req.Header.Set("Content-Encoding", "gzip") - part0W := httptest.NewRecorder() - - api.PutFilesUploadUploadId(part0W, part0Req, uploadId, PutFilesUploadUploadIdParams{Part: 0}) - require.Equal(t, http.StatusOK, part0W.Code) - - // Upload part 1 with gzip encoding - part1Gzipped := gzipCompress(part1Content) - part1Req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=1", bytes.NewReader(part1Gzipped)) - part1Req.Header.Set("Content-Type", "application/octet-stream") - part1Req.Header.Set("Content-Encoding", "gzip") - part1W := httptest.NewRecorder() - - api.PutFilesUploadUploadId(part1W, part1Req, uploadId, PutFilesUploadUploadIdParams{Part: 1}) - require.Equal(t, http.StatusOK, part1W.Code) - - // Complete upload - completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+uploadId+"/complete", nil) - completeW := httptest.NewRecorder() - - api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) - require.Equal(t, http.StatusOK, completeW.Code) - - // Verify file contents - content, err := os.ReadFile(destPath) - require.NoError(t, err) - assert.Equal(t, "Hello, World!", string(content)) - }) } func TestMultipartUploadRouting(t *testing.T) { From 2a91f48a9c648bde20b5d9b7599d4b9d443ff02f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 30 Jan 2026 16:22:53 +0100 Subject: [PATCH 21/59] refactor(envd): remove redundant session limit check The early check before file operations is sufficient. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload.go | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index ed9318387c..59e0468a2b 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -192,15 +192,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } a.uploadsLock.Lock() - if len(a.uploads) >= maxUploadSessions { - a.uploadsLock.Unlock() - destFile.Close() - os.Remove(filePath) - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") - jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) - - return - } a.uploads[uploadID] = session a.uploadsLock.Unlock() From 81831273310cafcb87a3f5a1e729be2f445a673f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Fri, 30 Jan 2026 17:03:28 +0100 Subject: [PATCH 22/59] fix(envd): enforce exact part size and reject empty file uploads - Reject truncated uploads by enforcing size == expectedSize instead of only checking size > expectedSize. This prevents silent data corruption when a client disconnects early. - Reject part uploads when TotalSize is 0 (NumParts == 0) to prevent writing unexpected data to files declared as empty. Co-Authored-By: Claude Opus 4.5 --- packages/envd/internal/api/multipart_upload.go | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 59e0468a2b..f055637c7a 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -257,8 +257,16 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } + // Reject part uploads for empty files (no parts needed) + if session.NumParts == 0 { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Msg("cannot upload parts to empty file") + jsonError(w, http.StatusBadRequest, fmt.Errorf("cannot upload parts to empty file (totalSize is 0)")) + + return + } + // Check part number is within range - if session.NumParts > 0 && partNumber >= session.NumParts { + if partNumber >= session.NumParts { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Int("numParts", session.NumParts).Msg("part number out of range") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) @@ -285,10 +293,10 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl size := int64(len(data)) - // Check if part exceeded expected size - if size > expectedSize { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("expectedSize", expectedSize).Msg("part size exceeds expected size") - jsonError(w, http.StatusBadRequest, fmt.Errorf("part size %d exceeds expected size %d", size, expectedSize)) + // Enforce exact size match to prevent silent corruption from truncated uploads + if size != expectedSize { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("expectedSize", expectedSize).Msg("part size mismatch") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part size %d does not match expected size %d", size, expectedSize)) return } From 761bf2df30a9e68faae74bc091ad3a9893174126 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 24 Feb 2026 14:53:20 +0000 Subject: [PATCH 23/59] chore: auto-commit generated changes --- .../internal/sandbox/envd/envd.gen.go | 18 +++--- tests/integration/internal/envd/generated.go | 60 +++++++++++++++++++ 2 files changed, 69 insertions(+), 9 deletions(-) diff --git a/packages/orchestrator/internal/sandbox/envd/envd.gen.go b/packages/orchestrator/internal/sandbox/envd/envd.gen.go index 3dc921faf3..eb77825967 100644 --- a/packages/orchestrator/internal/sandbox/envd/envd.gen.go +++ b/packages/orchestrator/internal/sandbox/envd/envd.gen.go @@ -69,12 +69,6 @@ type Metrics struct { Ts int64 `json:"ts,omitempty"` } -// VolumeMount Volume -type VolumeMount struct { - NfsTarget string `json:"nfs_target"` - Path string `json:"path"` -} - // MultipartUploadComplete defines model for MultipartUploadComplete. type MultipartUploadComplete struct { // Path Path to the final assembled file @@ -99,6 +93,12 @@ type MultipartUploadPart struct { Size int64 `json:"size"` } +// VolumeMount Volume +type VolumeMount struct { + NfsTarget string `json:"nfs_target"` + Path string `json:"path"` +} + // FilePath defines model for FilePath. type FilePath = string @@ -182,13 +182,13 @@ type PostFilesUploadInitJSONBody struct { // PostFilesUploadInitParams defines parameters for PostFilesUploadInit. type PostFilesUploadInitParams struct { // Username User used for setting the owner, or resolving relative paths. - Username *User `form:"username,omitempty" json:"username,omitempty"` + Username User `form:"username,omitempty" json:"username,omitempty"` // Signature Signature used for file access permission verification. - Signature *Signature `form:"signature,omitempty" json:"signature,omitempty"` + Signature Signature `form:"signature,omitempty" json:"signature,omitempty"` // SignatureExpiration Signature expiration used for defining the expiration time of the signature. - SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` + SignatureExpiration SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } // PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. diff --git a/tests/integration/internal/envd/generated.go b/tests/integration/internal/envd/generated.go index 8f25c860cf..13ae828089 100644 --- a/tests/integration/internal/envd/generated.go +++ b/tests/integration/internal/envd/generated.go @@ -78,6 +78,30 @@ type Metrics struct { Ts *int64 `json:"ts,omitempty"` } +// MultipartUploadComplete defines model for MultipartUploadComplete. +type MultipartUploadComplete struct { + // Path Path to the final assembled file + Path string `json:"path"` + + // Size Total size of the assembled file in bytes + Size int64 `json:"size"` +} + +// MultipartUploadInit defines model for MultipartUploadInit. +type MultipartUploadInit struct { + // UploadId Unique identifier for the upload session + UploadId string `json:"uploadId"` +} + +// MultipartUploadPart defines model for MultipartUploadPart. +type MultipartUploadPart struct { + // PartNumber The part number that was uploaded + PartNumber int `json:"partNumber"` + + // Size Size of the uploaded part in bytes + Size int64 `json:"size"` +} + // VolumeMount Volume type VolumeMount struct { NfsTarget string `json:"nfs_target"` @@ -111,6 +135,9 @@ type InvalidUser = Error // NotEnoughDiskSpace defines model for NotEnoughDiskSpace. type NotEnoughDiskSpace = Error +// UploadNotFound defines model for UploadNotFound. +type UploadNotFound = Error + // UploadSuccess defines model for UploadSuccess. type UploadSuccess = []EntryInfo @@ -149,6 +176,36 @@ type PostFilesParams struct { SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } +// PostFilesUploadInitJSONBody defines parameters for PostFilesUploadInit. +type PostFilesUploadInitJSONBody struct { + // PartSize Size of each part in bytes (last part may be smaller) + PartSize int64 `json:"partSize"` + + // Path Path to the file to upload + Path string `json:"path"` + + // TotalSize Total size of the file in bytes + TotalSize int64 `json:"totalSize"` +} + +// PostFilesUploadInitParams defines parameters for PostFilesUploadInit. +type PostFilesUploadInitParams struct { + // Username User used for setting the owner, or resolving relative paths. + Username *User `form:"username,omitempty" json:"username,omitempty"` + + // Signature Signature used for file access permission verification. + Signature *Signature `form:"signature,omitempty" json:"signature,omitempty"` + + // SignatureExpiration Signature expiration used for defining the expiration time of the signature. + SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` +} + +// PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. +type PutFilesUploadUploadIdParams struct { + // Part The part number (0-indexed) + Part int `form:"part" json:"part"` +} + // PostInitJSONBody defines parameters for PostInit. type PostInitJSONBody struct { // AccessToken Access token for secure access to envd service @@ -174,6 +231,9 @@ type PostInitJSONBody struct { // PostFilesMultipartRequestBody defines body for PostFiles for multipart/form-data ContentType. type PostFilesMultipartRequestBody PostFilesMultipartBody +// PostFilesUploadInitJSONRequestBody defines body for PostFilesUploadInit for application/json ContentType. +type PostFilesUploadInitJSONRequestBody PostFilesUploadInitJSONBody + // PostInitJSONRequestBody defines body for PostInit for application/json ContentType. type PostInitJSONRequestBody PostInitJSONBody From 816fb6ee901c50fb237ab18b4d836bca2557740f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:04:00 +0100 Subject: [PATCH 24/59] fix(envd): harden multipart upload against race conditions, auth bypass, and resource exhaustion - Make session limit check atomic with insertion to prevent TOCTOU race - Add access token validation to upload part, complete, and abort endpoints - Cap total upload size to 10GB to prevent DoS via excessive parts Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/auth.go | 15 ++++++ .../envd/internal/api/multipart_upload.go | 53 +++++++++++++++---- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index 49cd61ec57..c51a4493e9 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -93,6 +93,21 @@ func (a *API) generateSignature(path string, username string, operation string, return fmt.Sprintf("v1_%s", hasher.HashWithoutPrefix([]byte(signature))), nil } +// validateAccessToken checks that the request carries the correct access token header. +// Returns nil if no access token is configured or if the token matches. +func (a *API) validateAccessToken(r *http.Request) error { + if !a.accessToken.IsSet() { + return nil + } + + tokenFromHeader := r.Header.Get(accessTokenHeader) + if tokenFromHeader == "" || !a.accessToken.Equals(tokenFromHeader) { + return fmt.Errorf("unauthorized: valid access token required") + } + + return nil +} + func (a *API) validateSigning(r *http.Request, signature *string, signatureExpiration *int, username *string, path string, operation string) (err error) { var expectedSignature string diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index f055637c7a..6a78a38f71 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -22,6 +22,8 @@ import ( const ( // maxUploadSessions limits concurrent upload sessions to prevent resource exhaustion maxUploadSessions = 100 + // maxTotalSize limits the total upload size to 10GB + maxTotalSize = 10 * 1024 * 1024 * 1024 // maxPartSize limits individual part size to 100MB to prevent DoS maxPartSize = 100 * 1024 * 1024 // uploadSessionTTL is the maximum time an upload session can remain active @@ -51,6 +53,11 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + if body.TotalSize > maxTotalSize { + jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize %d exceeds maximum allowed size of %d bytes (10GB)", body.TotalSize, maxTotalSize)) + + return + } if body.PartSize <= 0 { jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize must be positive")) @@ -62,17 +69,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Check session limit early before doing any file operations - a.uploadsLock.RLock() - sessionCount := len(a.uploads) - a.uploadsLock.RUnlock() - if sessionCount >= maxUploadSessions { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") - jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) - - return - } - // Validate signing if needed err := a.validateSigning(r, params.Signature, params.SignatureExpiration, params.Username, body.Path, SigningWriteOperation) if err != nil { @@ -191,7 +187,18 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params CreatedAt: time.Now(), } + // Atomically check session limit and insert — prevents TOCTOU race where + // concurrent requests all pass a read-lock check before any inserts. a.uploadsLock.Lock() + if len(a.uploads) >= maxUploadSessions { + a.uploadsLock.Unlock() + destFile.Close() + os.Remove(filePath) + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") + jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) + + return + } a.uploads[uploadID] = session a.uploadsLock.Unlock() @@ -219,6 +226,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl operationID := logs.AssignOperationID() + // Validate access token + if err := a.validateAccessToken(r); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload part request") + jsonError(w, http.StatusUnauthorized, err) + + return + } + // Validate uploadId is a valid UUID to prevent path traversal if _, err := uuid.Parse(uploadId); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("invalid upload ID format") @@ -353,6 +368,14 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req operationID := logs.AssignOperationID() + // Validate access token + if err := a.validateAccessToken(r); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload complete request") + jsonError(w, http.StatusUnauthorized, err) + + return + } + // Get and remove the session a.uploadsLock.Lock() session, exists := a.uploads[uploadId] @@ -432,6 +455,14 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, operationID := logs.AssignOperationID() + // Validate access token + if err := a.validateAccessToken(r); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload abort request") + jsonError(w, http.StatusUnauthorized, err) + + return + } + // Get and remove the session a.uploadsLock.Lock() session, exists := a.uploads[uploadId] From 8c8a28f849cdcec4a68f977d9bea2bf7b4a7c366 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:10:57 +0100 Subject: [PATCH 25/59] refactor(envd): clean up multipart upload code - Extract cleanup loop body into removeExpiredSessions() with defer unlock - Add ignoreNotExist() helper for cleaner error handling - Use uuid.NewString() instead of uuid.New().String() - Remove redundant UUID parse check (map lookup suffices) - Use uint for NumParts and PartsWritten map keys - Remove "invalid upload ID format" test (no longer applies) Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 55 ++++++------------- .../internal/api/multipart_upload_test.go | 18 +----- packages/envd/internal/api/store.go | 55 ++++++++++++------- 3 files changed, 52 insertions(+), 76 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 6a78a38f71..bd5dd1b0a1 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -164,16 +164,10 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Create upload ID - uploadID := uuid.New().String() + uploadID := uuid.NewString() - // Calculate number of parts - numParts := int((body.TotalSize + body.PartSize - 1) / body.PartSize) - if numParts == 0 && body.TotalSize == 0 { - numParts = 0 // Empty file, no parts needed - } + numParts := uint((body.TotalSize + body.PartSize - 1) / body.PartSize) - // Store the session with the open file handle session := &MultipartUploadSession{ UploadID: uploadID, FilePath: filePath, @@ -183,7 +177,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params NumParts: numParts, UID: uid, GID: gid, - PartsWritten: make(map[int]bool), + PartsWritten: make(map[uint]bool), CreatedAt: time.Now(), } @@ -208,7 +202,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params Str("filePath", filePath). Int64("totalSize", body.TotalSize). Int64("partSize", body.PartSize). - Int("numParts", numParts). + Uint("numParts", numParts). Msg("multipart upload initialized") w.Header().Set("Content-Type", "application/json") @@ -234,14 +228,6 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Validate uploadId is a valid UUID to prevent path traversal - if _, err := uuid.Parse(uploadId); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("invalid upload ID format") - jsonError(w, http.StatusBadRequest, fmt.Errorf("invalid upload ID format: must be a valid UUID")) - - return - } - // Get the session a.uploadsLock.RLock() session, exists := a.uploads[uploadId] @@ -262,27 +248,18 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - partNumber := params.Part - - // Check for negative part numbers - if partNumber < 0 { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Msg("invalid part number") + if params.Part < 0 { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", params.Part).Msg("invalid part number") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number must be non-negative")) return } - // Reject part uploads for empty files (no parts needed) - if session.NumParts == 0 { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Msg("cannot upload parts to empty file") - jsonError(w, http.StatusBadRequest, fmt.Errorf("cannot upload parts to empty file (totalSize is 0)")) - - return - } + partNumber := uint(params.Part) - // Check part number is within range + // Check part number is within range (also rejects parts for empty files where NumParts == 0) if partNumber >= session.NumParts { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Int("numParts", session.NumParts).Msg("part number out of range") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Uint("partNumber", partNumber).Uint("numParts", session.NumParts).Msg("part number out of range") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) return @@ -338,7 +315,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Int("partNumber", partNumber). + Uint("partNumber", partNumber). Msg("overwriting existing part") } session.PartsWritten[partNumber] = true @@ -347,7 +324,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Int("partNumber", partNumber). + Uint("partNumber", partNumber). Int64("size", size). Int64("offset", offset). Msg("part uploaded") @@ -355,7 +332,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(MultipartUploadPart{ - PartNumber: partNumber, + PartNumber: int(partNumber), Size: size, }); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") @@ -402,7 +379,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req // Verify all parts were uploaded session.mu.Lock() - missingParts := []int{} + var missingParts []uint for i := range session.NumParts { if !session.PartsWritten[i] { missingParts = append(missingParts, i) @@ -416,7 +393,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req a.logger.Error(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Ints("missingParts", missingParts). + Uints("missingParts", missingParts). Msg("missing parts in upload") jsonError(w, http.StatusBadRequest, fmt.Errorf("missing parts: %v", missingParts)) @@ -436,7 +413,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req Str("uploadId", uploadId). Str("filePath", session.FilePath). Int64("totalSize", session.TotalSize). - Int("numParts", session.NumParts). + Uint("numParts", session.NumParts). Msg("multipart upload completed") w.Header().Set("Content-Type", "application/json") @@ -489,7 +466,7 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, // Close and remove the file session.DestFile.Close() - if err := os.Remove(session.FilePath); err != nil && !os.IsNotExist(err) { + if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing file") } diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 64a623367c..0ba49f0721 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -198,12 +198,10 @@ func TestMultipartUpload(t *testing.T) { t.Parallel() api := newMultipartTestAPI(t) - // Use a valid UUID that doesn't exist in the sessions map - nonExistentUUID := "00000000-0000-0000-0000-000000000000" - req := httptest.NewRequest(http.MethodPut, "/files/upload/"+nonExistentUUID+"?part=0", bytes.NewReader([]byte("test"))) + req := httptest.NewRequest(http.MethodPut, "/files/upload/no-such-session?part=0", bytes.NewReader([]byte("test"))) w := httptest.NewRecorder() - api.PutFilesUploadUploadId(w, req, nonExistentUUID, PutFilesUploadUploadIdParams{Part: 0}) + api.PutFilesUploadUploadId(w, req, "no-such-session", PutFilesUploadUploadIdParams{Part: 0}) assert.Equal(t, http.StatusNotFound, w.Code) }) @@ -229,18 +227,6 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, http.StatusNotFound, w.Code) }) - t.Run("invalid upload ID format", func(t *testing.T) { - t.Parallel() - api := newMultipartTestAPI(t) - - // Try to upload with an invalid UUID (path traversal attempt) - req := httptest.NewRequest(http.MethodPut, "/files/upload/../../../etc/passwd?part=0", bytes.NewReader([]byte("test"))) - w := httptest.NewRecorder() - - api.PutFilesUploadUploadId(w, req, "../../../etc/passwd", PutFilesUploadUploadIdParams{Part: 0}) - assert.Equal(t, http.StatusBadRequest, w.Code) - }) - t.Run("negative part number", func(t *testing.T) { t.Parallel() api := newMultipartTestAPI(t) diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index e4017e352e..0fb825094c 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -21,17 +21,26 @@ type MultipartUploadSession struct { UploadID string FilePath string // Final destination path DestFile *os.File // Open file handle for direct writes - TotalSize int64 // Total expected file size - PartSize int64 // Size of each part (except possibly last) - NumParts int // Total number of expected parts + TotalSize int64 // Total expected file size (validated >= 0 at input) + PartSize int64 // Size of each part (validated > 0 at input) + NumParts uint // Total number of expected parts UID int GID int - PartsWritten map[int]bool // partNumber -> whether it's been written + PartsWritten map[uint]bool // partNumber -> whether it's been written CreatedAt time.Time completed atomic.Bool // Set to true when complete/abort starts to prevent new parts mu sync.Mutex } +// ignoreNotExist returns nil if err is a "not exist" error, otherwise returns err unchanged. +func ignoreNotExist(err error) error { + if os.IsNotExist(err) { + return nil + } + + return err +} + // MMDSClient provides access to MMDS metadata. type MMDSClient interface { GetAccessTokenHash(ctx context.Context) (string, error) @@ -86,25 +95,29 @@ func (a *API) cleanupExpiredUploads() { defer ticker.Stop() for range ticker.C { - a.uploadsLock.Lock() - now := time.Now() - for uploadID, session := range a.uploads { - if now.Sub(session.CreatedAt) > uploadSessionTTL { - // Mark as completed to prevent races - if session.completed.CompareAndSwap(false, true) { - delete(a.uploads, uploadID) - // Close file handle and remove file in background - go func(s *MultipartUploadSession) { - s.DestFile.Close() - if err := os.Remove(s.FilePath); err != nil && !os.IsNotExist(err) { - a.logger.Warn().Err(err).Str("filePath", s.FilePath).Msg("failed to cleanup expired upload file") - } - }(session) - a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") - } + a.removeExpiredSessions() + } +} + +func (a *API) removeExpiredSessions() { + a.uploadsLock.Lock() + defer a.uploadsLock.Unlock() + + now := time.Now() + for uploadID, session := range a.uploads { + if now.Sub(session.CreatedAt) > uploadSessionTTL { + // Mark as completed to prevent races + if session.completed.CompareAndSwap(false, true) { + delete(a.uploads, uploadID) + go func(s *MultipartUploadSession) { + s.DestFile.Close() + if err := ignoreNotExist(os.Remove(s.FilePath)); err != nil { + a.logger.Warn().Err(err).Str("filePath", s.FilePath).Msg("failed to cleanup expired upload file") + } + }(session) + a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") } } - a.uploadsLock.Unlock() } } From fa2cc972e2cd2364e0639931e5be99d471cc2061 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:15:25 +0100 Subject: [PATCH 26/59] spec(envd): add minimum: 0 constraint to part query parameter Co-Authored-By: Claude Opus 4.6 --- packages/envd/spec/envd.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index 6706a4a8ae..65944f762e 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -197,6 +197,7 @@ paths: description: The part number (0-indexed) schema: type: integer + minimum: 0 requestBody: required: true content: From de2d2b33e3e8716d401c2f01b3d56df99b98d64f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:17:38 +0100 Subject: [PATCH 27/59] refactor(envd): drop handler-level negative part number check Rely on middleware to enforce minimum: 0 from the spec. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 7 --- .../internal/api/multipart_upload_test.go | 43 ------------------- 2 files changed, 50 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index bd5dd1b0a1..c12a1bca94 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -248,13 +248,6 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - if params.Part < 0 { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", params.Part).Msg("invalid part number") - jsonError(w, http.StatusBadRequest, fmt.Errorf("part number must be non-negative")) - - return - } - partNumber := uint(params.Part) // Check part number is within range (also rejects parts for empty files where NumParts == 0) diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 0ba49f0721..a56fc92359 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -227,49 +227,6 @@ func TestMultipartUpload(t *testing.T) { assert.Equal(t, http.StatusNotFound, w.Code) }) - t.Run("negative part number", func(t *testing.T) { - t.Parallel() - api := newMultipartTestAPI(t) - tempDir := t.TempDir() - - // Initialize upload - body := PostFilesUploadInitJSONRequestBody{ - Path: filepath.Join(tempDir, "test-file.txt"), - TotalSize: 100, - PartSize: 50, - } - bodyBytes, _ := json.Marshal(body) - - initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) - initReq.Header.Set("Content-Type", "application/json") - initW := httptest.NewRecorder() - - api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) - require.Equal(t, http.StatusOK, initW.Code) - - var initResp MultipartUploadInit - err := json.Unmarshal(initW.Body.Bytes(), &initResp) - require.NoError(t, err) - uploadId := initResp.UploadId - - // Try to upload with negative part number - req := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=-1", bytes.NewReader([]byte("test"))) - w := httptest.NewRecorder() - - api.PutFilesUploadUploadId(w, req, uploadId, PutFilesUploadUploadIdParams{Part: -1}) - assert.Equal(t, http.StatusBadRequest, w.Code) - - // Clean up - api.uploadsLock.Lock() - session := api.uploads[uploadId] - if session != nil { - session.DestFile.Close() - os.Remove(session.FilePath) - } - delete(api.uploads, uploadId) - api.uploadsLock.Unlock() - }) - t.Run("missing part in sequence", func(t *testing.T) { t.Parallel() api := newMultipartTestAPI(t) From a30b15b14bc866f60015337f4ddfba7174489e3c Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:29:01 +0100 Subject: [PATCH 28/59] refactor(envd): drop handler-level totalSize/partSize range checks Add minimum: 0 for totalSize and minimum: 1 for partSize in the OpenAPI spec, and rely on middleware for enforcement. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 11 ----------- packages/envd/spec/envd.yaml | 2 ++ 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index c12a1bca94..d0c4769413 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -47,22 +47,11 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Validate totalSize and partSize - if body.TotalSize < 0 { - jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize must be non-negative")) - - return - } if body.TotalSize > maxTotalSize { jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize %d exceeds maximum allowed size of %d bytes (10GB)", body.TotalSize, maxTotalSize)) return } - if body.PartSize <= 0 { - jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize must be positive")) - - return - } if body.PartSize > maxPartSize { jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize exceeds maximum allowed size of %d bytes", maxPartSize)) diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index 65944f762e..e81a0fb6ac 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -158,10 +158,12 @@ paths: totalSize: type: integer format: int64 + minimum: 0 description: Total size of the file in bytes partSize: type: integer format: int64 + minimum: 1 description: Size of each part in bytes (last part may be smaller) responses: "200": From d249ab7efd846472ae8f59fb2c1ebb5c451562e5 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:30:21 +0100 Subject: [PATCH 29/59] docs(envd): clarify that validateSigning already covers access token check Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index d0c4769413..161e71fe8c 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -58,7 +58,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Validate signing if needed + // Validate access token or signing err := a.validateSigning(r, params.Signature, params.SignatureExpiration, params.Username, body.Path, SigningWriteOperation) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error during auth validation") From ac5210c9429d937ebff6f23db15db41b163c5b4d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:32:00 +0100 Subject: [PATCH 30/59] refactor(envd): replace validateSigning with validateAccessToken on upload init Multipart upload init doesn't need signature-based auth. Use the same validateAccessToken as the other upload endpoints. Remove Signature and SignatureExpiration parameters from the spec and regenerate. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/api.gen.go | 22 ------------------- .../envd/internal/api/multipart_upload.go | 6 ++--- packages/envd/spec/envd.yaml | 2 -- 3 files changed, 2 insertions(+), 28 deletions(-) diff --git a/packages/envd/internal/api/api.gen.go b/packages/envd/internal/api/api.gen.go index 5840a98810..b195ec18df 100644 --- a/packages/envd/internal/api/api.gen.go +++ b/packages/envd/internal/api/api.gen.go @@ -188,12 +188,6 @@ type PostFilesUploadInitJSONBody struct { type PostFilesUploadInitParams struct { // Username User used for setting the owner, or resolving relative paths. Username *User `form:"username,omitempty" json:"username,omitempty"` - - // Signature Signature used for file access permission verification. - Signature *Signature `form:"signature,omitempty" json:"signature,omitempty"` - - // SignatureExpiration Signature expiration used for defining the expiration time of the signature. - SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } // PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. @@ -496,22 +490,6 @@ func (siw *ServerInterfaceWrapper) PostFilesUploadInit(w http.ResponseWriter, r return } - // ------------- Optional query parameter "signature" ------------- - - err = runtime.BindQueryParameter("form", true, false, "signature", r.URL.Query(), ¶ms.Signature) - if err != nil { - siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "signature", Err: err}) - return - } - - // ------------- Optional query parameter "signature_expiration" ------------- - - err = runtime.BindQueryParameter("form", true, false, "signature_expiration", r.URL.Query(), ¶ms.SignatureExpiration) - if err != nil { - siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "signature_expiration", Err: err}) - return - } - handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { siw.Handler.PostFilesUploadInit(w, r, params) })) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 161e71fe8c..c01477be94 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -58,10 +58,8 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Validate access token or signing - err := a.validateSigning(r, params.Signature, params.SignatureExpiration, params.Username, body.Path, SigningWriteOperation) - if err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error during auth validation") + if err := a.validateAccessToken(r); err != nil { + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload init request") jsonError(w, http.StatusUnauthorized, err) return diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index e81a0fb6ac..8f9578dd9c 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -139,8 +139,6 @@ paths: - {} parameters: - $ref: "#/components/parameters/User" - - $ref: "#/components/parameters/Signature" - - $ref: "#/components/parameters/SignatureExpiration" requestBody: required: true content: From 2198c96d1ce52abe20c62b189e6f1b613a68d2a1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 24 Feb 2026 15:34:17 +0000 Subject: [PATCH 31/59] chore: auto-commit generated changes --- .../internal/sandbox/envd/envd.gen.go | 6 --- tests/integration/internal/envd/generated.go | 38 ------------------- 2 files changed, 44 deletions(-) diff --git a/packages/orchestrator/internal/sandbox/envd/envd.gen.go b/packages/orchestrator/internal/sandbox/envd/envd.gen.go index eb77825967..6aa045ace1 100644 --- a/packages/orchestrator/internal/sandbox/envd/envd.gen.go +++ b/packages/orchestrator/internal/sandbox/envd/envd.gen.go @@ -183,12 +183,6 @@ type PostFilesUploadInitJSONBody struct { type PostFilesUploadInitParams struct { // Username User used for setting the owner, or resolving relative paths. Username User `form:"username,omitempty" json:"username,omitempty"` - - // Signature Signature used for file access permission verification. - Signature Signature `form:"signature,omitempty" json:"signature,omitempty"` - - // SignatureExpiration Signature expiration used for defining the expiration time of the signature. - SignatureExpiration SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } // PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. diff --git a/tests/integration/internal/envd/generated.go b/tests/integration/internal/envd/generated.go index 13ae828089..fddc286f41 100644 --- a/tests/integration/internal/envd/generated.go +++ b/tests/integration/internal/envd/generated.go @@ -192,12 +192,6 @@ type PostFilesUploadInitJSONBody struct { type PostFilesUploadInitParams struct { // Username User used for setting the owner, or resolving relative paths. Username *User `form:"username,omitempty" json:"username,omitempty"` - - // Signature Signature used for file access permission verification. - Signature *Signature `form:"signature,omitempty" json:"signature,omitempty"` - - // SignatureExpiration Signature expiration used for defining the expiration time of the signature. - SignatureExpiration *SignatureExpiration `form:"signature_expiration,omitempty" json:"signature_expiration,omitempty"` } // PutFilesUploadUploadIdParams defines parameters for PutFilesUploadUploadId. @@ -761,38 +755,6 @@ func NewPostFilesUploadInitRequestWithBody(server string, params *PostFilesUploa } - if params.Signature != nil { - - if queryFrag, err := runtime.StyleParamWithLocation("form", true, "signature", runtime.ParamLocationQuery, *params.Signature); err != nil { - return nil, err - } else if parsed, err := url.ParseQuery(queryFrag); err != nil { - return nil, err - } else { - for k, v := range parsed { - for _, v2 := range v { - queryValues.Add(k, v2) - } - } - } - - } - - if params.SignatureExpiration != nil { - - if queryFrag, err := runtime.StyleParamWithLocation("form", true, "signature_expiration", runtime.ParamLocationQuery, *params.SignatureExpiration); err != nil { - return nil, err - } else if parsed, err := url.ParseQuery(queryFrag); err != nil { - return nil, err - } else { - for k, v := range parsed { - for _, v2 := range v { - queryValues.Add(k, v2) - } - } - } - - } - queryURL.RawQuery = queryValues.Encode() } From e8fb58bbb4a9a851671e84fb375cccb0f261a90d Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:49:41 +0100 Subject: [PATCH 32/59] fix(envd): remove upload paths from auth bypass list Upload endpoints now validate access tokens internally via validateAccessToken, so they no longer need to bypass the WithAuthorization middleware. This removes allowedPathPrefixes and the isAllowedPath helper, simplifying back to exact-match allowedPaths with slices.Contains. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/auth.go | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index c51a4493e9..667d7e4cd7 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -21,46 +21,22 @@ const ( accessTokenHeader = "X-Access-Token" ) -// allowedExactPaths are paths that bypass general authentication using exact matching +// allowedPaths are paths that bypass general authentication // (e.g., health check, endpoints supporting signing) -var allowedExactPaths = []string{ +var allowedPaths = []string{ "GET/health", "GET/files", "POST/files", "POST/init", } -// allowedPathPrefixes are paths that bypass general authentication using prefix matching -// These are for paths with dynamic segments (e.g., upload ID) -var allowedPathPrefixes = []string{ - "PUT/files/upload/", - "DELETE/files/upload/", - "POST/files/upload/", -} - -func isAllowedPath(methodPath string) bool { - // Check exact matches first - if slices.Contains(allowedExactPaths, methodPath) { - return true - } - - // Check prefix matches for paths with dynamic segments - for _, prefix := range allowedPathPrefixes { - if strings.HasPrefix(methodPath, prefix) { - return true - } - } - - return false -} - func (a *API) WithAuthorization(handler http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { if a.accessToken.IsSet() { authHeader := req.Header.Get(accessTokenHeader) methodPath := req.Method + req.URL.Path - if !a.accessToken.Equals(authHeader) && !isAllowedPath(methodPath) { + if !a.accessToken.Equals(authHeader) && !slices.Contains(allowedPaths, methodPath) { a.logger.Error().Msg("Trying to access secured envd without correct access token") err := fmt.Errorf("unauthorized access, please provide a valid access token or method signing if supported") From d40de310ff75065701933011183cdbc5bf1f8a57 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:57:54 +0100 Subject: [PATCH 33/59] refactor(envd): reset auth.go and rely on WithAuthorization middleware Remove validateAccessToken helper and per-handler auth checks from upload endpoints. Upload routes are no longer in the auth bypass list, so the WithAuthorization middleware handles authentication for them. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/auth.go | 27 +++++----------- .../envd/internal/api/multipart_upload.go | 31 ------------------- 2 files changed, 7 insertions(+), 51 deletions(-) diff --git a/packages/envd/internal/api/auth.go b/packages/envd/internal/api/auth.go index 667d7e4cd7..69962e7cc1 100644 --- a/packages/envd/internal/api/auth.go +++ b/packages/envd/internal/api/auth.go @@ -21,9 +21,9 @@ const ( accessTokenHeader = "X-Access-Token" ) -// allowedPaths are paths that bypass general authentication -// (e.g., health check, endpoints supporting signing) -var allowedPaths = []string{ +// paths that are always allowed without general authentication +// POST/init is secured via MMDS hash validation instead +var authExcludedPaths = []string{ "GET/health", "GET/files", "POST/files", @@ -34,9 +34,11 @@ func (a *API) WithAuthorization(handler http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { if a.accessToken.IsSet() { authHeader := req.Header.Get(accessTokenHeader) - methodPath := req.Method + req.URL.Path - if !a.accessToken.Equals(authHeader) && !slices.Contains(allowedPaths, methodPath) { + // check if this path is allowed without authentication (e.g., health check, endpoints supporting signing) + allowedPath := slices.Contains(authExcludedPaths, req.Method+req.URL.Path) + + if !a.accessToken.Equals(authHeader) && !allowedPath { a.logger.Error().Msg("Trying to access secured envd without correct access token") err := fmt.Errorf("unauthorized access, please provide a valid access token or method signing if supported") @@ -69,21 +71,6 @@ func (a *API) generateSignature(path string, username string, operation string, return fmt.Sprintf("v1_%s", hasher.HashWithoutPrefix([]byte(signature))), nil } -// validateAccessToken checks that the request carries the correct access token header. -// Returns nil if no access token is configured or if the token matches. -func (a *API) validateAccessToken(r *http.Request) error { - if !a.accessToken.IsSet() { - return nil - } - - tokenFromHeader := r.Header.Get(accessTokenHeader) - if tokenFromHeader == "" || !a.accessToken.Equals(tokenFromHeader) { - return fmt.Errorf("unauthorized: valid access token required") - } - - return nil -} - func (a *API) validateSigning(r *http.Request, signature *string, signatureExpiration *int, username *string, path string, operation string) (err error) { var expectedSignature string diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index c01477be94..a69f4acc1f 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -58,13 +58,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - if err := a.validateAccessToken(r); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload init request") - jsonError(w, http.StatusUnauthorized, err) - - return - } - // Resolve username username, err := execcontext.ResolveDefaultUsername(params.Username, a.defaults.User) if err != nil { @@ -207,14 +200,6 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl operationID := logs.AssignOperationID() - // Validate access token - if err := a.validateAccessToken(r); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload part request") - jsonError(w, http.StatusUnauthorized, err) - - return - } - // Get the session a.uploadsLock.RLock() session, exists := a.uploads[uploadId] @@ -325,14 +310,6 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req operationID := logs.AssignOperationID() - // Validate access token - if err := a.validateAccessToken(r); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload complete request") - jsonError(w, http.StatusUnauthorized, err) - - return - } - // Get and remove the session a.uploadsLock.Lock() session, exists := a.uploads[uploadId] @@ -412,14 +389,6 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, operationID := logs.AssignOperationID() - // Validate access token - if err := a.validateAccessToken(r); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("unauthorized upload abort request") - jsonError(w, http.StatusUnauthorized, err) - - return - } - // Get and remove the session a.uploadsLock.Lock() session, exists := a.uploads[uploadId] From f8d86ad3f90679b0cad3a10789a8129881ab22b1 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 17:35:45 +0100 Subject: [PATCH 34/59] fix(envd): improve multipart upload error messages with contextual details Include relevant identifiers (upload ID, part number, path, username, etc.) in error responses to aid debugging. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index a69f4acc1f..300a8a7ca9 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -48,7 +48,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } if body.TotalSize > maxTotalSize { - jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize %d exceeds maximum allowed size of %d bytes (10GB)", body.TotalSize, maxTotalSize)) + jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize %d exceeds maximum allowed size of %d bytes", body.TotalSize, maxTotalSize)) return } @@ -62,7 +62,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params username, err := execcontext.ResolveDefaultUsername(params.Username, a.defaults.User) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("no user specified") - jsonError(w, http.StatusBadRequest, err) + jsonError(w, http.StatusBadRequest, fmt.Errorf("error resolving username (provided=%v, default=%q): %w", params.Username, a.defaults.User, err)) return } @@ -79,7 +79,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params uid, gid, err := permissions.GetUserIdInts(u) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error getting user ids") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error getting user ids: %w", err)) + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error getting user ids for user %q (uid=%s, gid=%s): %w", u.Username, u.Uid, u.Gid, err)) return } @@ -88,7 +88,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params filePath, err := permissions.ExpandAndResolve(body.Path, u, a.defaults.Workdir) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error resolving path") - jsonError(w, http.StatusBadRequest, fmt.Errorf("error resolving path: %w", err)) + jsonError(w, http.StatusBadRequest, fmt.Errorf("error resolving path %q: %w", body.Path, err)) return } @@ -96,7 +96,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params // Ensure parent directories exist if err := permissions.EnsureDirs(filepath.Dir(filePath), uid, gid); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories: %w", err)) + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories for %q: %w", filepath.Dir(filePath), err)) return } @@ -215,7 +215,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl // Check if session is already being completed/aborted if session.completed.Load() { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing or aborted")) + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) return } @@ -243,7 +243,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl data, err := io.ReadAll(limitedReader) if err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error reading part data") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error reading part data: %w", err)) + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error reading part %d data: %w", partNumber, err)) return } @@ -269,7 +269,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error writing part data") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error writing part data: %w", err)) + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error writing part %d data: %w", partNumber, err)) return } @@ -319,7 +319,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req // Already being completed by another request a.uploadsLock.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing")) + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing", uploadId)) return } @@ -398,7 +398,7 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, // Already being completed/aborted by another request a.uploadsLock.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session is already completing or aborted")) + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) return } From 07626ae6df4e8032fc138dea5a34c99aa72a1877 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:06:56 +0100 Subject: [PATCH 35/59] fix(envd): harden multipart upload against DoS and edge cases - Validate partSize >= 1 to prevent divide-by-zero panic (oapi-codegen does not enforce OpenAPI minimum constraints at runtime) - Cap numParts at 10,000 to prevent memory/CPU exhaustion from small partSize values (e.g. partSize=1 with totalSize=10GB) - Move session-limit check before file I/O to avoid truncating existing files when the request will be rejected for capacity - Replace io.ReadAll with streaming io.CopyN+OffsetWriter to avoid buffering up to 100MB per part upload in memory - Fix uint underflow in error message when NumParts==0 (empty file) - Add context.Context to API.New() for cleanup goroutine shutdown, preventing goroutine leaks in tests and enabling graceful shutdown Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/download_test.go | 13 +-- packages/envd/internal/api/init_test.go | 2 +- .../envd/internal/api/multipart_upload.go | 93 +++++++++++----- .../internal/api/multipart_upload_test.go | 101 +++++++++++++++++- packages/envd/internal/api/store.go | 18 ++-- packages/envd/main.go | 2 +- 6 files changed, 187 insertions(+), 42 deletions(-) diff --git a/packages/envd/internal/api/download_test.go b/packages/envd/internal/api/download_test.go index fabcb55a57..ac7fedccb3 100644 --- a/packages/envd/internal/api/download_test.go +++ b/packages/envd/internal/api/download_test.go @@ -3,6 +3,7 @@ package api import ( "bytes" "compress/gzip" + "context" "io" "mime/multipart" "net/http" @@ -95,7 +96,7 @@ func TestGetFilesContentDisposition(t *testing.T) { EnvVars: utils.NewMap[string, string](), User: currentUser.Username, } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) // Create request and response recorder req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) @@ -144,7 +145,7 @@ func TestGetFilesContentDispositionWithNestedPath(t *testing.T) { EnvVars: utils.NewMap[string, string](), User: currentUser.Username, } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) // Create request and response recorder req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) @@ -187,7 +188,7 @@ func TestGetFiles_GzipEncoding_ExplicitIdentityOffWithRange(t *testing.T) { EnvVars: utils.NewMap[string, string](), User: currentUser.Username, } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) // Create request and response recorder req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) @@ -228,7 +229,7 @@ func TestGetFiles_GzipDownload(t *testing.T) { EnvVars: utils.NewMap[string, string](), User: currentUser.Username, } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) req.Header.Set("Accept-Encoding", "gzip") @@ -293,7 +294,7 @@ func TestPostFiles_GzipUpload(t *testing.T) { EnvVars: utils.NewMap[string, string](), User: currentUser.Username, } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) req := httptest.NewRequest(http.MethodPost, "/files?path="+url.QueryEscape(destPath), &gzBuf) req.Header.Set("Content-Type", mpWriter.FormDataContentType()) @@ -353,7 +354,7 @@ func TestGzipUploadThenGzipDownload(t *testing.T) { EnvVars: utils.NewMap[string, string](), User: currentUser.Username, } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) uploadReq := httptest.NewRequest(http.MethodPost, "/files?path="+url.QueryEscape(destPath), &gzBuf) uploadReq.Header.Set("Content-Type", mpWriter.FormDataContentType()) diff --git a/packages/envd/internal/api/init_test.go b/packages/envd/internal/api/init_test.go index 9877104e09..bcc6e5baf6 100644 --- a/packages/envd/internal/api/init_test.go +++ b/packages/envd/internal/api/init_test.go @@ -142,7 +142,7 @@ func newTestAPI(accessToken *SecureToken, mmdsClient MMDSClient) *API { defaults := &execcontext.Defaults{ EnvVars: utils.NewMap[string, string](), } - api := New(&logger, defaults, nil, false) + api := New(context.Background(), &logger, defaults, nil, false) if accessToken != nil { api.accessToken.TakeFrom(accessToken) } diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 300a8a7ca9..6ca0ab3348 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -30,6 +30,9 @@ const ( uploadSessionTTL = 1 * time.Hour // uploadSessionCleanupInterval is how often to check for expired sessions uploadSessionCleanupInterval = 5 * time.Minute + // maxNumParts caps the number of parts to prevent memory/CPU exhaustion. + // With totalSize=10GB and partSize=1, numParts would be ~10 billion without this. + maxNumParts = 10_000 ) // PostFilesUploadInit initializes a multipart upload session @@ -47,6 +50,11 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + if body.PartSize < 1 { + jsonError(w, http.StatusBadRequest, fmt.Errorf("partSize must be at least 1")) + + return + } if body.TotalSize > maxTotalSize { jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize %d exceeds maximum allowed size of %d bytes", body.TotalSize, maxTotalSize)) @@ -58,6 +66,31 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + // Compute numParts and validate the cap before any file I/O. + var numParts uint + if body.TotalSize > 0 { + numParts = uint((body.TotalSize + body.PartSize - 1) / body.PartSize) + } + + if numParts > maxNumParts { + jsonError(w, http.StatusBadRequest, fmt.Errorf("upload would require %d parts, exceeding the maximum of %d (increase partSize)", numParts, maxNumParts)) + + return + } + + // Check session limit early, before any file I/O, to avoid truncating + // existing files only to reject the request due to capacity. + a.uploadsLock.RLock() + sessionCount := len(a.uploads) + a.uploadsLock.RUnlock() + + if sessionCount >= maxUploadSessions { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") + jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) + + return + } + // Resolve username username, err := execcontext.ResolveDefaultUsername(params.Username, a.defaults.User) if err != nil { @@ -146,8 +179,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params uploadID := uuid.NewString() - numParts := uint((body.TotalSize + body.PartSize - 1) / body.PartSize) - session := &MultipartUploadSession{ UploadID: uploadID, FilePath: filePath, @@ -222,7 +253,15 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl partNumber := uint(params.Part) - // Check part number is within range (also rejects parts for empty files where NumParts == 0) + // Reject parts for empty files (no parts expected) + if session.NumParts == 0 { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Uint("partNumber", partNumber).Msg("upload has no parts (empty file)") + jsonError(w, http.StatusBadRequest, fmt.Errorf("upload has no parts (empty file); no part uploads are accepted")) + + return + } + + // Check part number is within range if partNumber >= session.NumParts { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Uint("partNumber", partNumber).Uint("numParts", session.NumParts).Msg("part number out of range") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) @@ -238,30 +277,12 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl expectedSize = session.TotalSize - offset } - // Read the part data with size limit - limitedReader := io.LimitReader(r.Body, expectedSize+1) - data, err := io.ReadAll(limitedReader) - if err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error reading part data") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error reading part %d data: %w", partNumber, err)) - - return - } - - size := int64(len(data)) - - // Enforce exact size match to prevent silent corruption from truncated uploads - if size != expectedSize { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("size", size).Int64("expectedSize", expectedSize).Msg("part size mismatch") - jsonError(w, http.StatusBadRequest, fmt.Errorf("part size %d does not match expected size %d", size, expectedSize)) - - return - } - - // Write directly to the destination file at the correct offset - // WriteAt is safe for concurrent writes at different offsets, no lock needed here - _, err = session.DestFile.WriteAt(data, offset) - if err != nil { + // Stream the part data directly to the file at offset without buffering the + // entire part in memory. OffsetWriter + CopyN uses a small internal buffer + // (~32KB) instead of reading the full part into a single allocation. + offsetWriter := io.NewOffsetWriter(session.DestFile, offset) + written, err := io.CopyN(offsetWriter, r.Body, expectedSize) + if err != nil && !errors.Is(err, io.EOF) { if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -274,6 +295,24 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } + if written != expectedSize { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("written", written).Int64("expectedSize", expectedSize).Msg("part size mismatch") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part size %d does not match expected size %d", written, expectedSize)) + + return + } + + // Check for extra data beyond expected size + var extra [1]byte + if n, _ := r.Body.Read(extra[:]); n > 0 { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int64("expectedSize", expectedSize).Msg("part data exceeds expected size") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part data exceeds expected size %d", expectedSize)) + + return + } + + size := written + // Mark part as written - only lock for map access session.mu.Lock() if session.PartsWritten[partNumber] { diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index a56fc92359..52ab13c785 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -2,6 +2,7 @@ package api import ( "bytes" + "context" "encoding/json" "fmt" "net/http" @@ -27,7 +28,10 @@ func newMultipartTestAPI(t *testing.T) *API { EnvVars: utils.NewMap[string, string](), } - return New(&logger, defaults, nil, true) + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + return New(ctx, &logger, defaults, nil, true) } func TestMultipartUpload(t *testing.T) { @@ -474,6 +478,101 @@ func TestMultipartUpload(t *testing.T) { require.NoError(t, err) assert.Empty(t, string(content)) }) + + t.Run("reject too many parts", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + + // totalSize=10GB, partSize=1 would create ~10 billion parts + body := PostFilesUploadInitJSONRequestBody{ + Path: "/tmp/too-many-parts.txt", + TotalSize: maxTotalSize, + PartSize: 1, + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + assert.Equal(t, http.StatusBadRequest, w.Code) + + var errResp Error + err := json.Unmarshal(w.Body.Bytes(), &errResp) + require.NoError(t, err) + assert.Contains(t, errResp.Message, "parts") + }) + + t.Run("reject partSize zero", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + + body := PostFilesUploadInitJSONRequestBody{ + Path: "/tmp/should-not-exist.txt", + TotalSize: 100, + PartSize: 0, + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + assert.Equal(t, http.StatusBadRequest, w.Code) + }) + + t.Run("reject part upload on empty file", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "empty-reject.txt") + + // Initialize upload with 0 size + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 0, + PartSize: 1024, + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Try to upload a part — should be rejected with clear message + partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=0", bytes.NewReader([]byte("data"))) + partReq.Header.Set("Content-Type", "application/octet-stream") + partW := httptest.NewRecorder() + + api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: 0}) + assert.Equal(t, http.StatusBadRequest, partW.Code) + + // Verify error message does not contain a huge number from uint underflow + var errResp Error + err = json.Unmarshal(partW.Body.Bytes(), &errResp) + require.NoError(t, err) + assert.Contains(t, errResp.Message, "empty file") + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[uploadId] + if session != nil { + session.DestFile.Close() + os.Remove(session.FilePath) + } + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() + }) } func TestMultipartUploadRouting(t *testing.T) { diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 0fb825094c..1bef16c065 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -71,7 +71,7 @@ type API struct { uploadsLock sync.RWMutex } -func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { +func New(ctx context.Context, l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { api := &API{ logger: l, defaults: defaults, @@ -84,18 +84,24 @@ func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host. } // Start background cleanup for expired upload sessions - go api.cleanupExpiredUploads() + go api.cleanupExpiredUploads(ctx) return api } -// cleanupExpiredUploads periodically removes upload sessions that have exceeded their TTL -func (a *API) cleanupExpiredUploads() { +// cleanupExpiredUploads periodically removes upload sessions that have exceeded their TTL. +// It stops when ctx is cancelled, preventing goroutine leaks in tests and enabling graceful shutdown. +func (a *API) cleanupExpiredUploads(ctx context.Context) { ticker := time.NewTicker(uploadSessionCleanupInterval) defer ticker.Stop() - for range ticker.C { - a.removeExpiredSessions() + for { + select { + case <-ctx.Done(): + return + case <-ticker.C: + a.removeExpiredSessions() + } } } diff --git a/packages/envd/main.go b/packages/envd/main.go index e86a21ee6b..7a7d7fb565 100644 --- a/packages/envd/main.go +++ b/packages/envd/main.go @@ -187,7 +187,7 @@ func main() { processLogger := l.With().Str("logger", "process").Logger() processService := processRpc.Handle(m, &processLogger, defaults, cgroupManager) - service := api.New(&envLogger, defaults, mmdsChan, isNotFC) + service := api.New(ctx, &envLogger, defaults, mmdsChan, isNotFC) handler := api.HandlerFromMux(service, m) middleware := authn.NewMiddleware(permissions.AuthenticateUsername) From 167fbcf0b9823a1149d1d4af9ef7be715df26f15 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:12:05 +0100 Subject: [PATCH 36/59] fix(envd): pass context parameter to newTestAPI to satisfy contextcheck linter Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/init_test.go | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/envd/internal/api/init_test.go b/packages/envd/internal/api/init_test.go index bcc6e5baf6..43fbb099d5 100644 --- a/packages/envd/internal/api/init_test.go +++ b/packages/envd/internal/api/init_test.go @@ -137,12 +137,12 @@ func (m *mockMMDSClient) GetAccessTokenHash(_ context.Context) (string, error) { return m.hash, m.err } -func newTestAPI(accessToken *SecureToken, mmdsClient MMDSClient) *API { +func newTestAPI(ctx context.Context, accessToken *SecureToken, mmdsClient MMDSClient) *API { logger := zerolog.Nop() defaults := &execcontext.Defaults{ EnvVars: utils.NewMap[string, string](), } - api := New(context.Background(), &logger, defaults, nil, false) + api := New(ctx, &logger, defaults, nil, false) if accessToken != nil { api.accessToken.TakeFrom(accessToken) } @@ -241,7 +241,7 @@ func TestValidateInitAccessToken(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: tt.mmdsHash, err: tt.mmdsErr} - api := newTestAPI(tt.accessToken, mmdsClient) + api := newTestAPI(ctx, tt.accessToken, mmdsClient) err := api.validateInitAccessToken(ctx, tt.requestToken) @@ -263,7 +263,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Parallel() token := "my-secret-token" mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken(token), err: nil} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr(token)) @@ -274,7 +274,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns no match when token hash differs from MMDS hash", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken("different-token"), err: nil} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr("my-token")) @@ -285,7 +285,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns exists but no match when request token is nil", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken("some-token"), err: nil} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, nil) @@ -296,7 +296,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns false, false when MMDS returns error", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr("any-token")) @@ -307,7 +307,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns false, false when MMDS returns empty hash with non-nil request", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: nil} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr("any-token")) @@ -318,7 +318,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns false, false when MMDS returns empty hash with nil request", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: nil} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, nil) @@ -329,7 +329,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns true, true when MMDS returns hash of empty string with nil request (explicit reset)", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken(""), err: nil} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, nil) @@ -451,7 +451,7 @@ func TestSetData(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: tt.mmdsHash, err: tt.mmdsErr} - api := newTestAPI(tt.existingToken, mmdsClient) + api := newTestAPI(ctx, tt.existingToken, mmdsClient) data := PostInitJSONBody{ AccessToken: tt.requestToken, @@ -478,7 +478,7 @@ func TestSetData(t *testing.T) { t.Run("sets environment variables", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) envVars := EnvVars{"FOO": "bar", "BAZ": "qux"} data := PostInitJSONBody{ @@ -499,7 +499,7 @@ func TestSetData(t *testing.T) { t.Run("sets default user", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) data := PostInitJSONBody{ DefaultUser: utilsShared.ToPtr("testuser"), @@ -514,7 +514,7 @@ func TestSetData(t *testing.T) { t.Run("does not set default user when empty", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) api.defaults.User = "original" data := PostInitJSONBody{ @@ -530,7 +530,7 @@ func TestSetData(t *testing.T) { t.Run("sets default workdir", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) data := PostInitJSONBody{ DefaultWorkdir: utilsShared.ToPtr("/home/user"), @@ -546,7 +546,7 @@ func TestSetData(t *testing.T) { t.Run("does not set default workdir when empty", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) originalWorkdir := "/original" api.defaults.Workdir = &originalWorkdir @@ -564,7 +564,7 @@ func TestSetData(t *testing.T) { t.Run("sets multiple fields at once", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(nil, mmdsClient) + api := newTestAPI(ctx, nil, mmdsClient) envVars := EnvVars{"KEY": "value"} data := PostInitJSONBody{ From 8a339419924779633d514be9481bb862fd7ede61 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:30:33 +0100 Subject: [PATCH 37/59] fix(envd): add negative totalSize guard, concurrent part protection, and cap missing parts error - Reject totalSize < 0 to prevent negative sizes passing validation and producing empty files with negative reported size in complete response - Add partsInProgress map to prevent concurrent writes to the same part number, which could interleave and corrupt file contents - Re-check session.completed under session.mu after write to prevent the race where Complete deletes the file between write and PartsWritten update, causing part upload to return 200 while the file is gone - Cap missing parts list in error response to first 20 entries to avoid ~80KB JSON payloads when all 10,000 parts are missing Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 67 +++++++++++++++++-- .../internal/api/multipart_upload_test.go | 24 +++++++ packages/envd/internal/api/store.go | 9 +-- 3 files changed, 90 insertions(+), 10 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 6ca0ab3348..6f8e611028 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -33,6 +33,9 @@ const ( // maxNumParts caps the number of parts to prevent memory/CPU exhaustion. // With totalSize=10GB and partSize=1, numParts would be ~10 billion without this. maxNumParts = 10_000 + // maxMissingPartsInError caps the number of missing part numbers shown in error responses + // to avoid huge JSON payloads (e.g. 10,000 missing parts serialized as integers). + maxMissingPartsInError = 20 ) // PostFilesUploadInit initializes a multipart upload session @@ -55,6 +58,11 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + if body.TotalSize < 0 { + jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize must be non-negative")) + + return + } if body.TotalSize > maxTotalSize { jsonError(w, http.StatusBadRequest, fmt.Errorf("totalSize %d exceeds maximum allowed size of %d bytes", body.TotalSize, maxTotalSize)) @@ -188,8 +196,9 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params NumParts: numParts, UID: uid, GID: gid, - PartsWritten: make(map[uint]bool), - CreatedAt: time.Now(), + PartsWritten: make(map[uint]bool), + partsInProgress: make(map[uint]bool), + CreatedAt: time.Now(), } // Atomically check session limit and insert — prevents TOCTOU race where @@ -243,7 +252,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Check if session is already being completed/aborted + // Fast-path: reject early if session is already completing (authoritative check under session.mu below) if session.completed.Load() { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) @@ -277,6 +286,36 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl expectedSize = session.TotalSize - offset } + // Reserve this part under lock to prevent concurrent writes to the same part number + // and to authoritatively check completed status (the atomic check above is a fast path). + session.mu.Lock() + if session.completed.Load() { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session completed during part reservation") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) + + return + } + if session.partsInProgress[partNumber] { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("part is already being uploaded by another request") + jsonError(w, http.StatusConflict, fmt.Errorf("part %d is already being uploaded by another request for session %s", partNumber, uploadId)) + + return + } + session.partsInProgress[partNumber] = true + session.mu.Unlock() + + // Ensure in-progress flag is cleaned up on any early return (write errors, size mismatch, etc.) + partReserved := true + defer func() { + if partReserved { + session.mu.Lock() + delete(session.partsInProgress, partNumber) + session.mu.Unlock() + } + }() + // Stream the part data directly to the file at offset without buffering the // entire part in memory. OffsetWriter + CopyN uses a small internal buffer // (~32KB) instead of reading the full part into a single allocation. @@ -313,8 +352,19 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl size := written - // Mark part as written - only lock for map access + // Finalize: mark part as written under lock. Re-check completed to prevent + // the race where Complete deletes the file between our write and this point, + // which would cause us to return 200 while the file is gone. session.mu.Lock() + delete(session.partsInProgress, partNumber) + partReserved = false + if session.completed.Load() { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("session completed during part upload") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s was completed or aborted during part upload", uploadId)) + + return + } if session.PartsWritten[partNumber] { a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). @@ -389,9 +439,14 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req a.logger.Error(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Uints("missingParts", missingParts). + Int("missingCount", len(missingParts)). Msg("missing parts in upload") - jsonError(w, http.StatusBadRequest, fmt.Errorf("missing parts: %v", missingParts)) + // Cap the error message to avoid huge JSON responses (e.g. 10,000 missing parts) + if len(missingParts) > maxMissingPartsInError { + jsonError(w, http.StatusBadRequest, fmt.Errorf("missing %d parts (first %d: %v)", len(missingParts), maxMissingPartsInError, missingParts[:maxMissingPartsInError])) + } else { + jsonError(w, http.StatusBadRequest, fmt.Errorf("missing parts: %v", missingParts)) + } return } diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 52ab13c785..f8760410ad 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -504,6 +504,30 @@ func TestMultipartUpload(t *testing.T) { assert.Contains(t, errResp.Message, "parts") }) + t.Run("reject negative totalSize", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + + body := PostFilesUploadInitJSONRequestBody{ + Path: "/tmp/negative-size.txt", + TotalSize: -1, + PartSize: 1024, + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + assert.Equal(t, http.StatusBadRequest, w.Code) + + var errResp Error + err := json.Unmarshal(w.Body.Bytes(), &errResp) + require.NoError(t, err) + assert.Contains(t, errResp.Message, "non-negative") + }) + t.Run("reject partSize zero", func(t *testing.T) { t.Parallel() api := newMultipartTestAPI(t) diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 1bef16c065..c76ac0f105 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -26,10 +26,11 @@ type MultipartUploadSession struct { NumParts uint // Total number of expected parts UID int GID int - PartsWritten map[uint]bool // partNumber -> whether it's been written - CreatedAt time.Time - completed atomic.Bool // Set to true when complete/abort starts to prevent new parts - mu sync.Mutex + PartsWritten map[uint]bool // partNumber -> whether it's been written + partsInProgress map[uint]bool // partNumber -> whether a write is currently in flight + CreatedAt time.Time + completed atomic.Bool // Set to true when complete/abort starts to prevent new parts + mu sync.Mutex } // ignoreNotExist returns nil if err is a "not exist" error, otherwise returns err unchanged. From 110b6ab20ec62e663a441e9e45e16383fa03f7ab Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 24 Feb 2026 17:31:48 +0000 Subject: [PATCH 38/59] chore: auto-commit generated changes --- packages/envd/internal/api/multipart_upload.go | 16 ++++++++-------- packages/envd/internal/api/store.go | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 6f8e611028..36b0e93761 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -188,14 +188,14 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params uploadID := uuid.NewString() session := &MultipartUploadSession{ - UploadID: uploadID, - FilePath: filePath, - DestFile: destFile, - TotalSize: body.TotalSize, - PartSize: body.PartSize, - NumParts: numParts, - UID: uid, - GID: gid, + UploadID: uploadID, + FilePath: filePath, + DestFile: destFile, + TotalSize: body.TotalSize, + PartSize: body.PartSize, + NumParts: numParts, + UID: uid, + GID: gid, PartsWritten: make(map[uint]bool), partsInProgress: make(map[uint]bool), CreatedAt: time.Now(), diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index c76ac0f105..fc26bad963 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -18,14 +18,14 @@ import ( // MultipartUploadSession tracks an in-progress multipart upload type MultipartUploadSession struct { - UploadID string - FilePath string // Final destination path - DestFile *os.File // Open file handle for direct writes - TotalSize int64 // Total expected file size (validated >= 0 at input) - PartSize int64 // Size of each part (validated > 0 at input) - NumParts uint // Total number of expected parts - UID int - GID int + UploadID string + FilePath string // Final destination path + DestFile *os.File // Open file handle for direct writes + TotalSize int64 // Total expected file size (validated >= 0 at input) + PartSize int64 // Size of each part (validated > 0 at input) + NumParts uint // Total number of expected parts + UID int + GID int PartsWritten map[uint]bool // partNumber -> whether it's been written partsInProgress map[uint]bool // partNumber -> whether a write is currently in flight CreatedAt time.Time From 40d37e0d06efa4748def12d101b86ad34836dd4c Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 18:33:24 +0100 Subject: [PATCH 39/59] fix(envd): simplify missing parts error to just show count Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 36b0e93761..631187bba6 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -33,9 +33,6 @@ const ( // maxNumParts caps the number of parts to prevent memory/CPU exhaustion. // With totalSize=10GB and partSize=1, numParts would be ~10 billion without this. maxNumParts = 10_000 - // maxMissingPartsInError caps the number of missing part numbers shown in error responses - // to avoid huge JSON payloads (e.g. 10,000 missing parts serialized as integers). - maxMissingPartsInError = 20 ) // PostFilesUploadInit initializes a multipart upload session @@ -441,12 +438,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req Str("uploadId", uploadId). Int("missingCount", len(missingParts)). Msg("missing parts in upload") - // Cap the error message to avoid huge JSON responses (e.g. 10,000 missing parts) - if len(missingParts) > maxMissingPartsInError { - jsonError(w, http.StatusBadRequest, fmt.Errorf("missing %d parts (first %d: %v)", len(missingParts), maxMissingPartsInError, missingParts[:maxMissingPartsInError])) - } else { - jsonError(w, http.StatusBadRequest, fmt.Errorf("missing parts: %v", missingParts)) - } + jsonError(w, http.StatusBadRequest, fmt.Errorf("missing %d of %d parts", len(missingParts), session.NumParts)) return } From a6f257f6099294eff79057570fac9aa883e8cfa8 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:17:39 +0100 Subject: [PATCH 40/59] refactor(envd): clean up multipart upload PR - Remove unnecessary `size := written` alias in part upload handler - Remove unreachable 507 response from complete endpoint spec - Remove low-value routing test that only tested generated code - Unify PartsWritten/partsInProgress maps into single Parts map with PartStatus enum (partPending/PartInProgress/PartComplete) Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 42 +++++++++---------- .../internal/api/multipart_upload_test.go | 33 --------------- packages/envd/internal/api/store.go | 34 +++++++++------ packages/envd/spec/envd.yaml | 2 - tests/integration/internal/envd/generated.go | 8 ---- 5 files changed, 40 insertions(+), 79 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 631187bba6..c65942ec40 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -185,17 +185,16 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params uploadID := uuid.NewString() session := &MultipartUploadSession{ - UploadID: uploadID, - FilePath: filePath, - DestFile: destFile, - TotalSize: body.TotalSize, - PartSize: body.PartSize, - NumParts: numParts, - UID: uid, - GID: gid, - PartsWritten: make(map[uint]bool), - partsInProgress: make(map[uint]bool), - CreatedAt: time.Now(), + UploadID: uploadID, + FilePath: filePath, + DestFile: destFile, + TotalSize: body.TotalSize, + PartSize: body.PartSize, + NumParts: numParts, + UID: uid, + GID: gid, + Parts: make(map[uint]PartStatus), + CreatedAt: time.Now(), } // Atomically check session limit and insert — prevents TOCTOU race where @@ -293,14 +292,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - if session.partsInProgress[partNumber] { + if session.Parts[partNumber] == PartInProgress { session.mu.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("part is already being uploaded by another request") jsonError(w, http.StatusConflict, fmt.Errorf("part %d is already being uploaded by another request for session %s", partNumber, uploadId)) return } - session.partsInProgress[partNumber] = true + session.Parts[partNumber] = PartInProgress session.mu.Unlock() // Ensure in-progress flag is cleaned up on any early return (write errors, size mismatch, etc.) @@ -308,7 +307,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl defer func() { if partReserved { session.mu.Lock() - delete(session.partsInProgress, partNumber) + delete(session.Parts, partNumber) session.mu.Unlock() } }() @@ -347,13 +346,10 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - size := written - - // Finalize: mark part as written under lock. Re-check completed to prevent + // Finalize: mark part as complete under lock. Re-check completed to prevent // the race where Complete deletes the file between our write and this point, // which would cause us to return 200 while the file is gone. session.mu.Lock() - delete(session.partsInProgress, partNumber) partReserved = false if session.completed.Load() { session.mu.Unlock() @@ -362,21 +358,21 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - if session.PartsWritten[partNumber] { + if session.Parts[partNumber] == PartComplete { a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). Uint("partNumber", partNumber). Msg("overwriting existing part") } - session.PartsWritten[partNumber] = true + session.Parts[partNumber] = PartComplete session.mu.Unlock() a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). Uint("partNumber", partNumber). - Int64("size", size). + Int64("size", written). Int64("offset", offset). Msg("part uploaded") @@ -384,7 +380,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(MultipartUploadPart{ PartNumber: int(partNumber), - Size: size, + Size: written, }); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") } @@ -424,7 +420,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req session.mu.Lock() var missingParts []uint for i := range session.NumParts { - if !session.PartsWritten[i] { + if session.Parts[i] != PartComplete { missingParts = append(missingParts, i) } } diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index f8760410ad..94785d1db5 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -11,7 +11,6 @@ import ( "path/filepath" "testing" - "github.com/go-chi/chi/v5" "github.com/rs/zerolog" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -598,35 +597,3 @@ func TestMultipartUpload(t *testing.T) { api.uploadsLock.Unlock() }) } - -func TestMultipartUploadRouting(t *testing.T) { - t.Parallel() - - // Skip if not running as root - if os.Geteuid() != 0 { - t.Skip("skipping routing tests: requires root") - } - - api := newMultipartTestAPI(t) - router := chi.NewRouter() - HandlerFromMux(api, router) - - // Test that routes are registered - t.Run("init route exists", func(t *testing.T) { - t.Parallel() - body := PostFilesUploadInitJSONRequestBody{ - Path: "/tmp/test-file.txt", - TotalSize: 100, - PartSize: 50, - } - bodyBytes, _ := json.Marshal(body) - - req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) - req.Header.Set("Content-Type", "application/json") - w := httptest.NewRecorder() - - router.ServeHTTP(w, req) - // Should get 200 (success) not 404 (route not found) - assert.NotEqual(t, http.StatusNotFound, w.Code) - }) -} diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index fc26bad963..a012714e5c 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -16,21 +16,29 @@ import ( "github.com/e2b-dev/infra/packages/envd/internal/utils" ) +// PartStatus represents the state of a multipart upload part. +type PartStatus int + +const ( + partPending PartStatus = iota // zero value: part not yet started + PartInProgress // write currently in flight + PartComplete // write finished successfully +) + // MultipartUploadSession tracks an in-progress multipart upload type MultipartUploadSession struct { - UploadID string - FilePath string // Final destination path - DestFile *os.File // Open file handle for direct writes - TotalSize int64 // Total expected file size (validated >= 0 at input) - PartSize int64 // Size of each part (validated > 0 at input) - NumParts uint // Total number of expected parts - UID int - GID int - PartsWritten map[uint]bool // partNumber -> whether it's been written - partsInProgress map[uint]bool // partNumber -> whether a write is currently in flight - CreatedAt time.Time - completed atomic.Bool // Set to true when complete/abort starts to prevent new parts - mu sync.Mutex + UploadID string + FilePath string // Final destination path + DestFile *os.File // Open file handle for direct writes + TotalSize int64 // Total expected file size (validated >= 0 at input) + PartSize int64 // Size of each part (validated > 0 at input) + NumParts uint // Total number of expected parts + UID int + GID int + Parts map[uint]PartStatus // partNumber -> status + CreatedAt time.Time + completed atomic.Bool // Set to true when complete/abort starts to prevent new parts + mu sync.Mutex } // ignoreNotExist returns nil if err is a "not exist" error, otherwise returns err unchanged. diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index 8f9578dd9c..43fc8c3e9c 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -266,8 +266,6 @@ paths: $ref: "#/components/responses/UploadNotFound" "500": $ref: "#/components/responses/InternalServerError" - "507": - $ref: "#/components/responses/NotEnoughDiskSpace" components: securitySchemes: diff --git a/tests/integration/internal/envd/generated.go b/tests/integration/internal/envd/generated.go index fddc286f41..28bf900547 100644 --- a/tests/integration/internal/envd/generated.go +++ b/tests/integration/internal/envd/generated.go @@ -1215,7 +1215,6 @@ type PostFilesUploadUploadIdCompleteResponse struct { JSON200 *MultipartUploadComplete JSON404 *UploadNotFound JSON500 *InternalServerError - JSON507 *NotEnoughDiskSpace } // Status returns HTTPResponse.Status @@ -1700,13 +1699,6 @@ func ParsePostFilesUploadUploadIdCompleteResponse(rsp *http.Response) (*PostFile } response.JSON500 = &dest - case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 507: - var dest NotEnoughDiskSpace - if err := json.Unmarshal(bodyBytes, &dest); err != nil { - return nil, err - } - response.JSON507 = &dest - } return response, nil From 31c37cd05bd3f992cf3b00f2f6f334b5ea6cc5a0 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:38:04 +0100 Subject: [PATCH 41/59] fix(envd): fix race conditions and validation in multipart upload - Add WaitGroup to track in-flight part writes; Complete/Delete/TTL-cleanup wait for all writers to finish before closing the destination file, preventing EBADF errors from concurrent io.CopyN and file.Close - Mark parts as PartComplete before checking session.completed flag, preventing the scenario where a successfully written part stays stuck as PartInProgress and causes Complete to falsely report missing parts - Complete no longer deletes the session before validating parts; on missing parts it resets the completed flag so the client can upload remaining parts and retry instead of losing all uploaded data - Validate params.Part >= 0 before casting to uint to provide clear error messages instead of confusing wraparound values Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 64 ++++++++++++------ .../internal/api/multipart_upload_test.go | 66 +++++++++++++++++++ packages/envd/internal/api/store.go | 7 +- 3 files changed, 116 insertions(+), 21 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index c65942ec40..6cfb39f995 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -256,6 +256,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } + // Validate part number before casting to uint to avoid confusing wraparound errors + if params.Part < 0 { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("part", params.Part).Msg("negative part number") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part number must be non-negative, got %d", params.Part)) + + return + } + partNumber := uint(params.Part) // Reject parts for empty files (no parts expected) @@ -284,6 +292,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl // Reserve this part under lock to prevent concurrent writes to the same part number // and to authoritatively check completed status (the atomic check above is a fast path). + // Also register with the WaitGroup so Complete/Delete wait for this write to finish. session.mu.Lock() if session.completed.Load() { session.mu.Unlock() @@ -300,12 +309,17 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } session.Parts[partNumber] = PartInProgress + session.wg.Add(1) // Must happen under mu while completed is false to avoid Add/Wait race session.mu.Unlock() + // Always signal writer completion so Complete/Delete can proceed. + // This must be the first defer (runs last) so cleanup below finishes first. + defer session.wg.Done() + // Ensure in-progress flag is cleaned up on any early return (write errors, size mismatch, etc.) - partReserved := true + partWritten := false defer func() { - if partReserved { + if !partWritten { session.mu.Lock() delete(session.Parts, partNumber) session.mu.Unlock() @@ -346,18 +360,11 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Finalize: mark part as complete under lock. Re-check completed to prevent - // the race where Complete deletes the file between our write and this point, - // which would cause us to return 200 while the file is gone. + // Finalize: always mark the part as complete since the data was written to disk. + // Mark partWritten first so the deferred cleanup does not revert the status. + // Then check completed — if the session was finalized mid-write, return 409 + // but leave the part as PartComplete so Complete's validation sees it. session.mu.Lock() - partReserved = false - if session.completed.Load() { - session.mu.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("session completed during part upload") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s was completed or aborted during part upload", uploadId)) - - return - } if session.Parts[partNumber] == PartComplete { a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). @@ -366,6 +373,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl Msg("overwriting existing part") } session.Parts[partNumber] = PartComplete + partWritten = true + if session.completed.Load() { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("session completed during part upload") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s was completed or aborted during part upload", uploadId)) + + return + } session.mu.Unlock() a.logger.Debug(). @@ -392,11 +407,11 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req operationID := logs.AssignOperationID() - // Get and remove the session + // Look up the session and mark as completing to prevent new part reservations. + // Do NOT delete from the map yet — if validation fails, the client can retry. a.uploadsLock.Lock() session, exists := a.uploads[uploadId] if exists { - // Mark as completed to prevent new parts from being uploaded if !session.completed.CompareAndSwap(false, true) { // Already being completed by another request a.uploadsLock.Unlock() @@ -405,7 +420,6 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - delete(a.uploads, uploadId) } a.uploadsLock.Unlock() @@ -416,6 +430,11 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } + // Wait for all in-flight part writes to finish before checking part status. + // This prevents closing the file while io.CopyN is still writing and ensures + // parts that were mid-write when completed was set are properly accounted for. + session.wg.Wait() + // Verify all parts were uploaded session.mu.Lock() var missingParts []uint @@ -427,8 +446,8 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req session.mu.Unlock() if len(missingParts) > 0 { - session.DestFile.Close() - os.Remove(session.FilePath) + // Reset completed flag so the client can upload missing parts and retry + session.completed.Store(false) a.logger.Error(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). @@ -439,7 +458,11 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - // Close the file + // All parts present — remove session from map and close the file + a.uploadsLock.Lock() + delete(a.uploads, uploadId) + a.uploadsLock.Unlock() + if err := session.DestFile.Close(); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing destination file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing destination file: %w", err)) @@ -495,6 +518,9 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } + // Wait for any in-flight part writes to finish before closing the file + session.wg.Wait() + // Close and remove the file session.DestFile.Close() if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 94785d1db5..bc2ac37972 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -273,6 +273,22 @@ func TestMultipartUpload(t *testing.T) { api.PostFilesUploadUploadIdComplete(completeW, completeReq, uploadId) assert.Equal(t, http.StatusBadRequest, completeW.Code) + + // Session should still exist (completed flag reset) so client can retry + api.uploadsLock.RLock() + session, exists := api.uploads[uploadId] + api.uploadsLock.RUnlock() + assert.True(t, exists, "session should still exist after failed complete") + assert.False(t, session.completed.Load(), "completed flag should be reset") + + // Clean up + api.uploadsLock.Lock() + if s := api.uploads[uploadId]; s != nil { + s.DestFile.Close() + os.Remove(s.FilePath) + } + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() }) t.Run("upload part after complete started", func(t *testing.T) { @@ -596,4 +612,54 @@ func TestMultipartUpload(t *testing.T) { delete(api.uploads, uploadId) api.uploadsLock.Unlock() }) + + t.Run("reject negative part number", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "neg-part.txt") + + // Initialize upload + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 10, + PartSize: 10, + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Try to upload with negative part number + partReq := httptest.NewRequest(http.MethodPut, "/files/upload/"+uploadId+"?part=-1", bytes.NewReader([]byte("data"))) + partReq.Header.Set("Content-Type", "application/octet-stream") + partW := httptest.NewRecorder() + + api.PutFilesUploadUploadId(partW, partReq, uploadId, PutFilesUploadUploadIdParams{Part: -1}) + assert.Equal(t, http.StatusBadRequest, partW.Code) + + var errResp Error + err = json.Unmarshal(partW.Body.Bytes(), &errResp) + require.NoError(t, err) + assert.Contains(t, errResp.Message, "non-negative") + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[uploadId] + if session != nil { + session.DestFile.Close() + os.Remove(session.FilePath) + } + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() + }) } diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index a012714e5c..627a496de0 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -37,8 +37,9 @@ type MultipartUploadSession struct { GID int Parts map[uint]PartStatus // partNumber -> status CreatedAt time.Time - completed atomic.Bool // Set to true when complete/abort starts to prevent new parts - mu sync.Mutex + completed atomic.Bool // Set to true when complete/abort starts to prevent new parts + mu sync.Mutex // Protects Parts and activeWriters + wg sync.WaitGroup // Tracks in-flight part writes; Complete/Delete wait on this before closing DestFile } // ignoreNotExist returns nil if err is a "not exist" error, otherwise returns err unchanged. @@ -125,6 +126,8 @@ func (a *API) removeExpiredSessions() { if session.completed.CompareAndSwap(false, true) { delete(a.uploads, uploadID) go func(s *MultipartUploadSession) { + // Wait for any in-flight part writes to finish before closing the file + s.wg.Wait() s.DestFile.Close() if err := ignoreNotExist(os.Remove(s.FilePath)); err != nil { a.logger.Warn().Err(err).Str("filePath", s.FilePath).Msg("failed to cleanup expired upload file") From 0ee70056aff454cc71acc8a542f733fdab1a3583 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:40:22 +0100 Subject: [PATCH 42/59] refactor(envd): use int instead of uint for part numbers and counts Since params.Part is int and we validate >= 0 early, the uint cast is unnecessary. Change NumParts, Parts map key, and partNumber to int, removing all uint casts and simplifying the code. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 30 +++++++++---------- packages/envd/internal/api/store.go | 4 +-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 6cfb39f995..8f392c963a 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -72,9 +72,9 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } // Compute numParts and validate the cap before any file I/O. - var numParts uint + var numParts int if body.TotalSize > 0 { - numParts = uint((body.TotalSize + body.PartSize - 1) / body.PartSize) + numParts = int((body.TotalSize + body.PartSize - 1) / body.PartSize) } if numParts > maxNumParts { @@ -193,7 +193,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params NumParts: numParts, UID: uid, GID: gid, - Parts: make(map[uint]PartStatus), + Parts: make(map[int]PartStatus), CreatedAt: time.Now(), } @@ -218,7 +218,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params Str("filePath", filePath). Int64("totalSize", body.TotalSize). Int64("partSize", body.PartSize). - Uint("numParts", numParts). + Int("numParts", numParts). Msg("multipart upload initialized") w.Header().Set("Content-Type", "application/json") @@ -256,7 +256,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Validate part number before casting to uint to avoid confusing wraparound errors + // Validate part number is non-negative if params.Part < 0 { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("part", params.Part).Msg("negative part number") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number must be non-negative, got %d", params.Part)) @@ -264,11 +264,11 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - partNumber := uint(params.Part) + partNumber := params.Part // Reject parts for empty files (no parts expected) if session.NumParts == 0 { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Uint("partNumber", partNumber).Msg("upload has no parts (empty file)") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Msg("upload has no parts (empty file)") jsonError(w, http.StatusBadRequest, fmt.Errorf("upload has no parts (empty file); no part uploads are accepted")) return @@ -276,7 +276,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl // Check part number is within range if partNumber >= session.NumParts { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Uint("partNumber", partNumber).Uint("numParts", session.NumParts).Msg("part number out of range") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Int("numParts", session.NumParts).Msg("part number out of range") jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) return @@ -303,7 +303,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl } if session.Parts[partNumber] == PartInProgress { session.mu.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("part is already being uploaded by another request") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", partNumber).Msg("part is already being uploaded by another request") jsonError(w, http.StatusConflict, fmt.Errorf("part %d is already being uploaded by another request for session %s", partNumber, uploadId)) return @@ -369,14 +369,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Uint("partNumber", partNumber). + Int("partNumber", partNumber). Msg("overwriting existing part") } session.Parts[partNumber] = PartComplete partWritten = true if session.completed.Load() { session.mu.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Uint("partNumber", partNumber).Msg("session completed during part upload") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", partNumber).Msg("session completed during part upload") jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s was completed or aborted during part upload", uploadId)) return @@ -386,7 +386,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Uint("partNumber", partNumber). + Int("partNumber", partNumber). Int64("size", written). Int64("offset", offset). Msg("part uploaded") @@ -394,7 +394,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(MultipartUploadPart{ - PartNumber: int(partNumber), + PartNumber: partNumber, Size: written, }); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") @@ -437,7 +437,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req // Verify all parts were uploaded session.mu.Lock() - var missingParts []uint + var missingParts []int for i := range session.NumParts { if session.Parts[i] != PartComplete { missingParts = append(missingParts, i) @@ -475,7 +475,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req Str("uploadId", uploadId). Str("filePath", session.FilePath). Int64("totalSize", session.TotalSize). - Uint("numParts", session.NumParts). + Int("numParts", session.NumParts). Msg("multipart upload completed") w.Header().Set("Content-Type", "application/json") diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 627a496de0..02451a40fb 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -32,10 +32,10 @@ type MultipartUploadSession struct { DestFile *os.File // Open file handle for direct writes TotalSize int64 // Total expected file size (validated >= 0 at input) PartSize int64 // Size of each part (validated > 0 at input) - NumParts uint // Total number of expected parts + NumParts int // Total number of expected parts UID int GID int - Parts map[uint]PartStatus // partNumber -> status + Parts map[int]PartStatus // partNumber -> status CreatedAt time.Time completed atomic.Bool // Set to true when complete/abort starts to prevent new parts mu sync.Mutex // Protects Parts and activeWriters From ba06568282ac0f16eb7571831eae6ef9b0658ebf Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:41:18 +0100 Subject: [PATCH 43/59] refactor(envd): remove unnecessary partNumber alias Use params.Part directly instead of assigning to a local variable, since the uint cast that previously justified the alias was removed. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 8f392c963a..0b3bb6d39f 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -264,28 +264,26 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - partNumber := params.Part - // Reject parts for empty files (no parts expected) if session.NumParts == 0 { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Msg("upload has no parts (empty file)") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", params.Part).Msg("upload has no parts (empty file)") jsonError(w, http.StatusBadRequest, fmt.Errorf("upload has no parts (empty file); no part uploads are accepted")) return } // Check part number is within range - if partNumber >= session.NumParts { - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", partNumber).Int("numParts", session.NumParts).Msg("part number out of range") - jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", partNumber, session.NumParts-1)) + if params.Part >= session.NumParts { + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("partNumber", params.Part).Int("numParts", session.NumParts).Msg("part number out of range") + jsonError(w, http.StatusBadRequest, fmt.Errorf("part number %d out of range (expected 0-%d)", params.Part, session.NumParts-1)) return } // Calculate offset and expected size for this part - offset := int64(partNumber) * session.PartSize + offset := int64(params.Part) * session.PartSize expectedSize := session.PartSize - if partNumber == session.NumParts-1 { + if params.Part == session.NumParts-1 { // Last part may be smaller expectedSize = session.TotalSize - offset } @@ -301,14 +299,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - if session.Parts[partNumber] == PartInProgress { + if session.Parts[params.Part] == PartInProgress { session.mu.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", partNumber).Msg("part is already being uploaded by another request") - jsonError(w, http.StatusConflict, fmt.Errorf("part %d is already being uploaded by another request for session %s", partNumber, uploadId)) + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", params.Part).Msg("part is already being uploaded by another request") + jsonError(w, http.StatusConflict, fmt.Errorf("part %d is already being uploaded by another request for session %s", params.Part, uploadId)) return } - session.Parts[partNumber] = PartInProgress + session.Parts[params.Part] = PartInProgress session.wg.Add(1) // Must happen under mu while completed is false to avoid Add/Wait race session.mu.Unlock() @@ -321,7 +319,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl defer func() { if !partWritten { session.mu.Lock() - delete(session.Parts, partNumber) + delete(session.Parts, params.Part) session.mu.Unlock() } }() @@ -339,7 +337,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error writing part data") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error writing part %d data: %w", partNumber, err)) + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error writing part %d data: %w", params.Part, err)) return } @@ -365,18 +363,18 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl // Then check completed — if the session was finalized mid-write, return 409 // but leave the part as PartComplete so Complete's validation sees it. session.mu.Lock() - if session.Parts[partNumber] == PartComplete { + if session.Parts[params.Part] == PartComplete { a.logger.Warn(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Int("partNumber", partNumber). + Int("partNumber", params.Part). Msg("overwriting existing part") } - session.Parts[partNumber] = PartComplete + session.Parts[params.Part] = PartComplete partWritten = true if session.completed.Load() { session.mu.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", partNumber).Msg("session completed during part upload") + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", params.Part).Msg("session completed during part upload") jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s was completed or aborted during part upload", uploadId)) return @@ -386,7 +384,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). - Int("partNumber", partNumber). + Int("partNumber", params.Part). Int64("size", written). Int64("offset", offset). Msg("part uploaded") @@ -394,7 +392,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl w.Header().Set("Content-Type", "application/json") w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(MultipartUploadPart{ - PartNumber: partNumber, + PartNumber: params.Part, Size: written, }); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") From 6344ba855a0c08942cf4baf9cf7cf1edabd6b886 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:58:28 +0100 Subject: [PATCH 44/59] fix(envd): clean up remaining issues in multipart upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove orphaned file on DestFile.Close() failure in Complete handler, since the session is already removed from the map - Remove unreachable PartComplete check in PutPart finalization — the part is always PartInProgress at this point due to the concurrent write guard - Fix file path reuse race in Delete and TTL cleanup: unlink the file under uploadsLock before removing from the map, so a new Init for the same path gets a fresh inode (in-flight writers use the open fd) - Fix goroutine leak in download_test.go: use context.WithCancel with t.Cleanup instead of context.Background Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/download_test.go | 59 +++++++------------ .../envd/internal/api/multipart_upload.go | 26 ++++---- packages/envd/internal/api/store.go | 10 ++-- 3 files changed, 38 insertions(+), 57 deletions(-) diff --git a/packages/envd/internal/api/download_test.go b/packages/envd/internal/api/download_test.go index ac7fedccb3..4b3e6c3193 100644 --- a/packages/envd/internal/api/download_test.go +++ b/packages/envd/internal/api/download_test.go @@ -22,6 +22,20 @@ import ( "github.com/e2b-dev/infra/packages/envd/internal/utils" ) +func newDownloadTestAPI(t *testing.T, username string) *API { + t.Helper() + logger := zerolog.Nop() + defaults := &execcontext.Defaults{ + EnvVars: utils.NewMap[string, string](), + User: username, + } + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + return New(ctx, &logger, defaults, nil, false) +} + func TestGetFilesContentDisposition(t *testing.T) { t.Parallel() @@ -90,13 +104,7 @@ func TestGetFilesContentDisposition(t *testing.T) { err := os.WriteFile(tempFile, []byte("test content"), 0o644) require.NoError(t, err) - // Create test API - logger := zerolog.Nop() - defaults := &execcontext.Defaults{ - EnvVars: utils.NewMap[string, string](), - User: currentUser.Username, - } - api := New(context.Background(), &logger, defaults, nil, false) + api := newDownloadTestAPI(t, currentUser.Username) // Create request and response recorder req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) @@ -139,13 +147,7 @@ func TestGetFilesContentDispositionWithNestedPath(t *testing.T) { err = os.WriteFile(tempFile, []byte("test content"), 0o644) require.NoError(t, err) - // Create test API - logger := zerolog.Nop() - defaults := &execcontext.Defaults{ - EnvVars: utils.NewMap[string, string](), - User: currentUser.Username, - } - api := New(context.Background(), &logger, defaults, nil, false) + api := newDownloadTestAPI(t, currentUser.Username) // Create request and response recorder req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) @@ -182,13 +184,7 @@ func TestGetFiles_GzipEncoding_ExplicitIdentityOffWithRange(t *testing.T) { err = os.WriteFile(tempFile, []byte("test content"), 0o644) require.NoError(t, err) - // Create test API - logger := zerolog.Nop() - defaults := &execcontext.Defaults{ - EnvVars: utils.NewMap[string, string](), - User: currentUser.Username, - } - api := New(context.Background(), &logger, defaults, nil, false) + api := newDownloadTestAPI(t, currentUser.Username) // Create request and response recorder req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) @@ -224,12 +220,7 @@ func TestGetFiles_GzipDownload(t *testing.T) { err = os.WriteFile(tempFile, originalContent, 0o644) require.NoError(t, err) - logger := zerolog.Nop() - defaults := &execcontext.Defaults{ - EnvVars: utils.NewMap[string, string](), - User: currentUser.Username, - } - api := New(context.Background(), &logger, defaults, nil, false) + api := newDownloadTestAPI(t, currentUser.Username) req := httptest.NewRequest(http.MethodGet, "/files?path="+url.QueryEscape(tempFile), nil) req.Header.Set("Accept-Encoding", "gzip") @@ -289,12 +280,7 @@ func TestPostFiles_GzipUpload(t *testing.T) { tempDir := t.TempDir() destPath := filepath.Join(tempDir, "uploaded.txt") - logger := zerolog.Nop() - defaults := &execcontext.Defaults{ - EnvVars: utils.NewMap[string, string](), - User: currentUser.Username, - } - api := New(context.Background(), &logger, defaults, nil, false) + api := newDownloadTestAPI(t, currentUser.Username) req := httptest.NewRequest(http.MethodPost, "/files?path="+url.QueryEscape(destPath), &gzBuf) req.Header.Set("Content-Type", mpWriter.FormDataContentType()) @@ -349,12 +335,7 @@ func TestGzipUploadThenGzipDownload(t *testing.T) { tempDir := t.TempDir() destPath := filepath.Join(tempDir, "roundtrip.txt") - logger := zerolog.Nop() - defaults := &execcontext.Defaults{ - EnvVars: utils.NewMap[string, string](), - User: currentUser.Username, - } - api := New(context.Background(), &logger, defaults, nil, false) + api := newDownloadTestAPI(t, currentUser.Username) uploadReq := httptest.NewRequest(http.MethodPost, "/files?path="+url.QueryEscape(destPath), &gzBuf) uploadReq.Header.Set("Content-Type", mpWriter.FormDataContentType()) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 0b3bb6d39f..d1b51adbb0 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -359,17 +359,10 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl } // Finalize: always mark the part as complete since the data was written to disk. - // Mark partWritten first so the deferred cleanup does not revert the status. + // Mark partWritten so the deferred cleanup does not revert the status. // Then check completed — if the session was finalized mid-write, return 409 // but leave the part as PartComplete so Complete's validation sees it. session.mu.Lock() - if session.Parts[params.Part] == PartComplete { - a.logger.Warn(). - Str(string(logs.OperationIDKey), operationID). - Str("uploadId", uploadId). - Int("partNumber", params.Part). - Msg("overwriting existing part") - } session.Parts[params.Part] = PartComplete partWritten = true if session.completed.Load() { @@ -462,6 +455,10 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req a.uploadsLock.Unlock() if err := session.DestFile.Close(); err != nil { + // Session is already removed from the map; clean up the orphaned file. + if rmErr := ignoreNotExist(os.Remove(session.FilePath)); rmErr != nil { + a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove file after close error") + } a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing destination file") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing destination file: %w", err)) @@ -505,6 +502,12 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } + // Unlink the file before removing from the map so a new Init for + // the same path creates a fresh inode. In-flight writers use the + // open DestFile descriptor, which remains valid after unlink. + if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { + a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing file") + } delete(a.uploads, uploadId) } a.uploadsLock.Unlock() @@ -516,14 +519,9 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } - // Wait for any in-flight part writes to finish before closing the file + // Wait for any in-flight part writes to finish before closing the file descriptor session.wg.Wait() - - // Close and remove the file session.DestFile.Close() - if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { - a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing file") - } a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 02451a40fb..852683cb10 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -124,14 +124,16 @@ func (a *API) removeExpiredSessions() { if now.Sub(session.CreatedAt) > uploadSessionTTL { // Mark as completed to prevent races if session.completed.CompareAndSwap(false, true) { + // Unlink the file before removing from the map so a new Init + // for the same path creates a fresh inode. + if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { + a.logger.Warn().Err(err).Str("filePath", session.FilePath).Msg("failed to cleanup expired upload file") + } delete(a.uploads, uploadID) go func(s *MultipartUploadSession) { - // Wait for any in-flight part writes to finish before closing the file + // Wait for any in-flight part writes to finish before closing the descriptor s.wg.Wait() s.DestFile.Close() - if err := ignoreNotExist(os.Remove(s.FilePath)); err != nil { - a.logger.Warn().Err(err).Str("filePath", s.FilePath).Msg("failed to cleanup expired upload file") - } }(session) a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") } From 3ebee33ace00e62d9a23959573503f0c6b11179f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:08:32 +0100 Subject: [PATCH 45/59] refactor(envd): unexport package-internal multipart upload types Unexport PartStatus, PartInProgress, PartComplete, and MultipartUploadSession since they are only used within the api package. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 14 ++++++------ .../internal/api/multipart_upload_test.go | 2 +- packages/envd/internal/api/store.go | 22 +++++++++---------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index d1b51adbb0..0709e48eed 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -184,7 +184,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params uploadID := uuid.NewString() - session := &MultipartUploadSession{ + session := &multipartUploadSession{ UploadID: uploadID, FilePath: filePath, DestFile: destFile, @@ -193,7 +193,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params NumParts: numParts, UID: uid, GID: gid, - Parts: make(map[int]PartStatus), + Parts: make(map[int]partStatus), CreatedAt: time.Now(), } @@ -299,14 +299,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - if session.Parts[params.Part] == PartInProgress { + if session.Parts[params.Part] == partInProgress { session.mu.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", params.Part).Msg("part is already being uploaded by another request") jsonError(w, http.StatusConflict, fmt.Errorf("part %d is already being uploaded by another request for session %s", params.Part, uploadId)) return } - session.Parts[params.Part] = PartInProgress + session.Parts[params.Part] = partInProgress session.wg.Add(1) // Must happen under mu while completed is false to avoid Add/Wait race session.mu.Unlock() @@ -361,9 +361,9 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl // Finalize: always mark the part as complete since the data was written to disk. // Mark partWritten so the deferred cleanup does not revert the status. // Then check completed — if the session was finalized mid-write, return 409 - // but leave the part as PartComplete so Complete's validation sees it. + // but leave the part as partComplete so Complete's validation sees it. session.mu.Lock() - session.Parts[params.Part] = PartComplete + session.Parts[params.Part] = partComplete partWritten = true if session.completed.Load() { session.mu.Unlock() @@ -430,7 +430,7 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req session.mu.Lock() var missingParts []int for i := range session.NumParts { - if session.Parts[i] != PartComplete { + if session.Parts[i] != partComplete { missingParts = append(missingParts, i) } } diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index bc2ac37972..2714d4b19c 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -393,7 +393,7 @@ func TestMultipartUpload(t *testing.T) { session.DestFile.Close() os.Remove(session.FilePath) } - api.uploads = make(map[string]*MultipartUploadSession) + api.uploads = make(map[string]*multipartUploadSession) api.uploadsLock.Unlock() }) diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 852683cb10..4470813fcb 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -16,17 +16,17 @@ import ( "github.com/e2b-dev/infra/packages/envd/internal/utils" ) -// PartStatus represents the state of a multipart upload part. -type PartStatus int +// partStatus represents the state of a multipart upload part. +type partStatus int const ( - partPending PartStatus = iota // zero value: part not yet started - PartInProgress // write currently in flight - PartComplete // write finished successfully + partPending partStatus = iota // zero value: part not yet started + partInProgress // write currently in flight + partComplete // write finished successfully ) -// MultipartUploadSession tracks an in-progress multipart upload -type MultipartUploadSession struct { +// multipartUploadSession tracks an in-progress multipart upload +type multipartUploadSession struct { UploadID string FilePath string // Final destination path DestFile *os.File // Open file handle for direct writes @@ -35,7 +35,7 @@ type MultipartUploadSession struct { NumParts int // Total number of expected parts UID int GID int - Parts map[int]PartStatus // partNumber -> status + Parts map[int]partStatus // partNumber -> status CreatedAt time.Time completed atomic.Bool // Set to true when complete/abort starts to prevent new parts mu sync.Mutex // Protects Parts and activeWriters @@ -77,7 +77,7 @@ type API struct { initLock sync.Mutex // Multipart upload sessions - uploads map[string]*MultipartUploadSession + uploads map[string]*multipartUploadSession uploadsLock sync.RWMutex } @@ -90,7 +90,7 @@ func New(ctx context.Context, l *zerolog.Logger, defaults *execcontext.Defaults, mmdsClient: &DefaultMMDSClient{}, lastSetTime: utils.NewAtomicMax(), accessToken: &SecureToken{}, - uploads: make(map[string]*MultipartUploadSession), + uploads: make(map[string]*multipartUploadSession), } // Start background cleanup for expired upload sessions @@ -130,7 +130,7 @@ func (a *API) removeExpiredSessions() { a.logger.Warn().Err(err).Str("filePath", session.FilePath).Msg("failed to cleanup expired upload file") } delete(a.uploads, uploadID) - go func(s *MultipartUploadSession) { + go func(s *multipartUploadSession) { // Wait for any in-flight part writes to finish before closing the descriptor s.wg.Wait() s.DestFile.Close() From 4cdcb0ff629b133c3c25ecea7f6d1d232a7f5c79 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:20:53 +0100 Subject: [PATCH 46/59] fix(envd): enforce single active session per path and fix completed/wg race Two issues fixed: 1. Init opens the destination with O_TRUNC without checking for an existing active session on the same path. Move file creation under uploadsLock so the path conflict check and O_TRUNC are atomic. The check scans existing sessions by FilePath (max 100 entries); no second index needed. 2. Complete/abort set the completed flag via atomic CAS outside session.mu, creating a window where a part upload can call wg.Add(1) after wg.Wait() has already observed zero. Move the CAS under session.mu in complete, abort, and TTL cleanup so the completed transition is mutually exclusive with part reservation. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 82 +++++---- .../internal/api/multipart_upload_test.go | 163 ++++++++++++++++++ packages/envd/internal/api/store.go | 10 +- 3 files changed, 223 insertions(+), 32 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 0709e48eed..9a0e4fa560 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -139,9 +139,35 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } + // Atomically check session limit, check for path conflicts, create the + // file, and register the session. File creation (O_TRUNC) must happen + // under the lock to prevent two inits for the same path from both + // passing the check before either truncates. The syscalls under the lock + // (open, truncate, chown) are fast; heavy work like EnsureDirs is above. + uploadID := uuid.NewString() + + a.uploadsLock.Lock() + if len(a.uploads) >= maxUploadSessions { + a.uploadsLock.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") + jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) + + return + } + for _, existing := range a.uploads { + if existing.FilePath == filePath { + a.uploadsLock.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("filePath", filePath).Msg("destination path already has an active upload") + jsonError(w, http.StatusConflict, fmt.Errorf("destination path %q already has an active upload session", filePath)) + + return + } + } + // Create and preallocate the destination file destFile, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o666) if err != nil { + a.uploadsLock.Unlock() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -159,6 +185,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err := destFile.Truncate(body.TotalSize); err != nil { destFile.Close() os.Remove(filePath) + a.uploadsLock.Unlock() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -176,14 +203,13 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err := os.Chown(filePath, uid, gid); err != nil { destFile.Close() os.Remove(filePath) + a.uploadsLock.Unlock() a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) return } - uploadID := uuid.NewString() - session := &multipartUploadSession{ UploadID: uploadID, FilePath: filePath, @@ -197,18 +223,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params CreatedAt: time.Now(), } - // Atomically check session limit and insert — prevents TOCTOU race where - // concurrent requests all pass a read-lock check before any inserts. - a.uploadsLock.Lock() - if len(a.uploads) >= maxUploadSessions { - a.uploadsLock.Unlock() - destFile.Close() - os.Remove(filePath) - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Int("maxSessions", maxUploadSessions).Msg("too many concurrent upload sessions") - jsonError(w, http.StatusTooManyRequests, fmt.Errorf("too many concurrent upload sessions (max %d)", maxUploadSessions)) - - return - } a.uploads[uploadID] = session a.uploadsLock.Unlock() @@ -398,21 +412,10 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req operationID := logs.AssignOperationID() - // Look up the session and mark as completing to prevent new part reservations. - // Do NOT delete from the map yet — if validation fails, the client can retry. - a.uploadsLock.Lock() + // Look up the session. + a.uploadsLock.RLock() session, exists := a.uploads[uploadId] - if exists { - if !session.completed.CompareAndSwap(false, true) { - // Already being completed by another request - a.uploadsLock.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing", uploadId)) - - return - } - } - a.uploadsLock.Unlock() + a.uploadsLock.RUnlock() if !exists { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") @@ -421,6 +424,20 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } + // Mark as completing under session.mu so the transition is synchronized + // with part reservation (which checks completed and calls wg.Add under + // the same lock). This prevents a part upload from calling wg.Add(1) + // after our wg.Wait below has already observed a zero counter. + session.mu.Lock() + if !session.completed.CompareAndSwap(false, true) { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing", uploadId)) + + return + } + session.mu.Unlock() + // Wait for all in-flight part writes to finish before checking part status. // This prevents closing the file while io.CopyN is still writing and ensures // parts that were mid-write when completed was set are properly accounted for. @@ -493,15 +510,20 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, a.uploadsLock.Lock() session, exists := a.uploads[uploadId] if exists { - // Mark as completed to prevent new parts from being uploaded + // Mark as completed under session.mu to synchronize with part + // reservation (which checks completed and calls wg.Add under the + // same lock). This prevents a part upload from calling wg.Add(1) + // after our wg.Wait below has already observed a zero counter. + session.mu.Lock() if !session.completed.CompareAndSwap(false, true) { - // Already being completed/aborted by another request + session.mu.Unlock() a.uploadsLock.Unlock() a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) return } + session.mu.Unlock() // Unlink the file before removing from the map so a new Init for // the same path creates a fresh inode. In-flight writers use the // open DestFile descriptor, which remains valid after unlink. diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 2714d4b19c..e63bac6137 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -662,4 +662,167 @@ func TestMultipartUpload(t *testing.T) { delete(api.uploads, uploadId) api.uploadsLock.Unlock() }) + + t.Run("reject duplicate destination path", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "dup-path.txt") + + // First init should succeed + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 100, + PartSize: 50, + } + bodyBytes, _ := json.Marshal(body) + + req := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + req.Header.Set("Content-Type", "application/json") + w := httptest.NewRecorder() + + api.PostFilesUploadInit(w, req, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, w.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(w.Body.Bytes(), &initResp) + require.NoError(t, err) + uploadId := initResp.UploadId + + // Second init with same path should be rejected with 409 + bodyBytes2, _ := json.Marshal(body) + req2 := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes2)) + req2.Header.Set("Content-Type", "application/json") + w2 := httptest.NewRecorder() + + api.PostFilesUploadInit(w2, req2, PostFilesUploadInitParams{}) + assert.Equal(t, http.StatusConflict, w2.Code) + + var errResp Error + err = json.Unmarshal(w2.Body.Bytes(), &errResp) + require.NoError(t, err) + assert.Contains(t, errResp.Message, "active upload session") + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[uploadId] + if session != nil { + session.DestFile.Close() + os.Remove(session.FilePath) + } + delete(api.uploads, uploadId) + api.uploadsLock.Unlock() + }) + + t.Run("reuse path after complete", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "reuse-path.txt") + + // First upload (empty file for simplicity) + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 0, + PartSize: 1024, + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + + // Complete it + completeReq := httptest.NewRequest(http.MethodPost, "/files/upload/"+initResp.UploadId+"/complete", nil) + completeW := httptest.NewRecorder() + + api.PostFilesUploadUploadIdComplete(completeW, completeReq, initResp.UploadId) + require.Equal(t, http.StatusOK, completeW.Code) + + // Second init with same path should succeed now + bodyBytes2, _ := json.Marshal(body) + initReq2 := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes2)) + initReq2.Header.Set("Content-Type", "application/json") + initW2 := httptest.NewRecorder() + + api.PostFilesUploadInit(initW2, initReq2, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW2.Code) + + var initResp2 MultipartUploadInit + err = json.Unmarshal(initW2.Body.Bytes(), &initResp2) + require.NoError(t, err) + + // Clean up + api.uploadsLock.Lock() + session := api.uploads[initResp2.UploadId] + if session != nil { + session.DestFile.Close() + os.Remove(session.FilePath) + } + delete(api.uploads, initResp2.UploadId) + api.uploadsLock.Unlock() + }) + + t.Run("reuse path after abort", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "reuse-abort.txt") + + // First upload + body := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 100, + PartSize: 50, + } + bodyBytes, _ := json.Marshal(body) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + + // Abort it + abortReq := httptest.NewRequest(http.MethodDelete, "/files/upload/"+initResp.UploadId, nil) + abortW := httptest.NewRecorder() + + api.DeleteFilesUploadUploadId(abortW, abortReq, initResp.UploadId) + require.Equal(t, http.StatusNoContent, abortW.Code) + + // Second init with same path should succeed now + bodyBytes2, _ := json.Marshal(body) + initReq2 := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(bodyBytes2)) + initReq2.Header.Set("Content-Type", "application/json") + initW2 := httptest.NewRecorder() + + api.PostFilesUploadInit(initW2, initReq2, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW2.Code) + + // Clean up + var initResp2 MultipartUploadInit + err = json.Unmarshal(initW2.Body.Bytes(), &initResp2) + require.NoError(t, err) + + api.uploadsLock.Lock() + session := api.uploads[initResp2.UploadId] + if session != nil { + session.DestFile.Close() + os.Remove(session.FilePath) + } + delete(api.uploads, initResp2.UploadId) + api.uploadsLock.Unlock() + }) } diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 4470813fcb..117913397b 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -122,8 +122,14 @@ func (a *API) removeExpiredSessions() { now := time.Now() for uploadID, session := range a.uploads { if now.Sub(session.CreatedAt) > uploadSessionTTL { - // Mark as completed to prevent races - if session.completed.CompareAndSwap(false, true) { + // Mark as completed under session.mu to synchronize with part + // reservation (which checks completed and calls wg.Add under + // the same lock). This prevents a late wg.Add after our Wait. + session.mu.Lock() + swapped := session.completed.CompareAndSwap(false, true) + session.mu.Unlock() + + if swapped { // Unlink the file before removing from the map so a new Init // for the same path creates a fresh inode. if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { From 7f6b5cfe7761e75c7e34b668a042fab3e87b9012 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 20:55:38 +0100 Subject: [PATCH 47/59] refactor(envd): remove TTL cleanup and reject completed-part re-uploads The background TTL cleanup goroutine introduced a race with the retry- missing-parts flow. Since envd runs inside short-lived Firecracker VMs, incomplete uploads are cleaned up when the VM is destroyed, making TTL cleanup unnecessary. This also removes the context.Context dependency from api.New() and makes already-completed parts immutable (409 on re-upload). Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/download_test.go | 6 +- packages/envd/internal/api/init_test.go | 34 +++++----- .../envd/internal/api/multipart_upload.go | 13 ++-- .../internal/api/multipart_upload_test.go | 9 +-- packages/envd/internal/api/store.go | 65 ++----------------- packages/envd/main.go | 2 +- 6 files changed, 33 insertions(+), 96 deletions(-) diff --git a/packages/envd/internal/api/download_test.go b/packages/envd/internal/api/download_test.go index 4b3e6c3193..cda2a78e6e 100644 --- a/packages/envd/internal/api/download_test.go +++ b/packages/envd/internal/api/download_test.go @@ -3,7 +3,6 @@ package api import ( "bytes" "compress/gzip" - "context" "io" "mime/multipart" "net/http" @@ -30,10 +29,7 @@ func newDownloadTestAPI(t *testing.T, username string) *API { User: username, } - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - return New(ctx, &logger, defaults, nil, false) + return New(&logger, defaults, nil, false) } func TestGetFilesContentDisposition(t *testing.T) { diff --git a/packages/envd/internal/api/init_test.go b/packages/envd/internal/api/init_test.go index 43fbb099d5..9877104e09 100644 --- a/packages/envd/internal/api/init_test.go +++ b/packages/envd/internal/api/init_test.go @@ -137,12 +137,12 @@ func (m *mockMMDSClient) GetAccessTokenHash(_ context.Context) (string, error) { return m.hash, m.err } -func newTestAPI(ctx context.Context, accessToken *SecureToken, mmdsClient MMDSClient) *API { +func newTestAPI(accessToken *SecureToken, mmdsClient MMDSClient) *API { logger := zerolog.Nop() defaults := &execcontext.Defaults{ EnvVars: utils.NewMap[string, string](), } - api := New(ctx, &logger, defaults, nil, false) + api := New(&logger, defaults, nil, false) if accessToken != nil { api.accessToken.TakeFrom(accessToken) } @@ -241,7 +241,7 @@ func TestValidateInitAccessToken(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: tt.mmdsHash, err: tt.mmdsErr} - api := newTestAPI(ctx, tt.accessToken, mmdsClient) + api := newTestAPI(tt.accessToken, mmdsClient) err := api.validateInitAccessToken(ctx, tt.requestToken) @@ -263,7 +263,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Parallel() token := "my-secret-token" mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken(token), err: nil} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr(token)) @@ -274,7 +274,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns no match when token hash differs from MMDS hash", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken("different-token"), err: nil} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr("my-token")) @@ -285,7 +285,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns exists but no match when request token is nil", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken("some-token"), err: nil} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, nil) @@ -296,7 +296,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns false, false when MMDS returns error", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr("any-token")) @@ -307,7 +307,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns false, false when MMDS returns empty hash with non-nil request", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: nil} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, secureTokenPtr("any-token")) @@ -318,7 +318,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns false, false when MMDS returns empty hash with nil request", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: nil} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, nil) @@ -329,7 +329,7 @@ func TestCheckMMDSHash(t *testing.T) { t.Run("returns true, true when MMDS returns hash of empty string with nil request (explicit reset)", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: keys.HashAccessToken(""), err: nil} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) matches, exists := api.checkMMDSHash(ctx, nil) @@ -451,7 +451,7 @@ func TestSetData(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: tt.mmdsHash, err: tt.mmdsErr} - api := newTestAPI(ctx, tt.existingToken, mmdsClient) + api := newTestAPI(tt.existingToken, mmdsClient) data := PostInitJSONBody{ AccessToken: tt.requestToken, @@ -478,7 +478,7 @@ func TestSetData(t *testing.T) { t.Run("sets environment variables", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) envVars := EnvVars{"FOO": "bar", "BAZ": "qux"} data := PostInitJSONBody{ @@ -499,7 +499,7 @@ func TestSetData(t *testing.T) { t.Run("sets default user", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) data := PostInitJSONBody{ DefaultUser: utilsShared.ToPtr("testuser"), @@ -514,7 +514,7 @@ func TestSetData(t *testing.T) { t.Run("does not set default user when empty", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) api.defaults.User = "original" data := PostInitJSONBody{ @@ -530,7 +530,7 @@ func TestSetData(t *testing.T) { t.Run("sets default workdir", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) data := PostInitJSONBody{ DefaultWorkdir: utilsShared.ToPtr("/home/user"), @@ -546,7 +546,7 @@ func TestSetData(t *testing.T) { t.Run("does not set default workdir when empty", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) originalWorkdir := "/original" api.defaults.Workdir = &originalWorkdir @@ -564,7 +564,7 @@ func TestSetData(t *testing.T) { t.Run("sets multiple fields at once", func(t *testing.T) { t.Parallel() mmdsClient := &mockMMDSClient{hash: "", err: assert.AnError} - api := newTestAPI(ctx, nil, mmdsClient) + api := newTestAPI(nil, mmdsClient) envVars := EnvVars{"KEY": "value"} data := PostInitJSONBody{ diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 9a0e4fa560..e9aec3e90a 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -10,7 +10,6 @@ import ( "os/user" "path/filepath" "syscall" - "time" "github.com/google/uuid" @@ -26,10 +25,6 @@ const ( maxTotalSize = 10 * 1024 * 1024 * 1024 // maxPartSize limits individual part size to 100MB to prevent DoS maxPartSize = 100 * 1024 * 1024 - // uploadSessionTTL is the maximum time an upload session can remain active - uploadSessionTTL = 1 * time.Hour - // uploadSessionCleanupInterval is how often to check for expired sessions - uploadSessionCleanupInterval = 5 * time.Minute // maxNumParts caps the number of parts to prevent memory/CPU exhaustion. // With totalSize=10GB and partSize=1, numParts would be ~10 billion without this. maxNumParts = 10_000 @@ -220,7 +215,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params UID: uid, GID: gid, Parts: make(map[int]partStatus), - CreatedAt: time.Now(), } a.uploads[uploadID] = session @@ -320,6 +314,13 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } + if session.Parts[params.Part] == partComplete { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", params.Part).Msg("part was already uploaded") + jsonError(w, http.StatusConflict, fmt.Errorf("part %d was already uploaded for session %s", params.Part, uploadId)) + + return + } session.Parts[params.Part] = partInProgress session.wg.Add(1) // Must happen under mu while completed is false to avoid Add/Wait race session.mu.Unlock() diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index e63bac6137..6403f574ae 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -2,7 +2,6 @@ package api import ( "bytes" - "context" "encoding/json" "fmt" "net/http" @@ -27,10 +26,7 @@ func newMultipartTestAPI(t *testing.T) *API { EnvVars: utils.NewMap[string, string](), } - ctx, cancel := context.WithCancel(context.Background()) - t.Cleanup(cancel) - - return New(ctx, &logger, defaults, nil, true) + return New(&logger, defaults, nil, true) } func TestMultipartUpload(t *testing.T) { @@ -276,10 +272,9 @@ func TestMultipartUpload(t *testing.T) { // Session should still exist (completed flag reset) so client can retry api.uploadsLock.RLock() - session, exists := api.uploads[uploadId] + _, exists := api.uploads[uploadId] api.uploadsLock.RUnlock() assert.True(t, exists, "session should still exist after failed complete") - assert.False(t, session.completed.Load(), "completed flag should be reset") // Clean up api.uploadsLock.Lock() diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 117913397b..97a40a435c 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -7,7 +7,6 @@ import ( "os" "sync" "sync/atomic" - "time" "github.com/rs/zerolog" @@ -36,10 +35,9 @@ type multipartUploadSession struct { UID int GID int Parts map[int]partStatus // partNumber -> status - CreatedAt time.Time - completed atomic.Bool // Set to true when complete/abort starts to prevent new parts - mu sync.Mutex // Protects Parts and activeWriters - wg sync.WaitGroup // Tracks in-flight part writes; Complete/Delete wait on this before closing DestFile + completed atomic.Bool // Set to true when complete/abort starts to prevent new parts + mu sync.Mutex // Protects Parts and activeWriters + wg sync.WaitGroup // Tracks in-flight part writes; Complete/Delete wait on this before closing DestFile } // ignoreNotExist returns nil if err is a "not exist" error, otherwise returns err unchanged. @@ -81,8 +79,8 @@ type API struct { uploadsLock sync.RWMutex } -func New(ctx context.Context, l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { - api := &API{ +func New(l *zerolog.Logger, defaults *execcontext.Defaults, mmdsChan chan *host.MMDSOpts, isNotFC bool) *API { + return &API{ logger: l, defaults: defaults, mmdsChan: mmdsChan, @@ -92,59 +90,6 @@ func New(ctx context.Context, l *zerolog.Logger, defaults *execcontext.Defaults, accessToken: &SecureToken{}, uploads: make(map[string]*multipartUploadSession), } - - // Start background cleanup for expired upload sessions - go api.cleanupExpiredUploads(ctx) - - return api -} - -// cleanupExpiredUploads periodically removes upload sessions that have exceeded their TTL. -// It stops when ctx is cancelled, preventing goroutine leaks in tests and enabling graceful shutdown. -func (a *API) cleanupExpiredUploads(ctx context.Context) { - ticker := time.NewTicker(uploadSessionCleanupInterval) - defer ticker.Stop() - - for { - select { - case <-ctx.Done(): - return - case <-ticker.C: - a.removeExpiredSessions() - } - } -} - -func (a *API) removeExpiredSessions() { - a.uploadsLock.Lock() - defer a.uploadsLock.Unlock() - - now := time.Now() - for uploadID, session := range a.uploads { - if now.Sub(session.CreatedAt) > uploadSessionTTL { - // Mark as completed under session.mu to synchronize with part - // reservation (which checks completed and calls wg.Add under - // the same lock). This prevents a late wg.Add after our Wait. - session.mu.Lock() - swapped := session.completed.CompareAndSwap(false, true) - session.mu.Unlock() - - if swapped { - // Unlink the file before removing from the map so a new Init - // for the same path creates a fresh inode. - if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { - a.logger.Warn().Err(err).Str("filePath", session.FilePath).Msg("failed to cleanup expired upload file") - } - delete(a.uploads, uploadID) - go func(s *multipartUploadSession) { - // Wait for any in-flight part writes to finish before closing the descriptor - s.wg.Wait() - s.DestFile.Close() - }(session) - a.logger.Info().Str("uploadId", uploadID).Msg("cleaned up expired multipart upload session") - } - } - } } func (a *API) GetHealth(w http.ResponseWriter, r *http.Request) { diff --git a/packages/envd/main.go b/packages/envd/main.go index 7a7d7fb565..e86a21ee6b 100644 --- a/packages/envd/main.go +++ b/packages/envd/main.go @@ -187,7 +187,7 @@ func main() { processLogger := l.With().Str("logger", "process").Logger() processService := processRpc.Handle(m, &processLogger, defaults, cgroupManager) - service := api.New(ctx, &envLogger, defaults, mmdsChan, isNotFC) + service := api.New(&envLogger, defaults, mmdsChan, isNotFC) handler := api.HandlerFromMux(service, m) middleware := authn.NewMiddleware(permissions.AuthenticateUsername) From 52a9b7a3040d761cb0bbf6d47e9376ec6ba36924 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:12:35 +0100 Subject: [PATCH 48/59] fix(envd): add missing response codes to OpenAPI spec The multipart upload implementation returns 409, 429, and 507 status codes that were not documented in the OpenAPI spec. Add Conflict and TooManyRequests response types and reference them from all affected endpoints. Regenerate all downstream code. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/api.gen.go | 6 ++ packages/envd/spec/envd.yaml | 26 ++++++++ .../internal/sandbox/envd/envd.gen.go | 6 ++ tests/integration/internal/envd/generated.go | 62 +++++++++++++++++++ 4 files changed, 100 insertions(+) diff --git a/packages/envd/internal/api/api.gen.go b/packages/envd/internal/api/api.gen.go index b195ec18df..12f18816ee 100644 --- a/packages/envd/internal/api/api.gen.go +++ b/packages/envd/internal/api/api.gen.go @@ -116,6 +116,9 @@ type SignatureExpiration = int // User defines model for User. type User = string +// Conflict defines model for Conflict. +type Conflict = Error + // FileNotFound defines model for FileNotFound. type FileNotFound = Error @@ -131,6 +134,9 @@ type InvalidUser = Error // NotEnoughDiskSpace defines model for NotEnoughDiskSpace. type NotEnoughDiskSpace = Error +// TooManyRequests defines model for TooManyRequests. +type TooManyRequests = Error + // UploadNotFound defines model for UploadNotFound. type UploadNotFound = Error diff --git a/packages/envd/spec/envd.yaml b/packages/envd/spec/envd.yaml index 43fc8c3e9c..77c03e25d9 100644 --- a/packages/envd/spec/envd.yaml +++ b/packages/envd/spec/envd.yaml @@ -174,8 +174,14 @@ paths: $ref: "#/components/responses/InvalidPath" "401": $ref: "#/components/responses/InvalidUser" + "409": + $ref: "#/components/responses/Conflict" + "429": + $ref: "#/components/responses/TooManyRequests" "500": $ref: "#/components/responses/InternalServerError" + "507": + $ref: "#/components/responses/NotEnoughDiskSpace" /files/upload/{uploadId}: put: @@ -216,6 +222,8 @@ paths: $ref: "#/components/responses/InvalidPath" "404": $ref: "#/components/responses/UploadNotFound" + "409": + $ref: "#/components/responses/Conflict" "500": $ref: "#/components/responses/InternalServerError" "507": @@ -238,6 +246,8 @@ paths: description: Upload aborted and cleaned up successfully "404": $ref: "#/components/responses/UploadNotFound" + "409": + $ref: "#/components/responses/Conflict" "500": $ref: "#/components/responses/InternalServerError" @@ -262,8 +272,12 @@ paths: application/json: schema: $ref: "#/components/schemas/MultipartUploadComplete" + "400": + $ref: "#/components/responses/InvalidPath" "404": $ref: "#/components/responses/UploadNotFound" + "409": + $ref: "#/components/responses/Conflict" "500": $ref: "#/components/responses/InternalServerError" @@ -364,6 +378,18 @@ components: application/json: schema: $ref: "#/components/schemas/Error" + Conflict: + description: Conflict with current state of the resource + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + TooManyRequests: + description: Too many concurrent upload sessions + content: + application/json: + schema: + $ref: "#/components/schemas/Error" UploadNotFound: description: Upload session not found content: diff --git a/packages/orchestrator/internal/sandbox/envd/envd.gen.go b/packages/orchestrator/internal/sandbox/envd/envd.gen.go index 6aa045ace1..2b26c2f08d 100644 --- a/packages/orchestrator/internal/sandbox/envd/envd.gen.go +++ b/packages/orchestrator/internal/sandbox/envd/envd.gen.go @@ -111,6 +111,9 @@ type SignatureExpiration = int // User defines model for User. type User = string +// Conflict defines model for Conflict. +type Conflict = Error + // FileNotFound defines model for FileNotFound. type FileNotFound = Error @@ -126,6 +129,9 @@ type InvalidUser = Error // NotEnoughDiskSpace defines model for NotEnoughDiskSpace. type NotEnoughDiskSpace = Error +// TooManyRequests defines model for TooManyRequests. +type TooManyRequests = Error + // UploadNotFound defines model for UploadNotFound. type UploadNotFound = Error diff --git a/tests/integration/internal/envd/generated.go b/tests/integration/internal/envd/generated.go index 28bf900547..4a769929f5 100644 --- a/tests/integration/internal/envd/generated.go +++ b/tests/integration/internal/envd/generated.go @@ -120,6 +120,9 @@ type SignatureExpiration = int // User defines model for User. type User = string +// Conflict defines model for Conflict. +type Conflict = Error + // FileNotFound defines model for FileNotFound. type FileNotFound = Error @@ -135,6 +138,9 @@ type InvalidUser = Error // NotEnoughDiskSpace defines model for NotEnoughDiskSpace. type NotEnoughDiskSpace = Error +// TooManyRequests defines model for TooManyRequests. +type TooManyRequests = Error + // UploadNotFound defines model for UploadNotFound. type UploadNotFound = Error @@ -1141,7 +1147,10 @@ type PostFilesUploadInitResponse struct { JSON200 *MultipartUploadInit JSON400 *InvalidPath JSON401 *InvalidUser + JSON409 *Conflict + JSON429 *TooManyRequests JSON500 *InternalServerError + JSON507 *NotEnoughDiskSpace } // Status returns HTTPResponse.Status @@ -1164,6 +1173,7 @@ type DeleteFilesUploadUploadIdResponse struct { Body []byte HTTPResponse *http.Response JSON404 *UploadNotFound + JSON409 *Conflict JSON500 *InternalServerError } @@ -1189,6 +1199,7 @@ type PutFilesUploadUploadIdResponse struct { JSON200 *MultipartUploadPart JSON400 *InvalidPath JSON404 *UploadNotFound + JSON409 *Conflict JSON500 *InternalServerError JSON507 *NotEnoughDiskSpace } @@ -1213,7 +1224,9 @@ type PostFilesUploadUploadIdCompleteResponse struct { Body []byte HTTPResponse *http.Response JSON200 *MultipartUploadComplete + JSON400 *InvalidPath JSON404 *UploadNotFound + JSON409 *Conflict JSON500 *InternalServerError } @@ -1565,6 +1578,20 @@ func ParsePostFilesUploadInitResponse(rsp *http.Response) (*PostFilesUploadInitR } response.JSON401 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Conflict + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 429: + var dest TooManyRequests + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON429 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: var dest InternalServerError if err := json.Unmarshal(bodyBytes, &dest); err != nil { @@ -1572,6 +1599,13 @@ func ParsePostFilesUploadInitResponse(rsp *http.Response) (*PostFilesUploadInitR } response.JSON500 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 507: + var dest NotEnoughDiskSpace + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON507 = &dest + } return response, nil @@ -1598,6 +1632,13 @@ func ParseDeleteFilesUploadUploadIdResponse(rsp *http.Response) (*DeleteFilesUpl } response.JSON404 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Conflict + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: var dest InternalServerError if err := json.Unmarshal(bodyBytes, &dest); err != nil { @@ -1645,6 +1686,13 @@ func ParsePutFilesUploadUploadIdResponse(rsp *http.Response) (*PutFilesUploadUpl } response.JSON404 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Conflict + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: var dest InternalServerError if err := json.Unmarshal(bodyBytes, &dest); err != nil { @@ -1685,6 +1733,13 @@ func ParsePostFilesUploadUploadIdCompleteResponse(rsp *http.Response) (*PostFile } response.JSON200 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400: + var dest InvalidPath + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON400 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: var dest UploadNotFound if err := json.Unmarshal(bodyBytes, &dest); err != nil { @@ -1692,6 +1747,13 @@ func ParsePostFilesUploadUploadIdCompleteResponse(rsp *http.Response) (*PostFile } response.JSON404 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Conflict + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: var dest InternalServerError if err := json.Unmarshal(bodyBytes, &dest); err != nil { From 47c87373aac62f4570843253e2db64c9f87dde53 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:31:34 +0100 Subject: [PATCH 49/59] fix(envd): write to temp file during multipart upload to preserve originals PostFilesUploadInit was truncating the destination file immediately, destroying any pre-existing content before parts were uploaded. Now writes go to a temp file (.upload.) and Complete atomically renames it to the final path. Abort removes only the temp file, leaving any original file untouched. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 60 ++++++++------ .../internal/api/multipart_upload_test.go | 78 +++++++++++++++---- packages/envd/internal/api/store.go | 1 + 3 files changed, 103 insertions(+), 36 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index e9aec3e90a..2d9aba526c 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -135,11 +135,11 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } // Atomically check session limit, check for path conflicts, create the - // file, and register the session. File creation (O_TRUNC) must happen - // under the lock to prevent two inits for the same path from both - // passing the check before either truncates. The syscalls under the lock - // (open, truncate, chown) are fast; heavy work like EnsureDirs is above. + // temp file, and register the session. Writing to a temp file avoids + // destroying any existing file at the destination until the upload is + // fully complete. uploadID := uuid.NewString() + tempPath := filePath + ".upload." + uploadID a.uploadsLock.Lock() if len(a.uploads) >= maxUploadSessions { @@ -159,8 +159,9 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } } - // Create and preallocate the destination file - destFile, err := os.OpenFile(filePath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o666) + // Create and preallocate a temporary file; the final path is untouched + // until complete atomically renames the temp file into place. + destFile, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o666) if err != nil { a.uploadsLock.Unlock() if errors.Is(err, syscall.ENOSPC) { @@ -169,8 +170,8 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating destination file") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating destination file: %w", err)) + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error creating temp file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error creating temp file: %w", err)) return } @@ -179,7 +180,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if body.TotalSize > 0 { if err := destFile.Truncate(body.TotalSize); err != nil { destFile.Close() - os.Remove(filePath) + os.Remove(tempPath) a.uploadsLock.Unlock() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") @@ -194,10 +195,10 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params } } - // Set ownership - if err := os.Chown(filePath, uid, gid); err != nil { + // Set ownership on the temp file + if err := os.Chown(tempPath, uid, gid); err != nil { destFile.Close() - os.Remove(filePath) + os.Remove(tempPath) a.uploadsLock.Unlock() a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) @@ -208,6 +209,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params session := &multipartUploadSession{ UploadID: uploadID, FilePath: filePath, + TempPath: tempPath, DestFile: destFile, TotalSize: body.TotalSize, PartSize: body.PartSize, @@ -224,6 +226,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadID). Str("filePath", filePath). + Str("tempPath", tempPath). Int64("totalSize", body.TotalSize). Int64("partSize", body.PartSize). Int("numParts", numParts). @@ -467,18 +470,29 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - // All parts present — remove session from map and close the file + // All parts present — remove session from map, close the file, and + // atomically rename the temp file to the final destination path. a.uploadsLock.Lock() delete(a.uploads, uploadId) a.uploadsLock.Unlock() if err := session.DestFile.Close(); err != nil { - // Session is already removed from the map; clean up the orphaned file. - if rmErr := ignoreNotExist(os.Remove(session.FilePath)); rmErr != nil { - a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove file after close error") + // Session is already removed from the map; clean up the orphaned temp file. + if rmErr := ignoreNotExist(os.Remove(session.TempPath)); rmErr != nil { + a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after close error") } - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing destination file") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing destination file: %w", err)) + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error closing temp file") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error closing temp file: %w", err)) + + return + } + + if err := os.Rename(session.TempPath, session.FilePath); err != nil { + if rmErr := ignoreNotExist(os.Remove(session.TempPath)); rmErr != nil { + a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after rename error") + } + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error renaming temp file to destination") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error renaming temp file to destination: %w", err)) return } @@ -525,11 +539,11 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } session.mu.Unlock() - // Unlink the file before removing from the map so a new Init for - // the same path creates a fresh inode. In-flight writers use the - // open DestFile descriptor, which remains valid after unlink. - if err := ignoreNotExist(os.Remove(session.FilePath)); err != nil { - a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing file") + // Unlink the temp file before removing from the map. In-flight + // writers use the open DestFile descriptor, which remains valid + // after unlink. The original file at FilePath is never touched. + if err := ignoreNotExist(os.Remove(session.TempPath)); err != nil { + a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing temp file") } delete(a.uploads, uploadId) } diff --git a/packages/envd/internal/api/multipart_upload_test.go b/packages/envd/internal/api/multipart_upload_test.go index 6403f574ae..b9dc07d491 100644 --- a/packages/envd/internal/api/multipart_upload_test.go +++ b/packages/envd/internal/api/multipart_upload_test.go @@ -67,7 +67,7 @@ func TestMultipartUpload(t *testing.T) { session := api.uploads[resp.UploadId] if session != nil { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } delete(api.uploads, resp.UploadId) api.uploadsLock.Unlock() @@ -171,9 +171,17 @@ func TestMultipartUpload(t *testing.T) { require.NoError(t, err) uploadId := initResp.UploadId - // Verify file was created + // Verify temp file was created but destination is untouched + api.uploadsLock.RLock() + session := api.uploads[uploadId] + api.uploadsLock.RUnlock() + require.NotNil(t, session) + _, err = os.Stat(session.TempPath) + require.NoError(t, err, "temp file should exist after init") _, err = os.Stat(destPath) - require.NoError(t, err, "destination file should exist after init") + assert.True(t, os.IsNotExist(err), "destination should not exist yet") + + tempPath := session.TempPath // Abort upload abortReq := httptest.NewRequest(http.MethodDelete, "/files/upload/"+uploadId, nil) @@ -188,9 +196,11 @@ func TestMultipartUpload(t *testing.T) { api.uploadsLock.RUnlock() assert.False(t, exists) - // Verify file is cleaned up + // Verify temp file is cleaned up and destination still doesn't exist + _, err = os.Stat(tempPath) + assert.True(t, os.IsNotExist(err), "temp file should be removed after abort") _, err = os.Stat(destPath) - assert.True(t, os.IsNotExist(err)) + assert.True(t, os.IsNotExist(err), "destination should not exist after abort") }) t.Run("upload part to non-existent session", func(t *testing.T) { @@ -280,7 +290,7 @@ func TestMultipartUpload(t *testing.T) { api.uploadsLock.Lock() if s := api.uploads[uploadId]; s != nil { s.DestFile.Close() - os.Remove(s.FilePath) + os.Remove(s.TempPath) } delete(api.uploads, uploadId) api.uploadsLock.Unlock() @@ -342,7 +352,7 @@ func TestMultipartUpload(t *testing.T) { delete(api.uploads, uploadId) api.uploadsLock.Unlock() session.DestFile.Close() - os.Remove(destPath) + os.Remove(session.TempPath) }) t.Run("max sessions limit", func(t *testing.T) { @@ -386,7 +396,7 @@ func TestMultipartUpload(t *testing.T) { api.uploadsLock.Lock() for _, session := range api.uploads { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } api.uploads = make(map[string]*multipartUploadSession) api.uploadsLock.Unlock() @@ -602,7 +612,7 @@ func TestMultipartUpload(t *testing.T) { session := api.uploads[uploadId] if session != nil { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } delete(api.uploads, uploadId) api.uploadsLock.Unlock() @@ -652,7 +662,7 @@ func TestMultipartUpload(t *testing.T) { session := api.uploads[uploadId] if session != nil { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } delete(api.uploads, uploadId) api.uploadsLock.Unlock() @@ -703,7 +713,7 @@ func TestMultipartUpload(t *testing.T) { session := api.uploads[uploadId] if session != nil { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } delete(api.uploads, uploadId) api.uploadsLock.Unlock() @@ -759,7 +769,7 @@ func TestMultipartUpload(t *testing.T) { session := api.uploads[initResp2.UploadId] if session != nil { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } delete(api.uploads, initResp2.UploadId) api.uploadsLock.Unlock() @@ -815,9 +825,51 @@ func TestMultipartUpload(t *testing.T) { session := api.uploads[initResp2.UploadId] if session != nil { session.DestFile.Close() - os.Remove(session.FilePath) + os.Remove(session.TempPath) } delete(api.uploads, initResp2.UploadId) api.uploadsLock.Unlock() }) + + t.Run("abort preserves original file", func(t *testing.T) { + t.Parallel() + api := newMultipartTestAPI(t) + tempDir := t.TempDir() + destPath := filepath.Join(tempDir, "existing-file.txt") + + // Create a pre-existing file at the destination + originalContent := []byte("original content") + require.NoError(t, os.WriteFile(destPath, originalContent, 0o644)) + + // Initialize upload targeting the same path + initBody := PostFilesUploadInitJSONRequestBody{ + Path: destPath, + TotalSize: 100, + PartSize: 50, + } + initBodyBytes, _ := json.Marshal(initBody) + + initReq := httptest.NewRequest(http.MethodPost, "/files/upload/init", bytes.NewReader(initBodyBytes)) + initReq.Header.Set("Content-Type", "application/json") + initW := httptest.NewRecorder() + + api.PostFilesUploadInit(initW, initReq, PostFilesUploadInitParams{}) + require.Equal(t, http.StatusOK, initW.Code) + + var initResp MultipartUploadInit + err := json.Unmarshal(initW.Body.Bytes(), &initResp) + require.NoError(t, err) + + // Abort the upload + abortReq := httptest.NewRequest(http.MethodDelete, "/files/upload/"+initResp.UploadId, nil) + abortW := httptest.NewRecorder() + + api.DeleteFilesUploadUploadId(abortW, abortReq, initResp.UploadId) + require.Equal(t, http.StatusNoContent, abortW.Code) + + // Verify original file is untouched + content, err := os.ReadFile(destPath) + require.NoError(t, err) + assert.Equal(t, string(originalContent), string(content)) + }) } diff --git a/packages/envd/internal/api/store.go b/packages/envd/internal/api/store.go index 97a40a435c..ea0990027c 100644 --- a/packages/envd/internal/api/store.go +++ b/packages/envd/internal/api/store.go @@ -28,6 +28,7 @@ const ( type multipartUploadSession struct { UploadID string FilePath string // Final destination path + TempPath string // Temporary file path during upload (renamed to FilePath on complete) DestFile *os.File // Open file handle for direct writes TotalSize int64 // Total expected file size (validated >= 0 at input) PartSize int64 // Size of each part (validated > 0 at input) From 39cbe4d9a3274bdbc93ea0bb9cc13c153687f480 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:42:31 +0100 Subject: [PATCH 50/59] fix(envd): move filesystem I/O out of uploadsLock in delete handler os.Remove was called while holding a.uploadsLock, so slow filesystem deletes could block unrelated upload operations. Move Remove, wg.Wait, and Close outside the lock since the temp path is unique per upload ID and the session is already removed from the map. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 2d9aba526c..845ef5395a 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -521,7 +521,9 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, operationID := logs.AssignOperationID() - // Get and remove the session + // Look up and remove the session from the map under the lock, but defer + // filesystem I/O (Remove, Close) until after the lock is released so a + // slow/unresponsive filesystem cannot block unrelated upload operations. a.uploadsLock.Lock() session, exists := a.uploads[uploadId] if exists { @@ -539,12 +541,6 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } session.mu.Unlock() - // Unlink the temp file before removing from the map. In-flight - // writers use the open DestFile descriptor, which remains valid - // after unlink. The original file at FilePath is never touched. - if err := ignoreNotExist(os.Remove(session.TempPath)); err != nil { - a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing temp file") - } delete(a.uploads, uploadId) } a.uploadsLock.Unlock() @@ -556,6 +552,13 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } + // Unlink the temp file. The temp path is unique per upload ID so no + // other operation can conflict. In-flight writers use the open DestFile + // descriptor, which remains valid after unlink. + if err := ignoreNotExist(os.Remove(session.TempPath)); err != nil { + a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error removing temp file") + } + // Wait for any in-flight part writes to finish before closing the file descriptor session.wg.Wait() session.DestFile.Close() From 9948c9358fa8dfe422dd92d317bcf6234b87611f Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 21:49:17 +0100 Subject: [PATCH 51/59] fix(envd): move filesystem I/O out of uploadsLock in init handler Register a placeholder session (with completed=true) under the lock, then perform OpenFile/Truncate/Chown outside the lock to avoid blocking unrelated upload operations on slow filesystem calls. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 64 ++++++++++++------- 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 845ef5395a..0134800131 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -134,13 +134,26 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Atomically check session limit, check for path conflicts, create the - // temp file, and register the session. Writing to a temp file avoids - // destroying any existing file at the destination until the upload is - // fully complete. + // Register a placeholder session under the lock to claim the path and + // count toward the session limit, then perform file I/O outside the lock + // to avoid blocking unrelated upload operations. The session starts with + // completed=true so any concurrent access (Put/Complete/Delete) is safely + // rejected until initialization finishes. uploadID := uuid.NewString() tempPath := filePath + ".upload." + uploadID + session := &multipartUploadSession{ + UploadID: uploadID, + FilePath: filePath, + TempPath: tempPath, + TotalSize: body.TotalSize, + PartSize: body.PartSize, + NumParts: numParts, + UID: uid, + GID: gid, + } + session.completed.Store(true) // Block access until initialization finishes + a.uploadsLock.Lock() if len(a.uploads) >= maxUploadSessions { a.uploadsLock.Unlock() @@ -158,12 +171,23 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } } + a.uploads[uploadID] = session + a.uploadsLock.Unlock() + + // removeSession unregisters the placeholder on file I/O failure. + removeSession := func() { + a.uploadsLock.Lock() + delete(a.uploads, uploadID) + a.uploadsLock.Unlock() + } - // Create and preallocate a temporary file; the final path is untouched - // until complete atomically renames the temp file into place. + // Create and preallocate a temporary file outside the lock; the final + // path is untouched until complete atomically renames the temp file + // into place. The temp path is unique per upload ID so no other + // operation can conflict. destFile, err := os.OpenFile(tempPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o666) if err != nil { - a.uploadsLock.Unlock() + removeSession() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -181,7 +205,7 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err := destFile.Truncate(body.TotalSize); err != nil { destFile.Close() os.Remove(tempPath) - a.uploadsLock.Unlock() + removeSession() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) @@ -199,28 +223,20 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if err := os.Chown(tempPath, uid, gid); err != nil { destFile.Close() os.Remove(tempPath) - a.uploadsLock.Unlock() + removeSession() a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) return } - session := &multipartUploadSession{ - UploadID: uploadID, - FilePath: filePath, - TempPath: tempPath, - DestFile: destFile, - TotalSize: body.TotalSize, - PartSize: body.PartSize, - NumParts: numParts, - UID: uid, - GID: gid, - Parts: make(map[int]partStatus), - } - - a.uploads[uploadID] = session - a.uploadsLock.Unlock() + // Initialization complete — set the file handle and parts map, then + // clear the completed flag to allow part uploads. The atomic store + // provides the necessary memory ordering: any goroutine that observes + // completed==false via Load is guaranteed to see DestFile and Parts. + session.DestFile = destFile + session.Parts = make(map[int]partStatus) + session.completed.Store(false) a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). From 64491e54c5de3b8d205cdbbfbc7f875eff6ceeb5 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:29:40 +0100 Subject: [PATCH 52/59] fix(envd): defer map deletion until after finalization in complete/delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Complete: keep session in map during Close+Rename to prevent a concurrent Init for the same path from starting before the rename finishes. Delete from map only after success (or after cleanup on error). Delete: use RLock to look up the session, CAS under session.mu, then write-Lock only for the map deletion — avoids holding the global write lock during CAS which blocked all concurrent RLock callers. Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 66 +++++++++++-------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 0134800131..765f5d2c3b 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -486,14 +486,13 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } - // All parts present — remove session from map, close the file, and - // atomically rename the temp file to the final destination path. - a.uploadsLock.Lock() - delete(a.uploads, uploadId) - a.uploadsLock.Unlock() - + // All parts present — close the file and rename to the final path. + // The session stays in the map during finalization to prevent a new + // upload to the same path from starting before the rename completes. if err := session.DestFile.Close(); err != nil { - // Session is already removed from the map; clean up the orphaned temp file. + a.uploadsLock.Lock() + delete(a.uploads, uploadId) + a.uploadsLock.Unlock() if rmErr := ignoreNotExist(os.Remove(session.TempPath)); rmErr != nil { a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after close error") } @@ -504,6 +503,9 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req } if err := os.Rename(session.TempPath, session.FilePath); err != nil { + a.uploadsLock.Lock() + delete(a.uploads, uploadId) + a.uploadsLock.Unlock() if rmErr := ignoreNotExist(os.Remove(session.TempPath)); rmErr != nil { a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after rename error") } @@ -513,6 +515,10 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req return } + a.uploadsLock.Lock() + delete(a.uploads, uploadId) + a.uploadsLock.Unlock() + a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). @@ -537,29 +543,12 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, operationID := logs.AssignOperationID() - // Look up and remove the session from the map under the lock, but defer - // filesystem I/O (Remove, Close) until after the lock is released so a - // slow/unresponsive filesystem cannot block unrelated upload operations. - a.uploadsLock.Lock() + // Look up the session under a read lock, then operate on it + // independently. This avoids holding the global write lock during + // the CAS, which would block all concurrent RLock callers. + a.uploadsLock.RLock() session, exists := a.uploads[uploadId] - if exists { - // Mark as completed under session.mu to synchronize with part - // reservation (which checks completed and calls wg.Add under the - // same lock). This prevents a part upload from calling wg.Add(1) - // after our wg.Wait below has already observed a zero counter. - session.mu.Lock() - if !session.completed.CompareAndSwap(false, true) { - session.mu.Unlock() - a.uploadsLock.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) - - return - } - session.mu.Unlock() - delete(a.uploads, uploadId) - } - a.uploadsLock.Unlock() + a.uploadsLock.RUnlock() if !exists { a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session not found") @@ -568,6 +557,25 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, return } + // Mark as completed under session.mu to synchronize with part + // reservation (which checks completed and calls wg.Add under the + // same lock). This prevents a part upload from calling wg.Add(1) + // after our wg.Wait below has already observed a zero counter. + session.mu.Lock() + if !session.completed.CompareAndSwap(false, true) { + session.mu.Unlock() + a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("upload session is already completing") + jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s is already completing or aborted", uploadId)) + + return + } + session.mu.Unlock() + + // Remove session from map under the write lock. + a.uploadsLock.Lock() + delete(a.uploads, uploadId) + a.uploadsLock.Unlock() + // Unlink the temp file. The temp path is unique per upload ID so no // other operation can conflict. In-flight writers use the open DestFile // descriptor, which remains valid after unlink. From e30d7024e30c7c92eb79295a6a0437fc41e4b599 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:36:32 +0100 Subject: [PATCH 53/59] fix(envd): treat short part reads as 400 instead of 500 Allow io.ErrUnexpectedEOF to fall through to the size mismatch check, which correctly returns 400 Bad Request. Previously, a client sending fewer bytes than expectedSize was misclassified as a server error. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 765f5d2c3b..46fb1e7718 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -363,7 +363,7 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl // (~32KB) instead of reading the full part into a single allocation. offsetWriter := io.NewOffsetWriter(session.DestFile, offset) written, err := io.CopyN(offsetWriter, r.Body, expectedSize) - if err != nil && !errors.Is(err, io.EOF) { + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") jsonError(w, http.StatusInsufficientStorage, fmt.Errorf("not enough disk space")) From d0c6ecb968fcadbbe23c6509a9fbb6bdf9086509 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 22:57:39 +0100 Subject: [PATCH 54/59] fix(envd): address multipart upload resource leak and correctness issues - Protect session initialization (DestFile/Parts) with session.mu for proper memory ordering instead of relying on atomic store alone - Clean up session on response encoding failure to prevent permanent fd leak - Check and log DestFile.Close() error in delete handler - Check and log os.Remove() errors during init error paths to detect temp file leaks Co-Authored-By: Claude Opus 4.6 --- .../envd/internal/api/multipart_upload.go | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 46fb1e7718..f872d62fa4 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -204,7 +204,9 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params if body.TotalSize > 0 { if err := destFile.Truncate(body.TotalSize); err != nil { destFile.Close() - os.Remove(tempPath) + if rmErr := ignoreNotExist(os.Remove(tempPath)); rmErr != nil { + a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after truncate error") + } removeSession() if errors.Is(err, syscall.ENOSPC) { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("not enough disk space") @@ -222,7 +224,9 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params // Set ownership on the temp file if err := os.Chown(tempPath, uid, gid); err != nil { destFile.Close() - os.Remove(tempPath) + if rmErr := ignoreNotExist(os.Remove(tempPath)); rmErr != nil { + a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after chown error") + } removeSession() a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error changing file ownership") jsonError(w, http.StatusInternalServerError, fmt.Errorf("error changing file ownership: %w", err)) @@ -230,13 +234,15 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Initialization complete — set the file handle and parts map, then - // clear the completed flag to allow part uploads. The atomic store - // provides the necessary memory ordering: any goroutine that observes - // completed==false via Load is guaranteed to see DestFile and Parts. + // Initialization complete — set the file handle and parts map under + // session.mu, then clear the completed flag. The mutex ensures that + // any goroutine that later acquires session.mu and observes + // completed==false is guaranteed to see DestFile and Parts. + session.mu.Lock() session.DestFile = destFile session.Parts = make(map[int]partStatus) session.completed.Store(false) + session.mu.Unlock() a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). @@ -254,6 +260,13 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params UploadId: uploadID, }); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") + // Client never received the uploadId, so clean up to avoid a permanent leak. + session.completed.Store(true) + removeSession() + destFile.Close() + if rmErr := ignoreNotExist(os.Remove(tempPath)); rmErr != nil { + a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after response encoding error") + } } } @@ -585,7 +598,9 @@ func (a *API) DeleteFilesUploadUploadId(w http.ResponseWriter, r *http.Request, // Wait for any in-flight part writes to finish before closing the file descriptor session.wg.Wait() - session.DestFile.Close() + if err := session.DestFile.Close(); err != nil { + a.logger.Warn().Err(err).Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Msg("error closing temp file during abort") + } a.logger.Debug(). Str(string(logs.OperationIDKey), operationID). From 4f1d02e3dc375ad695e124f36158dd3179aaab1b Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Tue, 24 Feb 2026 23:17:39 +0100 Subject: [PATCH 55/59] fix(envd): close race conditions in multipart upload init and complete paths Fix two race conditions in the multipart upload handlers: 1. Init error path: set completed under session.mu and call wg.Wait() before destFile.Close() so in-flight part writes finish before the file descriptor is closed. 2. Complete missing-parts path: reset completed under session.mu to prevent a concurrent Complete from winning the CAS while the first Complete is still returning, which would cause two goroutines to race on Close/Rename. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index f872d62fa4..800db76657 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -261,8 +261,14 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params }); err != nil { a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("failed to encode response") // Client never received the uploadId, so clean up to avoid a permanent leak. + // Set completed under session.mu to synchronize with part uploads that + // check completed and call wg.Add under the same lock. + session.mu.Lock() session.completed.Store(true) + session.mu.Unlock() removeSession() + // Wait for any in-flight part writes before closing the file descriptor. + session.wg.Wait() destFile.Close() if rmErr := ignoreNotExist(os.Remove(tempPath)); rmErr != nil { a.logger.Warn().Err(rmErr).Str(string(logs.OperationIDKey), operationID).Msg("failed to remove temp file after response encoding error") @@ -487,8 +493,13 @@ func (a *API) PostFilesUploadUploadIdComplete(w http.ResponseWriter, r *http.Req session.mu.Unlock() if len(missingParts) > 0 { - // Reset completed flag so the client can upload missing parts and retry + // Reset completed flag under session.mu so the client can upload missing + // parts and retry. Holding the lock prevents a concurrent Complete from + // winning the CAS (false→true) before this goroutine has returned, + // which would cause two goroutines to race on Close/Rename. + session.mu.Lock() session.completed.Store(false) + session.mu.Unlock() a.logger.Error(). Str(string(logs.OperationIDKey), operationID). Str("uploadId", uploadId). From 5d77cde14fc331ed740c99fad6bcd2562a23fc05 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Wed, 25 Feb 2026 16:19:41 +0100 Subject: [PATCH 56/59] fix(envd): move EnsureDirs after authoritative session-limit check EnsureDirs was called before the write-lock session-limit check, meaning requests that would be rejected for exceeding the limit still created directories as a side effect. Move it after the session is registered so all file I/O happens only for requests that actually claim a slot. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 800db76657..39a5bc7a6c 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -126,14 +126,6 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Ensure parent directories exist - if err := permissions.EnsureDirs(filepath.Dir(filePath), uid, gid); err != nil { - a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") - jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories for %q: %w", filepath.Dir(filePath), err)) - - return - } - // Register a placeholder session under the lock to claim the path and // count toward the session limit, then perform file I/O outside the lock // to avoid blocking unrelated upload operations. The session starts with @@ -181,6 +173,16 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params a.uploadsLock.Unlock() } + // Ensure parent directories exist after the authoritative session-limit + // check to avoid creating directories for requests that will be rejected. + if err := permissions.EnsureDirs(filepath.Dir(filePath), uid, gid); err != nil { + removeSession() + a.logger.Error().Err(err).Str(string(logs.OperationIDKey), operationID).Msg("error ensuring directories") + jsonError(w, http.StatusInternalServerError, fmt.Errorf("error ensuring directories for %q: %w", filepath.Dir(filePath), err)) + + return + } + // Create and preallocate a temporary file outside the lock; the final // path is untouched until complete atomically renames the temp file // into place. The temp path is unique per upload ID so no other From 1005e97c3b583edf9668cc1f2a2d87ea464bf93c Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:43:54 +0100 Subject: [PATCH 57/59] fix(envd): return 200 for part writes that succeed during concurrent completion When a part write completes successfully but a concurrent Complete has already set the completed flag, the handler was returning 409 even though the data was on disk and marked partComplete. Complete's parts scan would count the part as present and return 200, giving the client an inconsistent view (409 for part upload, 200 for complete). Remove the post-write completed check so the client always gets 200 when its data was actually written, matching what Complete will report. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 39a5bc7a6c..87f31354db 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -413,20 +413,14 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl return } - // Finalize: always mark the part as complete since the data was written to disk. + // Finalize: mark the part as complete since the data was written to disk. // Mark partWritten so the deferred cleanup does not revert the status. - // Then check completed — if the session was finalized mid-write, return 409 - // but leave the part as partComplete so Complete's validation sees it. + // We intentionally do not check session.completed here — the write + // succeeded and Complete's parts scan will count it, so returning 200 + // gives the client an accurate view regardless of concurrent completion. session.mu.Lock() session.Parts[params.Part] = partComplete partWritten = true - if session.completed.Load() { - session.mu.Unlock() - a.logger.Error().Str(string(logs.OperationIDKey), operationID).Str("uploadId", uploadId).Int("partNumber", params.Part).Msg("session completed during part upload") - jsonError(w, http.StatusConflict, fmt.Errorf("upload session %s was completed or aborted during part upload", uploadId)) - - return - } session.mu.Unlock() a.logger.Debug(). From ebc550c20e97ec1580df2784335f7dc2c41f957e Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:57:32 +0100 Subject: [PATCH 58/59] fix(envd): prevent int overflow in numParts on 32-bit systems Compute numParts as int64 and validate against maxNumParts before casting to int. On 32-bit systems, the previous int() cast could overflow to a negative value for large totalSize/small partSize combinations, bypassing the cap check entirely. Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 87f31354db..7c71b58838 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -66,18 +66,21 @@ func (a *API) PostFilesUploadInit(w http.ResponseWriter, r *http.Request, params return } - // Compute numParts and validate the cap before any file I/O. - var numParts int + // Compute numParts as int64 and validate the cap before any file I/O. + // The cast to int is safe after the cap check (maxNumParts fits in any int). + var numParts64 int64 if body.TotalSize > 0 { - numParts = int((body.TotalSize + body.PartSize - 1) / body.PartSize) + numParts64 = (body.TotalSize + body.PartSize - 1) / body.PartSize } - if numParts > maxNumParts { - jsonError(w, http.StatusBadRequest, fmt.Errorf("upload would require %d parts, exceeding the maximum of %d (increase partSize)", numParts, maxNumParts)) + if numParts64 > maxNumParts { + jsonError(w, http.StatusBadRequest, fmt.Errorf("upload would require %d parts, exceeding the maximum of %d (increase partSize)", numParts64, maxNumParts)) return } + numParts := int(numParts64) + // Check session limit early, before any file I/O, to avoid truncating // existing files only to reject the request due to capacity. a.uploadsLock.RLock() From 5c9d823e57d8757ccb2df27ee23cdb0e93d5ccb5 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 26 Feb 2026 18:07:04 +0100 Subject: [PATCH 59/59] fix(envd): limit part upload body size with MaxBytesReader Without http.MaxBytesReader, the Go HTTP server buffers the entire request body before the handler can reject it. A client sending a significantly oversized body (e.g. 1 GB for a 100 MB part) consumes memory proportional to the excess. Wrap r.Body with MaxBytesReader(expectedSize+1) before io.CopyN so the server stops reading after the expected part size plus one byte (needed for the existing trailing-byte check). Co-Authored-By: Claude Opus 4.6 --- packages/envd/internal/api/multipart_upload.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/envd/internal/api/multipart_upload.go b/packages/envd/internal/api/multipart_upload.go index 7c71b58838..5daf6f4ee2 100644 --- a/packages/envd/internal/api/multipart_upload.go +++ b/packages/envd/internal/api/multipart_upload.go @@ -382,6 +382,12 @@ func (a *API) PutFilesUploadUploadId(w http.ResponseWriter, r *http.Request, upl } }() + // Limit the request body to expectedSize+1 so the server does not buffer + // an arbitrarily large oversized body. The +1 allows the trailing-byte + // check below to detect excess data without triggering MaxBytesError + // during io.CopyN itself (which reads exactly expectedSize bytes). + r.Body = http.MaxBytesReader(w, r.Body, expectedSize+1) + // Stream the part data directly to the file at offset without buffering the // entire part in memory. OffsetWriter + CopyN uses a small internal buffer // (~32KB) instead of reading the full part into a single allocation.