From eeaef308bb418fc11db66756c7982021635c9c68 Mon Sep 17 00:00:00 2001 From: yaoxin jing Date: Thu, 5 Feb 2026 00:05:04 +0000 Subject: [PATCH] Allocate exclusive per-container UID/GID blocks Sysbox-CE currently maps all containers to the same host UID/GID range, meaning container root (UID 0) resolves to the same host user across all containers. This weakens isolation since a breakout from one container could access another container's files. Change Alloc() to hand out sequential, non-overlapping 65536-uid/gid blocks from the subordinate range. Each container ID gets its own block; repeated calls with the same ID return the existing allocation. Free() recycles blocks for reuse. Also increase subidRangeSize from 65536 to 268435456 so that configSubidRange() provisions enough subordinate IDs for up to 4096 concurrent containers (same capacity as Sysbox-EE). Signed-off-by: yaoxin jing --- main.go | 2 +- subidAlloc/subidAllocSimple.go | 65 +++++++++++++++-- subidAlloc/subidAllocSimple_test.go | 108 ++++++++++++++++++++++------ 3 files changed, 147 insertions(+), 28 deletions(-) diff --git a/main.go b/main.go index 6beb138..a03a879 100644 --- a/main.go +++ b/main.go @@ -32,7 +32,7 @@ var ( sysboxRunDir string = "/run/sysbox" sysboxLibDirDefault string = "/var/lib/sysbox" sysboxMgrPidFile string = sysboxRunDir + "/sysmgr.pid" - subidRangeSize uint64 = 65536 + subidRangeSize uint64 = 268435456 // 4096 containers * 65536 UIDs each ) const ( diff --git a/subidAlloc/subidAllocSimple.go b/subidAlloc/subidAllocSimple.go index c44d28c..0d48317 100644 --- a/subidAlloc/subidAllocSimple.go +++ b/subidAlloc/subidAllocSimple.go @@ -28,6 +28,7 @@ import ( "fmt" "io" "sort" + "sync" mapset "github.com/deckarep/golang-set" "github.com/nestybox/sysbox-libs/formatter" @@ -42,7 +43,12 @@ const ( // subidAlloc class (implements the UidAllocator interface) type subidAlloc struct { - idRange user.SubID + mu sync.Mutex + idRange user.SubID + allocations map[string]uint32 // container ID -> block index + freeBlocks []uint32 // recycled block indices + nextBlock uint32 // next unallocated block index + maxBlocks uint32 // total blocks available in range } // New creates an subidAlloc object @@ -81,7 +87,9 @@ func New(userName string, subuidSrc, subgidSrc io.Reader) (intf.SubidAlloc, erro return nil, fmt.Errorf("could not find matching subuid and subgids range for user %s", userName) } - sub := &subidAlloc{} + sub := &subidAlloc{ + allocations: make(map[string]uint32), + } // find a common range that is large enough for the allocation size foundRange := false @@ -97,6 +105,8 @@ func New(userName string, subuidSrc, subgidSrc io.Reader) (intf.SubidAlloc, erro return nil, fmt.Errorf("did not find a large enough subuid range for user %s (need %v)", userName, allocBlkSize) } + sub.maxBlocks = uint32(sub.idRange.Count / int64(allocBlkSize)) + return sub, nil } @@ -130,15 +140,56 @@ func getCommonRanges(uidRanges, gidRanges []user.SubID) []user.SubID { } // Implements intf.SubidAlloc.Alloc +// +// Each call with a new container ID allocates an exclusive 65536-uid/gid block +// from the subordinate range. Repeated calls with the same ID return the same block. func (sub *subidAlloc) Alloc(id string, size uint64) (uint32, uint32, error) { - subid := sub.idRange - logrus.Debugf("Alloc(%s, %v) = %v, %v", - formatter.ContainerID{id}, size, subid, subid) - return uint32(subid.SubID), uint32(subid.SubID), nil + sub.mu.Lock() + defer sub.mu.Unlock() + + // Return existing allocation for this container + if blockIdx, ok := sub.allocations[id]; ok { + uid := uint32(sub.idRange.SubID) + blockIdx*allocBlkSize + logrus.Debugf("Alloc(%s, %v) = %v (existing block %d)", + formatter.ContainerID{id}, size, uid, blockIdx) + return uid, uid, nil + } + + // Allocate a new block: prefer recycled, then fresh + var blockIdx uint32 + if n := len(sub.freeBlocks); n > 0 { + blockIdx = sub.freeBlocks[n-1] + sub.freeBlocks = sub.freeBlocks[:n-1] + } else if sub.nextBlock < sub.maxBlocks { + blockIdx = sub.nextBlock + sub.nextBlock++ + } else { + return 0, 0, fmt.Errorf("exhausted: no more subid blocks available (max %d containers)", sub.maxBlocks) + } + + sub.allocations[id] = blockIdx + uid := uint32(sub.idRange.SubID) + blockIdx*allocBlkSize + + logrus.Debugf("Alloc(%s, %v) = %v (block %d)", + formatter.ContainerID{id}, size, uid, blockIdx) + return uid, uid, nil } // Implements intf.SubidAlloc.Free +// +// Releases the block allocated for the given container, making it available for reuse. func (sub *subidAlloc) Free(id string) error { - logrus.Debugf("Free(%v)", formatter.ContainerID{id}) + sub.mu.Lock() + defer sub.mu.Unlock() + + blockIdx, ok := sub.allocations[id] + if !ok { + return fmt.Errorf("not-found: container %s has no allocation", id) + } + + delete(sub.allocations, id) + sub.freeBlocks = append(sub.freeBlocks, blockIdx) + + logrus.Debugf("Free(%v) released block %d", formatter.ContainerID{id}, blockIdx) return nil } diff --git a/subidAlloc/subidAllocSimple_test.go b/subidAlloc/subidAllocSimple_test.go index 163977d..6811cd0 100644 --- a/subidAlloc/subidAllocSimple_test.go +++ b/subidAlloc/subidAllocSimple_test.go @@ -60,8 +60,8 @@ func testAlloc(t *testing.T, subidAlloc intf.SubidAlloc, tests []allocTest) { func TestAllocBasic(t *testing.T) { - subuidCfg := strings.NewReader(`testUser:0:655360`) - subgidCfg := strings.NewReader(`testUser:0:655360`) + subuidCfg := strings.NewReader(`testUser:100000:655360`) + subgidCfg := strings.NewReader(`testUser:100000:655360`) subidAlloc, err := New("testUser", subuidCfg, subgidCfg) if err != nil { @@ -71,9 +71,9 @@ func TestAllocBasic(t *testing.T) { var tests = []allocTest{ // id, size, wantUid, wantGid, wantErr - {"1", 65536, 0, 0, ""}, - {"2", 65536, 0, 0, ""}, - {"3", 65536, 0, 0, ""}, + {"1", 65536, 100000, 100000, ""}, + {"2", 65536, 165536, 165536, ""}, + {"3", 65536, 231072, 231072, ""}, } testAlloc(t, subidAlloc, tests) @@ -93,11 +93,12 @@ func TestAllocInvalidUser(t *testing.T) { func TestAllocMultiRange(t *testing.T) { - subuidCfg := strings.NewReader(`testUser:0:65536 - testUser:524288:65536`) + // Two common ranges; allocator picks the first one large enough (100000:196608 = 3 blocks) + subuidCfg := strings.NewReader(`testUser:100000:196608 + testUser:524288:196608`) - subgidCfg := strings.NewReader(`testUser:0:65536 - testUser:524288:65536`) + subgidCfg := strings.NewReader(`testUser:100000:196608 + testUser:524288:196608`) subidAlloc, err := New("testUser", subuidCfg, subgidCfg) if err != nil { @@ -107,9 +108,9 @@ func TestAllocMultiRange(t *testing.T) { var tests = []allocTest{ // id, size, wantUid, wantGid, wantErr - {"1", 65536, 0, 0, ""}, - {"2", 65536, 0, 0, ""}, - {"3", 65536, 0, 0, ""}, + {"1", 65536, 100000, 100000, ""}, + {"2", 65536, 165536, 165536, ""}, + {"3", 65536, 231072, 231072, ""}, } testAlloc(t, subidAlloc, tests) @@ -142,33 +143,100 @@ func TestGetCommonRanges(t *testing.T) { func TestAllocCommonRange(t *testing.T) { - subuidCfg := strings.NewReader(`testUser:0:65536 + subuidCfg := strings.NewReader(`testUser:100000:65536 testUser:524288:65536`) - subgidCfg := strings.NewReader(`testUser:65536:65536 - testUser:0:65536`) + subgidCfg := strings.NewReader(`testUser:165536:65536 + testUser:100000:65536`) subidAlloc, err := New("testUser", subuidCfg, subgidCfg) if err != nil { t.Errorf("failed to create allocator: %v", err) } + // Same container ID twice -> returns the same exclusive block var tests = []allocTest{ // id, size, wantUid, wantGid, wantErr - {"1", 65536, 0, 0, ""}, - {"1", 65536, 0, 0, ""}, + {"1", 65536, 100000, 100000, ""}, + {"1", 65536, 100000, 100000, ""}, } testAlloc(t, subidAlloc, tests) - subuidCfg = strings.NewReader(`testUser:0:65536 + // No common ranges -> New() should fail + subuidCfg = strings.NewReader(`testUser:100000:65536 testUser:524288:65536`) - subgidCfg = strings.NewReader(`testUser:65536:65536 - testUser:231072:65536`) + subgidCfg = strings.NewReader(`testUser:165536:65536 + testUser:331072:65536`) subidAlloc, err = New("testUser", subuidCfg, subgidCfg) if err == nil { t.Errorf("subidAlloc() passed; expected failure") } } + +func TestAllocExhausted(t *testing.T) { + + // Range has room for exactly 2 blocks (2 * 65536 = 131072) + subuidCfg := strings.NewReader(`testUser:100000:131072`) + subgidCfg := strings.NewReader(`testUser:100000:131072`) + + subidAlloc, err := New("testUser", subuidCfg, subgidCfg) + if err != nil { + t.Errorf("failed to create allocator: %v", err) + return + } + + var tests = []allocTest{ + {"1", 65536, 100000, 100000, ""}, + {"2", 65536, 165536, 165536, ""}, + {"3", 65536, 0, 0, "exhausted: no more subid blocks available (max 2 containers)"}, + } + + testAlloc(t, subidAlloc, tests) +} + +func TestAllocFreeReuse(t *testing.T) { + + subuidCfg := strings.NewReader(`testUser:100000:131072`) + subgidCfg := strings.NewReader(`testUser:100000:131072`) + + alloc, err := New("testUser", subuidCfg, subgidCfg) + if err != nil { + t.Errorf("failed to create allocator: %v", err) + return + } + + // Allocate 2 containers (fills all blocks) + uid1, _, err := alloc.Alloc("c1", 65536) + if err != nil || uid1 != 100000 { + t.Errorf("Alloc(c1) failed: got uid=%v, err=%v; want uid=100000", uid1, err) + } + + uid2, _, err := alloc.Alloc("c2", 65536) + if err != nil || uid2 != 165536 { + t.Errorf("Alloc(c2) failed: got uid=%v, err=%v; want uid=165536", uid2, err) + } + + // All blocks used; next alloc should fail + _, _, err = alloc.Alloc("c3", 65536) + if err == nil { + t.Errorf("Alloc(c3) should have failed (exhausted)") + } + + // Free c1 and allocate c3 -> should reuse c1's block + if err := alloc.Free("c1"); err != nil { + t.Errorf("Free(c1) failed: %v", err) + } + + uid3, _, err := alloc.Alloc("c3", 65536) + if err != nil || uid3 != 100000 { + t.Errorf("Alloc(c3) after Free(c1) failed: got uid=%v, err=%v; want uid=100000 (reused block)", uid3, err) + } + + // Free non-existent container should error + if err := alloc.Free("nonexistent"); err == nil { + t.Errorf("Free(nonexistent) should have failed") + } +}