Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions BLACKSMITH.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,40 @@ from `github.com/useblacksmith/bazel-remote/v2`. Existing FA imports
intentionally keep the upstream import path so this fork remains
behavior-preserving until Blacksmith-specific changes are needed.

## Build cache storage prefixing

BLA-4006 keeps the default upstream behavior unless FA attaches an explicit
request-scoped storage prefix to the cache operation context.

The existing configured S3 prefix remains the default path for Buck2 and any
other callers that do not opt in to request-scoped routing. For Bazel, FA should
resolve the authorized VM/job namespace to the full physical prefix:

```text
<MINIO_PREFIX>/bazel/<environment>/<region>/<model_installation_id>/<repository_id>/<generation>
```

and attach it with `cache.WithStoragePrefix`. The S3 proxy then uses that
request-scoped prefix when constructing Action Cache and CAS object keys. Action
Cache also remains isolated by bazel-remote's existing instance-name key
remapping, so the physical prefix is additive and gives cache-clear/delete
operations a visible repo/generation boundary. The local disk cache AC/CAS keys
also include the request-scoped prefix, so a new repo/generation namespace does
not hit stale local entries before reaching the S3 backend. This lets a single
shared bazel-remote process route AC/CAS puts/gets to the correct
repo/generation namespace while preserving existing Buck2 behavior.

Local disk cache entries store the full request prefix as a stable hash so the
LRU can distinguish identical AC/CAS digests from different repo/generation
namespaces without using S3-style slash-heavy prefixes in local paths. MinIO/S3
object keys use the real request-scoped prefix directly, so broad remote
deletion still targets `<MINIO_PREFIX>/bazel/.../<generation>/`.

For Bazel requests, FA should also mark the request with
`cache.WithRequiredStoragePrefix`. If a request reaches the S3 proxy with that
marker but without a request-scoped prefix, bazel-remote logs that it is falling
back to the configured process-wide prefix. Buck2 should not set this marker.

## Security and upstream patch tracking

Track upstream security fixes by monitoring the upstream repository's releases,
Expand Down
30 changes: 30 additions & 0 deletions cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,33 @@ func TransformActionCacheKey(key, instance string, logger Logger) string {
func LookupKey(kind EntryKind, hash string) string {
return kind.String() + "/" + hash
}

func StoragePrefixID(prefix string) string {
sum := sha256.Sum256([]byte(prefix))
return hex.EncodeToString(sum[:])
}

func StoragePrefixIDFromContext(ctx context.Context) (string, bool) {
prefix, ok := StoragePrefixFromContext(ctx)
if !ok {
return "", false
}
return StoragePrefixID(prefix), true
}

func LookupKeyForContext(ctx context.Context, kind EntryKind, hash string) string {
if kind == AC || kind == CAS {
if prefixID, ok := StoragePrefixIDFromContext(ctx); ok {
return LookupKeyForStoragePrefixID(prefixID, kind, hash)
}
}
return LookupKey(kind, hash)
}

func LookupKeyForStoragePrefixID(prefixID string, kind EntryKind, hash string) string {
key := LookupKey(kind, hash)
if (kind == AC || kind == CAS) && prefixID != "" {
return key + "/" + prefixID
}
return key
}
57 changes: 57 additions & 0 deletions cache/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package cache

import (
"context"
"testing"
)

func TestLookupKeyForContextDefaultsToOriginalKey(t *testing.T) {
result := LookupKeyForContext(context.Background(), CAS, "hash")
expected := "cas/hash"
if result != expected {
t.Fatalf("LookupKeyForContext() = %q, want %q", result, expected)
}
}

func TestLookupKeyForContextIncludesStoragePrefix(t *testing.T) {
prefix := "bazel/production/us-east-1/42/987654/v0"
ctx := WithStoragePrefix(context.Background(), prefix)

result := LookupKeyForContext(ctx, CAS, "hash")
expected := "cas/hash/" + StoragePrefixID(prefix)
if result != expected {
t.Fatalf("LookupKeyForContext() = %q, want %q", result, expected)
}
}

func TestLookupKeyForContextIncludesStoragePrefixForActionCache(t *testing.T) {
prefix := "bazel/production/us-east-1/42/987654/v0"
ctx := WithStoragePrefix(context.Background(), prefix)

result := LookupKeyForContext(ctx, AC, "hash")
expected := "ac/hash/" + StoragePrefixID(prefix)
if result != expected {
t.Fatalf("LookupKeyForContext() = %q, want %q", result, expected)
}
}

func TestLookupKeyForContextIgnoresStoragePrefixForRaw(t *testing.T) {
ctx := WithStoragePrefix(context.Background(), "bazel/production/us-east-1/42/987654/v0")

result := LookupKeyForContext(ctx, RAW, "hash")
expected := "raw/hash"
if result != expected {
t.Fatalf("LookupKeyForContext() = %q, want %q", result, expected)
}
}

func TestStoragePrefixRequiredFromContext(t *testing.T) {
if StoragePrefixRequiredFromContext(context.Background()) {
t.Fatal("StoragePrefixRequiredFromContext() = true, want false")
}

ctx := WithRequiredStoragePrefix(context.Background())
if !StoragePrefixRequiredFromContext(ctx) {
t.Fatal("StoragePrefixRequiredFromContext() = false, want true")
}
}
85 changes: 70 additions & 15 deletions cache/disk/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,34 @@ func (c *diskCache) updateCacheAgeMetric() {

func (c *diskCache) getElementPath(key Key, value lruItem) string {
ks := key.(string)
hash := ks[len(ks)-sha256.Size*2:]
kind, hash, storagePrefixID := lookupKeyParts(ks)
return filepath.Join(c.dir, c.FileLocationForStoragePrefixID(storagePrefixID, kind, value.legacy, hash, value.size, value.random))
}

func lookupKeyParts(key string) (cache.EntryKind, string, string) {
var kind cache.EntryKind = cache.AC
if strings.HasPrefix(ks, "cas") {
var hash string
var storagePrefixID string
parts := strings.Split(key, "/")
if len(parts) >= 2 {
hash = parts[1]
if len(parts) >= 3 {
storagePrefixID = parts[2]
}
}
if hash == "" && len(key) >= sha256.Size*2 {
hash = key[len(key)-sha256.Size*2:]
}

if strings.HasPrefix(key, "cas") {
kind = cache.CAS
} else if strings.HasPrefix(ks, "ac") {
} else if strings.HasPrefix(key, "ac") {
kind = cache.AC
} else if strings.HasPrefix(ks, "raw") {
} else if strings.HasPrefix(key, "raw") {
kind = cache.RAW
}

return filepath.Join(c.dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))
return kind, hash, storagePrefixID
}

func (c *diskCache) removeFile(f string) {
Expand Down Expand Up @@ -196,6 +213,22 @@ func (c *diskCache) FileLocationBase(kind cache.EntryKind, legacy bool, hash str
return fmt.Sprintf("cas.v2/%s/%s-%d", hash[:2], hash, size)
}

func (c *diskCache) FileLocationBaseForStoragePrefixID(storagePrefixID string, kind cache.EntryKind, legacy bool, hash string, size int64) string {
location := c.FileLocationBase(kind, legacy, hash, size)
if kind == cache.RAW || storagePrefixID == "" {
return location
}
return path.Join(storagePrefixID, location)
}

func (c *diskCache) FileLocationBaseForContext(ctx context.Context, kind cache.EntryKind, legacy bool, hash string, size int64) string {
if kind == cache.RAW {
return c.FileLocationBase(kind, legacy, hash, size)
}
storagePrefixID, _ := cache.StoragePrefixIDFromContext(ctx)
return c.FileLocationBaseForStoragePrefixID(storagePrefixID, kind, legacy, hash, size)
}

func (c *diskCache) FileLocation(kind cache.EntryKind, legacy bool, hash string, size int64, random string) string {
if kind == cache.RAW {
return path.Join("raw.v2", hash[:2], hash+"-"+random)
Expand All @@ -212,6 +245,22 @@ func (c *diskCache) FileLocation(kind cache.EntryKind, legacy bool, hash string,
return fmt.Sprintf("cas.v2/%s/%s-%d-%s", hash[:2], hash, size, random)
}

func (c *diskCache) FileLocationForStoragePrefixID(storagePrefixID string, kind cache.EntryKind, legacy bool, hash string, size int64, random string) string {
location := c.FileLocation(kind, legacy, hash, size, random)
if kind == cache.RAW || storagePrefixID == "" {
return location
}
return path.Join(storagePrefixID, location)
}

func (c *diskCache) FileLocationForContext(ctx context.Context, kind cache.EntryKind, legacy bool, hash string, size int64, random string) string {
if kind == cache.RAW {
return c.FileLocation(kind, legacy, hash, size, random)
}
storagePrefixID, _ := cache.StoragePrefixIDFromContext(ctx)
return c.FileLocationForStoragePrefixID(storagePrefixID, kind, legacy, hash, size, random)
}

// Put stores a stream of `size` bytes from `r` into the cache.
// If `hash` is not the empty string, and the contents don't match it,
// a non-nil error is returned. All data will be read from `r` before
Expand Down Expand Up @@ -241,7 +290,7 @@ func (c *diskCache) Put(ctx context.Context, kind cache.EntryKind, hash string,
return nil
}

key := cache.LookupKey(kind, hash)
key := cache.LookupKeyForContext(ctx, kind, hash)

var tf *os.File // Tempfile.
var blobFile string
Expand Down Expand Up @@ -299,7 +348,10 @@ func (c *diskCache) Put(ctx context.Context, kind cache.EntryKind, hash string,
legacy := kind == cache.CAS && c.storageMode == casblob.Identity

// Final destination, if all goes well.
filePath := path.Join(c.dir, c.FileLocationBase(kind, legacy, hash, size))
filePath := path.Join(c.dir, c.FileLocationBaseForContext(ctx, kind, legacy, hash, size))
if err := os.MkdirAll(path.Dir(filePath), os.ModePerm); err != nil {
return internalErr(err)
}

// We will download to this temporary file.
tf, random, err := tfc.Create(filePath, legacy)
Expand Down Expand Up @@ -424,18 +476,18 @@ func (c *diskCache) commit(key string, legacy bool, tempfile string, reservedSiz
// but that we can try the proxy backend.
//
// This function assumes that only CAS blobs are requested in zstd form.
func (c *diskCache) availableOrTryProxy(kind cache.EntryKind, hash string, size int64, offset int64, zstd bool) (io.ReadCloser, int64, bool, error) {
func (c *diskCache) availableOrTryProxy(ctx context.Context, kind cache.EntryKind, hash string, size int64, offset int64, zstd bool) (io.ReadCloser, int64, bool, error) {
locked := true
var err error
c.mu.Lock()

key := cache.LookupKey(kind, hash)
key := cache.LookupKeyForContext(ctx, kind, hash)
item, available := c.lru.Get(key)
if available {
c.mu.Unlock() // We expect a cache hit below.
locked = false

blobPath := path.Join(c.dir, c.FileLocation(kind, item.legacy, hash, item.size, item.random))
blobPath := path.Join(c.dir, c.FileLocationForContext(ctx, kind, item.legacy, hash, item.size, item.random))

if !isSizeMismatch(size, item.size) {
var f *os.File
Expand All @@ -447,7 +499,7 @@ func (c *diskCache) availableOrTryProxy(kind cache.EntryKind, hash string, size
c.mu.Lock()
item, available = c.lru.Get(key)
if available {
blobPath = path.Join(c.dir, c.FileLocation(kind, item.legacy, hash, item.size, item.random))
blobPath = path.Join(c.dir, c.FileLocationForContext(ctx, kind, item.legacy, hash, item.size, item.random))
f, err = os.Open(blobPath)
}
c.mu.Unlock()
Expand Down Expand Up @@ -575,7 +627,7 @@ func (c *diskCache) get(ctx context.Context, kind cache.EntryKind, hash string,
}

var err error
key := cache.LookupKey(kind, hash)
key := cache.LookupKeyForContext(ctx, kind, hash)

var tf *os.File // Tempfile we will write to.
var blobFile string
Expand Down Expand Up @@ -608,7 +660,7 @@ func (c *diskCache) get(ctx context.Context, kind cache.EntryKind, hash string,
}
}()

f, foundSize, tryProxy, err := c.availableOrTryProxy(kind, hash, size, offset, zstd)
f, foundSize, tryProxy, err := c.availableOrTryProxy(ctx, kind, hash, size, offset, zstd)
if err != nil {
return nil, -1, internalErr(err)
}
Expand Down Expand Up @@ -644,7 +696,10 @@ func (c *diskCache) get(ctx context.Context, kind cache.EntryKind, hash string,

legacy := kind == cache.CAS && c.storageMode == casblob.Identity

blobPathBase := path.Join(c.dir, c.FileLocationBase(kind, legacy, hash, foundSize))
blobPathBase := path.Join(c.dir, c.FileLocationBaseForContext(ctx, kind, legacy, hash, foundSize))
if err := os.MkdirAll(path.Dir(blobPathBase), os.ModePerm); err != nil {
return nil, -1, internalErr(err)
}
tf, random, err := tfc.Create(blobPathBase, legacy)
if err != nil {
return nil, -1, internalErr(err)
Expand Down Expand Up @@ -718,7 +773,7 @@ func (c *diskCache) Contains(ctx context.Context, kind cache.EntryKind, hash str
}

foundSize := int64(-1)
key := cache.LookupKey(kind, hash)
key := cache.LookupKeyForContext(ctx, kind, hash)

c.mu.Lock()
item, exists := c.lru.Get(key)
Expand Down
Loading
Loading