Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions internal/metadatadb/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,13 @@ func (m *MemoryBackend) Apply(_ context.Context, namespace string, ops ...Op) er

func (m *MemoryBackend) Query(_ context.Context, namespace string, q ReadOp, target any) error {
m.mu.RLock()
defer m.mu.RUnlock()
result := queryState(m.ns(namespace), q)
// Do not lazy-create the namespace under a read lock — that races with
// concurrent Apply on a different namespace. queryState handles a nil
// map by returning zero values, which is the right answer for a
// namespace that has never been written.
ns := m.state[namespace]
result := queryState(ns, q)
m.mu.RUnlock()
return errors.Wrap(jsonUnmarshalInto(result, target), "memory query")
}

Expand Down
107 changes: 101 additions & 6 deletions internal/strategy/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ type Config struct {
RepackInterval time.Duration `hcl:"repack-interval,optional" help:"How often to run full repack. 0 disables." default:"0"`
ZstdThreads int `hcl:"zstd-threads,optional" help:"Threads for zstd compression/decompression (0 = all CPU cores)." default:"0"`
BundleCacheTTL time.Duration `hcl:"bundle-cache-ttl,optional" help:"TTL of cached server-side git bundles." default:"2h"`
PrewarmMostClonedRepos int `hcl:"prewarm-most-cloned-repos,optional" help:"Number of most-cloned repositories to warm before /_readiness reports ready. 0 disables the gate." default:"0"`
}

const prewarmPopularityWindowDays = 14

type Strategy struct {
config Config
cache cache.Cache
Expand All @@ -61,7 +64,8 @@ type Strategy struct {
deferredRestoreOnce sync.Map // keyed by upstream URL, ensures at most one deferred restore per repo
metrics *gitMetrics
repoCounts *RepoCounts
ready atomic.Bool
existingWarmed atomic.Bool
prewarmComplete atomic.Bool
}

func New(
Expand Down Expand Up @@ -140,8 +144,13 @@ func New(
if err := s.warmExistingRepos(warmCtx); err != nil {
logger.WarnContext(warmCtx, "Failed to warm existing repos", "error", err)
}
s.ready.Store(true)
logger.InfoContext(warmCtx, "Git strategy ready")
s.existingWarmed.Store(true)
if s.config.PrewarmMostClonedRepos <= 0 {
logger.InfoContext(warmCtx, "Git strategy ready")
} else {
logger.InfoContext(warmCtx, "Existing repos warmed; waiting for prewarm pass before reporting ready",
"prewarm_most_cloned_repos", s.config.PrewarmMostClonedRepos)
}
}()

s.proxy = &httputil.ReverseProxy{
Expand Down Expand Up @@ -185,26 +194,112 @@ var _ strategy.Strategy = (*Strategy)(nil)
var _ strategy.Readier = (*Strategy)(nil)
var _ strategy.MetadataConsumer = (*Strategy)(nil)

// Ready reports whether startup warm-up has completed.
// Ready reports whether the strategy is ready to serve traffic. With
// PrewarmMostClonedRepos > 0, the strategy is ready only after both the
// existing-mirror warm-up and the prewarm pass have completed.
func (s *Strategy) Ready() bool {
return s.ready.Load()
if !s.existingWarmed.Load() {
return false
}
if s.config.PrewarmMostClonedRepos > 0 && !s.prewarmComplete.Load() {
return false
}
return true
}

// SetMetadataStore enables the per-repo clone histogram and schedules its
// daily reaper. Called by config.Load after the metadata backend is built.
// When PrewarmMostClonedRepos > 0, also kicks off the background prewarm pass
// that gates Ready().
func (s *Strategy) SetMetadataStore(store *metadatadb.Store) {
logger := logging.FromContext(s.ctx)
if store == nil {
if s.config.PrewarmMostClonedRepos > 0 {
logger.WarnContext(s.ctx, "prewarm-most-cloned-repos is set but no metadata store was provided; "+
"reporting ready without prewarm (misconfiguration)")
s.prewarmComplete.Store(true)
}
return
}
s.repoCounts = NewRepoCounts(store.Namespace("git"))
logging.FromContext(s.ctx).InfoContext(s.ctx, "Per-repo clone histogram enabled",
logger.InfoContext(s.ctx, "Per-repo clone histogram enabled",
"retention_days", s.repoCounts.retentionDays)
s.scheduler.SubmitPeriodicJob("repo-counts-reaper", "reap-repo-counts", defaultRepoCountsReapInterval, func(ctx context.Context) error {
if deleted := s.repoCounts.Reap(); deleted > 0 {
logging.FromContext(ctx).InfoContext(ctx, "Reaped stale repo clone counts", "deleted", deleted)
}
return nil
})
if s.config.PrewarmMostClonedRepos > 0 {
go func() {
warmCtx := context.WithoutCancel(s.ctx)
s.prewarmMostCloned(warmCtx)
s.prewarmComplete.Store(true)
logging.FromContext(warmCtx).InfoContext(warmCtx, "Prewarm pass complete; git strategy ready")
}()
}
}

// prewarmMostCloned blocks until the existing-repo warm-up has finished, then
// fetches (or clones) the most-cloned repositories from the histogram so that
// the local mirror is warm before the node starts serving traffic.
//
// Per-repo failures are logged but do not block readiness: a single broken
// upstream must not permanently brick a deploy. Same for an empty histogram
// (e.g. brand-new metadata backend) — we log a warning and consider the pass
// complete.
func (s *Strategy) prewarmMostCloned(ctx context.Context) {
logger := logging.FromContext(ctx)
for !s.existingWarmed.Load() {
t := time.NewTimer(100 * time.Millisecond)
select {
case <-ctx.Done():
t.Stop()
return
case <-t.C:
}
}
top := s.repoCounts.TopRepos(prewarmPopularityWindowDays, s.config.PrewarmMostClonedRepos)
if len(top) == 0 {
logger.WarnContext(ctx, "prewarm-most-cloned-repos is set but the clone histogram is empty; reporting ready")
return
}
logger.InfoContext(ctx, "Prewarming most-cloned repositories for readiness gate", "count", len(top))
for _, rc := range top {
if ctx.Err() != nil {
return
}
start := time.Now()
if err := s.prewarmRepo(ctx, rc.Repo); err != nil {
logger.ErrorContext(ctx, "Failed to prewarm repo for readiness gate",
"upstream", rc.Repo, "clone_count", rc.Count, "duration", time.Since(start), "error", err)
continue
}
logger.InfoContext(ctx, "Prewarmed repo for readiness gate",
"upstream", rc.Repo, "clone_count", rc.Count, "duration", time.Since(start))
}
}

// prewarmRepo ensures the mirror for upstreamURL is present locally and
// freshened with at least one fetch. If the mirror does not yet exist it
// triggers a clone (using the same snapshot-restore-then-fetch path as a
// request-driven clone); otherwise it does a lenient fetch so a slow upstream
// cannot block readiness indefinitely.
func (s *Strategy) prewarmRepo(ctx context.Context, upstreamURL string) error {
repo, err := s.cloneManager.GetOrCreate(ctx, upstreamURL)
if err != nil {
return errors.Wrapf(err, "resolve clone for %s", upstreamURL)
}
if repo.State() != gitclone.StateReady {
if err := s.ensureCloneReady(ctx, repo); err != nil {
return errors.Wrapf(err, "clone %s", upstreamURL)
}
return nil
}
if err := repo.FetchLenient(ctx, s.cloneManager.Config().CloneTimeout); err != nil {
return errors.Wrapf(err, "fetch %s", upstreamURL)
}
return nil
}

func (s *Strategy) warmExistingRepos(ctx context.Context) error {
Expand Down
63 changes: 63 additions & 0 deletions internal/strategy/git/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,69 @@ func TestSetMetadataStore(t *testing.T) {
s.SetMetadataStore(store)
}

// TestReadyGate_Prewarm_NoStore verifies that the strategy still reports
// ready when prewarm-most-cloned-repos is set but no metadata store was
// supplied — the misconfiguration is logged but must not brick the deploy.
func TestReadyGate_Prewarm_NoStore(t *testing.T) {
_, ctx := logging.Configure(context.Background(), logging.Config{})

mux := newTestMux()
cm := gitclone.NewManagerProvider(ctx, gitclone.Config{
MirrorRoot: filepath.Join(t.TempDir(), "clones"),
FetchInterval: 15,
}, nil)
s, err := git.New(ctx, git.Config{PrewarmMostClonedRepos: 10}, newTestScheduler(ctx, t), nil, mux, cm,
func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
assert.NoError(t, err)

s.SetMetadataStore(nil)

waitForReady(t, s)
}

// TestReadyGate_Prewarm_EmptyHistogram verifies that the strategy reports
// ready when prewarm-most-cloned-repos is set, a metadata store is supplied,
// but the histogram is empty (e.g. brand-new region).
func TestReadyGate_Prewarm_EmptyHistogram(t *testing.T) {
_, ctx := logging.Configure(context.Background(), logging.Config{})

mux := newTestMux()
cm := gitclone.NewManagerProvider(ctx, gitclone.Config{
MirrorRoot: filepath.Join(t.TempDir(), "clones"),
FetchInterval: 15,
}, nil)
s, err := git.New(ctx, git.Config{PrewarmMostClonedRepos: 10}, newTestScheduler(ctx, t), nil, mux, cm,
func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
assert.NoError(t, err)

store := metadatadb.New(ctx, metadatadb.NewMemoryBackend())
s.SetMetadataStore(store)

waitForReady(t, s)
}

// TestReadyGate_Prewarm_NoStoreCall verifies that with prewarm-most-cloned-repos
// set, the strategy never reports ready until the metadata store wiring contract
// is honoured. This guards against silent regressions where SetMetadataStore is
// dropped from the call chain.
func TestReadyGate_Prewarm_NoStoreCall(t *testing.T) {
_, ctx := logging.Configure(context.Background(), logging.Config{})

mux := newTestMux()
cm := gitclone.NewManagerProvider(ctx, gitclone.Config{
MirrorRoot: filepath.Join(t.TempDir(), "clones"),
FetchInterval: 15,
}, nil)
s, err := git.New(ctx, git.Config{PrewarmMostClonedRepos: 10}, newTestScheduler(ctx, t), nil, mux, cm,
func() (*githubapp.TokenManager, error) { return nil, nil }) //nolint:nilnil
assert.NoError(t, err)

// Give the existing-warm goroutine ample time to complete; Ready() must
// still report false because the prewarm pass has not run.
time.Sleep(200 * time.Millisecond)
assert.False(t, s.Ready(), "strategy must not report ready while prewarm-most-cloned-repos is set and SetMetadataStore has not been called")
}

func TestParseGitRefs(t *testing.T) {
_, ctx := logging.Configure(context.Background(), logging.Config{})
_ = ctx
Expand Down