From e6af4d537b12f81d0528b814ec8ad72e45daf638 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Thu, 14 May 2026 16:41:35 +0200 Subject: [PATCH 1/4] revision: use priority queue in limit_list() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit limit_list() maintains a date-sorted work queue of commits using a linked list with commit_list_insert_by_date() for insertion. Each insertion walks the list to find the right position — O(n) per insert. In repositories with merge-heavy histories, the symmetric difference can contain thousands of commits, making this O(n) insertion the dominant cost. Replace the sorted linked list with a prio_queue (binary heap). This gives O(log n) insertion and O(log n) extraction instead of O(n) insertion and O(1) extraction, which is a net win when the queue is large. The still_interesting() and everybody_uninteresting() helpers are updated to scan the prio_queue's contiguous array instead of walking a linked list. process_parents() already accepts both a commit_list and a prio_queue parameter, so the change in limit_list() simply switches which one is passed. Benchmark: git rev-list --left-right --count HEAD~N...HEAD Repository: 2.3M commits, merge-heavy DAG (monorepo) Best of 5 runs, times in seconds: commits in symmetric diff baseline patched speedup -------------- -------- ------- ------- 10 0.01 0.01 1.0x 50 0.01 0.01 1.0x 3751 21.23 8.49 2.5x 4524 21.70 8.29 2.6x 10130 20.10 6.65 3.0x No change for small traversals; 2.5-3.0x faster when the queue grows to thousands of commits. Signed-off-by: Kristofer Karlsson --- revision.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/revision.c b/revision.c index 599b3a66c369ca..2b1b3bb10e7ca1 100644 --- a/revision.c +++ b/revision.c @@ -473,10 +473,10 @@ static struct commit *handle_commit(struct rev_info *revs, die("%s is unknown object", name); } -static int everybody_uninteresting(struct commit_list *orig, +static int everybody_uninteresting(struct prio_queue *orig, struct commit **interesting_cache) { - struct commit_list *list = orig; + size_t i; if (*interesting_cache) { struct commit *commit = *interesting_cache; @@ -484,9 +484,8 @@ static int everybody_uninteresting(struct commit_list *orig, return 0; } - while (list) { - struct commit *commit = list->item; - list = list->next; + for (i = 0; i < orig->nr; i++) { + struct commit *commit = orig->array[i].data; if (commit->object.flags & UNINTERESTING) continue; @@ -1300,20 +1299,17 @@ static void cherry_pick_list(struct commit_list *list, struct rev_info *revs) /* How many extra uninteresting commits we want to see.. */ #define SLOP 5 -static int still_interesting(struct commit_list *src, timestamp_t date, int slop, +static int still_interesting(struct prio_queue *src, timestamp_t date, int slop, struct commit **interesting_cache) { /* - * No source list at all? We're definitely done.. + * Since src is sorted by date, it is enough to peek at the + * first entry to compare dates. No entry at all means done. */ - if (!src) + struct commit *commit = prio_queue_peek(src); + if (!commit) return 0; - - /* - * Does the destination list contain entries with a date - * before the source list? Definitely _not_ done. - */ - if (date <= src->item->date) + if (date <= commit->date) return SLOP; /* @@ -1451,6 +1447,7 @@ static int limit_list(struct rev_info *revs) struct commit_list *newlist = NULL; struct commit_list **p = &newlist; struct commit *interesting_cache = NULL; + struct prio_queue queue = { .compare = compare_commits_by_commit_date }; if (revs->ancestry_path_implicit_bottoms) { collect_bottom_commits(original_list, @@ -1461,6 +1458,11 @@ static int limit_list(struct rev_info *revs) while (original_list) { struct commit *commit = pop_commit(&original_list); + prio_queue_put(&queue, commit); + } + + while (queue.nr) { + struct commit *commit = prio_queue_get(&queue); struct object *obj = &commit->object; if (commit == interesting_cache) @@ -1468,11 +1470,13 @@ static int limit_list(struct rev_info *revs) if (revs->max_age != -1 && (commit->date < revs->max_age)) obj->flags |= UNINTERESTING; - if (process_parents(revs, commit, &original_list, NULL) < 0) + if (process_parents(revs, commit, NULL, &queue) < 0) { + clear_prio_queue(&queue); return -1; + } if (obj->flags & UNINTERESTING) { mark_parents_uninteresting(revs, commit); - slop = still_interesting(original_list, date, slop, &interesting_cache); + slop = still_interesting(&queue, date, slop, &interesting_cache); if (slop) continue; break; @@ -1509,7 +1513,7 @@ static int limit_list(struct rev_info *revs) } } - commit_list_free(original_list); + clear_prio_queue(&queue); revs->commits = newlist; return 0; } From b27c27a6d1a3f65683a6aa4f971b2fb9d94b0deb Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Tue, 19 May 2026 15:14:02 +0200 Subject: [PATCH 2/4] pack-objects: call release_revisions() after cruft traversal enumerate_and_traverse_cruft_objects() initializes a rev_info on the stack and uses it for a traversal, but never calls release_revisions() afterwards. This currently does not cause a leak on master because the commit queue uses a linked list that is consumed during traversal. However, a subsequent commit will switch the revision walking machinery to use a prio_queue, which dynamically allocates its backing array. Without this cleanup, that allocation is reported as a leak by LeakSanitizer (384 bytes in the linux-leaks CI job via t7704). Add the missing release_revisions() call. Signed-off-by: Kristofer Karlsson --- builtin/pack-objects.c | 1 + 1 file changed, 1 insertion(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index dd2480a73d2edf..d078d0ca6a0e3e 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -4265,6 +4265,7 @@ static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); stop_progress(&progress_state); + release_revisions(&revs); } static void read_cruft_objects(void) From 9e80c1a84448778c7df0f6e89c12c3a96fffb638 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Sun, 17 May 2026 17:06:58 +0200 Subject: [PATCH 3/4] revision: use priority queue for non-limited streaming walks Replace the O(N) sorted linked-list insertion used by get_revision_1() for non-limited walks with an O(log N) priority queue. Add a commit_queue field to rev_info alongside the existing commits linked list. The two representations are mutually exclusive: setup and external callers that need list access (mark_edges_uninteresting, bisect, etc.) use the linked list, then get_revision_1() lazily drains it into the priority queue on the first call for non-limited, non-no_walk, non-topo, non-reflog walks. The conversion function rev_info_commit_list_to_queue() is public so callers that know they will iterate can convert early. The parent-rewriting path (rewrite_one) always merges newly discovered commits into the priority queue, which is correct because rewrite_one_1 only calls process_parents for non-limited walks where the queue is active. Combined with the limit_list() priority queue change in the parent commit, this eliminates all O(N) sorted linked-list insertion from the revision walk machinery. Performance: git rev-list HEAD on a 2.3M-commit repository improves from ~21s to ~6.4s (3.3x speedup). Signed-off-by: Kristofer Karlsson --- commit.c | 13 ------------- commit.h | 2 -- revision.c | 52 +++++++++++++++++++++++++++------------------------- revision.h | 12 +++++++++++- 4 files changed, 38 insertions(+), 41 deletions(-) diff --git a/commit.c b/commit.c index 4385ae4329e921..5c71a3120540c5 100644 --- a/commit.c +++ b/commit.c @@ -729,19 +729,6 @@ void commit_list_free(struct commit_list *list) pop_commit(&list); } -struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list) -{ - struct commit_list **pp = list; - struct commit_list *p; - while ((p = *pp) != NULL) { - if (p->item->date < item->date) { - break; - } - pp = &p->next; - } - return commit_list_insert(item, pp); -} - static int commit_list_compare_by_date(const struct commit_list *a, const struct commit_list *b) { diff --git a/commit.h b/commit.h index 58150045afafed..385492fbb1ecc5 100644 --- a/commit.h +++ b/commit.h @@ -191,8 +191,6 @@ int commit_list_contains(struct commit *item, struct commit_list **commit_list_append(struct commit *commit, struct commit_list **next); unsigned commit_list_count(const struct commit_list *l); -struct commit_list *commit_list_insert_by_date(struct commit *item, - struct commit_list **list); void commit_list_sort_by_date(struct commit_list **list); /* Shallow copy of the input list */ diff --git a/revision.c b/revision.c index 2b1b3bb10e7ca1..4537f77bf5761c 100644 --- a/revision.c +++ b/revision.c @@ -1116,7 +1116,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit) } static int process_parents(struct rev_info *revs, struct commit *commit, - struct commit_list **list, struct prio_queue *queue) + struct prio_queue *queue) { struct commit_list *parent = commit->parents; unsigned pass_flags; @@ -1158,8 +1158,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit, if (p->object.flags & SEEN) continue; p->object.flags |= (SEEN | NOT_USER_GIVEN); - if (list) - commit_list_insert_by_date(p, list); if (queue) prio_queue_put(queue, p); if (revs->exclude_first_parent_only) @@ -1207,8 +1205,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit, p->object.flags |= pass_flags | CHILD_VISITED; if (!(p->object.flags & SEEN)) { p->object.flags |= (SEEN | NOT_USER_GIVEN); - if (list) - commit_list_insert_by_date(p, list); if (queue) prio_queue_put(queue, p); } @@ -1470,7 +1466,7 @@ static int limit_list(struct rev_info *revs) if (revs->max_age != -1 && (commit->date < revs->max_age)) obj->flags |= UNINTERESTING; - if (process_parents(revs, commit, NULL, &queue) < 0) { + if (process_parents(revs, commit, &queue) < 0) { clear_prio_queue(&queue); return -1; } @@ -3257,6 +3253,7 @@ static void free_void_commit_list(void *list) void release_revisions(struct rev_info *revs) { commit_list_free(revs->commits); + clear_prio_queue(&revs->commit_queue); commit_list_free(revs->ancestry_path_bottoms); release_display_notes(&revs->notes_opt); object_array_clear(&revs->pending); @@ -3726,7 +3723,7 @@ static void explore_walk_step(struct rev_info *revs) if (revs->max_age != -1 && (c->date < revs->max_age)) c->object.flags |= UNINTERESTING; - if (process_parents(revs, c, NULL, NULL) < 0) + if (process_parents(revs, c, NULL) < 0) return; if (c->object.flags & UNINTERESTING) @@ -3902,7 +3899,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit) { struct commit_list *p; struct topo_walk_info *info = revs->topo_walk_info; - if (process_parents(revs, commit, NULL, NULL) < 0) { + if (process_parents(revs, commit, NULL) < 0) { if (!revs->ignore_missing_links) die("Failed to traverse parents of commit %s", oid_to_hex(&commit->object.oid)); @@ -3938,6 +3935,13 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit) } } +void rev_info_commit_list_to_queue(struct rev_info *revs) +{ + while (revs->commits) + prio_queue_put(&revs->commit_queue, pop_commit(&revs->commits)); +} + + int prepare_revision_walk(struct rev_info *revs) { int i; @@ -4006,7 +4010,7 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs, for (;;) { struct commit *p = *pp; if (!revs->limited) - if (process_parents(revs, p, NULL, queue) < 0) + if (process_parents(revs, p, queue) < 0) return rewrite_one_error; if (p->object.flags & UNINTERESTING) return rewrite_one_ok; @@ -4020,27 +4024,18 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs, } } -static void merge_queue_into_list(struct prio_queue *q, struct commit_list **list) +static void merge_queue_into_prio_queue(struct prio_queue *from, + struct prio_queue *to) { - while (q->nr) { - struct commit *item = prio_queue_peek(q); - struct commit_list *p = *list; - - if (p && p->item->date >= item->date) - list = &p->next; - else { - p = commit_list_insert(item, list); - list = &p->next; /* skip newly added item */ - prio_queue_get(q); /* pop item */ - } - } + while (from->nr) + prio_queue_put(to, prio_queue_get(from)); } static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp) { struct prio_queue queue = { compare_commits_by_commit_date }; enum rewrite_result ret = rewrite_one_1(revs, pp, &queue); - merge_queue_into_list(&queue, &revs->commits); + merge_queue_into_prio_queue(&queue, &revs->commit_queue); clear_prio_queue(&queue); return ret; } @@ -4336,8 +4331,13 @@ static struct commit *get_revision_1(struct rev_info *revs) commit = next_reflog_entry(revs->reflog_info); else if (revs->topo_walk_info) commit = next_topo_commit(revs); - else + else if (revs->limited || revs->no_walk) commit = pop_commit(&revs->commits); + else { + if (!revs->commit_queue.nr && revs->commits) + rev_info_commit_list_to_queue(revs); + commit = prio_queue_get(&revs->commit_queue); + } if (!commit) return NULL; @@ -4359,7 +4359,9 @@ static struct commit *get_revision_1(struct rev_info *revs) try_to_simplify_commit(revs, commit); else if (revs->topo_walk_info) expand_topo_walk(revs, commit); - else if (process_parents(revs, commit, &revs->commits, NULL) < 0) { + else if (process_parents(revs, commit, + revs->no_walk ? + NULL : &revs->commit_queue) < 0) { if (!revs->ignore_missing_links) die("Failed to traverse parents of commit %s", oid_to_hex(&commit->object.oid)); diff --git a/revision.h b/revision.h index 584f1338b5e323..04982a3d47f28f 100644 --- a/revision.h +++ b/revision.h @@ -12,6 +12,7 @@ #include "decorate.h" #include "ident.h" #include "list-objects-filter-options.h" +#include "prio-queue.h" #include "strvec.h" /** @@ -122,8 +123,14 @@ struct oidset; struct topo_walk_info; struct rev_info { - /* Starting list */ + /* + * Work queue of commits, stored as either a linked list or a + * priority queue, but never both at the same time. + * rev_info_commit_list_to_queue() converts list to queue. + */ struct commit_list *commits; + struct prio_queue commit_queue; + struct object_array pending; struct repository *repo; @@ -400,6 +407,7 @@ struct rev_info { * uninitialized. */ #define REV_INFO_INIT { \ + .commit_queue = { .compare = compare_commits_by_commit_date }, \ .abbrev = DEFAULT_ABBREV, \ .simplify_history = 1, \ .pruning.flags.recursive = 1, \ @@ -478,6 +486,8 @@ void reset_revision_walk(void); */ int prepare_revision_walk(struct rev_info *revs); +/* Drain the commits linked list into the priority queue. */ +void rev_info_commit_list_to_queue(struct rev_info *revs); /** * Takes a pointer to a `rev_info` structure and iterates over it, returning a * `struct commit *` each time you call it. The end of the revision list is From 7d4da97ff5a9db3abe325933c767f3088dafffe0 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Mon, 18 May 2026 15:27:34 +0200 Subject: [PATCH 4/4] revision: introduce revision_walk_ops for get_revision_1() Extract the per-mode dispatch logic in get_revision_1() into a revision_walk_ops struct with function pointers for next() and expand(). Each walk mode (reflog, topo, streaming, no_walk, limited) gets its own small helper functions, and get_walk_ops() selects the right ops table once per walk. This replaces the scattered if/else chains that checked the same mode conditions at multiple points in the loop body. The loop is now mode-agnostic: it calls ops->next() to pop the next commit and ops->expand() to process parents, with NULL expand for limited walks that skip parent processing entirely. No functional change. Signed-off-by: Kristofer Karlsson --- revision.c | 132 +++++++++++++++++++++++++++++++++++++++++------------ revision.h | 2 + 2 files changed, 104 insertions(+), 30 deletions(-) diff --git a/revision.c b/revision.c index 4537f77bf5761c..2898603351e355 100644 --- a/revision.c +++ b/revision.c @@ -3250,8 +3250,13 @@ static void free_void_commit_list(void *list) commit_list_free(list); } +static const struct revision_walk_ops *get_walk_ops(struct rev_info *revs); + void release_revisions(struct rev_info *revs) { + if (revs->walk_ops && revs->walk_ops != get_walk_ops(revs)) + BUG("walk_ops changed after initial selection"); + commit_list_free(revs->commits); clear_prio_queue(&revs->commit_queue); commit_list_free(revs->ancestry_path_bottoms); @@ -3942,12 +3947,27 @@ void rev_info_commit_list_to_queue(struct rev_info *revs) } +/* + * Reset walk machinery so the same rev_info can be walked again. + * The bitmap code does this: it walks haves, then walks wants on the + * same rev_info. Without this reset, the cached walk_ops would skip + * re-initialization (e.g. draining the commit list into the priority + * queue), causing the second walk to read from an empty queue. + */ +static void reset_walk_state(struct rev_info *revs) +{ + revs->walk_ops = NULL; + clear_prio_queue(&revs->commit_queue); +} + int prepare_revision_walk(struct rev_info *revs) { int i; struct object_array old_pending; struct commit_list **next = &revs->commits; + reset_walk_state(revs); + memcpy(&old_pending, &revs->pending, sizeof(old_pending)); revs->pending.nr = 0; revs->pending.alloc = 0; @@ -4322,46 +4342,98 @@ static void track_linear(struct rev_info *revs, struct commit *commit) revs->previous_parents = commit_list_copy(commit->parents); } +struct revision_walk_ops { + void (*init)(struct rev_info *); + struct commit *(*next)(struct rev_info *); + int (*expand)(struct rev_info *, struct commit *); +}; + +static struct commit *next_reflog(struct rev_info *revs) +{ + struct commit *commit = next_reflog_entry(revs->reflog_info); + if (commit) + commit->object.flags &= ~(ADDED | SEEN | SHOWN); + return commit; +} + +static int expand_reflog(struct rev_info *revs, struct commit *commit) +{ + try_to_simplify_commit(revs, commit); + return 0; +} + +static int expand_topo(struct rev_info *revs, struct commit *commit) +{ + expand_topo_walk(revs, commit); + return 0; +} + +static struct commit *next_streaming(struct rev_info *revs) +{ + return prio_queue_get(&revs->commit_queue); +} + +static int expand_streaming(struct rev_info *revs, struct commit *commit) +{ + return process_parents(revs, commit, &revs->commit_queue); +} + +static struct commit *next_commit_list(struct rev_info *revs) +{ + return pop_commit(&revs->commits); +} + +static int expand_no_walk(struct rev_info *revs, struct commit *commit) +{ + return process_parents(revs, commit, NULL); +} + +static struct revision_walk_ops reflog_ops = + { NULL, next_reflog, expand_reflog }; +static struct revision_walk_ops topo_ops = + { NULL, next_topo_commit, expand_topo }; +static struct revision_walk_ops streaming_ops = + { rev_info_commit_list_to_queue, next_streaming, expand_streaming }; +static struct revision_walk_ops no_walk_ops = + { NULL, next_commit_list, expand_no_walk }; +static struct revision_walk_ops limited_ops = + { NULL, next_commit_list, NULL }; + +static const struct revision_walk_ops *get_walk_ops(struct rev_info *revs) +{ + if (revs->reflog_info) + return &reflog_ops; + if (revs->topo_walk_info) + return &topo_ops; + if (revs->no_walk) + return &no_walk_ops; + if (!revs->limited) + return &streaming_ops; + return &limited_ops; +} + static struct commit *get_revision_1(struct rev_info *revs) { + const struct revision_walk_ops *ops; + + if (!revs->walk_ops) { + revs->walk_ops = get_walk_ops(revs); + if (revs->walk_ops->init) + revs->walk_ops->init(revs); + } + ops = revs->walk_ops; + while (1) { - struct commit *commit; - - if (revs->reflog_info) - commit = next_reflog_entry(revs->reflog_info); - else if (revs->topo_walk_info) - commit = next_topo_commit(revs); - else if (revs->limited || revs->no_walk) - commit = pop_commit(&revs->commits); - else { - if (!revs->commit_queue.nr && revs->commits) - rev_info_commit_list_to_queue(revs); - commit = prio_queue_get(&revs->commit_queue); - } + struct commit *commit = ops->next(revs); if (!commit) return NULL; - if (revs->reflog_info) - commit->object.flags &= ~(ADDED | SEEN | SHOWN); - - /* - * If we haven't done the list limiting, we need to look at - * the parents here. We also need to do the date-based limiting - * that we'd otherwise have done in limit_list(). - */ - if (!revs->limited) { + if (ops->expand) { if (revs->max_age != -1 && comparison_date(revs, commit) < revs->max_age) continue; - - if (revs->reflog_info) - try_to_simplify_commit(revs, commit); - else if (revs->topo_walk_info) - expand_topo_walk(revs, commit); - else if (process_parents(revs, commit, - revs->no_walk ? - NULL : &revs->commit_queue) < 0) { + if (ops->expand(revs, commit) < 0) { if (!revs->ignore_missing_links) die("Failed to traverse parents of commit %s", oid_to_hex(&commit->object.oid)); diff --git a/revision.h b/revision.h index 04982a3d47f28f..bc8f302739bf4f 100644 --- a/revision.h +++ b/revision.h @@ -120,6 +120,7 @@ struct ref_exclusions { } struct oidset; +struct revision_walk_ops; struct topo_walk_info; struct rev_info { @@ -367,6 +368,7 @@ struct rev_info { struct revision_sources *sources; struct topo_walk_info *topo_walk_info; + const struct revision_walk_ops *walk_ops; /* Commit graph bloom filter fields */ /* The bloom filter key(s) for the pathspec */