@@ -87,6 +87,10 @@ def _rlimit_nofile():
8787
8888_REPO_ALLOW_SHALLOW = os .environ .get ("REPO_ALLOW_SHALLOW" )
8989
90+ _BLOAT_PACK_COUNT_THRESHOLD = 10
91+ _BLOAT_SIZE_PACK_THRESHOLD_KB = 10 * 1024 * 1024 # 10 GiB in KiB
92+ _BLOAT_SIZE_GARBAGE_THRESHOLD_KB = 1 * 1024 * 1024 # 1 GiB in KiB
93+
9094logger = RepoLogger (__file__ )
9195
9296
@@ -1371,6 +1375,104 @@ def tidy_up(run_gc, bare_git):
13711375 t .join ()
13721376 pm .end ()
13731377
1378+ @classmethod
1379+ def _CheckOneBloatedProject (cls , project_index : int ) -> Optional [str ]:
1380+ """Checks if a single project is bloated.
1381+
1382+ Args:
1383+ project_index: The index of the project in the parallel context.
1384+
1385+ Returns:
1386+ The name of the project if it is bloated, else None.
1387+ """
1388+ project = cls .get_parallel_context ()["projects" ][project_index ]
1389+
1390+ if not project .Exists or not project .worktree :
1391+ return None
1392+
1393+ # Only check dirty or locally modified projects. These can't be
1394+ # freshly cloned and will accumulate garbage.
1395+ try :
1396+ is_dirty = project .IsDirty (consider_untracked = True )
1397+
1398+ manifest_rev = project .GetRevisionId (project .bare_ref .all )
1399+ head_rev = project .work_git .rev_parse (HEAD )
1400+ has_local_commits = manifest_rev != head_rev
1401+
1402+ if not (is_dirty or has_local_commits ):
1403+ return None
1404+
1405+ output = project .bare_git .count_objects ("-v" )
1406+ except Exception :
1407+ return None
1408+
1409+ stats = {}
1410+ for line in output .splitlines ():
1411+ try :
1412+ key , value = line .split (": " , 1 )
1413+ stats [key .strip ()] = int (value .strip ())
1414+ except ValueError :
1415+ pass
1416+
1417+ pack_count = stats .get ("packs" , 0 )
1418+ size_pack_kb = stats .get ("size-pack" , 0 )
1419+ size_garbage_kb = stats .get ("size-garbage" , 0 )
1420+
1421+ is_fragmented = (
1422+ pack_count > _BLOAT_PACK_COUNT_THRESHOLD
1423+ and size_pack_kb > _BLOAT_SIZE_PACK_THRESHOLD_KB
1424+ )
1425+ has_excessive_garbage = (
1426+ size_garbage_kb > _BLOAT_SIZE_GARBAGE_THRESHOLD_KB
1427+ )
1428+
1429+ if is_fragmented or has_excessive_garbage :
1430+ return project .name
1431+ return None
1432+
1433+ def _CheckForBloatedProjects (self , projects , opt ):
1434+ """Check for shallow projects that are accumulating unoptimized data.
1435+
1436+ For projects with clone-depth="1" that are dirty (have local changes),
1437+ run 'git count-objects -v' and warn if the repository is accumulating
1438+ excessive pack files or garbage.
1439+ """
1440+ projects = [p for p in projects if p .clone_depth ]
1441+ if not projects :
1442+ return
1443+
1444+ bloated_projects = []
1445+ pm = Progress (
1446+ "Checking for bloat" , len (projects ), delay = False , quiet = opt .quiet
1447+ )
1448+
1449+ def _ProcessResults (pool , pm , results ):
1450+ for result in results :
1451+ if result :
1452+ bloated_projects .append (result )
1453+ pm .update (msg = "" )
1454+
1455+ with self .ParallelContext ():
1456+ self .get_parallel_context ()["projects" ] = projects
1457+ self .ExecuteInParallel (
1458+ opt .jobs ,
1459+ self ._CheckOneBloatedProject ,
1460+ range (len (projects )),
1461+ callback = _ProcessResults ,
1462+ output = pm ,
1463+ chunksize = 1 ,
1464+ )
1465+ pm .end ()
1466+
1467+ for project_name in bloated_projects :
1468+ warn_msg = (
1469+ f'warning: Project "{ project_name } " is accumulating '
1470+ 'unoptimized data. Please run "repo sync --auto-gc" or '
1471+ '"repo gc --repack" to clean up.'
1472+ )
1473+ self .git_event_log .ErrorEvent (warn_msg )
1474+ logger .warning (warn_msg )
1475+
13741476 def _UpdateRepoProject (self , opt , manifest , errors ):
13751477 """Fetch the repo project and check for updates."""
13761478 if opt .local_only :
@@ -2002,6 +2104,8 @@ def _ExecuteHelper(self, opt, args, errors):
20022104 "experience, sync the entire tree."
20032105 )
20042106
2107+ self ._CheckForBloatedProjects (all_projects , opt )
2108+
20052109 if not opt .quiet :
20062110 print ("repo sync has finished successfully." )
20072111
0 commit comments