Skip to content

Commit 90db5c5

Browse files
authored
perf(clean): Optimize (legacy) clean with multiple -p specifiers (#16264)
Co-authored-by: dino <dinojoaocosta@gmail.com> ### What does this PR try to resolve? This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now batch calls to `rm_rf_prefix_list`, thus potentially avoiding multiple walks over a single subdirectory. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. ### How to test and review this PR? We've tested it by hand, running it against `regex`, `ruff` and `zed` codebases. This PR is still marked as draft, as I don't love the code. I would also understand if y'all were against merging this, given that new build directory layout is in flight.
2 parents 5e7b660 + 80b47ac commit 90db5c5

File tree

2 files changed

+105
-121
lines changed

2 files changed

+105
-121
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ impl FileType {
147147
should_replace_hyphens: true,
148148
}
149149
}
150+
151+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
152+
(
153+
format!("{}{}-", self.prefix, target.crate_name()),
154+
self.suffix.clone(),
155+
)
156+
}
150157
}
151158

152159
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 98 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
1010
use anyhow::bail;
1111
use cargo_util::paths;
12-
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
12+
use indexmap::{IndexMap, IndexSet};
13+
14+
use std::ffi::OsString;
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -193,6 +194,7 @@ fn clean_specs(
193194
let packages = pkg_set.get_many(pkg_ids)?;
194195

195196
clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len()));
197+
let mut dirs_to_clean = DirectoriesToClean::default();
196198

197199
if clean_ctx.gctx.cli_unstable().build_dir_new_layout {
198200
for pkg in packages {
@@ -233,48 +235,54 @@ fn clean_specs(
233235
};
234236
if let Some(uplift_dir) = uplift_dir {
235237
for file_type in file_types {
236-
let uplifted_path =
237-
uplift_dir.join(file_type.uplift_filename(target));
238-
clean_ctx.rm_rf(&uplifted_path)?;
238+
let uplifted_filename = file_type.uplift_filename(target);
239+
239240
// Dep-info generated by Cargo itself.
240-
let dep_info = uplifted_path.with_extension("d");
241-
clean_ctx.rm_rf(&dep_info)?;
241+
let dep_info = Path::new(&uplifted_filename)
242+
.with_extension("d")
243+
.to_string_lossy()
244+
.into_owned();
245+
246+
dirs_to_clean.mark_utf(uplift_dir, |filename| {
247+
filename == uplifted_filename || filename == dep_info
248+
});
242249
}
243250
}
251+
let path_dash = format!("{}-", crate_name);
244252

245-
let dir = escape_glob_path(layout.build_dir().incremental())?;
246-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
247-
clean_ctx.rm_rf_glob(&incremental)?;
253+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |filename| {
254+
filename.starts_with(&path_dash)
255+
});
248256
}
249257
}
250258
}
251259
}
252260
} else {
253-
// Try to reduce the amount of times we iterate over the same target directory by storing away
254-
// the directories we've iterated over (and cleaned for a given package).
255-
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
256261
for pkg in packages {
257-
let pkg_dir = format!("{}-*", pkg.name());
258262
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
259263

260264
// Clean fingerprints.
261265
for (_, layout) in &layouts_with_host {
262-
let dir = escape_glob_path(layout.build_dir().legacy_fingerprint())?;
263-
clean_ctx.rm_rf_package_glob_containing_hash(
264-
&pkg.name(),
265-
&Path::new(&dir).join(&pkg_dir),
266-
)?;
266+
dirs_to_clean.mark_utf(layout.build_dir().legacy_fingerprint(), |filename| {
267+
let Some((pkg_name, _)) = filename.rsplit_once('-') else {
268+
return false;
269+
};
270+
271+
pkg_name == pkg.name().as_str()
272+
});
267273
}
268274

269275
for target in pkg.targets() {
270276
if target.is_custom_build() {
271277
// Get both the build_script_build and the output directory.
272278
for (_, layout) in &layouts_with_host {
273-
let dir = escape_glob_path(layout.build_dir().build())?;
274-
clean_ctx.rm_rf_package_glob_containing_hash(
275-
&pkg.name(),
276-
&Path::new(&dir).join(&pkg_dir),
277-
)?;
279+
dirs_to_clean.mark_utf(layout.build_dir().build(), |filename| {
280+
let Some((current_name, _)) = filename.rsplit_once('-') else {
281+
return false;
282+
};
283+
284+
current_name == pkg.name().as_str()
285+
});
278286
}
279287
continue;
280288
}
@@ -304,15 +312,15 @@ fn clean_specs(
304312
}
305313
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
306314
};
307-
let mut dir_glob_str = escape_glob_path(dir)?;
308-
let dir_glob = Path::new(&dir_glob_str);
315+
309316
for file_type in file_types {
310317
// Some files include a hash in the filename, some don't.
311-
let hashed_name = file_type.output_filename(target, Some("*"));
318+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
312319
let unhashed_name = file_type.output_filename(target, None);
313-
314-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
315-
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
320+
dirs_to_clean.mark_utf(&dir, |filename| {
321+
(filename.starts_with(&prefix) && filename.ends_with(&suffix))
322+
|| unhashed_name == filename
323+
});
316324

317325
// Remove the uplifted copy.
318326
if let Some(uplift_dir) = uplift_dir {
@@ -324,49 +332,79 @@ fn clean_specs(
324332
clean_ctx.rm_rf(&dep_info)?;
325333
}
326334
}
327-
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
328-
clean_ctx.rm_rf(&unhashed_dep_info)?;
335+
let unhashed_dep_info = format!("{}.d", crate_name);
336+
dirs_to_clean.mark_utf(dir, |filename| filename == unhashed_dep_info);
329337

330-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
331-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
332-
}
333-
dir_glob_str.push('*');
334-
let dir_glob_str: Rc<str> = dir_glob_str.into();
335-
if cleaned_packages
336-
.entry(dir_glob_str.clone())
337-
.or_default()
338-
.insert(crate_name.clone())
339-
{
340-
let paths = [
338+
dirs_to_clean.mark_utf(dir, |filename| {
339+
if filename.starts_with(&path_dash) {
341340
// Remove dep-info file generated by rustc. It is not tracked in
342341
// file_types. It does not have a prefix.
343-
(path_dash, ".d"),
342+
filename.ends_with(".d")
343+
} else if filename.starts_with(&path_dot) {
344344
// Remove split-debuginfo files generated by rustc.
345-
(path_dot, ".o"),
346-
(path_dot, ".dwo"),
347-
(path_dot, ".dwp"),
348-
];
349-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
350-
}
345+
[".o", ".dwo", ".dwp"]
346+
.iter()
347+
.any(|suffix| filename.ends_with(suffix))
348+
} else {
349+
false
350+
}
351+
});
351352

352353
// TODO: what to do about build_script_build?
353-
let dir = escape_glob_path(layout.build_dir().incremental())?;
354-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
355-
clean_ctx.rm_rf_glob(&incremental)?;
354+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |filename| {
355+
filename.starts_with(path_dash)
356+
});
356357
}
357358
}
358359
}
359360
}
360361
}
362+
clean_ctx.rm_rf_all(dirs_to_clean)?;
361363

362364
Ok(())
363365
}
364366

365-
fn escape_glob_path(pattern: &Path) -> CargoResult<String> {
366-
let pattern = pattern
367-
.to_str()
368-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
369-
Ok(glob::Pattern::escape(pattern))
367+
#[derive(Default)]
368+
struct DirectoriesToClean {
369+
dir_contents: IndexMap<PathBuf, IndexSet<OsString>>,
370+
to_remove: IndexSet<PathBuf>,
371+
}
372+
373+
impl DirectoriesToClean {
374+
fn mark(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&OsString) -> bool) {
375+
let entry = match self.dir_contents.entry(directory.to_owned()) {
376+
indexmap::map::Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
377+
indexmap::map::Entry::Vacant(vacant_entry) => {
378+
let Ok(dir_entries) = std::fs::read_dir(directory.to_owned()) else {
379+
return;
380+
};
381+
vacant_entry.insert(
382+
dir_entries
383+
.into_iter()
384+
.flatten()
385+
.map(|entry| entry.file_name())
386+
.collect::<IndexSet<_>>(),
387+
)
388+
}
389+
};
390+
391+
entry.retain(|path| {
392+
let should_remove = should_remove_entry(path);
393+
if should_remove {
394+
self.to_remove.insert(directory.join(path));
395+
}
396+
!should_remove
397+
});
398+
}
399+
400+
fn mark_utf(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&str) -> bool) {
401+
self.mark(directory, move |filename| {
402+
let Some(as_utf) = filename.to_str() else {
403+
return false;
404+
};
405+
should_remove_entry(as_utf)
406+
});
407+
}
370408
}
371409

372410
impl<'gctx> CleanContext<'gctx> {
@@ -384,74 +422,13 @@ impl<'gctx> CleanContext<'gctx> {
384422
}
385423
}
386424

387-
/// Glob remove artifacts for the provided `package`
388-
///
389-
/// Make sure the artifact is for `package` and not another crate that is prefixed by
390-
/// `package` by getting the original name stripped of the trailing hash and possible
391-
/// extension
392-
fn rm_rf_package_glob_containing_hash(
393-
&mut self,
394-
package: &str,
395-
pattern: &Path,
396-
) -> CargoResult<()> {
397-
// TODO: Display utf8 warning to user? Or switch to globset?
398-
let pattern = pattern
399-
.to_str()
400-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
401-
for path in glob::glob(pattern)? {
402-
let path = path?;
403-
404-
let pkg_name = path
405-
.file_name()
406-
.and_then(std::ffi::OsStr::to_str)
407-
.and_then(|artifact| artifact.rsplit_once('-'))
408-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?
409-
.0;
410-
411-
if pkg_name != package {
412-
continue;
413-
}
414-
425+
fn rm_rf_all(&mut self, dirs: DirectoriesToClean) -> CargoResult<()> {
426+
for path in dirs.to_remove {
415427
self.rm_rf(&path)?;
416428
}
417429
Ok(())
418430
}
419431

420-
fn rm_rf_glob(&mut self, pattern: &Path) -> CargoResult<()> {
421-
// TODO: Display utf8 warning to user? Or switch to globset?
422-
let pattern = pattern
423-
.to_str()
424-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
425-
for path in glob::glob(pattern)? {
426-
self.rm_rf(&path?)?;
427-
}
428-
Ok(())
429-
}
430-
431-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
432-
///
433-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
434-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
435-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
436-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
437-
fn rm_rf_prefix_list(
438-
&mut self,
439-
pattern: &str,
440-
path_matchers: &[(&str, &str)],
441-
) -> CargoResult<()> {
442-
for path in glob::glob(pattern)? {
443-
let path = path?;
444-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
445-
if path_matchers
446-
.iter()
447-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
448-
{
449-
self.rm_rf(&path)?;
450-
}
451-
}
452-
Ok(())
453-
}
454-
455432
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
456433
let meta = match fs::symlink_metadata(path) {
457434
Ok(meta) => meta,

0 commit comments

Comments
 (0)