diff --git a/Cargo.lock b/Cargo.lock index 98274e3a..ee6605c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1145,6 +1145,7 @@ dependencies = [ "uu_renice", "uu_rev", "uu_setsid", + "uu_whereis", "uucore", "xattr", ] @@ -1309,6 +1310,19 @@ dependencies = [ "uucore", ] +[[package]] +name = "uu_whereis" +version = "0.0.1" +dependencies = [ + "clap", + "glob", + "regex", + "serde", + "serde_json", + "sysinfo", + "uucore", +] + [[package]] name = "uucore" version = "0.0.30" diff --git a/Cargo.toml b/Cargo.toml index 3798c367..1dd71c8b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ feat_common_core = [ "renice", "rev", "setsid", + "whereis", ] [workspace.dependencies] @@ -98,6 +99,7 @@ mountpoint = { optional = true, version = "0.0.1", package = "uu_mountpoint", pa renice = { optional = true, version = "0.0.1", package = "uu_renice", path = "src/uu/renice" } rev = { optional = true, version = "0.0.1", package = "uu_rev", path = "src/uu/rev" } setsid = { optional = true, version = "0.0.1", package = "uu_setsid", path ="src/uu/setsid" } +whereis = { optional = true, version = "0.0.1", package = "uu_whereis", path = "src/uu/whereis" } [dev-dependencies] # dmesg test require fixed-boot-time feature turned on. diff --git a/src/uu/whereis/Cargo.toml b/src/uu/whereis/Cargo.toml new file mode 100644 index 00000000..f859d1b1 --- /dev/null +++ b/src/uu/whereis/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "uu_whereis" +version = "0.0.1" +edition = "2021" + +[lib] +path = "src/whereis.rs" + +[[bin]] +name = "whereis" +path = "src/main.rs" + +[dependencies] +regex = { workspace = true } +uucore = { workspace = true } +clap = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +sysinfo = { workspace = true } +glob = "0.3" diff --git a/src/uu/whereis/src/constants.rs b/src/uu/whereis/src/constants.rs new file mode 100644 index 00000000..1199fa1a --- /dev/null +++ b/src/uu/whereis/src/constants.rs @@ -0,0 +1,83 @@ +// List of directories, man, src, and binary + +pub const MAN_DIRS: [&str; 7] = [ + "/usr/man/*", + "/usr/share/man/*", + "/usr/X386/man/*", + "/usr/X11/man/*", + "/usr/TeX/man/*", + "/usr/interviews/man/mann", + "/usr/share/info", + // NULL +]; + +pub const SRC_DIRS: [&str; 6] = [ + "/usr/src/*", + "/usr/src/lib/libc/*", + "/usr/src/lib/libc/net/*", + "/usr/src/ucb/pascal", + "/usr/src/ucb/pascal/utilities", + "/usr/src/undoc", + // NULL +]; + +pub const BIN_DIRS: [&str; 46] = [ + "/usr/bin", + "/usr/sbin", + "/bin", + "/sbin", + // #[cfg(...)] + /* + #if defined(MULTIARCHTRIPLET) + + "/lib/" MULTIARCHTRIPLET, + "/usr/lib/" MULTIARCHTRIPLET, + "/usr/local/lib/" MULTIARCHTRIPLET, + + #endif + */ + // #[cfg(not(...))] + "/usr/lib", + "/usr/lib32", + "/usr/lib64", + "/etc", + "/usr/etc", + "/lib", + "/lib32", + "/lib64", + "/usr/games", + "/usr/games/bin", + "/usr/games/lib", + "/usr/emacs/etc", + "/usr/lib/emacs/*/etc", + "/usr/TeX/bin", + "/usr/tex/bin", + "/usr/interviews/bin/LINUX", + "/usr/X11R6/bin", + "/usr/X386/bin", + "/usr/bin/X11", + "/usr/X11/bin", + "/usr/X11R5/bin", + "/usr/local/bin", + "/usr/local/sbin", + "/usr/local/etc", + "/usr/local/lib", + "/usr/local/games", + "/usr/local/games/bin", + "/usr/local/emacs/etc", + "/usr/local/TeX/bin", + "/usr/local/tex/bin", + "/usr/local/bin/X11", + "/usr/contrib", + "/usr/hosts", + "/usr/include", + "/usr/g++-include", + "/usr/ucb", + "/usr/old", + "/usr/new", + "/usr/local", + "/usr/libexec", + "/usr/share", + "/opt/*/bin", + // NULL +]; diff --git a/src/uu/whereis/src/main.rs b/src/uu/whereis/src/main.rs new file mode 100644 index 00000000..5a25b320 --- /dev/null +++ b/src/uu/whereis/src/main.rs @@ -0,0 +1 @@ +uucore::bin!(uu_whereis); diff --git a/src/uu/whereis/src/whereis.rs b/src/uu/whereis/src/whereis.rs new file mode 100644 index 00000000..dc0d097f --- /dev/null +++ b/src/uu/whereis/src/whereis.rs @@ -0,0 +1,403 @@ +// This file is a part of the uutils util-linux package. +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use clap::{crate_version, Arg, ArgAction, Command}; +use glob::glob; +use serde::Serialize; +use std::{ + collections::HashMap, collections::HashSet, fs, os::unix::fs::MetadataExt, path::Path, + path::PathBuf, +}; +use uucore::{error::UResult, format_usage, help_about, help_usage}; + +mod constants; +use crate::constants::{BIN_DIRS, MAN_DIRS, SRC_DIRS}; + +mod options { + pub const BIN: &str = "binaries"; + pub const MAN: &str = "manuals"; + pub const SRC: &str = "sources"; + pub const PATH: &str = "lookups"; + + pub const SPECIFIED_BIN: &str = "listed binaries"; + pub const SPECIFIED_MAN: &str = "listed manuals"; + pub const SPECIFIED_SRC: &str = "listed sources"; +} + +const ABOUT: &str = help_about!("whereis.md"); +const USAGE: &str = help_usage!("whereis.md"); + +// Directories are usually manual pages dirs, binary dirs or source dirs. Hopefully not unknown. +#[derive(Serialize, Copy, Clone, Debug, Eq, Hash, PartialEq)] +pub enum DirType { + Binary, + Manual, + Source, + Unknown, +} + +// Store the metadata for a file +#[derive(Serialize, Clone, Debug)] +pub struct WhDir { + #[serde(skip_serializing)] + #[serde(skip_deserializing)] + metadata: Option, + path: PathBuf, + dir_type: DirType, +} + +impl WhDir { + fn new(path: PathBuf, dir_type: DirType) -> Self { + Self { + metadata: fs::metadata(&path).ok(), + path, + dir_type, + } + } +} + +// Use a vector to store the list of directories. Additionally keep a HashSet of the inode number and st_dev ID. +#[derive(Serialize, Debug)] +pub struct WhDirList { + list: Vec, + seen_files: HashSet<(u64, u64)>, +} + +impl WhDirList { + fn new() -> Self { + Self { + list: Vec::new(), + seen_files: HashSet::new(), + } + } + + fn construct_dir_list(&mut self, dir_type: DirType, paths: &[&str]) { + for path in paths { + let pathbuf = PathBuf::from(path); + if path.contains('*') { + self.add_sub_dirs(&pathbuf, dir_type); + } else { + self.add_dir(WhDir::new(pathbuf, dir_type)); + } + } + } + + // Use (ino) inode number and (st_dev) ID of device containing the file to keep track of whats unique. + fn add_dir(&mut self, dir: WhDir) { + if self.list.iter().any(|d| d.path == dir.path) { + return; + } + + if dir.metadata.is_some() { + let dev = dir.metadata.clone().unwrap().dev(); + let ino = dir.metadata.clone().unwrap().ino(); + + if self.seen_files.insert((dev, ino)) { + self.list.push(dir); + } + } + } + + #[allow(dead_code)] + fn remove_dir(&mut self, dir: &WhDir) { + self.list.retain(|d| d.path != dir.path); + } + + // TODO: We need to do something with the entry if an error occurs. + fn add_sub_dirs(&mut self, parent_dir: &Path, dir_type: DirType) { + for entry in glob(&parent_dir.display().to_string()).expect("Failed to read glob pattern") { + match entry { + Ok(path) if path.is_dir() => { + self.add_dir(WhDir::new(path, dir_type)); + } + Ok(_) => todo!(), + Err(_e) => todo!(), + } + } + } + + // A debug function. + #[allow(dead_code)] + fn list_dirs(&self) { + for dir in &self.list { + let dir_type = whereis_type_to_name(dir.dir_type); + println!("{:?} : {:?}", dir_type, dir.path.display()); + } + } + + fn lookup(&self, pattern: &str, dir_type: DirType) -> Vec { + let mut results = Vec::new(); + let pathbuf_pattern = PathBuf::from(pattern); + + for dir in &self.list { + if dir.dir_type == dir_type { + find_in(&dir.path, &pathbuf_pattern, &mut results, dir.dir_type); + } + } + + results + } +} + +pub fn whereis_type_to_name(dir_type: DirType) -> &'static str { + match dir_type { + DirType::Manual => "man", + DirType::Binary => "bin", + DirType::Source => "src", + DirType::Unknown => "???", + } +} + +// Almost an exact ripoff from the C source. +fn filename_equal(cp: &PathBuf, dp: &str, dir_type: DirType) -> bool { + let cp_str = match cp.file_name().and_then(|s| s.to_str()) { + Some(s) => s, + None => return false, + }; + + let mut dp_trimmed = dp; + + if dir_type == DirType::Source && dp_trimmed.starts_with("s.") { + return filename_equal(cp, &dp_trimmed[2..], dir_type); + } + + if dir_type == DirType::Manual { + for ext in [".Z", ".gz", ".xz", ".bz2", ".zst"] { + if let Some(stripped) = dp_trimmed.strip_suffix(ext) { + dp_trimmed = stripped; + break; + } + } + } + + let mut cp_chars = cp_str.chars(); + let mut dp_chars = dp_trimmed.chars(); + + loop { + match (cp_chars.next(), dp_chars.next()) { + (Some(c1), Some(c2)) if c1 == c2 => continue, + (None, None) => return true, // both ended + (None, Some('.')) if dir_type != DirType::Binary => { + // cp ended, dp has .section + return true; + } + _ => return false, + } + } +} + +fn find_in(dir: &Path, pathbuf: &PathBuf, results: &mut Vec, dir_type: DirType) { + if let Ok(entries) = fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + if let Some(filename) = path.file_name().and_then(|f| f.to_str()) { + if filename_equal(pathbuf, filename, dir_type) { + results.push(path.display().to_string()); + } + } + } + } +} + +// TODO: Doesn't completely all possible options like specified_bin, etc. +fn print_output(options: &OutputOptions, pattern: &str, results: Vec) { + let mut grouped: HashMap> = HashMap::new(); + + // Split results by type, grouping MAN, BIN and SRC. + for path in results { + if path.contains("/bin/") { + grouped.entry(DirType::Binary).or_default().push(path); + } else if path.contains("/man") || path.contains("/share/man") { + grouped.entry(DirType::Manual).or_default().push(path); + } else { + grouped.entry(DirType::Source).or_default().push(path); + } + } + + print!("{}:", pattern); + + // If *any* of the search flags are set, print according to them + if options.search_bin || options.search_man || options.search_src { + if options.search_bin { + if let Some(paths) = grouped.get(&DirType::Binary) { + for path in paths { + print!(" {}", path); + } + } + } + if options.search_man { + if let Some(paths) = grouped.get(&DirType::Manual) { + for path in paths { + print!(" {}", path); + } + } + } + if options.search_src { + if let Some(paths) = grouped.get(&DirType::Source) { + for path in paths { + print!(" {}", path); + } + } + } + } else { + // No -b/-m/-s flag given? Print everything + for paths in grouped.values() { + for path in paths { + print!(" {}", path); + } + } + } + + println!(); +} + +#[uucore::main] +pub fn uumain(args: impl uucore::Args) -> UResult<()> { + let matches: clap::ArgMatches = uu_app().try_get_matches_from(args)?; + + let output_options = OutputOptions { + search_bin: matches.get_flag(options::BIN), + search_man: matches.get_flag(options::MAN), + search_src: matches.get_flag(options::SRC), + path_given: matches.get_flag(options::PATH), + + search_specific_bin: matches.get_flag(options::SPECIFIED_BIN), + search_specific_man: matches.get_flag(options::SPECIFIED_MAN), + search_specific_src: matches.get_flag(options::SPECIFIED_SRC), + }; + + let mut dir_list = WhDirList::new(); + + dir_list.construct_dir_list(DirType::Binary, &BIN_DIRS); + dir_list.construct_dir_list(DirType::Manual, &MAN_DIRS); + dir_list.construct_dir_list(DirType::Source, &SRC_DIRS); + + let names: Vec<_> = matches + .get_many::("names") + .unwrap() + .map(|s| s.as_str()) + .collect(); + + // Search for the names that were passed into the program. + for pattern in names { + let mut results = dir_list.lookup(pattern, DirType::Binary); + results.append(&mut dir_list.lookup(pattern, DirType::Manual)); + results.append(&mut dir_list.lookup(pattern, DirType::Source)); + + print_output(&output_options, pattern, results); + } + + Ok(()) +} + +// TODO: Implement the necessary behavior for path_given and other fields with the dead_code macro. +struct OutputOptions { + search_bin: bool, + search_man: bool, + search_src: bool, + + #[allow(dead_code)] + path_given: bool, + + #[allow(dead_code)] + search_specific_bin: bool, + + #[allow(dead_code)] + search_specific_man: bool, + + #[allow(dead_code)] + search_specific_src: bool, +} + +pub fn uu_app() -> Command { + Command::new(uucore::util_name()) + .version(crate_version!()) + .about(ABOUT) + .override_usage(format_usage(USAGE)) + .infer_long_args(true) + + .arg( + Arg::new("names") + .help("The name of the program [s] to search for.") + .num_args(1..) + .required(true) + ) + .arg( + Arg::new(options::BIN) + .short('b') + .long("binaries") + .action(ArgAction::SetTrue) + .help("Search for binaries.") + .required(false), + ) + .arg( + Arg::new(options::MAN) + .short('m') + .long("manual") + .help("Search for manuals.") + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new(options::SRC) + .short('s') + .long("source") + .action(ArgAction::SetTrue) + .help("Search for sources.") + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new(options::SPECIFIED_BIN) + .short('B') + .long("bins") + .action(ArgAction::SetTrue) + .help( + "Limit the places where whereis searches for binaries, \ + by a whitespace-separated list of directories." + ) + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new(options::SPECIFIED_MAN) + .short('M') + .long("mans") + .action(ArgAction::SetTrue) + .help( + "Limit the places where whereis searches for manuals and documentation in Info \ + format, by a whitespace-separated list of directories." + ) + .action(ArgAction::SetTrue) + .required(false), + ) + .arg( + Arg::new(options::SPECIFIED_SRC) + .short('S') + .long("sources") + .action(ArgAction::SetTrue) + .help( + "Limit the places where whereis searches for sources, by a whitespace-separated \ + list of directories." + ) + .action(ArgAction::SetTrue) + .required(false), + ) + + // Want to rename this in the future. + .arg( + Arg::new(options::PATH) + .short('u') + .long("source path") + .action(ArgAction::SetTrue) + .help( + "Only show the command names that have unusual entries. A command is said to be \ + unusual if it does not have just one entry of each explicitly requested type. \ + Thus 'whereis -m -u *' asks for those files in the current directory which \ + have no documentation file, or more than one." + ) + .action(ArgAction::SetTrue) + .required(false), + ) +} diff --git a/src/uu/whereis/whereis.md b/src/uu/whereis/whereis.md new file mode 100644 index 00000000..35752b66 --- /dev/null +++ b/src/uu/whereis/whereis.md @@ -0,0 +1,7 @@ +# whereis + +``` +whereis [options] [-BMS directory... -f] name... +``` +Locates the binary, source and manual pages for a command. + diff --git a/tests/by-util/test_whereis.rs b/tests/by-util/test_whereis.rs new file mode 100644 index 00000000..696bb97a --- /dev/null +++ b/tests/by-util/test_whereis.rs @@ -0,0 +1,64 @@ +// This file is part of the uutils util-linux package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use crate::common::util::TestScenario; + +#[test] +#[cfg(target_os = "linux")] +fn test_basic_lookup() { + new_ucmd!() + .arg("gcc") + .succeeds() + .stdout_contains("/usr/bin/gcc"); +} + +#[test] +#[cfg(target_os = "linux")] +fn test_bin_only() { + new_ucmd!() + .arg("-b") + .arg("ping") + .succeeds() + .stdout_contains("/usr/bin/ping"); +} + +#[test] +#[cfg(target_os = "linux")] +fn test_man_only() { + new_ucmd!() + .arg("-m") + .arg("ls") + .succeeds() + .stdout_contains("/usr/share/man"); +} + +#[test] +#[cfg(target_os = "linux")] +fn test_src_only() { + new_ucmd!() + .arg("-s") + .arg("dig") + .succeeds() + .stdout_is("dig:\n"); +} + +#[test] +#[cfg(target_os = "linux")] +fn test_output() { + let res = new_ucmd!().arg("ping").arg("gcc").succeeds(); + let stdout = res.no_stderr().stdout_str(); + + // Non-exhaustive list of fields we expect + // Check that 'ping' and 'gcc' have their paths listed + assert!(stdout.contains("ping:")); + assert!(stdout.contains("gcc:")); + + // Check that paths are printed next to the command name, as expected + assert!(stdout.contains("/usr/bin/ping")); + assert!(stdout.contains("/usr/bin/gcc")); + + assert!(stdout.contains("/usr/lib/gcc")); + assert!(stdout.contains("/usr/share/gcc")); +} diff --git a/tests/tests.rs b/tests/tests.rs index 0fb3ed14..db25f401 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -60,3 +60,7 @@ mod test_fsfreeze; #[cfg(feature = "mcookie")] #[path = "by-util/test_mcookie.rs"] mod test_mcookie; + +#[cfg(feature = "whereis")] +#[path = "by-util/test_whereis.rs"] +mod test_whereis;