From 0a77536d56fd4a4499f174672ec044bf5f2ac3ec Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Mon, 24 Mar 2025 13:30:26 -0300 Subject: [PATCH 1/8] bugfix: Specifying the same file multiple times, reuse the pointer, not overwrite the previous path. --- src/find/matchers/mod.rs | 60 +++++++++++++++++++++++++++++++----- src/find/matchers/printer.rs | 9 +++--- src/find/matchers/printf.rs | 7 +++-- tests/find_cmd_tests.rs | 50 ++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 15 deletions(-) diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index 05ad37ae..4a15c570 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -31,6 +31,17 @@ pub mod time; mod type_matcher; mod user; +use ::regex::Regex; +use chrono::{DateTime, Datelike, NaiveDateTime, Utc}; +use fs::FileSystemMatcher; +use ls::Ls; +use std::collections::HashMap; +use std::fs::{File, Metadata}; +use std::path::Path; +use std::sync::Arc; +use std::time::SystemTime; +use std::{error::Error, str::FromStr}; + use self::access::AccessMatcher; use self::delete::DeleteMatcher; use self::empty::EmptyMatcher; @@ -272,13 +283,39 @@ impl ComparableValue { } } +// Used on file output arguments. +// When the same file is specified multiple times, the same pointer is used. +struct FileMemoizer { + mem: HashMap> +} +impl FileMemoizer { + fn new() -> Self { + Self { mem: HashMap::new() } + } + fn get_or_create_file(&mut self, path: &str) -> Result, Box> { + let file = self.mem.entry(path.to_string()).or_insert( + Arc::new(File::create(path)?) + ); + Ok(file.clone()) + } +} + /// Builds a single `AndMatcher` containing the Matcher objects corresponding /// to the passed in predicate arguments. pub fn build_top_level_matcher( args: &[&str], config: &mut Config, ) -> Result, Box> { - let (_, top_level_matcher) = (build_matcher_tree(args, config, 0, false))?; + let mut file_mem = FileMemoizer::new(); + let (_, top_level_matcher) = ( + build_matcher_tree( + args, + config, + &mut file_mem, + 0, + false + ) + )?; // if the matcher doesn't have any side-effects, then we default to printing if !top_level_matcher.has_side_effects() { @@ -435,6 +472,7 @@ fn get_or_create_file(path: &str) -> Result> { fn build_matcher_tree( args: &[&str], config: &mut Config, + file_mem: &mut FileMemoizer, arg_index: usize, mut expecting_bracket: bool, ) -> Result<(usize, Box), Box> { @@ -465,8 +503,10 @@ fn build_matcher_tree( } i += 1; - let file = get_or_create_file(args[i])?; - Some(Printer::new(PrintDelimiter::Newline, Some(file)).into_box()) + let file = file_mem.get_or_create_file(args[i])?.clone(); + Some( + Printer::new(PrintDelimiter::Newline, Some(file)).into_box() + ) } "-fprintf" => { if i >= args.len() - 2 { @@ -477,9 +517,11 @@ fn build_matcher_tree( // Args + 1: output file path // Args + 2: format string i += 1; - let file = get_or_create_file(args[i])?; + let file = file_mem.get_or_create_file(args[i])?.clone(); i += 1; - Some(Printf::new(args[i], Some(file))?.into_box()) + Some( + Printf::new(args[i], Some(file))?.into_box() + ) } "-fprint0" => { if i >= args.len() - 1 { @@ -487,8 +529,10 @@ fn build_matcher_tree( } i += 1; - let file = get_or_create_file(args[i])?; - Some(Printer::new(PrintDelimiter::Null, Some(file)).into_box()) + let file = file_mem.get_or_create_file(args[i])?.clone(); + Some( + Printer::new(PrintDelimiter::Null, Some(file)).into_box() + ) } "-ls" => Some(Ls::new(None).into_box()), "-fls" => { @@ -814,7 +858,7 @@ fn build_matcher_tree( None } "(" => { - let (new_arg_index, sub_matcher) = build_matcher_tree(args, config, i + 1, true)?; + let (new_arg_index, sub_matcher) = build_matcher_tree(args, config, file_mem, i + 1, true)?; i = new_arg_index; Some(sub_matcher) } diff --git a/src/find/matchers/printer.rs b/src/find/matchers/printer.rs index 5d9e4f8b..3959cacb 100644 --- a/src/find/matchers/printer.rs +++ b/src/find/matchers/printer.rs @@ -6,6 +6,7 @@ use std::fs::File; use std::io::{stderr, Write}; +use std::sync::Arc; use super::{Matcher, MatcherIO, WalkEntry}; @@ -26,11 +27,11 @@ impl std::fmt::Display for PrintDelimiter { /// This matcher just prints the name of the file to stdout. pub struct Printer { delimiter: PrintDelimiter, - output_file: Option, + output_file: Option>, } impl Printer { - pub fn new(delimiter: PrintDelimiter, output_file: Option) -> Self { + pub fn new(delimiter: PrintDelimiter, output_file: Option>) -> Self { Self { delimiter, output_file, @@ -71,7 +72,7 @@ impl Printer { impl Matcher for Printer { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, matcher_io, file, true); + self.print(file_info, file.clone(), true); } else { self.print( file_info, @@ -127,7 +128,7 @@ mod tests { let dev_full = File::open("/dev/full").unwrap(); let abbbc = get_dir_entry_for("./test_data/simple", "abbbc"); - let matcher = Printer::new(PrintDelimiter::Newline, Some(dev_full)); + let matcher = Printer::new(PrintDelimiter::Newline, Some(Arc::new(dev_full))); let deps = FakeDependencies::new(); assert!(matcher.matches(&abbbc, &mut deps.new_matcher_io())); diff --git a/src/find/matchers/printf.rs b/src/find/matchers/printf.rs index b6722152..75d185a0 100644 --- a/src/find/matchers/printf.rs +++ b/src/find/matchers/printf.rs @@ -7,6 +7,7 @@ use std::error::Error; use std::fs::{self, File}; use std::path::Path; +use std::sync::Arc; use std::time::SystemTime; use std::{borrow::Cow, io::Write}; @@ -572,11 +573,11 @@ fn format_directive<'entry>( /// find's printf syntax. pub struct Printf { format: FormatString, - output_file: Option, + output_file: Option>, } impl Printf { - pub fn new(format: &str, output_file: Option) -> Result> { + pub fn new(format: &str, output_file: Option>) -> Result> { Ok(Self { format: FormatString::parse(format)?, output_file, @@ -624,7 +625,7 @@ impl Printf { impl Matcher for Printf { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, file); + self.print(file_info, file.clone()); } else { self.print(file_info, &mut *matcher_io.deps.get_output().borrow_mut()); } diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index c4c3692e..1abdf096 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -12,6 +12,7 @@ use assert_cmd::Command; use predicates::prelude::*; use regex::Regex; use serial_test::serial; +use std::collections::HashMap; use std::fs::{self, File}; use std::io::{Read, Write}; use std::{env, io::ErrorKind}; @@ -1122,6 +1123,55 @@ fn find_fprinter() { } } +struct TestCaseData { + search_dir: &'static str, + args: Vec<&'static str>, + expected_out: &'static str +} + +#[test] +#[serial(working_dir)] +fn find_using_same_out_multiple_times() { + let cases = HashMap::from([ + ("fprint", TestCaseData{ + search_dir: "test_data/simple", + args: vec!["-fprint", "test_data/find_fprint", "-fprint", "test_data/find_fprint"], + expected_out: "test_data/simple\ntest_data/simple\ntest_data/simple/subdir\ntest_data/simple/subdir\ntest_data/simple/subdir/ABBBC\ntest_data/simple/subdir/ABBBC\ntest_data/simple/abbbc\ntest_data/simple/abbbc\n" + }), + ("fprint0", TestCaseData{ + search_dir: "test_data/simple", + args: vec!["-fprint0", "test_data/find_fprint0", "-fprint0", "test_data/find_fprint0"], + expected_out: "test_data/simple\0test_data/simple\0test_data/simple/subdir\0test_data/simple/subdir\0test_data/simple/subdir/ABBBC\0test_data/simple/subdir/ABBBC\0test_data/simple/abbbc\0test_data/simple/abbbc\0" + }), + ("fprintf", TestCaseData{ + search_dir: "test_data/simple", + args: vec!["-fprintf", "test_data/find_fprintf", "%p\n", "-fprintf", "test_data/find_fprintf", "%f\n"], + expected_out: "test_data/simple\nsimple\ntest_data/simple/subdir\nsubdir\ntest_data/simple/subdir/ABBBC\nABBBC\ntest_data/simple/abbbc\nabbbc\n" + }), + ]); + + for (key, test_data) in cases { + let _ = fs::remove_file(format!("test_data/find_{key}")); + + Command::cargo_bin("find") + .expect("found binary") + .arg(test_data.search_dir) + .args(test_data.args) + .assert() + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + // Read the generated content + let mut f = File::open(format!("test_data/find_{key}")).unwrap(); + let mut contents = String::new(); + f.read_to_string(&mut contents).unwrap(); + assert_eq!(contents, test_data.expected_out); + + let _ = fs::remove_file(format!("test_data/find_{key}")); + } +} + #[test] #[serial(working_dir)] fn find_follow() { From 19ce61cdc563739e0284cc1c9bcab747da9ece76 Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Mon, 24 Mar 2025 13:33:05 -0300 Subject: [PATCH 2/8] raw: Run cargo fmt --- src/find/matchers/mod.rs | 38 ++++++++++++++------------------------ tests/find_cmd_tests.rs | 2 +- 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index 4a15c570..52f830b2 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -286,16 +286,19 @@ impl ComparableValue { // Used on file output arguments. // When the same file is specified multiple times, the same pointer is used. struct FileMemoizer { - mem: HashMap> + mem: HashMap>, } impl FileMemoizer { fn new() -> Self { - Self { mem: HashMap::new() } + Self { + mem: HashMap::new(), + } } fn get_or_create_file(&mut self, path: &str) -> Result, Box> { - let file = self.mem.entry(path.to_string()).or_insert( - Arc::new(File::create(path)?) - ); + let file = self + .mem + .entry(path.to_string()) + .or_insert(Arc::new(File::create(path)?)); Ok(file.clone()) } } @@ -307,15 +310,7 @@ pub fn build_top_level_matcher( config: &mut Config, ) -> Result, Box> { let mut file_mem = FileMemoizer::new(); - let (_, top_level_matcher) = ( - build_matcher_tree( - args, - config, - &mut file_mem, - 0, - false - ) - )?; + let (_, top_level_matcher) = (build_matcher_tree(args, config, &mut file_mem, 0, false))?; // if the matcher doesn't have any side-effects, then we default to printing if !top_level_matcher.has_side_effects() { @@ -504,9 +499,7 @@ fn build_matcher_tree( i += 1; let file = file_mem.get_or_create_file(args[i])?.clone(); - Some( - Printer::new(PrintDelimiter::Newline, Some(file)).into_box() - ) + Some(Printer::new(PrintDelimiter::Newline, Some(file)).into_box()) } "-fprintf" => { if i >= args.len() - 2 { @@ -519,9 +512,7 @@ fn build_matcher_tree( i += 1; let file = file_mem.get_or_create_file(args[i])?.clone(); i += 1; - Some( - Printf::new(args[i], Some(file))?.into_box() - ) + Some(Printf::new(args[i], Some(file))?.into_box()) } "-fprint0" => { if i >= args.len() - 1 { @@ -530,9 +521,7 @@ fn build_matcher_tree( i += 1; let file = file_mem.get_or_create_file(args[i])?.clone(); - Some( - Printer::new(PrintDelimiter::Null, Some(file)).into_box() - ) + Some(Printer::new(PrintDelimiter::Null, Some(file)).into_box()) } "-ls" => Some(Ls::new(None).into_box()), "-fls" => { @@ -858,7 +847,8 @@ fn build_matcher_tree( None } "(" => { - let (new_arg_index, sub_matcher) = build_matcher_tree(args, config, file_mem, i + 1, true)?; + let (new_arg_index, sub_matcher) = + build_matcher_tree(args, config, file_mem, i + 1, true)?; i = new_arg_index; Some(sub_matcher) } diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index 1abdf096..7adec104 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -1126,7 +1126,7 @@ fn find_fprinter() { struct TestCaseData { search_dir: &'static str, args: Vec<&'static str>, - expected_out: &'static str + expected_out: &'static str, } #[test] From 7e0ec9696eee70f2d2af54827c09708ca4f288e5 Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Mon, 24 Mar 2025 13:45:51 -0300 Subject: [PATCH 3/8] bugfix: Add reuse of file pointer to -fls arg --- src/find/matchers/ls.rs | 7 ++++--- src/find/matchers/mod.rs | 35 +---------------------------------- 2 files changed, 5 insertions(+), 37 deletions(-) diff --git a/src/find/matchers/ls.rs b/src/find/matchers/ls.rs index 2480fbad..244c21fa 100644 --- a/src/find/matchers/ls.rs +++ b/src/find/matchers/ls.rs @@ -7,6 +7,7 @@ use chrono::DateTime; use std::{ fs::File, io::{stderr, Write}, + sync::Arc, }; use super::{Matcher, MatcherIO, WalkEntry}; @@ -110,11 +111,11 @@ fn format_permissions(file_attributes: u32) -> String { } pub struct Ls { - output_file: Option, + output_file: Option>, } impl Ls { - pub fn new(output_file: Option) -> Self { + pub fn new(output_file: Option>) -> Self { Self { output_file } } @@ -268,7 +269,7 @@ impl Ls { impl Matcher for Ls { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, matcher_io, file, true); + self.print(file_info, file.clone(), true); } else { self.print( file_info, diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index 52f830b2..7930ee1d 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -453,13 +453,6 @@ fn parse_str_to_newer_args(input: &str) -> Option<(String, String)> { } } -/// Creates a file if it doesn't exist. -/// If it does exist, it will be overwritten. -fn get_or_create_file(path: &str) -> Result> { - let file = File::create(path)?; - Ok(file) -} - /// The main "translate command-line args into a matcher" function. Will call /// itself recursively if it encounters an opening bracket. A successful return /// consists of a tuple containing the new index into the args array to use (if @@ -530,7 +523,7 @@ fn build_matcher_tree( } i += 1; - let file = get_or_create_file(args[i])?; + let file = file_mem.get_or_create_file(args[i])?.clone(); Some(Ls::new(Some(file)).into_box()) } "-true" => Some(TrueMatcher.into_box()), @@ -1831,30 +1824,4 @@ mod tests { .expect("-version should stop parsing"); assert!(config.version_requested); } - - #[test] - fn get_or_create_file_test() { - use std::fs; - - // remove file if hard link file exist. - // But you can't delete a file that doesn't exist, - // so ignore the error returned here. - let _ = fs::remove_file("test_data/get_or_create_file_test"); - - // test create file - let file = get_or_create_file("test_data/get_or_create_file_test"); - assert!(file.is_ok()); - - let file = get_or_create_file("test_data/get_or_create_file_test"); - assert!(file.is_ok()); - - // test error when file no permission - #[cfg(unix)] - { - let result = get_or_create_file("/etc/shadow"); - assert!(result.is_err()); - } - - let _ = fs::remove_file("test_data/get_or_create_file_test"); - } } From ad98bd3f591c751db311ea65416e500818795c1e Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Tue, 25 Mar 2025 06:28:01 -0300 Subject: [PATCH 4/8] test:Fix tests to look by substring instead a long fixed string. --- tests/find_cmd_tests.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index 7adec104..af086f2b 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -1126,7 +1126,7 @@ fn find_fprinter() { struct TestCaseData { search_dir: &'static str, args: Vec<&'static str>, - expected_out: &'static str, + needle_substr: Vec<(&'static str, usize)>, } #[test] @@ -1136,17 +1136,17 @@ fn find_using_same_out_multiple_times() { ("fprint", TestCaseData{ search_dir: "test_data/simple", args: vec!["-fprint", "test_data/find_fprint", "-fprint", "test_data/find_fprint"], - expected_out: "test_data/simple\ntest_data/simple\ntest_data/simple/subdir\ntest_data/simple/subdir\ntest_data/simple/subdir/ABBBC\ntest_data/simple/subdir/ABBBC\ntest_data/simple/abbbc\ntest_data/simple/abbbc\n" + needle_substr: vec![("test_data/simple\n", 2), ("test_data/simple/subdir\n", 2), ("test_data/simple/subdir/ABBBC\n", 2), ("test_data/simple/abbbc\n", 2)] }), ("fprint0", TestCaseData{ search_dir: "test_data/simple", args: vec!["-fprint0", "test_data/find_fprint0", "-fprint0", "test_data/find_fprint0"], - expected_out: "test_data/simple\0test_data/simple\0test_data/simple/subdir\0test_data/simple/subdir\0test_data/simple/subdir/ABBBC\0test_data/simple/subdir/ABBBC\0test_data/simple/abbbc\0test_data/simple/abbbc\0" + needle_substr: vec![("test_data/simple\0", 2), ("test_data/simple/subdir\0", 2), ("test_data/simple/subdir/ABBBC\0", 2), ("test_data/simple/abbbc\0", 2)] }), ("fprintf", TestCaseData{ search_dir: "test_data/simple", args: vec!["-fprintf", "test_data/find_fprintf", "%p\n", "-fprintf", "test_data/find_fprintf", "%f\n"], - expected_out: "test_data/simple\nsimple\ntest_data/simple/subdir\nsubdir\ntest_data/simple/subdir/ABBBC\nABBBC\ntest_data/simple/abbbc\nabbbc\n" + needle_substr: vec![("test_data/simple\nsimple\n", 1), ("test_data/simple/subdir\nsubdir\n", 1), ("test_data/simple/subdir/ABBBC\nABBBC\n", 1), ("test_data/simple/abbbc\nabbbc\n", 1)] }), ]); @@ -1166,7 +1166,14 @@ fn find_using_same_out_multiple_times() { let mut f = File::open(format!("test_data/find_{key}")).unwrap(); let mut contents = String::new(); f.read_to_string(&mut contents).unwrap(); - assert_eq!(contents, test_data.expected_out); + + // The find output can have different order depending on the platform, so we check by substrs + for (substr, times) in test_data.needle_substr { + assert_eq!( + &contents.as_bytes().windows(substr.len()).filter(|&w| w == substr.as_bytes()).count(), + × + ); + } let _ = fs::remove_file(format!("test_data/find_{key}")); } From 6428b10dcf0c22e6ef4beb2e83229e3183dd6c93 Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Sat, 29 Mar 2025 02:22:39 -0300 Subject: [PATCH 5/8] fix:FileMemoizer use filepath inode as hashmap key. perf:Remove some Arc clones. test:Add new tests to symbolic and hardlinks. --- src/find/matchers/ls.rs | 2 +- src/find/matchers/mod.rs | 35 ++++++-- src/find/matchers/printer.rs | 2 +- src/find/matchers/printf.rs | 2 +- tests/find_cmd_tests.rs | 164 +++++++++++++++++++++++++++++++---- 5 files changed, 177 insertions(+), 28 deletions(-) diff --git a/src/find/matchers/ls.rs b/src/find/matchers/ls.rs index 244c21fa..77c51b9a 100644 --- a/src/find/matchers/ls.rs +++ b/src/find/matchers/ls.rs @@ -269,7 +269,7 @@ impl Ls { impl Matcher for Ls { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, file.clone(), true); + self.print(file_info, file.as_ref(), true); } else { self.print( file_info, diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index 7930ee1d..e674c151 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -41,6 +41,7 @@ use std::path::Path; use std::sync::Arc; use std::time::SystemTime; use std::{error::Error, str::FromStr}; +use uucore::fs::FileInformation; use self::access::AccessMatcher; use self::delete::DeleteMatcher; @@ -284,9 +285,10 @@ impl ComparableValue { } // Used on file output arguments. -// When the same file is specified multiple times, the same pointer is used. +// Based on path inode or file_index check if the file already has been specified. +// If yes, use the same file pointer. struct FileMemoizer { - mem: HashMap>, + mem: HashMap>, } impl FileMemoizer { fn new() -> Self { @@ -295,12 +297,31 @@ impl FileMemoizer { } } fn get_or_create_file(&mut self, path: &str) -> Result, Box> { + let path = Path::new(path); + let file_id = self.get_file_id(path); + if file_id.is_err() { + let file = Arc::new(File::create(path)?); + self.mem.insert(self.get_file_id(path)?, file.clone()); + return Ok(file); + } + let file = self .mem - .entry(path.to_string()) + .entry(file_id.unwrap()) .or_insert(Arc::new(File::create(path)?)); Ok(file.clone()) } + + fn get_file_id(&self, path: &Path) -> Result> { + let file_info = FileInformation::from_path(&path, true)?; + #[cfg(windows)] + let file_inode = file_info.file_index(); + + #[cfg(unix)] + let file_inode = file_info.inode(); + + Ok(file_inode) + } } /// Builds a single `AndMatcher` containing the Matcher objects corresponding @@ -491,7 +512,7 @@ fn build_matcher_tree( } i += 1; - let file = file_mem.get_or_create_file(args[i])?.clone(); + let file = file_mem.get_or_create_file(args[i])?; Some(Printer::new(PrintDelimiter::Newline, Some(file)).into_box()) } "-fprintf" => { @@ -503,7 +524,7 @@ fn build_matcher_tree( // Args + 1: output file path // Args + 2: format string i += 1; - let file = file_mem.get_or_create_file(args[i])?.clone(); + let file = file_mem.get_or_create_file(args[i])?; i += 1; Some(Printf::new(args[i], Some(file))?.into_box()) } @@ -513,7 +534,7 @@ fn build_matcher_tree( } i += 1; - let file = file_mem.get_or_create_file(args[i])?.clone(); + let file = file_mem.get_or_create_file(args[i])?; Some(Printer::new(PrintDelimiter::Null, Some(file)).into_box()) } "-ls" => Some(Ls::new(None).into_box()), @@ -523,7 +544,7 @@ fn build_matcher_tree( } i += 1; - let file = file_mem.get_or_create_file(args[i])?.clone(); + let file = file_mem.get_or_create_file(args[i])?; Some(Ls::new(Some(file)).into_box()) } "-true" => Some(TrueMatcher.into_box()), diff --git a/src/find/matchers/printer.rs b/src/find/matchers/printer.rs index 3959cacb..2f9357e7 100644 --- a/src/find/matchers/printer.rs +++ b/src/find/matchers/printer.rs @@ -72,7 +72,7 @@ impl Printer { impl Matcher for Printer { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, file.clone(), true); + self.print(file_info, file.as_ref(), true); } else { self.print( file_info, diff --git a/src/find/matchers/printf.rs b/src/find/matchers/printf.rs index 75d185a0..09c72fea 100644 --- a/src/find/matchers/printf.rs +++ b/src/find/matchers/printf.rs @@ -625,7 +625,7 @@ impl Printf { impl Matcher for Printf { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, file.clone()); + self.print(file_info, file.as_ref()); } else { self.print(file_info, &mut *matcher_io.deps.get_output().borrow_mut()); } diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index af086f2b..1b984c77 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -15,6 +15,7 @@ use serial_test::serial; use std::collections::HashMap; use std::fs::{self, File}; use std::io::{Read, Write}; +use std::os::unix; use std::{env, io::ErrorKind}; use tempfile::Builder; @@ -1133,26 +1134,65 @@ struct TestCaseData { #[serial(working_dir)] fn find_using_same_out_multiple_times() { let cases = HashMap::from([ - ("fprint", TestCaseData{ - search_dir: "test_data/simple", - args: vec!["-fprint", "test_data/find_fprint", "-fprint", "test_data/find_fprint"], - needle_substr: vec![("test_data/simple\n", 2), ("test_data/simple/subdir\n", 2), ("test_data/simple/subdir/ABBBC\n", 2), ("test_data/simple/abbbc\n", 2)] - }), - ("fprint0", TestCaseData{ - search_dir: "test_data/simple", - args: vec!["-fprint0", "test_data/find_fprint0", "-fprint0", "test_data/find_fprint0"], - needle_substr: vec![("test_data/simple\0", 2), ("test_data/simple/subdir\0", 2), ("test_data/simple/subdir/ABBBC\0", 2), ("test_data/simple/abbbc\0", 2)] - }), - ("fprintf", TestCaseData{ - search_dir: "test_data/simple", - args: vec!["-fprintf", "test_data/find_fprintf", "%p\n", "-fprintf", "test_data/find_fprintf", "%f\n"], - needle_substr: vec![("test_data/simple\nsimple\n", 1), ("test_data/simple/subdir\nsubdir\n", 1), ("test_data/simple/subdir/ABBBC\nABBBC\n", 1), ("test_data/simple/abbbc\nabbbc\n", 1)] - }), + ( + "fprint", + TestCaseData { + search_dir: "test_data/simple", + args: vec![ + "-fprint", + "test_data/find_fprint", + "-fprint", + "test_data/find_fprint", + ], + needle_substr: vec![ + ("test_data/simple\n", 2), + ("test_data/simple/subdir\n", 2), + ("test_data/simple/subdir/ABBBC\n", 2), + ("test_data/simple/abbbc\n", 2), + ], + }, + ), + ( + "fprint0", + TestCaseData { + search_dir: "test_data/simple", + args: vec![ + "-fprint0", + "test_data/find_fprint0", + "-fprint0", + "test_data/find_fprint0", + ], + needle_substr: vec![ + ("test_data/simple\0", 2), + ("test_data/simple/subdir\0", 2), + ("test_data/simple/subdir/ABBBC\0", 2), + ("test_data/simple/abbbc\0", 2), + ], + }, + ), + ( + "fprintf", + TestCaseData { + search_dir: "test_data/simple", + args: vec![ + "-fprintf", + "test_data/find_fprintf", + "%p\n", + "-fprintf", + "test_data/find_fprintf", + "%f\n", + ], + needle_substr: vec![ + ("test_data/simple\nsimple\n", 1), + ("test_data/simple/subdir\nsubdir\n", 1), + ("test_data/simple/subdir/ABBBC\nABBBC\n", 1), + ("test_data/simple/abbbc\nabbbc\n", 1), + ], + }, + ), ]); for (key, test_data) in cases { - let _ = fs::remove_file(format!("test_data/find_{key}")); - Command::cargo_bin("find") .expect("found binary") .arg(test_data.search_dir) @@ -1170,13 +1210,101 @@ fn find_using_same_out_multiple_times() { // The find output can have different order depending on the platform, so we check by substrs for (substr, times) in test_data.needle_substr { assert_eq!( - &contents.as_bytes().windows(substr.len()).filter(|&w| w == substr.as_bytes()).count(), + &contents + .as_bytes() + .windows(substr.len()) + .filter(|&w| w == substr.as_bytes()) + .count(), × ); } let _ = fs::remove_file(format!("test_data/find_{key}")); } + + let original_file = "test_data/find_fprint_original_links"; + assert!( + File::create(original_file).is_ok(), + "Error creating original file for symlink." + ); + + let symlink = "test_data/find_fprint_symlink"; + #[cfg(unix)] + { + let symlink_file = + unix::fs::symlink(std::fs::canonicalize(original_file).unwrap(), symlink); + assert!( + symlink_file.is_ok(), + "Error creating symlink file. {:?}", + symlink_file + ); + } + #[cfg(windows)] + { + let symlink_file = std::os::windows::fs::symlink_file( + std::fs::canonicalize(original_file).unwrap(), + symlink, + ); + assert!( + symlink_file.is_ok(), + "Error creating symlink file. {:?}", + symlink_file + ); + } + + let hardlink = "test_data/find_fprint_hardlink"; + let hardlink_file = std::fs::hard_link(std::fs::canonicalize(original_file).unwrap(), hardlink); + assert!( + hardlink_file.is_ok(), + "Error creating hardlink file. {:?}", + hardlink_file + ); + + let test = TestCaseData { + search_dir: "test_data/simple", + args: vec![ + "-fprint", + original_file, + "-fprint", + symlink, + "-fprint", + hardlink, + ], + needle_substr: vec![ + ("test_data/simple\n", 3), + ("test_data/simple/subdir\n", 3), + ("test_data/simple/subdir/ABBBC\n", 3), + ("test_data/simple/abbbc\n", 3), + ], + }; + Command::cargo_bin("find") + .expect("found binary") + .arg(test.search_dir) + .args(test.args) + .assert() + .success() + .stdout(predicate::str::is_empty()) + .stderr(predicate::str::is_empty()); + + let mut f = File::open(original_file).unwrap(); + let mut contents = String::new(); + f.read_to_string(&mut contents).unwrap(); + + for (substr, times) in test.needle_substr { + assert_eq!( + &contents + .as_bytes() + .windows(substr.len()) + .filter(|&w| w == substr.as_bytes()) + .count(), + ×, + "At substr {substr}" + ); + } + + let _ = fs::remove_file(original_file); + let _ = fs::remove_file(symlink); + let _ = fs::remove_file(hardlink); } #[test] From 3fea131369172e7a2779d5ee9ac323b083a473b3 Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Sat, 29 Mar 2025 12:58:21 -0300 Subject: [PATCH 6/8] find: fix:Run clippy test:Use fix_up_slashes for windows. Use same already imported unix symlink module. --- src/find/matchers/mod.rs | 2 +- tests/find_cmd_tests.rs | 32 +++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index e674c151..af68ae50 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -313,7 +313,7 @@ impl FileMemoizer { } fn get_file_id(&self, path: &Path) -> Result> { - let file_info = FileInformation::from_path(&path, true)?; + let file_info = FileInformation::from_path(path, true)?; #[cfg(windows)] let file_inode = file_info.file_index(); diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index 1b984c77..52f4aca3 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -15,7 +15,6 @@ use serial_test::serial; use std::collections::HashMap; use std::fs::{self, File}; use std::io::{Read, Write}; -use std::os::unix; use std::{env, io::ErrorKind}; use tempfile::Builder; @@ -1195,7 +1194,7 @@ fn find_using_same_out_multiple_times() { for (key, test_data) in cases { Command::cargo_bin("find") .expect("found binary") - .arg(test_data.search_dir) + .arg(fix_up_slashes(test_data.search_dir)) .args(test_data.args) .assert() .success() @@ -1209,13 +1208,15 @@ fn find_using_same_out_multiple_times() { // The find output can have different order depending on the platform, so we check by substrs for (substr, times) in test_data.needle_substr { + let substr = fix_up_slashes(substr); assert_eq!( &contents .as_bytes() .windows(substr.len()) .filter(|&w| w == substr.as_bytes()) .count(), - × + ×, + "Failes on key '{key}' substr '{substr}'" ); } @@ -1228,11 +1229,10 @@ fn find_using_same_out_multiple_times() { "Error creating original file for symlink." ); - let symlink = "test_data/find_fprint_symlink"; + let symlink_path = "test_data/find_fprint_symlink"; #[cfg(unix)] { - let symlink_file = - unix::fs::symlink(std::fs::canonicalize(original_file).unwrap(), symlink); + let symlink_file = symlink(std::fs::canonicalize(original_file).unwrap(), symlink_path); assert!( symlink_file.is_ok(), "Error creating symlink file. {:?}", @@ -1243,7 +1243,7 @@ fn find_using_same_out_multiple_times() { { let symlink_file = std::os::windows::fs::symlink_file( std::fs::canonicalize(original_file).unwrap(), - symlink, + symlink_path, ); assert!( symlink_file.is_ok(), @@ -1252,8 +1252,9 @@ fn find_using_same_out_multiple_times() { ); } - let hardlink = "test_data/find_fprint_hardlink"; - let hardlink_file = std::fs::hard_link(std::fs::canonicalize(original_file).unwrap(), hardlink); + let hardlink_path = "test_data/find_fprint_hardlink"; + let hardlink_file = + std::fs::hard_link(std::fs::canonicalize(original_file).unwrap(), hardlink_path); assert!( hardlink_file.is_ok(), "Error creating hardlink file. {:?}", @@ -1266,9 +1267,9 @@ fn find_using_same_out_multiple_times() { "-fprint", original_file, "-fprint", - symlink, + symlink_path, "-fprint", - hardlink, + hardlink_path, ], needle_substr: vec![ ("test_data/simple\n", 3), @@ -1279,7 +1280,7 @@ fn find_using_same_out_multiple_times() { }; Command::cargo_bin("find") .expect("found binary") - .arg(test.search_dir) + .arg(fix_up_slashes(test.search_dir)) .args(test.args) .assert() .success() @@ -1291,6 +1292,7 @@ fn find_using_same_out_multiple_times() { f.read_to_string(&mut contents).unwrap(); for (substr, times) in test.needle_substr { + let substr = fix_up_slashes(substr); assert_eq!( &contents .as_bytes() @@ -1298,13 +1300,13 @@ fn find_using_same_out_multiple_times() { .filter(|&w| w == substr.as_bytes()) .count(), ×, - "At substr {substr}" + "Error at substr '{substr}'" ); } let _ = fs::remove_file(original_file); - let _ = fs::remove_file(symlink); - let _ = fs::remove_file(hardlink); + let _ = fs::remove_file(symlink_path); + let _ = fs::remove_file(hardlink_path); } #[test] From 35a9759c90df93d681eec98720369d85044ebae8 Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Sun, 20 Apr 2025 10:00:01 -0300 Subject: [PATCH 7/8] fix: Use FileInformation to check same file. --- src/find/matchers/mod.rs | 41 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index af68ae50..71f41011 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -285,10 +285,9 @@ impl ComparableValue { } // Used on file output arguments. -// Based on path inode or file_index check if the file already has been specified. // If yes, use the same file pointer. struct FileMemoizer { - mem: HashMap>, + mem: HashMap>, } impl FileMemoizer { fn new() -> Self { @@ -297,30 +296,22 @@ impl FileMemoizer { } } fn get_or_create_file(&mut self, path: &str) -> Result, Box> { - let path = Path::new(path); - let file_id = self.get_file_id(path); - if file_id.is_err() { - let file = Arc::new(File::create(path)?); - self.mem.insert(self.get_file_id(path)?, file.clone()); - return Ok(file); + let mut file_info = FileInformation::from_path(path, true); + match file_info { + Ok(info) => { + let file = self + .mem + .entry(info) + .or_insert(Arc::new(File::create(path)?)); + Ok(file.clone()) + } + Err(_) => { + let file = Arc::new(File::create(path)?); + file_info = FileInformation::from_path(path, true); + self.mem.insert(file_info?, file.clone()); + Ok(file) + } } - - let file = self - .mem - .entry(file_id.unwrap()) - .or_insert(Arc::new(File::create(path)?)); - Ok(file.clone()) - } - - fn get_file_id(&self, path: &Path) -> Result> { - let file_info = FileInformation::from_path(path, true)?; - #[cfg(windows)] - let file_inode = file_info.file_index(); - - #[cfg(unix)] - let file_inode = file_info.inode(); - - Ok(file_inode) } } From f314b347b8b7edd5076390b324eb0a6c62b28a97 Mon Sep 17 00:00:00 2001 From: tonakai-s Date: Sun, 20 Apr 2025 10:55:54 -0300 Subject: [PATCH 8/8] fix: Rebase changes --- src/find/matchers/ls.rs | 2 +- src/find/matchers/mod.rs | 13 +------------ src/find/matchers/printer.rs | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/find/matchers/ls.rs b/src/find/matchers/ls.rs index 77c51b9a..bf75f55d 100644 --- a/src/find/matchers/ls.rs +++ b/src/find/matchers/ls.rs @@ -269,7 +269,7 @@ impl Ls { impl Matcher for Ls { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, file.as_ref(), true); + self.print(file_info, matcher_io, file.as_ref(), true); } else { self.print( file_info, diff --git a/src/find/matchers/mod.rs b/src/find/matchers/mod.rs index 71f41011..3cdf654d 100644 --- a/src/find/matchers/mod.rs +++ b/src/find/matchers/mod.rs @@ -70,18 +70,7 @@ use self::time::{ }; use self::type_matcher::{TypeMatcher, XtypeMatcher}; use self::user::{NoUserMatcher, UserMatcher}; -use ::regex::Regex; -use chrono::{DateTime, Datelike, NaiveDateTime, Utc}; -use fs::FileSystemMatcher; -use ls::Ls; -use std::{ - error::Error, - fs::{File, Metadata}, - io::Read, - path::Path, - str::FromStr, - time::SystemTime, -}; +use std::io::Read; use super::{Config, Dependencies}; diff --git a/src/find/matchers/printer.rs b/src/find/matchers/printer.rs index 2f9357e7..dc453892 100644 --- a/src/find/matchers/printer.rs +++ b/src/find/matchers/printer.rs @@ -72,7 +72,7 @@ impl Printer { impl Matcher for Printer { fn matches(&self, file_info: &WalkEntry, matcher_io: &mut MatcherIO) -> bool { if let Some(file) = &self.output_file { - self.print(file_info, file.as_ref(), true); + self.print(file_info, matcher_io, file.as_ref(), true); } else { self.print( file_info,