From a2c12cd9c1872887010aef0ce4868e4d9d5d39b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Sun, 12 Mar 2023 16:32:21 +0100 Subject: [PATCH] feat(cli): improve the cli and add more options (#16) * feat(cli): improve the cli and add more options * feat(cli): enhancing the cli * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * chore(ci): fix some bugs --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- Cargo.lock | 52 +++++++++---- Cargo.toml | 3 +- README.md | 19 ++--- src/bin.rs | 129 ++++++++++++++++++++++++++------ static/logo.svg | 158 ++++++++++++++++++++++++++++++++++++++++ tests/correct_suffix.rs | 70 ++++++++++++++++++ tests/same_as_find.rs | 25 +++---- 7 files changed, 394 insertions(+), 62 deletions(-) create mode 100644 static/logo.svg create mode 100644 tests/correct_suffix.rs diff --git a/Cargo.lock b/Cargo.lock index eea7994..d3bf130 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "aho-corasick" -version = "0.7.18" +version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] @@ -39,12 +39,11 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.11" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -54,6 +53,7 @@ dependencies = [ "crossbeam-channel", "globset", "ignore", + "num_cpus", "regex", ] @@ -76,6 +76,15 @@ dependencies = [ "regex", ] +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] + [[package]] name = "ignore" version = "0.4.20" @@ -99,6 +108,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "libc" +version = "0.2.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" + [[package]] name = "log" version = "0.4.17" @@ -114,11 +129,21 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "num_cpus" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" -version = "1.13.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "regex" @@ -133,9 +158,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" [[package]] name = "same-file" @@ -148,16 +173,17 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.154" +version = "1.0.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdd151213925e7f1ab45a9bbfb129316bd00799784b174b7cc7bcd16961c49e" +checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8" [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] diff --git a/Cargo.toml b/Cargo.toml index d671012..8a19350 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ path = "src/bin.rs" crossbeam-channel = "0.5" globset = "0.4" ignore = "0.4" +num_cpus = "1.15.0" regex = "1" [package] @@ -18,7 +19,7 @@ license = "MIT" name = "filesfinder" readme = "README.md" repository = "https://github.com/jeertmans/filesfinder" -rust-version = "1.58.1" +rust-version = "1.63.0" version = "0.3.8" [[test]] diff --git a/README.md b/README.md index fec13b4..485d12d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +

+ +

+ # FilesFinder > Find files matching patterns while respecting `.gitignore` @@ -9,16 +13,15 @@ 3. [Examples](#examples) 4. [GitHub Action](#github-action) 5. [Contributing](#contributing) - - [Future features](#future-features) ## About FilesFinder (FF) is a command-line tool that aims to search for files within a given repository. As such, it respects your `.gitignore` files and exclude the same files from the output. -FF is a fast and simpler-to-use alternative to other tools such as `find` from [Findutils](https://www.gnu.org/software/findutils/manual/html_mono/find.html). +FF is a **faster** and **simpler-to-use** alternative to other tools such as `find` from [Findutils](https://www.gnu.org/software/findutils/manual/html_mono/find.html). -> **NOTE:** FF is not necessarily faster than `find` (or else), but speed is plays an important in its development and you can be sure that opting to `ff` will not decrease performance by much. +> **NOTE:** FF is generally faster than `find` (or else), mainly because it uses parallel processing. If you find a scenario in which FF is slower than `find` or any other tool, please report it to me :-) ## Installation @@ -127,13 +130,3 @@ A major application to `FF` is to be used within repositories. Therefore, you ca ## Contributing Contributions are more than welcome! - -### Future features - -- [ ] Benchmark the tool against alternatives -- [ ] Provide other flags for case -- [ ] Allow to match fullname or basename -- [x] Add tests for CI -- [x] Create a GitHub action -- [ ] Publish pre-built binaries and use them for GitHub action -- [ ] ... diff --git a/src/bin.rs b/src/bin.rs index 2ff87eb..5a2f77e 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -1,7 +1,7 @@ use globset::GlobBuilder; use regex::bytes::RegexSetBuilder; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; #[macro_export] macro_rules! path_as_bytes { @@ -32,6 +32,8 @@ OPTIONS: -r, -R Parse pattern as a regular expression. + Note that expressions are unanchored by default. + Use '^' or '\\A' to denote start, and '$' or '\\z' for the end. -i, -I Matching files will be included in the output. @@ -40,15 +42,34 @@ OPTIONS: -e, -E Matching files will be excluded from the output. + -j + Number of threads to use. + Setting this to zero will choose the number of threads automatically. + [default: num_cpus] + --dir Files will be searched in the directory specified by the PATH. + Multiple occurences are allowed. [default: '.'] + --max-depth + Maximum depth to recurse. + [default: None] + + --follow-links + Allow to follow symbolic links. + --show-hidden Allow to show hidden files. --no-gitignore - Ignore gitignore files. + ignore .gitignore files. + + --no-ignore + ignore .ignore files. + + --no-strip-prefix + Do not strip './' prefix, same as what GNU find does. -h, --help Print help information. @@ -154,14 +175,24 @@ impl<'source> MatcherBuilder<'source> { fn main() -> Result<(), Box> { let mut args = std::env::args().skip(1); + + // Matcher options let mut default_kind = MatcherKind::Glob; let mut default_include = true; let mut include: Vec = vec![]; let mut exclude: Vec = vec![]; - let mut last_arg_seen = false; - let mut directory = ".".to_string(); - let mut ignore_hidden = true; + let mut strip_prefix: bool = true; + + // Walker options + let mut directories: Vec = vec![]; + let mut follow_links = false; let mut use_gitignore = true; + let mut use_ignore = true; + let mut ignore_hidden = true; + let mut max_depth: Option = None; + let mut threads: Option = None; + + let mut last_arg_seen = false; while !last_arg_seen { let mut matcher = MatcherBuilder::new(default_kind.clone()); @@ -173,16 +204,29 @@ fn main() -> Result<(), Box> { match option { "dir" => { if let Some(path) = args.next() { - directory = path; + directories.push(path); } else { eprintln!( - "--dir option is missing a . Print help with '--help'." + "Error: --dir option is missing a . Print help with '--help'." ); std::process::exit(1); } } + "follow-links" => follow_links = true, "show-hidden" => ignore_hidden = false, "no-gitignore" => use_gitignore = false, + "no-ignore" => use_ignore = false, + "no-strip-prefix" => strip_prefix = false, + "max-depth" => { + if let Some(depth) = args.next() { + max_depth = depth.parse().ok(); + } else { + eprintln!( + "Error: --max-depth option is missing a . Print help with '--help'." + ); + std::process::exit(1); + } + } "help" => { print_help(); std::process::exit(0); @@ -223,6 +267,16 @@ fn main() -> Result<(), Box> { default_include = false; } } + 'j' => { + if let Some(jobs) = args.next() { + threads = jobs.parse().ok(); + } else { + eprintln!( + "error: -j option is missing a . Print help with '--help'." + ); + std::process::exit(1); + } + } 'h' => { print_help(); std::process::exit(0); @@ -263,30 +317,63 @@ fn main() -> Result<(), Box> { let include = RegexSetBuilder::new(include).build()?; let exclude = RegexSetBuilder::new(exclude).build()?; - let (tx, rx) = crossbeam_channel::unbounded::(); + let (tx, rx) = crossbeam_channel::unbounded::(); - let walker = ignore::WalkBuilder::new(directory.as_str()) - .hidden(ignore_hidden) + let mut directories = directories.iter().map(|s| s.as_str()); + + let mut walk_builder = ignore::WalkBuilder::new(directories.next().unwrap_or(".")); + + walk_builder + .follow_links(follow_links) .git_ignore(use_gitignore) - .build_parallel(); + .hidden(ignore_hidden) + .ignore(use_ignore) + .max_depth(max_depth) + .threads(threads.unwrap_or(num_cpus::get())); + + for directory in directories { + walk_builder.add(directory); + } + + let walker = walk_builder.build_parallel(); + + let mut stdout = io::BufWriter::new(io::stdout()); let stdout_thread = std::thread::spawn(move || { - let mut stdout = io::BufWriter::new(io::stdout()); - for de in rx.iter().filter(|de| { - let path = de.path(); - let strl = path.to_string_lossy(); - let utf8 = strl.as_bytes(); - path.is_file() && include.is_match(utf8) && !exclude.is_match(utf8) - }) { - write_path(&mut stdout, de.path()); + for path_buf in rx { + write_path(&mut stdout, path_buf.as_path()); } }); walker.run(|| { let tx = tx.clone(); + let include = &include; + let exclude = &exclude; + Box::new(move |result| { - tx.send(result.unwrap()).unwrap(); - ignore::WalkState::Continue + let de = match result { + Ok(de) => de, + Err(_) => return ignore::WalkState::Continue, + }; + + let mut path = de.path(); + + if strip_prefix { + if let Ok(p) = path.strip_prefix("./") { + path = p; + } + } + + let strl = path.to_string_lossy(); + let utf8 = strl.as_bytes(); + if path.is_file() && include.is_match(utf8) && !exclude.is_match(utf8) { + match tx.send(path.to_path_buf()) { + Ok(_) => ignore::WalkState::Continue, + Err(_) => ignore::WalkState::Quit, + } + } else { + ignore::WalkState::Continue + } }) }); diff --git a/static/logo.svg b/static/logo.svg new file mode 100644 index 0000000..61721bd --- /dev/null +++ b/static/logo.svg @@ -0,0 +1,158 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/correct_suffix.rs b/tests/correct_suffix.rs new file mode 100644 index 0000000..9d6305a --- /dev/null +++ b/tests/correct_suffix.rs @@ -0,0 +1,70 @@ +use std::process::Command; + +fn stdout_to_lines_vec(stdout: Vec) -> Vec { + String::from_utf8(stdout) + .unwrap() + .lines() + .map(|s| s.to_string()) + .collect() +} + +#[macro_export] +macro_rules! command { + ($name: expr, $( $args: expr ),* ) => { + Command::new($name).args(vec![$($args),*]).output().unwrap() + }; +} + +#[macro_export] +macro_rules! assert_correct_suffix { + (@include $output:expr, $suffixes:expr) => { + let files = stdout_to_lines_vec($output.stdout); + let suffixes = $suffixes; + + for file in files.iter() { + assert!( + suffixes.iter().any(|suff| file.ends_with(suff)), + "file {} does end with any of {:?}", + file, + suffixes + ); + } + }; + (@exclude $output:expr, $suffixes:expr) => { + let files = stdout_to_lines_vec($output.stdout); + let suffixes = $suffixes; + + for file in files.iter() { + assert!( + !suffixes.iter().any(|suff| file.ends_with(suff)), + "file {} shoud not end with any of {:?}", + file, + suffixes + ); + } + }; +} + +#[test] +fn test_one_glob_pattern() { + assert_correct_suffix!(@include command!["ff", "*.rs"], &[".rs"]); + assert_correct_suffix!(@exclude command!["ff", "*", "-e", "*.rs"], &[".rs"]); +} + +#[test] +fn test_one_regex_pattern() { + assert_correct_suffix!(@include command!["ff", "-r", r".*\.c$"], &[".c"]); + assert_correct_suffix!(@exclude command!["ff", "*", "-er", r".*\.c$"], &[".c"]); +} + +#[test] +fn test_two_glob_patterns() { + assert_correct_suffix!(@include command!["ff", "*.rs", "*.toml"], &[".rs", ".toml"]); + assert_correct_suffix!(@exclude command!["ff", "*", "-e", "*.rs", "-e", "*.toml"], &[".rs", ".toml"]); +} + +#[test] +fn test_two_regex_patterns() { + assert_correct_suffix!(@include command!["ff", "-r", r".*\.c$", "-r", r".*\.h$"], &[".c", ".h"]); + assert_correct_suffix!(@exclude command!["ff", "*", "-er", r".*\.c$", "-er", r".*\.h$"], &[".c", ".h"]); +} diff --git a/tests/same_as_find.rs b/tests/same_as_find.rs index 525ff72..cde302a 100644 --- a/tests/same_as_find.rs +++ b/tests/same_as_find.rs @@ -18,7 +18,7 @@ macro_rules! command { #[macro_export] macro_rules! ff { ($( $args: expr ),* ) => { - command!("ff", "--no-gitignore", $($args),*) + command!("ff", "--show-hidden", "--no-gitignore", "--no-ignore", "--no-strip-prefix", $($args),*) }; } @@ -34,31 +34,28 @@ macro_rules! assert_same_output { ($left:expr, $right:expr) => { let left = $left; let right = $right; - println!("{:?}", right); assert_eq!(left.status, right.status); assert_eq!(left.stderr, right.stderr); let mut left = stdout_to_lines_vec($left.stdout); left.sort(); - let mut right = stdout_to_lines_vec($left.stdout); + let mut right = stdout_to_lines_vec($right.stdout); right.sort(); - assert_eq!(left, right); + assert_eq!(left.len(), right.len(), "vectors are not of equal length"); + + for (l, r) in left.iter().zip(right.iter()) { + assert_eq!(l, r); + } }; } #[test] fn test_one_glob_pattern() { - assert_same_output!(ff!["*.rs"], find![".", "-type", "f", "-name", "*.rs"]); - assert_same_output!(ff!["*.toml"], find![".", "-type", "f", "-name", "*.toml"]); + assert_same_output!(ff!["*.rs"], find![".", "-wholename", "*.rs"]); + assert_same_output!(ff!["*.toml"], find![".", "-wholename", "*.toml"]); } #[test] fn test_one_regex_pattern() { - assert_same_output!( - ff!["-r", r".*\.c"], - find![".", "-type", "f", "-regex", r".*\.c"] - ); - assert_same_output!( - ff!["-r", r".*\.h"], - find![".", "-type", "f", "-regex", r".*\.h"] - ); + assert_same_output!(ff!["-r", r".*\.c$"], find![".", "-regex", r".*\.c"]); + assert_same_output!(ff!["-r", r".*\.h$"], find![".", "-regex", r".*\.h"]); }