Skip to content

Commit

Permalink
Merge branch 'handle-invalid-utf8' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
jeertmans committed Aug 3, 2022
2 parents 4381e00 + f479234 commit bc70528
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 65 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "filesfinder"
version = "0.1.0"
version = "0.2.0"
authors = ["Jérome Eertmans <[email protected]>"]
edition = "2021"
description = "Find files within current directory that match given patterns, while respecting gitignore rules."
Expand All @@ -20,3 +20,7 @@ ignore = "0.4"
crossbeam-channel = "0.5"
regex = "1"
globset = "0.4"

[[test]]
name = "same_as_find"
path = "tests/same_as_find.rs"
106 changes: 42 additions & 64 deletions src/bin.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
use globset::GlobBuilder;
use regex::bytes::RegexSetBuilder;
use std::io::{self, Write};
use std::path::Path;

const AUTHORS: &str = env!("CARGO_PKG_AUTHORS");
const DESCRIPTION: &str = env!("CARGO_PKG_DESCRIPTION");
const NAME: &str = env!("CARGO_PKG_NAME");
const VERSION: &str = env!("CARGO_PKG_VERSION");

#[macro_export]
macro_rules! path_as_bytes {
($path: ident) => {
$path.to_string_lossy().as_bytes()
};
}

fn print_help() {
println!(
"{NAME} {VERSION}
Expand Down Expand Up @@ -80,6 +92,19 @@ fn print_invalid_short_option(option: char) {
print_invalid_option(format!("-{}", option).as_str())
}

#[cfg(unix)]
fn write_path<W: Write>(mut wtr: W, path: &Path) {
use std::os::unix::ffi::OsStrExt;
wtr.write(path.as_os_str().as_bytes()).unwrap();
wtr.write(b"\n").unwrap();
}

#[cfg(not(unix))]
fn write_path<W: Write>(mut wtr: W, path: &Path) {
wtr.write(path.to_string_lossy().as_bytes()).unwrap();
wtr.write(b"\n").unwrap();
}

#[derive(Clone, Copy)]
enum MatcherKind {
Glob,
Expand All @@ -92,22 +117,6 @@ impl Default for MatcherKind {
}
}

#[derive(Clone)]
enum Matcher {
Glob(globset::GlobMatcher),
Regex(regex::Regex),
}

impl Matcher {
#[inline]
fn is_match(&self, string: &str) -> bool {
match self {
Matcher::Glob(glob) => glob.is_match(string),
Matcher::Regex(regex) => regex.is_match(string),
}
}
}

#[derive(Default)]
struct MatcherBuilder<'source> {
pattern: Option<&'source str>,
Expand Down Expand Up @@ -137,48 +146,24 @@ impl<'source> MatcherBuilder<'source> {
self.set_kind(MatcherKind::Regex)
}

fn build(self) -> Result<Matcher, Box<dyn std::error::Error>> {
fn build(self) -> Result<String, Box<dyn std::error::Error>> {
let pattern = self
.pattern
.expect("cannot build matcher if pattern is not set.");

match self.kind {
MatcherKind::Glob => {
let glob = globset::GlobBuilder::new(pattern)
.build()?
.compile_matcher();
Ok(Matcher::Glob(glob))
}
MatcherKind::Regex => {
let regex = regex::Regex::new(pattern)?;
Ok(Matcher::Regex(regex))
}
MatcherKind::Glob => Ok(GlobBuilder::new(pattern).build()?.regex().to_string()),
MatcherKind::Regex => Ok(pattern.to_string()),
}
}
}

#[derive(Clone)]
struct MatcherSet {
matchers: Vec<Matcher>,
}

impl MatcherSet {
fn new(matchers: Vec<Matcher>) -> MatcherSet {
Self { matchers }
}

#[inline]
fn is_match(&self, string: &str) -> bool {
self.matchers.iter().any(|m| m.is_match(string))
}
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut args = std::env::args().skip(1);
let mut default_kind = MatcherKind::Glob;
let mut default_include = true;
let mut include: Vec<Matcher> = vec![];
let mut exclude: Vec<Matcher> = vec![];
let mut include: Vec<String> = vec![];
let mut exclude: Vec<String> = vec![];
let mut last_arg_seen = false;
let mut directory = ".".to_string();
let mut ignore_hidden = true;
Expand Down Expand Up @@ -281,39 +266,32 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
std::process::exit(1);
}

let include = MatcherSet::new(include);
let exclude = MatcherSet::new(exclude);
let include = RegexSetBuilder::new(include).build()?;
let exclude = RegexSetBuilder::new(exclude).build()?;

let (tx, rx) = crossbeam_channel::unbounded::<std::path::PathBuf>();
let (tx, rx) = crossbeam_channel::unbounded::<ignore::DirEntry>();

let walker = ignore::WalkBuilder::new(directory.as_str())
.hidden(ignore_hidden)
.git_ignore(use_gitignore)
.build_parallel();

let stdout_thread = std::thread::spawn(move || {
for path in rx {
println!("{:?}", path.as_os_str());
let mut stdout = io::BufWriter::new(io::stdout());
for de in rx.iter().filter(|de| {
let path = de.path();
let strl = path.to_string_lossy();
let utf8 = strl.as_bytes();
path.is_file() && include.is_match(&utf8) && !exclude.is_match(&utf8)
}) {
write_path(&mut stdout, de.path());
}
});

walker.run(|| {
let tx = tx.clone();
let include = include.clone();
let exclude = exclude.clone();

Box::new(move |result| {
if let Ok(de) = result {
let path = de.into_path();

if path.is_file() {
if let Some(filename) = path.to_str() {
if include.is_match(filename) && !exclude.is_match(filename) {
tx.send(path).unwrap();
}
}
}
}
tx.send(result.unwrap()).unwrap();
ignore::WalkState::Continue
})
});
Expand Down
54 changes: 54 additions & 0 deletions tests/same_as_find.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use std::process::Command;

fn stdout_to_lines_vec(stdout: Vec<u8>) -> Vec<String> {
String::from_utf8(stdout).unwrap().lines().map(|s| s.to_string()).collect()
}

#[macro_export]
macro_rules! command {
($name: expr, $( $args: expr ),* ) => {
Command::new($name).args(vec![$($args),*]).output().unwrap()
};
}

#[macro_export]
macro_rules! ff {
($( $args: expr ),* ) => {
command!("ff", "--no-gitignore", $($args),*)
};
}

#[macro_export]
macro_rules! find {
($( $args: expr ),* ) => {
command!("find", $($args),*)
};
}

#[macro_export]
macro_rules! assert_same_output {
($left:expr, $right:expr) => {
let left = $left;
let right = $right;
println!("{:?}", right);
assert_eq!(left.status, right.status);
assert_eq!(left.stderr, right.stderr);
let mut left = stdout_to_lines_vec($left.stdout);
left.sort();
let mut right = stdout_to_lines_vec($left.stdout);
right.sort();
assert_eq!(left, right);
}
}

#[test]
fn test_one_glob_pattern() {
assert_same_output!(ff!["*.rs"], find![".", "-type", "f", "-name", "*.rs"]);
assert_same_output!(ff!["*.toml"], find![".", "-type", "f", "-name", "*.toml"]);
}

#[test]
fn test_one_regex_pattern() {
assert_same_output!(ff!["-r", r".*\.c"], find![".", "-type", "f", "-regex", r".*\.c"]);
assert_same_output!(ff!["-r", r".*\.h"], find![".", "-type", "f", "-regex", r".*\.h"]);
}

0 comments on commit bc70528

Please sign in to comment.