Cleaned up the roadmap, added examples, and a few features too.

cmccomb · Oct 31, 2021 · 89d619b · 89d619b
1 parent 496e51a
commit 89d619b
Show file tree

Hide file tree

Showing 6 changed files with 100 additions and 84 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "human_regex"
-version = "0.1.2"
+version = "0.1.3"
 authors = ["Chris McComb <[email protected]>"]
 description = "A regex library for humans"
 edition = "2021"

diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 | ⚠️ This package is under active development which will include breaking changes. ⚠️ |
-| --------------------------------------------------------------------- |
+| :--------------------------------------------------------------------------------: |
 # Regex for Humans
 The goal of this crate is simple: give everybody the power of regular expressions without having 
 to learn the complicated syntax. It is inspired by [ReadableRegex.jl](https://github.com/jkrumbiegel/ReadableRegex.jl).
@@ -24,100 +24,100 @@ fn main() {
 ```
 
 # Roadmap
-The eventual goal of this crate is to support all of the syntax in the [core Rust regex library](https://crates.io/crates/regex) through a human-readable API. Here is where we currently stand:
+The eventual goal of this crate is to support all the syntax in the [core Rust regex library](https://crates.io/crates/regex) through a human-readable API. Here is where we currently stand:
 
 ## Character Classes
 ### Single Character
 
-| Implemented?  | Expression | Description |
-| :----------:  | :--------: | :---------- | 
+| Implemented?  | Expression | Description                                                   |
+| :----------:  | :--------: | :------------------------------------------------------------ | 
 | `any()`       |   `.`      | any character except new line (includes new line with s flag) |
-| `digit()`     |   `\d`     | digit (\p{Nd}) |
-| `non_digit()` |    `\D`    | not digit |
-|               |`\pN`       | One-letter name Unicode character class |
-|               |`\p{Greek}` | Unicode character class (general category or script) |
-|               |`\PN`       | Negated one-letter name Unicode character class |
-|               |`\P{Greek}` | negated Unicode character class (general category or script) |
+| `digit()`     |   `\d`     | digit (\p{Nd})                                                |
+| `non_digit()` |    `\D`    | not digit                                                     |
+|               |`\pN`       | One-letter name Unicode character class                       |
+|               |`\p{Greek}` | Unicode character class (general category or script)          |
+|               |`\PN`       | Negated one-letter name Unicode character class               |
+|               |`\P{Greek}` | negated Unicode character class (general category or script)  |
 
 ### Perl Character Classes
 
-| Implemented?       | Expression | Description |
-| :---------------:  | :--------: | :---------- | 
-| `digit()`          |   `\d`     | digit (\p{Nd}) |
-| `non_digit()`      |   `\D`     | not digit |
-| `whitespace()`     |   `\s`     | whitespace (\p{White_Space}) |
-| `non_whitespace()` |   `\S`     | not whitespace |
+| Implemented?       | Expression | Description                                                              |
+| :---------------:  | :--------: | :----------------------------------------------------------------------- | 
+| `digit()`          |   `\d`     | digit (\p{Nd})                                                           |
+| `non_digit()`      |   `\D`     | not digit                                                                |
+| `whitespace()`     |   `\s`     | whitespace (\p{White_Space})                                             |
+| `non_whitespace()` |   `\S`     | not whitespace                                                           |
 | `word()`           |   `\w`     | word character (\p{Alphabetic} + \p{M} + \d + \p{Pc} + \p{Join_Control}) |
-| `non_word()`       |   `\W`     | not word character |
+| `non_word()`       |   `\W`     | not word character                                                       |
 
 ### ASCII Character Classes
 
-| Implemented?       | Expression | Description |
-| :---------------:  | :------------: | :---------- |
-|                    | `[[:alnum:]]`  | alphanumeric ([0-9A-Za-z]) |
-|                    | `[[:alpha:]]`  | alphabetic ([A-Za-z]) |
-|                    | `[[:ascii:]]`  | ASCII ([\x00-\x7F]) |
-|                    | `[[:blank:]]`  | blank ([\t ]) |
-|                    | `[[:cntrl:]]`  | control ([\x00-\x1F\x7F]) |
-| `digit()`          | `[[:digit:]]`  | digits ([0-9]) |
-|                    | `[[:graph:]]`  | graphical ([!-~]) |
-|                    | `[[:lower:]]`  | lower case ([a-z]) |
-|                    | `[[:print:]]`  | printable ([ -~]) |
-|                    | `[[:punct:]]`  | punctuation ([!-/:-@\[-`{-~]) |
-|                    | `[[:space:]]`  | whitespace ([\t\n\v\f\r ]) |
-|                    | `[[:upper:]]`  | upper case ([A-Z]) |
+| Implemented?       | Expression     | Description                    |
+| :---------------:  | :------------: | :----------------------------- |
+|                    | `[[:alnum:]]`  | alphanumeric ([0-9A-Za-z])     |
+|                    | `[[:alpha:]]`  | alphabetic ([A-Za-z])          |
+|                    | `[[:ascii:]]`  | ASCII ([\x00-\x7F])            |
+|                    | `[[:blank:]]`  | blank ([\t ])                  |
+|                    | `[[:cntrl:]]`  | control ([\x00-\x1F\x7F])      |
+| `digit()`          | `[[:digit:]]`  | digits ([0-9])                 |
+|                    | `[[:graph:]]`  | graphical ([!-~])              |
+|                    | `[[:lower:]]`  | lower case ([a-z])             |
+|                    | `[[:print:]]`  | printable ([ -~])              |
+|                    | `[[:punct:]]`  | punctuation ([!-/:-@\[-`{-~])  |
+|                    | `[[:space:]]`  | whitespace ([\t\n\v\f\r ])     |
+|                    | `[[:upper:]]`  | upper case ([A-Z])             |
 |  `word()`          | `[[:word:]]`   | word characters ([0-9A-Za-z_]) |
-|                    | `[[:xdigit:]]` | hex digit ([0-9A-Fa-f]) |
+|                    | `[[:xdigit:]]` | hex digit ([0-9A-Fa-f])        |
 
 ## Repetitions
 
-| Implemented?             | Expression | Description |
-| :----------------------: | :------------: | :---------- |
-| `zero_or_more(x)`        |    `x*`        | zero or more of x (greedy) |
-| `one_or_more(x)`         |    `x+`        | one or more of x (greedy) |
-| `zero_or_one(x)`         |    `x?`        | zero or one of x (greedy) |
-| `zero_or_more(x)`        |    `x*?`       | zero or more of x (ungreedy/lazy) |
-| `one_or_more(x).lazy()`  |    `x+?`       | one or more of x (ungreedy/lazy) |
-| `zero_or_more(x).lazy()` |    `x??`       | zero or one of x (ungreedy/lazy) |
-| `at_least_at_most(n, m, x)` |    `x{n,m}`    | at least n x and at most m x (greedy) |
-| `at_least(n, x)`         | `x{n,}`        | at least n x (greedy) |
-| `exactly(n, x)`          | `x{n}`         | exactly n x |
-| `at_least_at_most(n, m, x).lazy()`| `x{n,m}?`  | at least n x and at most m x (ungreedy/lazy) |
-| `at_least(n, x).lazy()`  | `x{n,}?`   | at least n x (ungreedy/lazy) |
+| Implemented?              | Expression     | Description                                  |
+| :-----------------------: | :------------: | :------------------------------------------- |
+| `zero_or_more(x)`         |    `x*`        | zero or more of x (greedy)                   |
+| `one_or_more(x)`          |    `x+`        | one or more of x (greedy)                    |
+| `zero_or_one(x)`          |    `x?`        | zero or one of x (greedy)                    |
+| `zero_or_more(x)`         |    `x*?`       | zero or more of x (ungreedy/lazy)            |
+| `one_or_more(x).lazy()`   |    `x+?`       | one or more of x (ungreedy/lazy)             |
+| `zero_or_more(x).lazy()`  |    `x??`       | zero or one of x (ungreedy/lazy)             |
+| `between(n, m, x)`        |    `x{n,m}`    | at least n x and at most m x (greedy)        |
+| `at_least(n, x)`          |    `x{n,}`     | at least n x (greedy)                        |
+| `exactly(n, x)`           |    `x{n}`      | exactly n x                                  |
+| `between(n, m, x).lazy()` |    `x{n,m}?`   | at least n x and at most m x (ungreedy/lazy) |
+| `at_least(n, x).lazy()`   |    `x{n,}?`    | at least n x (ungreedy/lazy)                 |
 
 ## Composites
 
 | Implemented?       |   Expression   |      Description                |
 | :---------------:  | :------------: | :------------------------------ |
 |    `+`             |   `xy`         | concatenation (x followed by y) |
-| `or()`             |   `x\|y`        | alternation (x or y, prefer x)  |
+| `or()`             |   `x\|y`       | alternation (x or y, prefer x)  |
 
 ## Empty matches
 
-| Implemented?       |   Expression   |      Description                |
-| :---------------:  | :------------: | :------------------------------ |
-| `begin()` | `^` |     the beginning of text (or start-of-line with multi-line mode) |
-| `end()` | `$`  |   the end of text (or end-of-line with multi-line mode) |
-| |`\A`  |  only the beginning of text (even with multi-line mode enabled) |
-| | `\z` |   only the end of text (even with multi-line mode enabled) |
-| |`\b`   | a Unicode word boundary (\w on one side and \W, \A, or \z on other) |
-| | `\B`  |  not a Unicode word boundary |
+| Implemented?          |   Expression   |      Description                                                    |
+| :------------------:  | :------------: | :------------------------------------------------------------------ |
+|    `begin()`          |    `^`         | the beginning of text (or start-of-line with multi-line mode)       |
+|     `end()`           |    `$`         | the end of text (or end-of-line with multi-line mode)               |
+|                       |    `\A`        | only the beginning of text (even with multi-line mode enabled)      |
+|                       |    `\z`        | only the end of text (even with multi-line mode enabled)            |
+| `word_boundary()`     |    `\b`        | a Unicode word boundary (\w on one side and \W, \A, or \z on other) |
+| `non_word_boundary()` |    `\B`        | not a Unicode word boundary                                         |
 
 ## Groupings and Flags
 
-| Implemented?       |   Expression   |      Description                |
-| :---------------:  | :------------: | :------------------------------ |
-| | `(exp)`         | numbered capture group (indexed by opening parenthesis) |
-| | `(?P<name>exp)` | named (also numbered) capture group |
-| | `(?:exp)`       | non-capturing group |
-| | `(?flags)`      | set flags within current group |
-| | `(?flags:exp)`  | set flags for exp (non-capturing) |
-
-| Implemented?       |   Expression   |      Description                |
-| :---------------:  | :------------: | :------------------------------ |
-| | `i` |    case-insensitive: letters match both upper and lower case |
-| | `m` |     multi-line mode: `^` and `$` match begin/end of line |
-| | `s` |     allow `.` to match `\n` |
-| | `U` |     swap the meaning of `x*` and `x*`? |
-| | `u` |     Unicode support (enabled by default) |
-| | `x` |     ignore whitespace and allow line comments (starting with `#`) |
+| Implemented?       |   Expression    |      Description                                        |
+| :---------------:  | :-------------: | :------------------------------------------------------ |
+|                    | `(exp)`         | numbered capture group (indexed by opening parenthesis) |
+|                    | `(?P<name>exp)` | named (also numbered) capture group                     |
+| Handled implicitly through functional composition | `(?:exp)`       | non-capturing group      |
+|                    | `(?flags)`      | set flags within current group                          |
+|                    | `(?flags:exp)`  | set flags for exp (non-capturing)                       |
+
+| Implemented?       |   Expression   |      Description                                              |
+| :---------------:  | :------------: | :------------------------------------------------------------ |
+|                    |   `i`          | case-insensitive: letters match both upper and lower case     |
+|                    |   `m`          | multi-line mode: `^` and `$` match begin/end of line          |
+|                    |   `s`          | allow `.` to match `\n`                                       |
+|                    |   `U`          | swap the meaning of `x*` and `x*`?                            |
+|                    |   `u`          | Unicode support (enabled by default)                          |
+|                    |   `x`          | ignore whitespace and allow line comments (starting with `#`) |
diff --git a/src/lib.rs b/src/lib.rs
@@ -35,16 +35,16 @@
 
 mod shorthand;
 pub use shorthand::{
-    any, begin, digit, direct_regex, end, non_digit, non_whitespace, non_word, text, whitespace,
-    word,
+    any, begin, digit, direct_regex, end, non_digit, non_whitespace, non_word, non_word_boundary,
+    text, whitespace, word, word_boundary,
 };
 
 mod humanregex;
 pub use humanregex::{fmt, HumanRegex};
 
 mod repetitions;
 pub use repetitions::{
-    at_least, at_least_at_most, exactly, one_or_more, optional, zero_or_more, zero_or_one,
+    at_least, between, exactly, one_or_more, optional, zero_or_more, zero_or_one,
 };
 
 mod logical;

diff --git a/src/logical.rs b/src/logical.rs
@@ -14,7 +14,7 @@ where
 {
     let mut regex_string = format!("({})", options[0].to_string());
     for idx in 1..options.len() {
-        regex_string = format!("{}|({})", regex_string, options[idx].to_string())
+        regex_string = format!("{}|(:?{})", regex_string, options[idx].to_string())
     }
     HumanRegex(regex_string)
 }
diff --git a/src/repetitions.rs b/src/repetitions.rs
@@ -10,20 +10,20 @@ pub fn at_least<T>(n: u8, target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({}){{{},}}", target, n))
+    HumanRegex(format!("(:?{}){{{},}}", target, n))
 }
 
 /// Match at least _n_ and at most _m_ of a certain target
 /// ```
-/// let regex_string = human_regex::at_least_at_most(3, 5, "a");
+/// let regex_string = human_regex::between(3, 5, "a");
 /// assert!(regex_string.to_regex().is_match("aaaa"));
 /// assert!(!regex_string.to_regex().is_match("aa"));
 /// ```
-pub fn at_least_at_most<T>(n: u8, m: u8, target: T) -> HumanRegex
+pub fn between<T>(n: u8, m: u8, target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({}){{{},{}}}", target, n, m))
+    HumanRegex(format!("(:?{}){{{},{}}}", target, n, m))
 }
 
 /// Match one or more of a certain target
@@ -36,7 +36,7 @@ pub fn one_or_more<T>(target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({})+", target))
+    HumanRegex(format!("(:?{})+", target))
 }
 
 /// Match zero or more of a certain target
@@ -49,7 +49,7 @@ pub fn zero_or_more<T>(target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({})*", target))
+    HumanRegex(format!("(:?{})*", target))
 }
 
 /// Match zero or one of a certain target
@@ -62,7 +62,7 @@ pub fn zero_or_one<T>(target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({})?", target))
+    HumanRegex(format!("(:?{})?", target))
 }
 
 /// Match zero or one of a certain target
@@ -75,7 +75,7 @@ pub fn optional<T>(target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({})?", target))
+    HumanRegex(format!("(:?{})?", target))
 }
 
 /// Match exactly _n_ of a certain target
@@ -88,5 +88,5 @@ pub fn exactly<T>(n: u8, target: T) -> HumanRegex
 where
     T: Into<String> + fmt::Display,
 {
-    HumanRegex(format!("({}){{{}}}", target, n))
+    HumanRegex(format!("(:?{}){{{}}}", target, n))
 }
diff --git a/src/shorthand.rs b/src/shorthand.rs
@@ -13,6 +13,12 @@ pub fn any() -> HumanRegex {
 }
 
 /// A function for the digit character class (i.e., the digits 0 through 9)
+/// ```
+/// use human_regex::{begin, end, one_or_more, digit};
+/// let regex_string = begin() + one_or_more(digit()) + end();
+/// assert!(regex_string.to_regex().is_match("010101010100100100100101"));
+/// assert!(!regex_string.to_regex().is_match("a string that is not composed of digits will fail"));
+/// ```
 pub fn digit() -> HumanRegex {
     HumanRegex(r"\d".to_string())
 }
@@ -107,3 +113,13 @@ where
 pub fn direct_regex(text: &str) -> HumanRegex {
     HumanRegex(text.to_string())
 }
+
+/// A function to match a word boundary
+pub fn word_boundary() -> HumanRegex {
+    HumanRegex(r"\b".to_string())
+}
+
+/// A function to match anything BUT a word boundary
+pub fn non_word_boundary() -> HumanRegex {
+    HumanRegex(r"\B".to_string())
+}