diff --git a/java/com/google/re2j/Matcher.java b/java/com/google/re2j/Matcher.java index 874955c7..678186d0 100644 --- a/java/com/google/re2j/Matcher.java +++ b/java/com/google/re2j/Matcher.java @@ -50,6 +50,9 @@ public final class Matcher { // The number of submatches (groups) in the pattern. private final int groupCount; + // The number of instructions in the pattern. + private final int numberOfInstructions; + private MatcherInput matcherInput; // The input length in UTF16 codes. @@ -77,6 +80,7 @@ private Matcher(Pattern pattern) { groupCount = re2.numberOfCapturingGroups(); groups = new int[2 + 2 * groupCount]; namedGroups = re2.namedGroups; + numberOfInstructions = re2.numberOfInstructions(); } /** Creates a new {@code Matcher} with the given pattern and input. */ @@ -209,6 +213,20 @@ public int end(String group) { return end(g); } + /** + * Returns the program size of this pattern. + * + *
+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's + * "cost". Larger numbers are more expensive than smaller numbers. + *
+ * + * @return the program size of this pattern + */ + public int programSize() { + return numberOfInstructions; + } + /** * Returns the most recent match. * diff --git a/java/com/google/re2j/Pattern.java b/java/com/google/re2j/Pattern.java index f5883f53..dfe1bf18 100644 --- a/java/com/google/re2j/Pattern.java +++ b/java/com/google/re2j/Pattern.java @@ -300,6 +300,20 @@ public String toString() { return pattern; } + /** + * Returns the program size of this pattern. + * + *+ * Similar to the C++ implementation, the program size is a very approximate measure of a regexp's + * "cost". Larger numbers are more expensive than smaller numbers. + *
+ * + * @return the program size of this pattern + */ + public int programSize() { + return re2.numberOfInstructions(); + } + /** * Returns the number of capturing groups in this matcher's pattern. Group zero denotes the entire * pattern and is excluded from this count. diff --git a/java/com/google/re2j/RE2.java b/java/com/google/re2j/RE2.java index c05fd3c3..46837354 100644 --- a/java/com/google/re2j/RE2.java +++ b/java/com/google/re2j/RE2.java @@ -209,6 +209,13 @@ int numberOfCapturingGroups() { return numSubexp; } + /** + * Returns the number of instructions in this compiled regular expression program. + */ + int numberOfInstructions() { + return prog.numInst(); + } + // get() returns a machine to use for matching |this|. It uses |this|'s // machine cache if possible, to avoid unnecessary allocation. Machine get() { diff --git a/javatests/com/google/re2j/ApiTestUtils.java b/javatests/com/google/re2j/ApiTestUtils.java index 7cf366d2..b38b656d 100644 --- a/javatests/com/google/re2j/ApiTestUtils.java +++ b/javatests/com/google/re2j/ApiTestUtils.java @@ -159,6 +159,27 @@ public static void testGroupCount(String pattern, int count) { assertEquals(count, mj.groupCount()); } + // Tests that both RE2 Patterns and Matchers give the same groupCount. + public static void testProgramSize(String pattern, int expectedSize) { + Pattern p = Pattern.compile(pattern); + + String input = "foo"; + byte[] inputBytes = getUtf8Bytes(input); + Matcher m1 = p.matcher(input); + Matcher m2 = p.matcher(inputBytes); + + Truth.assertWithMessage("Pattern(\"%s\") program size", p) + .that(p.programSize()) + .isEqualTo(expectedSize); + Truth.assertWithMessage("Matcher(\"%s\", \"%s\") program size", m1.pattern(), input) + .that(m1.programSize()) + .isEqualTo(expectedSize); + Truth.assertWithMessage( + "Matcher(\"%s\", %s) program size", m2.pattern(), Arrays.toString(inputBytes)) + .that(m2.programSize()) + .isEqualTo(expectedSize); + } + public static void testGroup(String text, String regexp, String[] output) { // RE2 Pattern p = Pattern.compile(regexp); diff --git a/javatests/com/google/re2j/MatcherTest.java b/javatests/com/google/re2j/MatcherTest.java index 3c2c00ef..4f24ae2e 100644 --- a/javatests/com/google/re2j/MatcherTest.java +++ b/javatests/com/google/re2j/MatcherTest.java @@ -94,8 +94,25 @@ public void testReplaceFirst() { ApiTestUtils.testReplaceFirst("aab", "a*?", "<$0>", "<>aab"); } + @Test + public void testProgramSize() { + // It is a simple delegation, but still test it. + // More test cases are covered in PatternTest#testProgramSize. + Pattern pattern = Pattern.compile("go+d"); + int programSize = pattern.programSize(); + Truth.assertWithMessage("Pattern program size").that(programSize).isGreaterThan(1); + Truth.assertWithMessage("Positive matcher program size") + .that(pattern.matcher("good").programSize()) + .isEqualTo(programSize); + Truth.assertWithMessage("Negative matcher program size") + .that(pattern.matcher("bad").programSize()) + .isEqualTo(programSize); + } + @Test public void testGroupCount() { + // It is a simple delegation, but still test it. + // More test cases are covered in PatternTest#testGroupCount. ApiTestUtils.testGroupCount("(a)(b(c))d?(e)", 4); } diff --git a/javatests/com/google/re2j/PatternTest.java b/javatests/com/google/re2j/PatternTest.java index 9251a37b..8a4f897f 100644 --- a/javatests/com/google/re2j/PatternTest.java +++ b/javatests/com/google/re2j/PatternTest.java @@ -164,9 +164,21 @@ public void testSplit() { ApiTestUtils.testSplit(":", ":a::b", new String[] {"", "a", "", "b"}); } + @Test + public void testProgramSize() { + ApiTestUtils.testProgramSize("", 3); + ApiTestUtils.testProgramSize("a", 3); + ApiTestUtils.testProgramSize("^", 3); + ApiTestUtils.testProgramSize("^$", 4); + ApiTestUtils.testProgramSize("a+b", 5); + ApiTestUtils.testProgramSize("a+b?", 6); + ApiTestUtils.testProgramSize("(a+b)", 7); + ApiTestUtils.testProgramSize("a+b.*", 7); + ApiTestUtils.testProgramSize("(a+b?)", 8); + } + @Test public void testGroupCount() { - // It is a simple delegation, but still test it. ApiTestUtils.testGroupCount("(.*)ab(.*)a", 2); ApiTestUtils.testGroupCount("(.*)(ab)(.*)a", 3); ApiTestUtils.testGroupCount("(.*)((a)b)(.*)a", 4);