Skip to content

Commit

Permalink
ICU-22555 Fix infinity loop in RuleBasedCollator constructor
Browse files Browse the repository at this point in the history
Fix C++ and Java code.
Add unit tests for both C++ and Java.
  • Loading branch information
FrankYFTang committed Nov 29, 2023
1 parent 8d3d214 commit 757d2cd
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 5 deletions.
13 changes: 11 additions & 2 deletions icu4c/source/common/caniter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
if(U_FAILURE(status)) {
return;
}
// To avoid infinity loop caused by permute, we limit the depth of recursive
// call to permute and return U_UNSUPPORTED_ERROR.
// We know in some unit test we need at least 4. Set to 8 just in case some
// unforseen use cases.
constexpr int32_t kPermuteDepthLimit = 8;
if (depth > kPermuteDepthLimit) {
status = U_UNSUPPORTED_ERROR;
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;

Expand Down Expand Up @@ -311,7 +320,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros

// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/common/unicode/caniter.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,10 @@ class U_COMMON_API CanonicalIterator final : public UObject {
* @param skipZeros determine if skip zeros
* @param result the results in a set.
* @param status Fill-in parameter which receives the status of this operation.
* @param depth depth of the call.
* @internal
*/
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0);
#endif /* U_HIDE_INTERNAL_API */

/**
Expand Down
20 changes: 20 additions & 0 deletions icu4c/source/test/intltest/regcoll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,25 @@ void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
}
}

void CollationRegressionTest::TestICU22555InfinityLoop() {
char16_t data[] = {
0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
};
icu::UnicodeString rule(false, data, sizeof(data)/sizeof(char16_t));
UErrorCode status = U_ZERO_ERROR;
icu::LocalPointer<icu::RuleBasedCollator> col1(
new icu::RuleBasedCollator(rule, status));
}

void CollationRegressionTest::TestICU22517() {
IcuTestErrorCode errorCode(*this, "TestICU22517");
char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
Expand Down Expand Up @@ -1421,6 +1440,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
TESTCASE_AUTO(TestICU22277);
TESTCASE_AUTO(TestICU22517);
TESTCASE_AUTO(TestICU22555InfinityLoop);
TESTCASE_AUTO_END;
}

Expand Down
2 changes: 2 additions & 0 deletions icu4c/source/test/intltest/regcoll.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ class CollationRegressionTest: public IntlTestCollator

void TestICU22517();

void TestICU22555InfinityLoop();

private:
//------------------------------------------------------------------------
// Internal utilities
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,27 @@ public void TestICU22517() {
}
}

@Test
public void TestICU22555InfinityLoop() {
char data[] = {
0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
};
String rule = new String(data, 0, data.length);
try {
RuleBasedCollator coll = new RuleBasedCollator(rule);
} catch(Exception expected) {
}
}

@Test
public void TestBeforeWithTooStrongAfter() {
// ICU ticket #9959:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ public void setSource(String newSource) {
}
}

// To avoid infinity loop caused by permute, we limit the depth of recursive
// call to permute and throw exception.
// We know in some unit test we need at least 4. Set to 8 just in case some
// unforseen use cases.
/**
* Simple implementation of permutation.
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
Expand All @@ -165,9 +169,27 @@ public void setSource(String newSource) {
*/
@Deprecated
public static void permute(String source, boolean skipZeros, Set<String> output) {
permute(source, skipZeros, output, 0);
}

private static int PERMUTE_DEPTH_LIMIT = 8;
/**
* Simple implementation of permutation.
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
* @param source the string to find permutations for
* @param skipZeros set to true to skip characters with canonical combining class zero
* @param output the set to add the results to
* @param depth the depth of the recursive call.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
private static void permute(String source, boolean skipZeros, Set<String> output, int depth) {
// TODO: optimize
//if (PROGRESS) System.out.println("Permute: " + source);

if (depth > PERMUTE_DEPTH_LIMIT) {
throw new UnsupportedOperationException("Stack too deep:" + depth);
}
// optimization:
// if zero or one character, just return a set with it
// we check for length < 2 to keep from counting code points all the time
Expand All @@ -193,7 +215,7 @@ public static void permute(String source, boolean skipZeros, Set<String> output)
// see what the permutations of the characters before and after this one are
subpermute.clear();
permute(source.substring(0,i)
+ source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute);
+ source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute, depth+1);

// prefix this character to all of them
String chStr = UTF16.valueOf(source, i);
Expand Down

1 comment on commit 757d2cd

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 757d2cd Previous: 8d3d214 Ratio
Test_ICU_Forward_Search 49.7263 ns/iter 19.055 ns/iter 2.61

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.