Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-22555 Fix infinity loop in RuleBasedCollator constructor #2677

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions icu4c/source/common/caniter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,10 +264,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
* @param source the string to find permutations for
* @return the results in a set.
*/
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
if(U_FAILURE(status)) {
return;
}
// To avoid infinity loop caused by permute, we limit the depth of recursive
// call to permute and return U_UNSUPPORTED_ERROR.
// We know in some unit test we need at least 4. Set to 8 just in case some
// unforseen use cases.
constexpr int32_t kPermuteDepthLimit = 8;
if (depth > kPermuteDepthLimit) {
status = U_UNSUPPORTED_ERROR;
return;
}
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
int32_t i = 0;

Expand Down Expand Up @@ -311,7 +320,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros

// see what the permutations of the characters before and after this one are
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
/* Test for buffer overflows */
if(U_FAILURE(status)) {
return;
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/common/unicode/caniter.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,10 @@ class U_COMMON_API CanonicalIterator final : public UObject {
* @param skipZeros determine if skip zeros
* @param result the results in a set.
* @param status Fill-in parameter which receives the status of this operation.
* @param depth depth of the call.
* @internal
*/
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0);
#endif /* U_HIDE_INTERNAL_API */

/**
Expand Down
20 changes: 20 additions & 0 deletions icu4c/source/test/intltest/regcoll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,25 @@ void CollationRegressionTest::TestBeforeWithTooStrongAfter() {
}
}

void CollationRegressionTest::TestICU22555InfinityLoop() {
char16_t data[] = {
0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
};
icu::UnicodeString rule(false, data, sizeof(data)/sizeof(char16_t));
UErrorCode status = U_ZERO_ERROR;
icu::LocalPointer<icu::RuleBasedCollator> col1(
new icu::RuleBasedCollator(rule, status));
}

void CollationRegressionTest::TestICU22517() {
IcuTestErrorCode errorCode(*this, "TestICU22517");
char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊";
Expand Down Expand Up @@ -1421,6 +1440,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
TESTCASE_AUTO(TestBeforeWithTooStrongAfter);
TESTCASE_AUTO(TestICU22277);
TESTCASE_AUTO(TestICU22517);
TESTCASE_AUTO(TestICU22555InfinityLoop);
TESTCASE_AUTO_END;
}

Expand Down
2 changes: 2 additions & 0 deletions icu4c/source/test/intltest/regcoll.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ class CollationRegressionTest: public IntlTestCollator

void TestICU22517();

void TestICU22555InfinityLoop();

private:
//------------------------------------------------------------------------
// Internal utilities
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,27 @@ public void TestICU22517() {
}
}

@Test
public void TestICU22555InfinityLoop() {
char data[] = {
0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99,
0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a,
0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026,
0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055,
0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053,
0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f,
0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750,
0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303,
};
String rule = new String(data, 0, data.length);
try {
RuleBasedCollator coll = new RuleBasedCollator(rule);
} catch(Exception expected) {
}
}

@Test
public void TestBeforeWithTooStrongAfter() {
// ICU ticket #9959:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ public void setSource(String newSource) {
}
}

// To avoid infinity loop caused by permute, we limit the depth of recursive
// call to permute and throw exception.
// We know in some unit test we need at least 4. Set to 8 just in case some
// unforseen use cases.
/**
* Simple implementation of permutation.
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
Expand All @@ -165,9 +169,27 @@ public void setSource(String newSource) {
*/
@Deprecated
public static void permute(String source, boolean skipZeros, Set<String> output) {
permute(source, skipZeros, output, 0);
}

private static int PERMUTE_DEPTH_LIMIT = 8;
/**
* Simple implementation of permutation.
* <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
* @param source the string to find permutations for
* @param skipZeros set to true to skip characters with canonical combining class zero
* @param output the set to add the results to
* @param depth the depth of the recursive call.
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
private static void permute(String source, boolean skipZeros, Set<String> output, int depth) {
// TODO: optimize
//if (PROGRESS) System.out.println("Permute: " + source);

if (depth > PERMUTE_DEPTH_LIMIT) {
throw new UnsupportedOperationException("Stack too deep:" + depth);
}
// optimization:
// if zero or one character, just return a set with it
// we check for length < 2 to keep from counting code points all the time
Expand All @@ -193,7 +215,7 @@ public static void permute(String source, boolean skipZeros, Set<String> output)
// see what the permutations of the characters before and after this one are
subpermute.clear();
permute(source.substring(0,i)
+ source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute);
+ source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute, depth+1);

// prefix this character to all of them
String chStr = UTF16.valueOf(source, i);
Expand Down
Loading