From faf44de9b029ec0ecaa3d675e2897b232ee801e2 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Tue, 24 Oct 2023 15:39:06 -0700 Subject: [PATCH] ICU-22555 Fix infinity loop in RuleBasedCollator constructor Fix C++ and Java code. Add unit tests for both C++ and Java. --- icu4c/source/common/caniter.cpp | 13 ++++++++-- icu4c/source/common/unicode/caniter.h | 3 ++- icu4c/source/test/intltest/regcoll.cpp | 20 ++++++++++++++ icu4c/source/test/intltest/regcoll.h | 2 ++ .../collator/CollationRegressionTest.java | 21 +++++++++++++++ .../com/ibm/icu/text/CanonicalIterator.java | 26 +++++++++++++++++-- 6 files changed, 80 insertions(+), 5 deletions(-) diff --git a/icu4c/source/common/caniter.cpp b/icu4c/source/common/caniter.cpp index 64a3c65d29a0..2bea6f2a6838 100644 --- a/icu4c/source/common/caniter.cpp +++ b/icu4c/source/common/caniter.cpp @@ -264,10 +264,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st * @param source the string to find permutations for * @return the results in a set. */ -void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) { +void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) { if(U_FAILURE(status)) { return; } + // To avoid infinity loop caused by permute, we limit the depth of recursive + // call to permute and return U_UNSUPPORTED_ERROR. + // We know in some unit test we need at least 4. Set to 8 just in case some + // unforseen use cases. + constexpr int32_t kPermuteDepthLimit = 8; + if (depth > kPermuteDepthLimit) { + status = U_UNSUPPORTED_ERROR; + return; + } //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source))); int32_t i = 0; @@ -311,7 +320,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1); /* Test for buffer overflows */ if(U_FAILURE(status)) { return; diff --git a/icu4c/source/common/unicode/caniter.h b/icu4c/source/common/unicode/caniter.h index 035bd0e64eb9..67c265fe602f 100644 --- a/icu4c/source/common/unicode/caniter.h +++ b/icu4c/source/common/unicode/caniter.h @@ -128,9 +128,10 @@ class U_COMMON_API CanonicalIterator final : public UObject { * @param skipZeros determine if skip zeros * @param result the results in a set. * @param status Fill-in parameter which receives the status of this operation. + * @param depth depth of the call. * @internal */ - static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status); + static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0); #endif /* U_HIDE_INTERNAL_API */ /** diff --git a/icu4c/source/test/intltest/regcoll.cpp b/icu4c/source/test/intltest/regcoll.cpp index 3f07bb1aeec0..f8b4eaa937c7 100644 --- a/icu4c/source/test/intltest/regcoll.cpp +++ b/icu4c/source/test/intltest/regcoll.cpp @@ -1249,6 +1249,25 @@ void CollationRegressionTest::TestBeforeWithTooStrongAfter() { } } +void CollationRegressionTest::TestICU22555InfinityLoop() { + char16_t data[] = { + 0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99, + 0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a, + 0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026, + 0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055, + 0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053, + 0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f, + 0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750, + 0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303, + 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, + 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, + }; + icu::UnicodeString rule(false, data, sizeof(data)/sizeof(char16_t)); + UErrorCode status = U_ZERO_ERROR; + icu::LocalPointer col1( + new icu::RuleBasedCollator(rule, status)); +} + void CollationRegressionTest::TestICU22517() { IcuTestErrorCode errorCode(*this, "TestICU22517"); char16_t data[] = u"&a=b쫊쫊쫊쫊쫊쫊쫊쫊"; @@ -1421,6 +1440,7 @@ void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch TESTCASE_AUTO(TestBeforeWithTooStrongAfter); TESTCASE_AUTO(TestICU22277); TESTCASE_AUTO(TestICU22517); + TESTCASE_AUTO(TestICU22555InfinityLoop); TESTCASE_AUTO_END; } diff --git a/icu4c/source/test/intltest/regcoll.h b/icu4c/source/test/intltest/regcoll.h index 76ba733c795c..ab6575d5ecac 100644 --- a/icu4c/source/test/intltest/regcoll.h +++ b/icu4c/source/test/intltest/regcoll.h @@ -242,6 +242,8 @@ class CollationRegressionTest: public IntlTestCollator void TestICU22517(); + void TestICU22555InfinityLoop(); + private: //------------------------------------------------------------------------ // Internal utilities diff --git a/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationRegressionTest.java b/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationRegressionTest.java index bb7cd0f7341c..9650f4bfb9c9 100644 --- a/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationRegressionTest.java +++ b/icu4j/main/collate/src/test/java/com/ibm/icu/dev/test/collator/CollationRegressionTest.java @@ -1243,6 +1243,27 @@ public void TestICU22517() { } } + @Test + public void TestICU22555InfinityLoop() { + char data[] = { + 0x0020, 0x0026, 0x4000, 0x002c, 0x6601, 0x0106, 0xff7f, 0xff99, + 0x003b, 0x1141, 0x106a, 0x1006, 0x0001, 0x0080, 0x1141, 0x106a, + 0x0026, 0x00ff, 0xff6f, 0xff99, 0x013b, 0x1141, 0x1067, 0x1026, + 0x0601, 0x0080, 0x5f03, 0x17e3, 0x0000, 0x3e00, 0x3e3e, 0x0055, + 0x8080, 0x0000, 0x01e4, 0x0000, 0x0300, 0x003d, 0x4cff, 0x8053, + 0x7a65, 0x0000, 0x6400, 0x5f00, 0x0150, 0x9090, 0x9090, 0x2f5f, + 0x0053, 0xffe4, 0x002c, 0x0300, 0x1f3d, 0x55f7, 0x8053, 0x1750, + 0x3d00, 0xff00, 0x00ff, 0xff6f, 0x0099, 0x03fa, 0x0303, 0x0303, + 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, + 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, 0x0303, + }; + String rule = new String(data, 0, data.length); + try { + RuleBasedCollator coll = new RuleBasedCollator(rule); + } catch(Exception expected) { + } + } + @Test public void TestBeforeWithTooStrongAfter() { // ICU ticket #9959: diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/CanonicalIterator.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/CanonicalIterator.java index 56b36d90c49b..39baae1b6091 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/CanonicalIterator.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/CanonicalIterator.java @@ -154,6 +154,10 @@ public void setSource(String newSource) { } } + // To avoid infinity loop caused by permute, we limit the depth of recursive + // call to permute and throw exception. + // We know in some unit test we need at least 4. Set to 8 just in case some + // unforseen use cases. /** * Simple implementation of permutation. *
Warning: The strings are not guaranteed to be in any particular order. @@ -165,9 +169,27 @@ public void setSource(String newSource) { */ @Deprecated public static void permute(String source, boolean skipZeros, Set output) { + permute(source, skipZeros, output, 0); + } + + private static int PERMUTE_DEPTH_LIMIT = 8; + /** + * Simple implementation of permutation. + *
Warning: The strings are not guaranteed to be in any particular order. + * @param source the string to find permutations for + * @param skipZeros set to true to skip characters with canonical combining class zero + * @param output the set to add the results to + * @param depth the depth of the recursive call. + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + private static void permute(String source, boolean skipZeros, Set output, int depth) { // TODO: optimize //if (PROGRESS) System.out.println("Permute: " + source); - + if (depth > PERMUTE_DEPTH_LIMIT) { + throw new UnsupportedOperationException("Stack too deep:" + depth); + } // optimization: // if zero or one character, just return a set with it // we check for length < 2 to keep from counting code points all the time @@ -193,7 +215,7 @@ public static void permute(String source, boolean skipZeros, Set output) // see what the permutations of the characters before and after this one are subpermute.clear(); permute(source.substring(0,i) - + source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute); + + source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute, depth+1); // prefix this character to all of them String chStr = UTF16.valueOf(source, i);