Skip to content

Commit

Permalink
ICU-22707 feed more bits to the starving monkeys
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jul 4, 2024
1 parent d26a321 commit ecffcb7
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 43 deletions.
98 changes: 57 additions & 41 deletions icu4c/source/test/intltest/rbbitst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@

#include <algorithm>
#include <array>
#include <cinttypes>
#include <list>
#include <random>
#include <set>
#include <sstream>
#include <stdio.h>
Expand Down Expand Up @@ -72,9 +74,9 @@
} \
} UPRV_BLOCK_MACRO_END

#define MONKEY_ERROR(msg, fRuleFileName, index, seed) { \
IntlTest::gTest->errln("\n%s:%d %s at index %d. Parameters to reproduce: @\"type=%s seed=%u loop=1\"", \
__FILE__, __LINE__, msg, index, fRuleFileName, seed); \
#define MONKEY_ERROR(msg, fRuleFileName, index, engineState) { \
IntlTest::gTest->errln("\n%s:%d %s at index %d. Parameters to reproduce: @\"type=%s engineState=[%s] loop=1\"", \
__FILE__, __LINE__, msg, index, fRuleFileName, engineState.c_str()); \
}

//---------------------------------------------
Expand Down Expand Up @@ -1628,20 +1630,15 @@ unsigned int RBBIMonkeyKind::maxClassNameSize() {

//----------------------------------------------------------------------------------------
//
// Random Numbers. Similar to standard lib rand() and srand()
// Not using library to
// 1. Get same results on all platforms.
// 2. Get access to current seed, to more easily reproduce failures.
// Random Numbers. We need a long cycle length since we run overnight tests over
// millions of strings involving 1000 random generations per string
// (a 32-bit LCG will not do!), we want and a reasonably small state
// so that we can output it to reproduce failures.
//
//---------------------------------------------------------------------------------------
static uint32_t m_seed = 1;

static uint32_t m_rand()
{
m_seed = m_seed * 1103515245 + 12345;
return (uint32_t)(m_seed/65536) % 32768;
}

using RandomNumberGenerator = std::ranlux48;
constexpr RandomNumberGenerator::result_type defaultSeed = std::ranlux48_base::default_seed;
static RandomNumberGenerator randomNumberGenerator;

//------------------------------------------------------------------------------------------
//
Expand Down Expand Up @@ -3715,8 +3712,8 @@ RBBILineMonkey::~RBBILineMonkey() {
//
//-------------------------------------------------------------------------------------------

static int32_t getIntParam(UnicodeString name, UnicodeString &params, int32_t defaultVal) {
int32_t val = defaultVal;
static int64_t getIntParam(UnicodeString name, UnicodeString &params, int64_t defaultVal) {
int64_t val = defaultVal;
name.append(" *= *(-?\\d+)");
UErrorCode status = U_ZERO_ERROR;
RegexMatcher m(name, params, 0, status);
Expand All @@ -3728,7 +3725,7 @@ static int32_t getIntParam(UnicodeString name, UnicodeString &params, int32_t d
paramLength = (int32_t)(sizeof(valString)-2);
}
params.extract(m.start(1, status), paramLength, valString, sizeof(valString));
val = strtol(valString, nullptr, 10);
val = strtoll(valString, nullptr, 10);

// Delete this parameter from the params string.
m.reset();
Expand Down Expand Up @@ -4141,13 +4138,14 @@ void RBBITest::TestMonkey() {
#if !UCONFIG_NO_REGULAR_EXPRESSIONS

UErrorCode status = U_ZERO_ERROR;
int32_t loopCount = 500;
int32_t seed = 1;
int64_t loopCount = 500;
uint64_t seed = defaultSeed;
UnicodeString breakType = "all";
Locale locale("en");
UBool useUText = false;
UBool scalarsOnly = false;
std::string exportPath;
std::string engineState;

if (quick == false) {
loopCount = 10000;
Expand All @@ -4156,7 +4154,14 @@ void RBBITest::TestMonkey() {
if (fTestParams) {
UnicodeString p(fTestParams);
loopCount = getIntParam("loop", p, loopCount);
seed = getIntParam("seed", p, seed);
seed = getIntParam("seed", p, defaultSeed);

RegexMatcher engineStateMatcher(R"( *engineState *=\[*([0-9 ]+)\] *)", p, 0, status);
if (engineStateMatcher.find()) {
engineStateMatcher.group(1, status).toUTF8String(engineState);
engineStateMatcher.reset();
p = engineStateMatcher.replaceFirst("", status);
}

RegexMatcher m(" *type *= *(char|word|line|sent|title) *", p, 0, status);
if (m.find()) {
Expand Down Expand Up @@ -4198,16 +4203,23 @@ void RBBITest::TestMonkey() {
}

}
if (seed != defaultSeed && !engineState.empty()) {
errln("seed and engineState parameters are mutually exclusive\n");
return;
}
if (engineState.empty()) {
engineState = (std::stringstream() << RandomNumberGenerator(seed)).str();
}

if (breakType == "char" || breakType == "all") {
FILE *file = exportPath.empty() ? nullptr : fopen((exportPath + "_char.txt").c_str(), "w");
RBBICharMonkey m;
BreakIterator *bi = BreakIterator::createCharacterInstance(locale, status);
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "char", seed, loopCount, useUText, file, scalarsOnly);
RunMonkey(bi, m, "char", engineState, loopCount, useUText, file, scalarsOnly);
if (breakType == "all" && useUText==false) {
// Also run a quick test with UText when "all" is specified
RunMonkey(bi, m, "char", seed, loopCount, true, nullptr, scalarsOnly);
RunMonkey(bi, m, "char", engineState, loopCount, true, nullptr, scalarsOnly);
}
}
else {
Expand All @@ -4225,7 +4237,7 @@ void RBBITest::TestMonkey() {
RBBIWordMonkey m;
BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "word", seed, loopCount, useUText, file, scalarsOnly);
RunMonkey(bi, m, "word", engineState, loopCount, useUText, file, scalarsOnly);
}
else {
errcheckln(status, "Creation of word break iterator failed %s", u_errorName(status));
Expand All @@ -4245,7 +4257,7 @@ void RBBITest::TestMonkey() {
loopCount = loopCount / 5; // Line break runs slower than the others.
}
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "line", seed, loopCount, useUText, file, scalarsOnly);
RunMonkey(bi, m, "line", engineState, loopCount, useUText, file, scalarsOnly);
}
else {
errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
Expand All @@ -4265,7 +4277,7 @@ void RBBITest::TestMonkey() {
loopCount = loopCount / 10; // Sentence runs slower than the other break types
}
if (U_SUCCESS(status)) {
RunMonkey(bi, m, "sent", seed, loopCount, useUText, file, scalarsOnly);
RunMonkey(bi, m, "sent", engineState, loopCount, useUText, file, scalarsOnly);
}
else {
errcheckln(status, "Creation of line break iterator failed %s", u_errorName(status));
Expand All @@ -4285,16 +4297,16 @@ void RBBITest::TestMonkey() {
// bi - the break iterator to use
// mk - MonkeyKind, abstraction for obtaining expected results
// name - Name of test (char, word, etc.) for use in error messages
// seed - Seed for starting random number generator (parameter from user)
// engineState - State for starting random number generator (parameter from user)
// numIterations
// exportFile - Pointer to a file to which the test cases will be written in
// UCD format. May be null.
// scalarsOnly - Only test sequences of Unicode scalar values; if this is false,
// arbitrary sequences of code points (including unpaired surrogates)
// are tested.
//
void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t seed,
int32_t numIterations, UBool useUText, FILE *exportFile, UBool scalarsOnly) {
void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, std::string engineState,
int64_t numIterations, UBool useUText, FILE *exportFile, UBool scalarsOnly) {

#if !UCONFIG_NO_REGULAR_EXPRESSIONS

Expand All @@ -4309,10 +4321,13 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
char followingBreaks[TESTSTRINGLEN*2+1];
char precedingBreaks[TESTSTRINGLEN*2+1];
int i;
int loopCount = 0;
int64_t loopCount = 0;


m_seed = seed;
if (engineState.empty()) {
randomNumberGenerator = {};
} else {
std::stringstream(engineState) >> randomNumberGenerator;
}

numCharClasses = mk.charClasses().size();
const std::vector<UnicodeSet>& chClasses = mk.charClasses();
Expand Down Expand Up @@ -4342,22 +4357,23 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
// If test is running in an infinite loop, display a periodic tic so
// we can tell that it is making progress.
constexpr std::array<std::string_view, 5> monkeys{"🙈", "🙉", "🙊", "🐵", "🐒"};
fprintf(stderr, "%s", monkeys[m_seed % monkeys.size()].data());
fprintf(stderr, "%s",
monkeys[RandomNumberGenerator(randomNumberGenerator)() % monkeys.size()].data());
if (loopCount % 1'000'000 == 0) {
fprintf(stderr, "\nTested %d million random strings with %d errors…\n",
fprintf(stderr, "\nTested %" PRId64 " million random strings with %d errors…\n",
loopCount / 1'000'000, getErrors());
}
}
// Save current random number seed, so that we can recreate the random numbers
// Save current RNG state, so that we can recreate the random numbers
// for this loop iteration in event of an error.
seed = m_seed;
engineState = (std::stringstream() << randomNumberGenerator).str();

// Populate a test string with data.
testText.truncate(0);
for (i=0; i<TESTSTRINGLEN; i++) {
int32_t aClassNum = m_rand() % numCharClasses;
int32_t aClassNum = randomNumberGenerator() % numCharClasses;
const UnicodeSet &classSet = chClasses[aClassNum];
int32_t charIdx = m_rand() % classSet.size();
int32_t charIdx = randomNumberGenerator() % classSet.size();
UChar32 c = classSet.charAt(charIdx);
if (c < 0) { // TODO: deal with sets containing strings.
errln("%s:%d c < 0", __FILE__, __LINE__);
Expand Down Expand Up @@ -4450,8 +4466,8 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
(breakPos > lastBreakPos && lastBreakPos > i)) {
errln("%s break monkey test: "
"Out of range value returned by BreakIterator::following().\n"
"Random seed=%d index=%d; following returned %d; lastbreak=%d",
name, seed, i, breakPos, lastBreakPos);
"Random engineState=[%s] index=%d; following returned %d; lastbreak=%d",
name, engineState.c_str(), i, breakPos, lastBreakPos);
break;
}
followingBreaks[breakPos] = 1;
Expand Down Expand Up @@ -4557,7 +4573,7 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
MONKEY_ERROR(
(expectedBreaks[i] ? "Break expected but not found" :
"Break found but not expected"),
name, i, seed);
name, i, engineState);

for (ci = startContext;; (ci = testText.moveIndex32(ci, 1))) {
UChar32 c;
Expand Down
4 changes: 2 additions & 2 deletions icu4c/source/test/intltest/rbbitst.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ class RBBITest: public IntlTest {
* internal methods to prepare test data
**/

void RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, uint32_t seed,
int32_t loopCount, UBool useUText, FILE *exportFile, UBool scalarsOnly);
void RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name, std::string engineState,
int64_t loopCount, UBool useUText, FILE *exportFile, UBool scalarsOnly);

// Run one of the Unicode Consortium boundary test data files.
void runUnicodeTestData(const char *fileName, RuleBasedBreakIterator *bi);
Expand Down

0 comments on commit ecffcb7

Please sign in to comment.