Skip to content

Commit

Permalink
ICU-23000 Replace CharString for LocaleBased
Browse files Browse the repository at this point in the history
The changes improve the overall peak memory usage and slightly improve
performance

	main avg	main stdev	branch avg	branch stdev	diff avg	diff avg %
User time (seconds):	81.19	0.37	81.36	0.09	0.17	0.21%
System time (seconds):	5.03	0.08	4.86	0.05	-0.17	-3.38%
Percent of CPU this job got:	237.33%	0.01	235.33%	0.01	-2.00%	-0.84%
Maximum resident set size (kbytes):	227,141	99.14	226,667	366.48	-475	-0.21%
Minor (reclaiming a frame) page faults:	46,009	510.78	43,775	294.47	-2,234	-4.86%
Voluntary context switches:	333,442	4513.76	316,506	8850.48	-16,936	-5.08%
Involuntary context switches:	467	30.60	515	22.65	48	10.36%

Raw data in
https://docs.google.com/spreadsheets/d/10LQ3kO83EwvsFfoiySTzsTbIzfUV00Te4jQnX0b_FM4/edit?usp=sharing
  • Loading branch information
FrankYFTang committed Dec 28, 2024
1 parent e515c84 commit 04d3b3b
Show file tree
Hide file tree
Showing 14 changed files with 257 additions and 126 deletions.
86 changes: 64 additions & 22 deletions icu4c/source/common/brkiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
{
char fnbuff[256];
char ext[4]={'\0'};
CharString actualLocale;
CharString actual;
int32_t size;
const char16_t* brkfname = nullptr;
UResourceBundle brkRulesStack;
Expand Down Expand Up @@ -94,7 +94,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st

// Use the string if we found it
if (U_SUCCESS(status) && brkfname) {
actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status);
actual.append(ures_getLocaleInternal(brkName, &status), -1, status);

char16_t* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
Expand Down Expand Up @@ -123,10 +123,16 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
if (U_SUCCESS(status) && result != nullptr) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);

locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actualLocale.data());
uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY);
result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
actual.data(), status);
if (result->requestLocale == nullptr) {
result->requestLocale = new CharString(loc.getName(), status);
if (result->requestLocale == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
} else {
result->requestLocale->copyFrom(loc.getName(), status);
}
}

ures_close(b);
Expand Down Expand Up @@ -204,28 +210,50 @@ BreakIterator::getAvailableLocales(int32_t& count)
//
//-------------------------------------------

BreakIterator::BreakIterator()
BreakIterator::BreakIterator() : actualLocale(nullptr), validLocale(nullptr), requestLocale(nullptr)
{
*validLocale = *actualLocale = *requestLocale = 0;
}

BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other), actualLocale(nullptr), validLocale(nullptr), requestLocale(nullptr) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, *this);
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
if (other.requestLocale == nullptr) {
delete requestLocale;
requestLocale = nullptr;
} else {
requestLocale = new CharString(*(other.requestLocale), status);
if (requestLocale == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
U_ASSERT(U_SUCCESS(status));
}

BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
if (this != &other) {
uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale));
uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale));
uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale));
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, *this);
locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
if (other.requestLocale == nullptr) {
delete requestLocale;
requestLocale = nullptr;
} else {
requestLocale = new CharString(*(other.requestLocale), status);
if (requestLocale == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
U_ASSERT(U_SUCCESS(status));
}
return *this;
}

BreakIterator::~BreakIterator()
{
delete validLocale;
delete actualLocale;
delete requestLocale;
}

// ------------------------------------------
Expand Down Expand Up @@ -394,7 +422,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
// revisit this in ICU 3.0 and clean it up/fix it/remove it.
if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
U_LOCALE_BASED(locBased, *result);
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName());
locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
}
return result;
}
Expand Down Expand Up @@ -496,19 +524,31 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)

Locale
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
if (U_FAILURE(status)) {
return {""};
}
if (type == ULOC_REQUESTED_LOCALE) {
return {requestLocale};
if (requestLocale == nullptr) {
return {""};
}
return {requestLocale->data()};
}
U_LOCALE_BASED(locBased, *this);
U_LOCALE_BASED(locBased, *const_cast<BreakIterator*>(this));
return locBased.getLocale(type, status);
}

const char *
BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
if (U_FAILURE(status)) {
return {""};
}
if (type == ULOC_REQUESTED_LOCALE) {
return requestLocale;
if (requestLocale == nullptr) {
return {""};
}
return requestLocale->data();
}
U_LOCALE_BASED(locBased, *this);
U_LOCALE_BASED(locBased, *const_cast<BreakIterator*>(this));
return locBased.getLocaleID(type, status);
}

Expand All @@ -535,9 +575,11 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
return 1;
}

BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) : actualLocale(nullptr), validLocale(nullptr), requestLocale(nullptr) {
UErrorCode status = U_ZERO_ERROR;
U_LOCALE_BASED(locBased, (*this));
locBased.setLocaleIDs(valid, actual);
locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
U_ASSERT(U_SUCCESS(status));
}

U_NAMESPACE_END
Expand Down
9 changes: 9 additions & 0 deletions icu4c/source/common/charstr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
return *this;
}

CharString &CharString::copyFrom(StringPiece s, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return *this;
}
len = 0;
append(s, errorCode);
return *this;
}

int32_t CharString::lastIndexOf(char c) const {
for(int32_t i=len; i>0;) {
if(buffer[--i]==c) {
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/common/charstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class U_COMMON_API CharString : public UMemory {
* use a UErrorCode where memory allocations might be needed.
*/
CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
CharString &copyFrom(StringPiece s, UErrorCode &errorCode);

UBool isEmpty() const { return len==0; }
int32_t length() const { return len; }
Expand Down
72 changes: 59 additions & 13 deletions icu4c/source/common/locbased.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
*/
#include "locbased.h"
#include "cstring.h"
#include "charstr.h"

U_NAMESPACE_BEGIN

Expand All @@ -27,29 +28,74 @@ const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status

switch(type) {
case ULOC_VALID_LOCALE:
return valid;
return valid->data();
case ULOC_ACTUAL_LOCALE:
return actual;
return actual->data();
default:
status = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
}

void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
if (validID != nullptr) {
uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) {
if (validID == nullptr) {
delete valid;
valid = nullptr;
} else {
if (valid == nullptr) {
valid = new CharString(*validID, status);
if (valid == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
valid->copyFrom(*validID, status);
}
}
if (actualID != nullptr) {
uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
if (actualID == nullptr) {
delete actual;
actual = nullptr;
} else {
if (actual == nullptr) {
actual = new CharString(*actualID, status);
if (actual == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
actual->copyFrom(*actualID, status);
}
}
}

void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) {
uprv_strcpy(valid, validID.getName());
uprv_strcpy(actual, actualID.getName());
void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) {
if (validID == nullptr) {
delete valid;
valid = nullptr;
} else {
if (valid == nullptr) {
valid = new CharString(validID, status);
if (valid == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
valid->copyFrom(validID, status);
}
}
if (actualID == nullptr) {
delete actual;
actual = nullptr;
} else {
if (actual == nullptr) {
actual = new CharString(actualID, status);
if (actual == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
} else {
actual->copyFrom(actualID, status);
}
}
}

U_NAMESPACE_END
35 changes: 8 additions & 27 deletions icu4c/source/common/locbased.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
/**
* Macro to declare a locale LocaleBased wrapper object for the given
* object, which must have two members named `validLocale' and
* `actualLocale' of size ULOC_FULLNAME_CAPACITY
* `actualLocale' of which are pointers to the internal icu::CharString.
*/
#define U_LOCALE_BASED(varname, objname) \
LocaleBased varname((objname).validLocale, (objname).actualLocale)

U_NAMESPACE_BEGIN

class CharString;
/**
* A utility class that unifies the implementation of getLocale() by
* various ICU services. This class is likely to be removed in the
Expand All @@ -41,13 +42,7 @@ class U_COMMON_API LocaleBased : public UMemory {
* Construct a LocaleBased wrapper around the two pointers. These
* will be aliased for the lifetime of this object.
*/
inline LocaleBased(char* validAlias, char* actualAlias);

/**
* Construct a LocaleBased wrapper around the two const pointers.
* These will be aliased for the lifetime of this object.
*/
inline LocaleBased(const char* validAlias, const char* actualAlias);
inline LocaleBased(CharString*& validAlias, CharString*& actualAlias);

/**
* Return locale meta-data for the service object wrapped by this
Expand Down Expand Up @@ -75,33 +70,19 @@ class U_COMMON_API LocaleBased : public UMemory {
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const char* valid, const char* actual);

/**
* Set the locale meta-data for the service object wrapped by this
* object.
* @param valid the ID of the valid locale
* @param actual the ID of the actual locale
*/
void setLocaleIDs(const Locale& valid, const Locale& actual);
void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status);
void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status);

private:

char* valid;

char* actual;
CharString*& valid;
CharString*& actual;
};

inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) :
inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) :
valid(validAlias), actual(actualAlias) {
}

inline LocaleBased::LocaleBased(const char* validAlias,
const char* actualAlias) :
// ugh: cast away const
valid(const_cast<char*>(validAlias)), actual(const_cast<char*>(actualAlias)) {
}

U_NAMESPACE_END

#endif
8 changes: 5 additions & 3 deletions icu4c/source/common/unicode/brkiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ U_NAMESPACE_END

U_NAMESPACE_BEGIN

class CharString;

/**
* The BreakIterator class implements methods for finding the location
* of boundaries in text. BreakIterator is an abstract base class.
Expand Down Expand Up @@ -646,9 +648,9 @@ class U_COMMON_API BreakIterator : public UObject {
private:

/** @internal (private) */
char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY];
char requestLocale[ULOC_FULLNAME_CAPACITY];
CharString* actualLocale;
CharString* validLocale;
CharString* requestLocale;
};

#ifndef U_HIDE_DEPRECATED_API
Expand Down
Loading

0 comments on commit 04d3b3b

Please sign in to comment.