Skip to content

Commit

Permalink
ICU-22696 Update ulocimp_setKeywordValue() to use std::string_view.
Browse files Browse the repository at this point in the history
  • Loading branch information
roubert committed Aug 13, 2024
1 parent 0bd2b4d commit 7ffbe77
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 88 deletions.
87 changes: 47 additions & 40 deletions icu4c/source/common/locid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2633,59 +2633,66 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName,
}

void
Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
{
if (U_FAILURE(status)) {
Locale::setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
if (keywordName.empty()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR;
}
int32_t bufferLength =
uprv_max(static_cast<int32_t>(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY);
int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
bufferLength, &status) + 1;
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
/* Handle the case the current buffer is not enough to hold the new id */
if (status == U_BUFFER_OVERFLOW_ERROR) {
U_ASSERT(newLength > bufferLength);
char* newFullName = static_cast<char*>(uprv_malloc(newLength));
if (newFullName == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_strcpy(newFullName, fullName);
if (fullName != fullNameBuffer) {
if (baseName == fullName) {
baseName = newFullName; // baseName should not point to freed memory.

int32_t length = static_cast<int32_t>(uprv_strlen(fullName));
int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1;

const char* start = locale_getKeywordsStart(fullName);
int32_t offset = start == nullptr ? length : start - fullName;

for (;;) {
// Remove -1 from the capacity so that this function can guarantee NUL termination.
CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);

int32_t reslen = ulocimp_setKeywordValue(
{fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
keywordName,
keywordValue,
sink,
status);

if (status == U_BUFFER_OVERFLOW_ERROR) {
capacity = reslen + offset + 1;
char* newFullName = static_cast<char*>(uprv_malloc(capacity));
if (newFullName == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
// if full Name is already on the heap, need to free it.
uprv_free(fullName);
uprv_memcpy(newFullName, fullName, length + 1);
if (fullName != fullNameBuffer) {
if (baseName == fullName) {
baseName = newFullName; // baseName should not point to freed memory.
}
// if fullName is already on the heap, need to free it.
uprv_free(fullName);
}
fullName = newFullName;
status = U_ZERO_ERROR;
continue;
}
fullName = newFullName;
status = U_ZERO_ERROR;
uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
} else {
U_ASSERT(newLength <= bufferLength);

if (U_FAILURE(status)) { return; }
u_terminateChars(fullName, capacity, reslen + offset, &status);
break;
}
if (U_SUCCESS(status) && baseName == fullName) {

if (baseName == fullName) {
// May have added the first keyword, meaning that the fullName is no longer also the baseName.
initBaseName(status);
}
}

void
Locale::setKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
// TODO: Remove the need for a const char* to a NUL terminated buffer.
const CharString keywordName_nul(keywordName, status);
const CharString keywordValue_nul(keywordValue, status);
setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
}

void
Locale::setUnicodeKeywordValue(StringPiece keywordName,
StringPiece keywordValue,
Expand Down
91 changes: 49 additions & 42 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,11 @@ uloc_setKeywordValue(const char* keywordName,
{
if (U_FAILURE(*status)) { return 0; }

if (keywordName == nullptr || *keywordName == 0) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}

if (bufferCapacity <= 1) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
Expand All @@ -890,7 +895,11 @@ uloc_setKeywordValue(const char* keywordName,
CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
bufferCapacity - baseLen - 1);
int32_t reslen = ulocimp_setKeywordValue(
keywords, keywordName, keywordValue, sink, *status);
keywords == nullptr ? std::string_view() : keywords,
keywordName,
keywordValue == nullptr ? std::string_view() : keywordValue,
sink,
*status);

if (U_FAILURE(*status)) {
return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0;
Expand All @@ -905,24 +914,29 @@ uloc_setKeywordValue(const char* keywordName,
}

U_EXPORT void
ulocimp_setKeywordValue(const char* keywordName,
const char* keywordValue,
ulocimp_setKeywordValue(std::string_view keywordName,
std::string_view keywordValue,
CharString& localeID,
UErrorCode& status)
{
if (U_FAILURE(status)) { return; }
// This is safe because CharString::truncate() doesn't actually erase any
// data, but simply sets the position for where new data will be written.
const char* keywords = locale_getKeywordsStart(localeID.data());
if (keywords != nullptr) localeID.truncate(keywords - localeID.data());
std::string_view keywords;
if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) {
// This is safe because CharString::truncate() doesn't actually erase any
// data, but simply sets the position for where new data will be written.
int32_t size = start - localeID.data();
keywords = localeID.toStringPiece();
keywords.remove_prefix(size);
localeID.truncate(size);
}
CharStringByteSink sink(&localeID);
ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status);
}

U_EXPORT int32_t
ulocimp_setKeywordValue(const char* keywords,
const char* keywordName,
const char* keywordValue,
ulocimp_setKeywordValue(std::string_view keywords,
std::string_view keywordName,
std::string_view keywordValue,
ByteSink& sink,
UErrorCode& status)
{
Expand All @@ -931,17 +945,14 @@ ulocimp_setKeywordValue(const char* keywords,
/* TODO: sorting. removal. */
int32_t needLen = 0;
int32_t rc;
const char* nextSeparator = nullptr;
const char* nextEqualsign = nullptr;
const char* keywordStart = nullptr;
CharString updatedKeysAndValues;
bool handledInputKeyAndValue = false;
char keyValuePrefix = '@';

if (status == U_STRING_NOT_TERMINATED_WARNING) {
status = U_ZERO_ERROR;
}
if (keywordName == nullptr || keywordName[0] == 0) {
if (keywordName.empty()) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
Expand All @@ -951,21 +962,19 @@ ulocimp_setKeywordValue(const char* keywords,
}

CharString canonKeywordValue;
if(keywordValue) {
while (*keywordValue != 0) {
if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
return 0;
}
/* Should we force lowercase in value to set? */
canonKeywordValue.append(*keywordValue++, status);
for (char c : keywordValue) {
if (!UPRV_ISALPHANUM(c) && !UPRV_OK_VALUE_PUNCTUATION(c)) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
return 0;
}
/* Should we force lowercase in value to set? */
canonKeywordValue.append(c, status);
}
if (U_FAILURE(status)) {
return 0;
}

if (keywords == nullptr || keywords[1] == '\0') {
if (keywords.size() <= 1) {
if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
return 0;
Expand All @@ -991,23 +1000,20 @@ ulocimp_setKeywordValue(const char* keywords,
return needLen;
} /* end shortcut - no @ */

keywordStart = keywords;
/* search for keyword */
while(keywordStart) {
const char* keyValueTail;

for (size_t keywordStart = 0; keywordStart != std::string_view::npos;) {
keywordStart++; /* skip @ or ; */
nextEqualsign = uprv_strchr(keywordStart, '=');
if (!nextEqualsign) {
size_t nextEqualsign = keywords.find('=', keywordStart);
if (nextEqualsign == std::string_view::npos) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
return 0;
}
/* strip leading & trailing spaces (TC decided to tolerate these) */
while(*keywordStart == ' ') {
while (keywordStart < keywords.size() && keywords[keywordStart] == ' ') {
keywordStart++;
}
keyValueTail = nextEqualsign;
while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
size_t keyValueTail = nextEqualsign;
while (keyValueTail > keywordStart && keywords[keyValueTail - 1] == ' ') {
keyValueTail--;
}
/* now keyValueTail points to first char after the keyName */
Expand All @@ -1018,26 +1024,26 @@ ulocimp_setKeywordValue(const char* keywords,
}
CharString localeKeywordName;
while (keywordStart < keyValueTail) {
if (!UPRV_ISALPHANUM(*keywordStart)) {
if (!UPRV_ISALPHANUM(keywords[keywordStart])) {
status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
return 0;
}
localeKeywordName.append(uprv_tolower(*keywordStart++), status);
localeKeywordName.append(uprv_tolower(keywords[keywordStart++]), status);
}
if (U_FAILURE(status)) {
return 0;
}

nextSeparator = uprv_strchr(nextEqualsign, ';');
size_t nextSeparator = keywords.find(';', nextEqualsign);

/* start processing the value part */
nextEqualsign++; /* skip '=' */
/* First strip leading & trailing spaces (TC decided to tolerate these) */
while(*nextEqualsign == ' ') {
while (nextEqualsign < keywords.size() && keywords[nextEqualsign] == ' ') {
nextEqualsign++;
}
keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
keyValueTail = nextSeparator == std::string_view::npos ? keywords.size() : nextSeparator;
while (keyValueTail > nextEqualsign && keywords[keyValueTail - 1] == ' ') {
keyValueTail--;
}
if (nextEqualsign == keyValueTail) {
Expand Down Expand Up @@ -1072,9 +1078,10 @@ ulocimp_setKeywordValue(const char* keywords,
keyValuePrefix = ';'; /* for any subsequent key-value pair */
updatedKeysAndValues.append(localeKeywordName, status);
updatedKeysAndValues.append('=', status);
updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), status);
updatedKeysAndValues.append(keywords.data() + nextEqualsign,
static_cast<int32_t>(keyValueTail - nextEqualsign), status);
}
if (!nextSeparator && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
if (nextSeparator == std::string_view::npos && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
/* append new entry at the end, it sorts later than existing entries */
updatedKeysAndValues.append(keyValuePrefix, status);
/* skip keyValuePrefix update, no subsequent key-value pair */
Expand All @@ -1098,7 +1105,7 @@ ulocimp_setKeywordValue(const char* keywords,
/* if input key/value specified removal of a keyword not present in locale, or
* there was an error in CharString.append, leave original locale alone. */
U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
return static_cast<int32_t>(uprv_strlen(keywords));
return static_cast<int32_t>(keywords.size());
}

needLen = updatedKeysAndValues.length();
Expand Down Expand Up @@ -2225,7 +2232,7 @@ uloc_getLCID(const char* localeID)
CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
if (U_SUCCESS(status) && !collVal.isEmpty()) {
CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
ulocimp_setKeywordValue("collation", collVal.data(), tmpLocaleID, status);
ulocimp_setKeywordValue("collation", collVal.toStringPiece(), tmpLocaleID, status);
if (U_SUCCESS(status)) {
return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);
}
Expand Down
10 changes: 5 additions & 5 deletions icu4c/source/common/ulocimp.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,15 @@ U_EXPORT icu::CharString
ulocimp_getVariant(const char* localeID, UErrorCode& status);

U_EXPORT void
ulocimp_setKeywordValue(const char* keywordName,
const char* keywordValue,
ulocimp_setKeywordValue(std::string_view keywordName,
std::string_view keywordValue,
icu::CharString& localeID,
UErrorCode& status);

U_EXPORT int32_t
ulocimp_setKeywordValue(const char* keywords,
const char* keywordName,
const char* keywordValue,
ulocimp_setKeywordValue(std::string_view keywords,
std::string_view keywordName,
std::string_view keywordValue,
icu::ByteSink& sink,
UErrorCode& status);

Expand Down
4 changes: 3 additions & 1 deletion icu4c/source/common/unicode/locid.h
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,9 @@ class U_COMMON_API Locale : public UObject {
*
* @stable ICU 49
*/
void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status);
void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) {
setKeywordValue(StringPiece{keywordName}, StringPiece{keywordValue}, status);
}

/**
* Sets or removes the value for a keyword.
Expand Down

0 comments on commit 7ffbe77

Please sign in to comment.