Skip to content

Commit

Permalink
ICU-22908 MF2: Finish updating spec tests and implement required test…
Browse files Browse the repository at this point in the history
… functions

Implement :test:format, :test:select, and :test:function, which are
required by the new `pattern-selection.json` tests.

Change the internal value representation in the formatter in order to
support some of the test cases (binding the results of selectors to a
variable).
  • Loading branch information
catamorphism committed Sep 25, 2024
1 parent 534dd57 commit 38500d3
Show file tree
Hide file tree
Showing 14 changed files with 789 additions and 396 deletions.
3 changes: 2 additions & 1 deletion icu4c/source/common/unicode/utypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -598,12 +598,13 @@ typedef enum UErrorCode {
U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */
U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */
U_MF_BAD_OPTION, /**< An option value provided to a function does not have the required form for that option. @internal ICU 77 technology preview @deprecated This API is for technology preview only. */
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal formatting API error code.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
U_FMT_PARSE_ERROR_LIMIT = 0x10120,
U_FMT_PARSE_ERROR_LIMIT = 0x10121,
#endif // U_HIDE_DEPRECATED_API

/*
Expand Down
3 changes: 2 additions & 1 deletion icu4c/source/common/utypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = {
"U_MF_MISSING_SELECTOR_ANNOTATION_ERROR",
"U_MF_DUPLICATE_DECLARATION_ERROR",
"U_MF_OPERAND_MISMATCH_ERROR",
"U_MF_DUPLICATE_VARIANT_ERROR"
"U_MF_DUPLICATE_VARIANT_ERROR",
"U_MF_BAD_OPTION"
};

static const char * const
Expand Down
395 changes: 124 additions & 271 deletions icu4c/source/i18n/messageformat2.cpp

Large diffs are not rendered by default.

242 changes: 221 additions & 21 deletions icu4c/source/i18n/messageformat2_evaluation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,32 +91,44 @@ FunctionOptions::~FunctionOptions() {
delete[] options;
}
}
// ResolvedSelector
// ----------------

ResolvedSelector::ResolvedSelector(const FunctionName& fn,
Selector* sel,
FunctionOptions&& opts,
FormattedPlaceholder&& val)
: selectorName(fn), selector(sel), options(std::move(opts)), value(std::move(val)) {
U_ASSERT(sel != nullptr);

static bool containsOption(const UVector& opts, const ResolvedFunctionOption& opt) {
for (int32_t i = 0; i < opts.size(); i++) {
if (static_cast<ResolvedFunctionOption*>(opts[i])->getName()
== opt.getName()) {
return true;
}
}
return false;
}

ResolvedSelector::ResolvedSelector(FormattedPlaceholder&& val) : value(std::move(val)) {}
// Options in `this` take precedence
// `this` can't be used after mergeOptions is called
FunctionOptions FunctionOptions::mergeOptions(FunctionOptions&& other,
UErrorCode& status) {
UVector mergedOptions(status);
if (U_FAILURE(status)) {
return {};
}

ResolvedSelector& ResolvedSelector::operator=(ResolvedSelector&& other) noexcept {
selectorName = std::move(other.selectorName);
selector.adoptInstead(other.selector.orphan());
options = std::move(other.options);
value = std::move(other.value);
return *this;
}
// Create a new vector consisting of the options from this `FunctionOptions`
for (int32_t i = 0; i < functionOptionsLen; i++) {
mergedOptions.addElement(create<ResolvedFunctionOption>(std::move(options[i]), status),
status);
}

ResolvedSelector::ResolvedSelector(ResolvedSelector&& other) {
*this = std::move(other);
}
// Add each option from `other` that doesn't appear in this `FunctionOptions`
for (int i = 0; i < other.functionOptionsLen; i++) {
// Note: this is quadratic in the length of `options`
if (!containsOption(mergedOptions, other.options[i])) {
mergedOptions.addElement(create<ResolvedFunctionOption>(std::move(other.options[i]),
status),
status);
}
}

ResolvedSelector::~ResolvedSelector() {}
return FunctionOptions(std::move(mergedOptions), status);
}

// PrioritizedVariant
// ------------------
Expand Down Expand Up @@ -199,6 +211,194 @@ PrioritizedVariant::~PrioritizedVariant() {}
UErrorCode& status) : arguments(args), errors(e, status) {}
MessageContext::~MessageContext() {}

// InternalValue
// -------------

bool InternalValue::isFallback() const {
return std::holds_alternative<FormattedPlaceholder>(argument)
&& std::get_if<FormattedPlaceholder>(&argument)->isFallback();
}

bool InternalValue::hasNullOperand() const {
return std::holds_alternative<FormattedPlaceholder>(argument)
&& std::get_if<FormattedPlaceholder>(&argument)->isNullOperand();
}

FormattedPlaceholder InternalValue::takeArgument(UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return {};
}

if (std::holds_alternative<FormattedPlaceholder>(argument)) {
return std::move(*std::get_if<FormattedPlaceholder>(&argument));
}
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return {};
}

const UnicodeString& InternalValue::getFallback() const {
if (std::holds_alternative<FormattedPlaceholder>(argument)) {
return std::get_if<FormattedPlaceholder>(&argument)->getFallback();
}
return (*std::get_if<InternalValue*>(&argument))->getFallback();
}

const Selector* InternalValue::getSelector(UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return nullptr;
}

if (selector == nullptr) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
}
return selector;
}

InternalValue::InternalValue(FormattedPlaceholder&& arg) {
argument = std::move(arg);
selector = nullptr;
formatter = nullptr;
}

InternalValue::InternalValue(InternalValue* operand,
FunctionOptions&& opts,
const FunctionName& functionName,
const Formatter* f,
const Selector* s) {
argument = operand;
options = std::move(opts);
name = functionName;
selector = s;
formatter = f;
U_ASSERT(selector != nullptr || formatter != nullptr);
}

// `this` cannot be used after calling this method
void InternalValue::forceSelection(DynamicErrors& errs,
const UnicodeString* keys,
int32_t keysLen,
UnicodeString* prefs,
int32_t& prefsLen,
UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}

if (!canSelect()) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Find the argument and complete set of options by traversing `argument`
FunctionOptions opts;
InternalValue* p = this;
FunctionName selectorName = name;
while (std::holds_alternative<InternalValue*>(p->argument)) {
if (p->name != selectorName) {
// Can only compose calls to the same selector
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// First argument to mergeOptions takes precedence
opts = opts.mergeOptions(std::move(p->options), errorCode);
if (U_FAILURE(errorCode)) {
return;
}
InternalValue* next = *std::get_if<InternalValue*>(&p->argument);
p->argument = nullptr;
p = next;
}
FormattedPlaceholder arg = std::move(*std::get_if<FormattedPlaceholder>(&p->argument));

selector->selectKey(std::move(arg), std::move(opts),
keys, keysLen,
prefs, prefsLen, errorCode);
if (U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
errs.setSelectorError(selectorName, errorCode);
}
}

FormattedPlaceholder InternalValue::forceFormatting(DynamicErrors& errs, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return {};
}

if (formatter == nullptr && selector == nullptr) {
U_ASSERT(std::holds_alternative<FormattedPlaceholder>(argument));
return std::move(*std::get_if<FormattedPlaceholder>(&argument));
}
if (formatter == nullptr) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return {};
}

FormattedPlaceholder arg;

if (std::holds_alternative<FormattedPlaceholder>(argument)) {
arg = std::move(*std::get_if<FormattedPlaceholder>(&argument));
} else {
arg = (*std::get_if<InternalValue*>(&argument))->forceFormatting(errs,
errorCode);
}

if (U_FAILURE(errorCode)) {
return {};
}

// The fallback for a nullary function call is the function name
UnicodeString fallback;
if (arg.isNullOperand()) {
fallback = u":";
fallback += name;
} else {
fallback = arg.getFallback();
}

// Call the function with the argument
FormattedPlaceholder result = formatter->format(std::move(arg), std::move(options), errorCode);
if (U_FAILURE(errorCode)) {
if (errorCode == U_MF_OPERAND_MISMATCH_ERROR) {
errorCode = U_ZERO_ERROR;
errs.setOperandMismatchError(name, errorCode);
} else {
errorCode = U_ZERO_ERROR;
// Convey any error generated by the formatter
// as a formatting error, except for operand mismatch errors
errs.setFormattingError(name, errorCode);
}
}
// Ignore the output if any error occurred
if (errs.hasFormattingError()) {
return FormattedPlaceholder(fallback);
}

return result;
}

InternalValue& InternalValue::operator=(InternalValue&& other) noexcept {
argument = std::move(other.argument);
other.argument = nullptr;
options = std::move(other.options);
name = other.name;
selector = other.selector;
formatter = other.formatter;
other.selector = nullptr;
other.formatter = nullptr;

return *this;
}

InternalValue::~InternalValue() {
delete selector;
selector = nullptr;
delete formatter;
formatter = nullptr;
if (std::holds_alternative<InternalValue*>(argument)) {
delete *std::get_if<InternalValue*>(&argument);
argument = nullptr;
}
}

} // namespace message2
U_NAMESPACE_END

Expand Down
70 changes: 38 additions & 32 deletions icu4c/source/i18n/messageformat2_evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,38 +63,6 @@ namespace message2 {
return 1;
}

// Encapsulates a value to be scrutinized by a `match` with its resolved
// options and the name of the selector
class ResolvedSelector : public UObject {
public:
ResolvedSelector() {}
ResolvedSelector(const FunctionName& fn,
Selector* selector,
FunctionOptions&& options,
FormattedPlaceholder&& value);
// Used either for errors, or when selector isn't yet known
explicit ResolvedSelector(FormattedPlaceholder&& value);
bool hasSelector() const { return selector.isValid(); }
const FormattedPlaceholder& argument() const { return value; }
FormattedPlaceholder&& takeArgument() { return std::move(value); }
const Selector* getSelector() {
U_ASSERT(selector.isValid());
return selector.getAlias();
}
FunctionOptions&& takeOptions() {
return std::move(options);
}
const FunctionName& getSelectorName() const { return selectorName; }
virtual ~ResolvedSelector();
ResolvedSelector& operator=(ResolvedSelector&&) noexcept;
ResolvedSelector(ResolvedSelector&&);
private:
FunctionName selectorName; // For error reporting
LocalPointer<Selector> selector;
FunctionOptions options;
FormattedPlaceholder value;
}; // class ResolvedSelector

// Closures and environments
// -------------------------

Expand Down Expand Up @@ -193,6 +161,44 @@ namespace message2 {
DynamicErrors errors;
}; // class MessageContext

// InternalValue
// ----------------

class InternalValue : public UObject {
public:
const FunctionName& getFunctionName() const { return name; }
bool canSelect() const { return selector != nullptr; }
const Selector* getSelector(UErrorCode&) const;
FormattedPlaceholder forceFormatting(DynamicErrors& errs,
UErrorCode& errorCode);
void forceSelection(DynamicErrors& errs,
const UnicodeString* keys,
int32_t keysLen,
UnicodeString* prefs,
int32_t& prefsLen,
UErrorCode& errorCode);
// Needs to be deep-copyable and movable
virtual ~InternalValue();
InternalValue(FormattedPlaceholder&&);
// Formatter and selector may be null
InternalValue(InternalValue*, FunctionOptions&&, const FunctionName&, const Formatter*,
const Selector*);
const UnicodeString& getFallback() const;
bool isFallback() const;
bool hasNullOperand() const;
// Can't be used anymore after calling this
FormattedPlaceholder takeArgument(UErrorCode& errorCode);
InternalValue(InternalValue&& other) { *this = std::move(other); }
InternalValue& operator=(InternalValue&& other) noexcept;
private:
// InternalValue is owned (if present)
std::variant<InternalValue*, FormattedPlaceholder> argument;
FunctionOptions options;
FunctionName name;
const Selector* selector; // May be null
const Formatter* formatter; // May be null, but one or the other should be non-null unless argument is a FormattedPlaceholder
}; // class InternalValue

} // namespace message2

U_NAMESPACE_END
Expand Down
13 changes: 10 additions & 3 deletions icu4c/source/i18n/messageformat2_formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,13 @@ namespace message2 {
.adoptFormatter(FunctionName(UnicodeString("time")), time, success)
.adoptFormatter(FunctionName(UnicodeString("number")), number, success)
.adoptFormatter(FunctionName(UnicodeString("integer")), integer, success)
.adoptFormatter(FunctionName(UnicodeString("test:function")), new StandardFunctions::TestFormatFactory(), success)
.adoptFormatter(FunctionName(UnicodeString("test:format")), new StandardFunctions::TestFormatFactory(), success)
.adoptSelector(FunctionName(UnicodeString("number")), new StandardFunctions::PluralFactory(UPLURAL_TYPE_CARDINAL), success)
.adoptSelector(FunctionName(UnicodeString("integer")), new StandardFunctions::PluralFactory(StandardFunctions::PluralFactory::integer()), success)
.adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success);
.adoptSelector(FunctionName(UnicodeString("string")), new StandardFunctions::TextFactory(), success)
.adoptSelector(FunctionName(UnicodeString("test:function")), new StandardFunctions::TestSelectFactory(), success)
.adoptSelector(FunctionName(UnicodeString("test:select")), new StandardFunctions::TestSelectFactory(), success);
CHECK_ERROR(success);
standardMFFunctionRegistry = standardFunctionsBuilder.build();
CHECK_ERROR(success);
Expand Down Expand Up @@ -256,8 +260,11 @@ namespace message2 {
return formatter;
}

bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const {
U_ASSERT(hasCustomMFFunctionRegistry());
bool MessageFormatter::getDefaultFormatterNameByType(const UnicodeString& type,
FunctionName& name) const {
if (!hasCustomMFFunctionRegistry()) {
return false;
}
const MFFunctionRegistry& reg = getCustomMFFunctionRegistry();
return reg.getDefaultFormatterNameByType(type, name);
}
Expand Down
Loading

0 comments on commit 38500d3

Please sign in to comment.