diff --git a/executors/cpp/Makefile b/executors/cpp/Makefile index 0534c2f4..6d6604f9 100644 --- a/executors/cpp/Makefile +++ b/executors/cpp/Makefile @@ -16,7 +16,7 @@ TARGET=executor # All object files (C or C++) -OBJECTS=main.o coll.o datetime_fmt.o langnames.o likely_subtags.o list_fmt.o message_fmt2.o number_fmt.o plural_rules.o relativedatetime_fmt.o util.o +OBJECTS=main.o coll.o datetime_fmt.o localedisplaynames.o likely_subtags.o list_fmt.o message_fmt2.o number_fmt.o plural_rules.o relativedatetime_fmt.o util.o #### rules # Load in standard makefile definitions diff --git a/executors/cpp/langnames.cpp b/executors/cpp/langnames.cpp deleted file mode 100644 index 4d9add5c..00000000 --- a/executors/cpp/langnames.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/******************************************************************** - * testing icu4c for language display names - */ - -#include - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include - -#include "./util.h" - -using std::cout; -using std::endl; -using std::string; - -using icu::Locale; -using icu::UnicodeString; - -const string TestLangNames (json_object *json_in) { - UErrorCode status = U_ZERO_ERROR; - - json_object *label_obj = json_object_object_get(json_in, "label"); - string label_string = json_object_get_string(label_obj); - - - // The locale in which the name is given. - json_object *locale_label_obj = - json_object_object_get(json_in, "locale_label"); - string locale_string = json_object_get_string(locale_label_obj); - - // The language's name to be displayed. - json_object *language_label_obj = json_object_object_get( - json_in, "language_label"); - string language_label_string = json_object_get_string(language_label_obj); - - Locale displayLocale(locale_string.c_str()); - - Locale testLocale(language_label_string.c_str()); - - UnicodeString testLang; - - testLocale.getDisplayName(displayLocale, testLang); - - json_object *return_json = json_object_new_object(); - json_object_object_add(return_json, "label", label_obj); - - string result_string; - testLang.toUTF8String(result_string); - - json_object_object_add(return_json, - "result", - json_object_new_string(result_string.c_str())); - - string return_string = json_object_to_json_string(return_json); - return return_string; -} diff --git a/executors/cpp/localedisplaynames.cpp b/executors/cpp/localedisplaynames.cpp new file mode 100644 index 00000000..565fa679 --- /dev/null +++ b/executors/cpp/localedisplaynames.cpp @@ -0,0 +1,82 @@ +/******************************************************************** + * testing icu4c for locale display names + */ + +#include + +#include + +#include +#include +#include + +#include +#include + +#include "./util.h" + +using icu::Locale; +using icu::UnicodeString; +using icu::LocaleDisplayNames; + +const string TestLocaleDisplayNames (json_object *json_in) { + UErrorCode status = U_ZERO_ERROR; + + json_object *label_obj = json_object_object_get(json_in, "label"); + string label_string = json_object_get_string(label_obj); + + // The locale in which the name is given. + json_object *locale_label_obj = + json_object_object_get(json_in, "locale_label"); + string locale_string = json_object_get_string(locale_label_obj); + + // The locales's name to be displayed. + json_object *displayed_locale_label_obj = json_object_object_get( + json_in, "language_label"); + string displayed_locale_label_string = + json_object_get_string(displayed_locale_label_obj); + + // Either standard or dialect names for the locale. + string language_display_string = "standard"; + json_object *language_display_obj = json_object_object_get( + json_in, "languageDisplay"); + if (language_display_obj) { + language_display_string = json_object_get_string(language_display_obj); + } + + // In what language to show the locale name + Locale displayLocale(locale_string.c_str()); + + // The id of the locale to be formatted. + Locale testLocale(displayed_locale_label_string.c_str()); + + // Create display names object with the kind of locale name. + // Default is "standard". + UDialectHandling display_handling = ULDN_STANDARD_NAMES; + if (language_display_string == "dialect") { + display_handling = ULDN_DIALECT_NAMES; + } + + LocaleDisplayNames* ldn = + LocaleDisplayNames::createInstance(displayLocale, display_handling); + + // Get the resulting string for this testLocale + UnicodeString locale_name_result; + ldn->localeDisplayName(testLocale, locale_name_result); + delete ldn; + + string result_string; + locale_name_result.toUTF8String(result_string); + + // Create the output with label and resulting locale name. + json_object *return_json = json_object_new_object(); + json_object_object_add(return_json, "label", label_obj); + + + json_object_object_add(return_json, + "result", + json_object_new_string(result_string.c_str())); + + string return_string = json_object_to_json_string(return_json); + return return_string; +} diff --git a/executors/cpp/main.cpp b/executors/cpp/main.cpp index a703f718..d75d1457 100644 --- a/executors/cpp/main.cpp +++ b/executors/cpp/main.cpp @@ -41,7 +41,7 @@ using std::string; // Test functions extern const string TestCollator(json_object *json_in); extern const string TestDatetimeFmt(json_object *json_in); -extern const string TestLangNames(json_object *json_in); +extern const string TestLocaleDisplayNames(json_object *json_in); extern const string TestLikelySubtags(json_object *json_in); extern const string TestListFmt(json_object *json_in); @@ -127,7 +127,7 @@ int main(int argc, const char** argv) { } else if (test_type == "list_fmt") { outputLine = TestListFmt(json_input); } else if (test_type == "lang_names") { - outputLine = TestLangNames(json_input); + outputLine = TestLocaleDisplayNames(json_input); } else if (test_type == "plural_rules") { outputLine = TestPluralRules(json_input); } else if (test_type == "rdt_fmt") { diff --git a/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesDisplayOptions.java b/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesDisplayOptions.java new file mode 100644 index 00000000..75a650c1 --- /dev/null +++ b/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesDisplayOptions.java @@ -0,0 +1,16 @@ +package org.unicode.conformance.testtype.langnames; + +public enum LangNamesDisplayOptions { + STANDARD, + DIALECT; // ULDN_DIALECT_NAMES is the ICU4C enum + + public static org.unicode.conformance.testtype.langnames.LangNamesDisplayOptions DEFAULT = STANDARD; + + public static org.unicode.conformance.testtype.langnames.LangNamesDisplayOptions getFromString(String s) { + try { + return org.unicode.conformance.testtype.langnames.LangNamesDisplayOptions.valueOf(s.toUpperCase()); + } catch (Exception e){ + return DEFAULT; + } + } +} diff --git a/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesInputJson.java b/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesInputJson.java index a1ae247b..5532f69d 100644 --- a/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesInputJson.java +++ b/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesInputJson.java @@ -12,4 +12,6 @@ public class LangNamesInputJson implements ITestTypeInputJson { public String locale_label; + public LangNamesDisplayOptions language_display; // For standard or dialect + } diff --git a/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesTester.java b/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesTester.java index 76ae2ddb..175094c0 100644 --- a/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesTester.java +++ b/executors/icu4j/74/executor-icu4j/src/main/java/org/unicode/conformance/testtype/langnames/LangNamesTester.java @@ -24,6 +24,9 @@ public ITestTypeInputJson inputMapToJson(Map inputMapData) { result.language_label = (String) inputMapData.get("language_label", null); result.locale_label = (String) inputMapData.get("locale_label", null); + String lang_display_string = (String) inputMapData.get("languageDisplay", null); + result.language_display = LangNamesDisplayOptions.getFromString(lang_display_string); + return result; } @@ -71,6 +74,10 @@ public String formatOutputJson(ITestTypeOutputJson outputJson) { public String getDisplayLanguageString(LangNamesInputJson input) { String localeID = input.language_label; String displayLocaleID = input.locale_label; - return ULocale.getDisplayNameWithDialect(localeID, displayLocaleID); + if (input.language_display == LangNamesDisplayOptions.STANDARD) { + return ULocale.getDisplayName(localeID, displayLocaleID); + } else { + return ULocale.getDisplayNameWithDialect(localeID, displayLocaleID); + } } } diff --git a/executors/icu4j/74/executor-icu4j/src/test/java/org/unicode/conformance/langnames/icu74/LangNamesTest.java b/executors/icu4j/74/executor-icu4j/src/test/java/org/unicode/conformance/langnames/icu74/LangNamesTest.java index 6cfa35b4..fa2b879a 100644 --- a/executors/icu4j/74/executor-icu4j/src/test/java/org/unicode/conformance/langnames/icu74/LangNamesTest.java +++ b/executors/icu4j/74/executor-icu4j/src/test/java/org/unicode/conformance/langnames/icu74/LangNamesTest.java @@ -19,4 +19,26 @@ public void testLocaleAndDisplayLocale() { assertEquals("Französisch", output.result); } + @Test + public void testLocaleNameStandardDutchBelgium() { + String testInput = + "{\"test_type\": \"lang_names\", \"label\": \"nl1\", \"language_label\": \"nl-BE\", \"locale_label\": \"en\", \"languageDisplay\": \"standard\"}"; + + LangNamesOutputJson output = + (LangNamesOutputJson) LangNamesTester.INSTANCE.getStructuredOutputFromInputStr(testInput); + + assertEquals("Dutch (Belgium)", output.result); + } + + @Test + public void testLocaleNameDialectFlemish() { + String testInput = + "{\"test_type\": \"lang_names\", \"label\": \"nl2\", \"language_label\": \"nl-BE\", \"locale_label\": \"en\", \"languageDisplay\": \"dialect\"}"; + + LangNamesOutputJson output = + (LangNamesOutputJson) LangNamesTester.INSTANCE.getStructuredOutputFromInputStr(testInput); + + assertEquals("Flemish", output.result); + } + } diff --git a/executors/node/executor.js b/executors/node/executor.js index 2bfd7dc4..4d12aa71 100644 --- a/executors/node/executor.js +++ b/executors/node/executor.js @@ -22,7 +22,7 @@ let numberformatter = require('./numberformat.js'); let displaynames = require('./displaynames.js'); -let langnames = require('./langnames.js'); +let localedisplaynames = require('./localedisplaynames.js') let likely_subtags = require('./likely_subtags.js'); @@ -62,8 +62,8 @@ const testTypes = { TestDateTimeFormat : Symbol("datetime_fmt"), TestPluralRules : Symbol("plural_rules"), TestDisplayNames : Symbol("display_names"), - TestLangNames : Symbol("language_display_name"), TestListFmt : Symbol("list_fmt"), + TestLocaleDisplayNames : Symbol("language_display_name"), TestRelativeDateTimeFormat : Symbol("rdt_fmt") }; @@ -138,7 +138,7 @@ function parseJsonForTestId(parsed) { } if (testId == "language_display_name" || testId == "lang_names") { - return testTypes.TestLangNames; + return testTypes.TestLocaleDisplayNames; } if (testId == "datetime_fmt") { @@ -231,7 +231,7 @@ rl.on('line', function(line) { outputLine = displaynames.testDisplayNames(parsedJson); } else if (test_type == "language_display_name" || test_type == "lang_names") { - outputLine = langnames.testLangNames(parsedJson); + outputLine = localedisplaynames.testLocaleDisplayNames(parsedJson); } else if (test_type == "likely_subtags") { outputLine = likely_subtags.testLikelySubtags(parsedJson); diff --git a/executors/node/langnames.js b/executors/node/localedisplaynames.js similarity index 81% rename from executors/node/langnames.js rename to executors/node/localedisplaynames.js index 970d3d21..fe73937e 100644 --- a/executors/node/langnames.js +++ b/executors/node/localedisplaynames.js @@ -2,7 +2,7 @@ module.exports = { - testLangNames: function (json) { + testLocaleDisplayNames: function (json) { let locale = 'en'; // Default let options = {}; if (json['locale_label']) { @@ -15,9 +15,12 @@ module.exports = { let label = json['label']; let input = json['language_label'].replace(/_/g, '-'); + if (json['languageDisplay']) { + // Fix to use dash, not underscore. + options['languageDisplay'] = json['languageDisplay']; + } + let outputLine; - //console.log("langnames input: " + input + - // " options: " + JSON.stringify(options) + " locale " + locale); let dn; try { @@ -42,12 +45,12 @@ module.exports = { "result": resultString }; } catch (error) { - //console.log("LangName problem: input = " + input + ", error = " + error); outputLine = {"label": json['label'], "locale_label": locale, "language_label": input, "result": resultString, - "error": error.toString() + "error": error.toString(), + "actual_options": options.toString() }; } return outputLine; diff --git a/executors/rust/1.3/src/langnames.rs b/executors/rust/1.3/src/localenames.rs similarity index 65% rename from executors/rust/1.3/src/langnames.rs rename to executors/rust/1.3/src/localenames.rs index 1803f861..ea0aba4d 100644 --- a/executors/rust/1.3/src/langnames.rs +++ b/executors/rust/1.3/src/localenames.rs @@ -2,22 +2,23 @@ use serde_json::{json, Value}; -use icu::displaynames::{DisplayNamesOptions, LanguageDisplayNames}; +use icu::displaynames::{DisplayNamesOptions, LocaleDisplayNamesFormatter}; -use icu::locid::subtags::Language; use icu::locid::Locale; -// Function runs language names tests -pub fn run_language_name_test(json_obj: &Value) -> Result { +use icu::displaynames::LanguageDisplay; + +// Function runs locale names tests +pub fn run_locale_name_test(json_obj: &Value) -> Result { let label = &json_obj["label"].as_str().unwrap(); - let options: DisplayNamesOptions = Default::default(); + let mut options: DisplayNamesOptions = Default::default(); let language_label = json_obj["language_label"] .as_str() .unwrap() .replace('_', "-"); - let input_lang_result = language_label.parse::(); - let input_lang = match input_lang_result { + let input_locale_result = language_label.parse::(); + let input_locale = match input_locale_result { Ok(l) => l, Err(_e) => { return Ok(json!({ @@ -44,11 +45,24 @@ pub fn run_language_name_test(json_obj: &Value) -> Result { "test_type": "display_names", "unsupported": "locale name", "error_type": "unsupported", - "error_detail": {"unsupported_locale": &locale_name_result} + "error_detail": {"cannot parse locale": &locale_name_result} })) } }; + // Get either standard or dialect form of locale name. + let language_display_result = json_obj["languageDisplay"].as_str(); + let language_display: LanguageDisplay = match language_display_result { + Some(s) => match s { + "standard" => LanguageDisplay::Standard, + "dialect" => LanguageDisplay::Dialect, + &_ => LanguageDisplay::Standard, + }, + None => LanguageDisplay::Standard, // The default + }; + + options.language_display = language_display; + let langid_result = locale_name.parse::(); let langid = match langid_result { @@ -67,13 +81,13 @@ pub fn run_language_name_test(json_obj: &Value) -> Result { } }; - let display_name_formatter = LanguageDisplayNames::try_new(&langid.into(), options); + let display_name_formatter = LocaleDisplayNamesFormatter::try_new(&langid.into(), options); let json_result = match display_name_formatter { Ok(formatter) => { json!({ "label": label, - "result": formatter.of(input_lang) + "result": formatter.of(&input_locale) }) } Err(e) => { @@ -83,7 +97,7 @@ pub fn run_language_name_test(json_obj: &Value) -> Result { "error": e.to_string(), "error_type": "unsupported", "unsupported": e.to_string(), - "error_detail": {"unsupported_locale": locale_name} + "error_detail": {"formatting fails for": locale_name} }) } }; diff --git a/executors/rust/1.3/src/main.rs b/executors/rust/1.3/src/main.rs index 97b79afc..7c412c47 100644 --- a/executors/rust/1.3/src/main.rs +++ b/executors/rust/1.3/src/main.rs @@ -8,10 +8,10 @@ // DONE 5. Move parameter extraction into function. // 6. Fix NumberFormat with options // 7. Clean up code -// 8. Decide on a repository structure -// DONE 9. Modularize into separate files for each type of test +// 8. DONE Decide on a repository structure +// 9. DONE Modularize into separate files for each type of test // 10. Fix test_type and switch statement -// 11. Add language names +// 11. DONE Add language names --> locale names // References for ICU4X: // https://unicode-org.github.io/icu4x-docs/doc/icu_collator/index.html @@ -20,18 +20,18 @@ mod collator; mod datetimefmt; mod decimalfmt; mod displaynames; -mod langnames; mod likelysubtags; mod listfmt; +mod localenames; mod numberfmt; mod pluralrules; mod relativedatetime_fmt; use collator::run_collation_test; use datetimefmt::run_datetimeformat_test; -use langnames::run_language_name_test; use likelysubtags::run_likelysubtags_test; use listfmt::run_list_fmt_test; +use localenames::run_locale_name_test; use numberfmt::run_numberformat_test; use pluralrules::run_plural_rules_test; use relativedatetime_fmt::run_relativedatetimeformat_test; @@ -111,7 +111,7 @@ fn main() -> io::Result<()> { || (test_type == "language_display_name") || (test_type == "lang_names") { - run_language_name_test(&json_info) + run_locale_name_test(&json_info) } else if test_type == "likely_subtags" { run_likelysubtags_test(&json_info) } else if test_type == "list_fmt" { diff --git a/schema/lang_names/test_schema.json b/schema/lang_names/test_schema.json index b0716b28..3985c944 100644 --- a/schema/lang_names/test_schema.json +++ b/schema/lang_names/test_schema.json @@ -49,6 +49,10 @@ "description": "locale tag of the source language ", "type": "string" }, + "languageDisplay": { + "description": "either 'standard' or 'dialect'", + "type": "string" + }, "locale_label": { "description": "locale tag of the language being described ", "type": "string" diff --git a/testgen/generators/localeDisplayNames.py b/testgen/generators/localeDisplayNames.py new file mode 100644 index 00000000..1e1a7613 --- /dev/null +++ b/testgen/generators/localeDisplayNames.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +import os +import json +import re +import logging +from generators.base import DataGenerator + +reblankline = re.compile("^\s*$") + + +class LocaleNamesGenerator(DataGenerator): + json_test = {"test_type": "lang_names"} + json_verify = {"test_type": "lang_names"} + + + def process_test_data(self): + self.languageNameDescr() + # Data constructed from CLDR data + filename = "localeDisplayName.txt" + raw_locale_display_names_testdata = self.readFile(filename, self.icu_version) + + if not raw_locale_display_names_testdata: + # File may not exist + return None + + # TODO: add standard vs. dialect vs. alternate names + self.generateLanguageNameTestDataObjects(raw_locale_display_names_testdata) + self.generateTestHashValues(self.json_test) + output_path = os.path.join(self.icu_version, "lang_name_test_file.json") + lang_name_test_file = open(output_path, "w", encoding="UTF-8") + json.dump(self.json_test, lang_name_test_file, indent=1) + lang_name_test_file.close() + + output_path = os.path.join(self.icu_version, "lang_name_verify_file.json") + lang_name_verify_file = open(output_path, "w", encoding="UTF-8") + json.dump(self.json_verify, lang_name_verify_file, indent=1) + lang_name_verify_file.close() + + return True + + def languageNameDescr(self): + # Adds information to LanguageName tests and verify JSON + descr = "Language display name test cases. The first code declares the language whose display name is requested while the second code declares the locale to display the language name in." + test_id = "lang_names" + source_url = "No URL yet." + version = "unspecified" + self.json_test = { + "test_type": test_id, + "Test scenario": test_id, + "description": descr, + "source": { + "repository": "conformance-test", + "version": "trunk", + "url": source_url, + "source_version": version, + }, + } + return + + def generateLanguageNameTestDataObjects(self, rawtestdata): + # Get the JSON data for tests and verification for language names + recommentline = re.compile("^\s*#") + + set_locale = re.compile(r"@locale=(\w+)") + set_languageDisplay = re.compile(r"@languageDisplay=(\w+)") + + count = 0 + + jtests = [] + jverify = [] + + # Compute max size needed for label number + test_lines = rawtestdata.splitlines() + num_samples = len(test_lines) + max_digits = self.computeMaxDigitsForCount(num_samples) + + language_label = 'und' + language_display = 'standard' + + for item in test_lines: + if not (recommentline.match(item) or reblankline.match(item)): + + locale_match = set_locale.match(item) + if locale_match: + locale_label = locale_match.group(1) + continue + + language_display_match = set_languageDisplay.match(item) + if language_display_match: + language_display = language_display_match.group(1) + continue + + test_data = self.parseLanguageNameData(item) + if test_data == None: + logging.debug( + " LanguageNames (%s): Line '%s' not recognized as valid test data entry", + self.icu_version, + item, + ) + continue + else: + label = str(count).rjust(max_digits, "0") + test_json = { + "label": label, + "language_label": test_data[0], + "locale_label": locale_label, + "languageDisplay": language_display + } + jtests.append(test_json) + jverify.append({"label": label, "verify": test_data[1]}) + count += 1 + + self.json_test["tests"] = self.sample_tests(jtests) + self.json_verify["verifications"] = self.sample_tests(jverify) + + logging.info("LocaleDisplayNames Test (%s): %d lines processed", self.icu_version, count) + return + + def parseLanguageNameData(self, rawtestdata): + reformat = re.compile(r"(\w+(\-\w+)*);\s*(.+)$") + + test_match = reformat.search(rawtestdata) + + if test_match != None: + return (test_match.group(1), test_match.group(3)) + else: + return None diff --git a/testgen/testdata_gen.py b/testgen/testdata_gen.py index b734a643..9c469432 100644 --- a/testgen/testdata_gen.py +++ b/testgen/testdata_gen.py @@ -9,6 +9,7 @@ from generators.collation_short import CollationShortGenerator from generators.datetime_fmt import DateTimeFmtGenerator from generators.lang_names import LangNamesGenerator +from generators.localeDisplayNames import LocaleNamesGenerator from generators.likely_subtags import LikelySubtagsGenerator from generators.message_fmt2 import MessageFmt2Generator from generators.list_fmt import ListFmtGenerator @@ -80,8 +81,17 @@ def generate_versioned_data(version_info): if TestType.LANG_NAMES in args.test_types: # This is slow - generator = LangNamesGenerator(icu_version, args.run_limit) - generator.process_test_data() + + # First try with the new source of data. If not found, then use the older + # lang names generator. + generator = LocaleNamesGenerator(icu_version, args.run_limit) + if not generator: + logging.info('lang generated from old LangNames data in %s', icu_version) + generator = LangNamesGenerator(icu_version, args.run_limit) + else: + logging.info('lang generated from new LocaleNames data in %s', icu_version) + if generator: + generator.process_test_data() if TestType.LIKELY_SUBTAGS in args.test_types: generator = LikelySubtagsGenerator(icu_version, args.run_limit) diff --git a/verifier/testreport.py b/verifier/testreport.py index 487275a2..738dbde2 100644 --- a/verifier/testreport.py +++ b/verifier/testreport.py @@ -654,6 +654,7 @@ def characterize_results_by_options(self, test_list, category): 'error_detail', 'ignorePunctuation', 'language_label', + 'languageDisplay', 'locale_label', 'locale', 'options',