From addae40c27e4397390b6a643c2a01ac4d8f5c8ea Mon Sep 17 00:00:00 2001 From: Conrad Nied Date: Wed, 11 Dec 2024 08:08:15 +0100 Subject: [PATCH] CLDR-18155 Recommended edits --- common/supplemental/likelySubtags.xml | 4 +-- common/supplemental/supplementalData.xml | 5 +-- .../localeIdentifiers/likelySubtags.txt | 6 ++-- .../unicode/cldr/util/DiffLanguageGroups.java | 2 +- .../cldr/util/SupplementalDataInfo.java | 33 +++++++++---------- .../util/data/country_language_population.tsv | 3 +- 6 files changed, 27 insertions(+), 26 deletions(-) diff --git a/common/supplemental/likelySubtags.xml b/common/supplemental/likelySubtags.xml index 6dd06b45d6f..1cf6e396413 100644 --- a/common/supplemental/likelySubtags.xml +++ b/common/supplemental/likelySubtags.xml @@ -451,7 +451,6 @@ not be patched by hand, as any changes made in that fashion may be lost. - @@ -1254,7 +1253,8 @@ not be patched by hand, as any changes made in that fashion may be lost. - + + diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml index 59d143b7ea9..5b44590c312 100644 --- a/common/supplemental/supplementalData.xml +++ b/common/supplemental/supplementalData.xml @@ -1918,7 +1918,7 @@ XXX Code for transations where no currency is involved - + @@ -3147,7 +3147,8 @@ XXX Code for transations where no currency is involved - + + diff --git a/common/testData/localeIdentifiers/likelySubtags.txt b/common/testData/localeIdentifiers/likelySubtags.txt index f525de38b34..3b1261d5247 100644 --- a/common/testData/localeIdentifiers/likelySubtags.txt +++ b/common/testData/localeIdentifiers/likelySubtags.txt @@ -1575,9 +1575,9 @@ und-NP ; ne-Deva-NP ; ne ; und-NR ; en-Latn-NR ; en-NR ; und-NU ; en-Latn-NU ; en-NU ; und-NZ ; en-Latn-NZ ; en-NZ ; -und-Nkoo ; man-Nkoo-GN ; man-Nkoo ; man-GN -und-Nkoo-AQ ; man-Nkoo-AQ ; ; -und-Nkoo-GN ; man-Nkoo-GN ; man-Nkoo ; man-GN +und-Nkoo ; nqo-Nkoo-GN ; nqo ; +und-Nkoo-AQ ; nqo-Nkoo-AQ ; nqo-AQ ; +und-Nkoo-GN ; nqo-Nkoo-GN ; nqo ; und-OM ; ar-Arab-OM ; ar-OM ; und-Olck ; sat-Olck-IN ; sat ; und-Olck-AQ ; sat-Olck-AQ ; sat-AQ ; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DiffLanguageGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DiffLanguageGroups.java index 1afe31aff3a..2a023cceaaf 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DiffLanguageGroups.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DiffLanguageGroups.java @@ -356,7 +356,7 @@ public static Set getAllKeysAndValues(Map newItems) { private static boolean handleLanguageGroups( String value, XPathParts parts, Multimap languageGroups) { String parent = parts.getAttributeValue(-1, "parent"); - List children = SupplementalDataInfo.WHITESPACE_SPLTTER.splitToList(value); + List children = SupplementalDataInfo.WHITESPACE_SPLITTER.splitToList(value); languageGroups.putAll(parent, children); return true; } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java index 19355579dfb..94295daef18 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java @@ -318,8 +318,6 @@ public PopulationData setWritingPopulation(double writingPopulation) { } } - static final Pattern WHITESPACE_PATTERN = PatternCache.get("\\s+"); - /** Simple language/script/region information */ public static class BasicLanguageData implements Comparable, @@ -349,18 +347,17 @@ public BasicLanguageData setType(Type type) { // Adding scripts but leaving 0 as a placeholder when there is no population data // input: a whitespace-separated list of scripts public BasicLanguageData setScriptsWithoutPopulation(String scriptTokens) { - List scripts = new ArrayList<>(); - if (scriptTokens != null) { - scripts = Arrays.asList(WHITESPACE_PATTERN.split(scriptTokens)); - } - return setScriptsWithoutPopulation(scripts); + return setScriptsWithoutPopulation( + scriptTokens == null ? null : WHITESPACE_SPLITTER.splitToList(scriptTokens)); } // Adding scripts but leaving 0 as a placeholder when there is no population data public BasicLanguageData setScriptsWithoutPopulation(Collection scripts) { Map scriptsByPopulation = new TreeMap<>(); - for (String script : scripts) { - scriptsByPopulation.put(script, 0); + if (scripts != null) { + for (String script : scripts) { + scriptsByPopulation.put(script, 0); + } } return setScripts(scriptsByPopulation); } @@ -369,7 +366,7 @@ public BasicLanguageData setTerritories(String territoryTokens) { return setTerritories( territoryTokens == null ? null - : Arrays.asList(WHITESPACE_PATTERN.split(territoryTokens))); + : WHITESPACE_SPLITTER.splitToList(territoryTokens)); } public BasicLanguageData setScripts(Map newScripts) { @@ -379,7 +376,9 @@ public BasicLanguageData setScripts(Map newScripts) { // TODO add error checking scripts = Collections.emptySet(); scriptsByPopulation = new TreeMap<>(); - addScripts(newScripts); + if (newScripts != null) { + addScripts(newScripts); + } return this; } @@ -1686,7 +1685,7 @@ private boolean handleUnitPreferences(XPathValue parts, String value) { private boolean handleLanguageGroups(String value, XPathValue parts) { String parent = parts.getAttributeValue(-1, "parent"); - List children = WHITESPACE_SPLTTER.splitToList(value); + List children = WHITESPACE_SPLITTER.splitToList(value); languageGroups.putAll(parent, children); return true; } @@ -1862,7 +1861,7 @@ private boolean handleLanguageMatcher(XPathValue parts) { switch (parts.getElement(3)) { case "paradigmLocales": List locales = - WHITESPACE_SPLTTER.splitToList(parts.getAttributeValue(3, "locales")); + WHITESPACE_SPLITTER.splitToList(parts.getAttributeValue(3, "locales")); // TODO // LanguageMatchData languageMatchData = // languageMatchData.get(type); @@ -5107,7 +5106,7 @@ public File getDirectory() { return directory; } - public static final Splitter WHITESPACE_SPLTTER = + public static final Splitter WHITESPACE_SPLITTER = Splitter.on(PatternCache.get("\\s+")).omitEmptyStrings(); public static final class AttributeValidityInfo { @@ -5155,7 +5154,7 @@ public AttributeValidityInfo( this.dtds = Collections.singleton(DtdType.ldml); } else { Set temp = EnumSet.noneOf(DtdType.class); - for (String s : WHITESPACE_SPLTTER.split(dtds)) { + for (String s : WHITESPACE_SPLITTER.split(dtds)) { temp.add(DtdType.fromElement(s)); } this.dtds = Collections.unmodifiableSet(temp); @@ -5164,10 +5163,10 @@ public AttributeValidityInfo( this.elements = elements == null ? Collections.EMPTY_SET - : With.in(WHITESPACE_SPLTTER.split(elements)) + : With.in(WHITESPACE_SPLITTER.split(elements)) .toUnmodifiableCollection(new HashSet()); this.attributes = - With.in(WHITESPACE_SPLTTER.split(attributes)) + With.in(WHITESPACE_SPLITTER.split(attributes)) .toUnmodifiableCollection(new HashSet()); this.order = order; } diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv index 04fd3895bd3..2155b596d52 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv @@ -507,7 +507,8 @@ Guinea GN "11,855,411" 41% "27,970,000,000" official French fr 27.2% https://o Guinea GN "11,855,411" 41% "27,970,000,000" Fulah ff 26% Guinea GN "11,855,411" 41% "27,970,000,000" Fulah (Adlam) ff_Adlm 1 No Data Available at present. Guinea GN "11,855,411" 41% "27,970,000,000" Kpelle kpe 3.8% -Guinea GN "11,855,411" 41% "27,970,000,000" Mandingo (N’Ko) man_Nkoo 23% +Guinea GN "11,855,411" 41% "27,970,000,000" Mandingo man 23% +Guinea GN "11,855,411" 41% "27,970,000,000" Mandingo (N’Ko) man_Nkoo 23% 5% Actually literacy in Nko writing unknown but historically they used the Latin script Guinea GN "11,855,411" 41% "27,970,000,000" N’Ko nqo 5% No figures available for this language. Estimating at 5%. Guinea GN "11,855,411" 41% "27,970,000,000" Susu sus 11% Guinea-Bissau GW "1,833,247" 55% "3,171,000,000" Fulah ff 1 No estimate available.