From 8dea7bcd97c1d2f378a0b13ae1e54426df13af7d Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 29 Jun 2024 11:18:23 -1000 Subject: [PATCH 1/3] CLDR-17756 first cut at test for missing examples --- .../cldr/unittest/TestExampleGenerator.java | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java index da1d251ee87..c9fbe34d8f5 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java @@ -1,6 +1,13 @@ package org.unicode.cldr.unittest; +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Multimap; +import com.google.common.collect.TreeMultimap; import com.ibm.icu.dev.test.TestFmwk; import java.io.IOException; import java.util.ArrayList; @@ -20,13 +27,20 @@ import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.Counter; +import org.unicode.cldr.util.DtdData; +import org.unicode.cldr.util.DtdType; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.GrammarInfo; import org.unicode.cldr.util.GrammarInfo.CaseValues; import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; +import org.unicode.cldr.util.Level; import org.unicode.cldr.util.Pair; +import org.unicode.cldr.util.PathHeader; +import org.unicode.cldr.util.PathHeader.PageId; +import org.unicode.cldr.util.PathHeader.SectionId; import org.unicode.cldr.util.PathStarrer; import org.unicode.cldr.util.SupplementalDataInfo; import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; @@ -34,9 +48,12 @@ import org.unicode.cldr.util.SupplementalDataInfo.PluralType; import org.unicode.cldr.util.UnitPathType; import org.unicode.cldr.util.With; +import org.unicode.cldr.util.XPathParts; public class TestExampleGenerator extends TestFmwk { + private static final Joiner TAB_JOINER = Joiner.on('\t'); + boolean showTranslationPaths = CldrUtility.getProperty("TestExampleGenerator:showTranslationPaths", false); @@ -1645,4 +1662,138 @@ public void TestUnicodeSetExamples() { assertEquals(locale + path + "=" + value, expected, actual); } } + + static final class MissingKey implements Comparable { + final SectionId sectionId; + final PageId pageId; + final String starred; + + public MissingKey(SectionId sectionId, PageId pageId, String starred) { + this.sectionId = sectionId; + this.pageId = pageId; + this.starred = starred; + } + + @Override + public int compareTo(MissingKey o) { + return ComparisonChain.start() + .compare(sectionId, o.sectionId) + .compare(pageId, o.pageId) + .compare(starred, o.starred) + .result(); + } + + @Override + public boolean equals(Object obj) { + return compareTo((MissingKey) obj) == 0; + } + + @Override + public int hashCode() { + return Objects.hashCode(sectionId, pageId, starred); + } + } + + static final Multimap OK_IF_MISSING = + ImmutableMultimap.builder() + .put("//ldml/characters/moreInformation", "") + // mul➔«Multiple languages»; zxx➔«No linguistic content» + .putAll( + "//ldml/localeDisplayNames/languages/language[@type=\"*\"]", + "mul", + "zxx") + .build(); + + public void TestForMissing() { + CLDRFile cldrFile = info.getEnglish(); + ExampleGenerator exampleGenerator = + new ExampleGenerator(info.getEnglish(), info.getEnglish()); + PathStarrer ps = new PathStarrer(); + ps.setSubstitutionPattern("*"); + Counter countWithExamples = new Counter<>(); + Map samplesForWith = new HashMap<>(); + Counter countWithoutExamples = new Counter<>(); + Multimap samplesForWithout = TreeMultimap.create(); + DtdData dtdData = DtdData.getInstance(DtdType.ldml); + PathHeader.Factory phf = PathHeader.getFactory(); + final String separator = "•"; + + for (String xpath : cldrFile.fullIterable()) { + if (xpath.endsWith("/alias")) { + continue; + } + final XPathParts parts = XPathParts.getFrozenInstance(xpath); + if (dtdData.isDeprecated(parts)) { + continue; + } + Level level = SDI.getCoverageLevel(xpath, "en"); + if (level.compareTo(Level.COMPREHENSIVE) == 0) { + continue; + } + String value = cldrFile.getStringValue(xpath); + String starred = ps.set(xpath); + String attrs = ps.getAttributesString(separator); + if (OK_IF_MISSING.containsEntry(starred, attrs)) { + logln("OK if missing: " + starred + ";\t" + attrs); + continue; + } + String example = null; + PathHeader ph = phf.fromPath(xpath); + String heading = ph.getSection() + " > " + ph.getPageId(); + SectionId section = ph.getSectionId(); + MissingKey key = new MissingKey(ph.getSectionId(), ph.getPageId(), starred); + try { + example = exampleGenerator.getExampleHtml(xpath, value); + } catch (Exception e) { + } + if (example == null) { + samplesForWithout.put(key, sampleAttrAndValue(ps, separator, value)); + countWithoutExamples.add(key, 1); + } else { + if (!samplesForWith.containsKey(key)) { + samplesForWith.put(key, sampleAttrAndValue(ps, separator, value)); + } + countWithExamples.add(key, 1); + } + } + Set keys = new TreeSet<>(); + keys.addAll(countWithoutExamples.keySet()); + keys.addAll(countWithExamples.keySet()); + List missingItems = new ArrayList<>(); + for (MissingKey key : keys) { + final long countWithout = countWithoutExamples.get(key); + if (countWithout == 0) { // ok, no missing + continue; + } + final Collection sampleForWithoutItem = samplesForWithout.get(key); + final String sampleForWithItem = samplesForWith.get(key); + final long countWith = countWithExamples.get(key); + final double doneRatio = countWith / (double) (countWith + countWithout); + missingItems.add( + TAB_JOINER.join( + doneRatio, + countWithout, + (sampleForWithItem == null + ? sampleForWithoutItem.iterator().next() + : Joiner.on("; ") + .join(Iterables.limit(sampleForWithoutItem, 5))), + countWith, + (sampleForWithItem == null ? "n/a" : sampleForWithItem), + key.sectionId, + key.pageId, + key.starred)); + } + if (!missingItems.isEmpty()) { + errln( + "Missing Examples:\t" + + missingItems.size() + + "\n" + + "\nDone?\tWithout\tSample Attrs\tWith\tSample Attrs\tSection\tPage\tStarred Pattern\n" + + Joiner.on("\n").join(missingItems)); + } + } + + public String sampleAttrAndValue(PathStarrer ps, final String separator, String value) { + return ps.getAttributesString(separator) + "➔«" + value + "»"; + } } From dc5c4c0a9d38858f0f41ac5304c83dd99c42c291 Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 29 Jun 2024 20:14:39 -1000 Subject: [PATCH 2/3] CLDR-17756 Minor cleanup, allow multiple locales --- .../cldr/unittest/TestExampleGenerator.java | 164 +++++++++--------- 1 file changed, 83 insertions(+), 81 deletions(-) diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java index c9fbe34d8f5..f98e6699154 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java @@ -1705,91 +1705,93 @@ public int hashCode() { .build(); public void TestForMissing() { - CLDRFile cldrFile = info.getEnglish(); - ExampleGenerator exampleGenerator = - new ExampleGenerator(info.getEnglish(), info.getEnglish()); - PathStarrer ps = new PathStarrer(); - ps.setSubstitutionPattern("*"); - Counter countWithExamples = new Counter<>(); - Map samplesForWith = new HashMap<>(); - Counter countWithoutExamples = new Counter<>(); - Multimap samplesForWithout = TreeMultimap.create(); - DtdData dtdData = DtdData.getInstance(DtdType.ldml); - PathHeader.Factory phf = PathHeader.getFactory(); - final String separator = "•"; - - for (String xpath : cldrFile.fullIterable()) { - if (xpath.endsWith("/alias")) { - continue; - } - final XPathParts parts = XPathParts.getFrozenInstance(xpath); - if (dtdData.isDeprecated(parts)) { - continue; - } - Level level = SDI.getCoverageLevel(xpath, "en"); - if (level.compareTo(Level.COMPREHENSIVE) == 0) { - continue; - } - String value = cldrFile.getStringValue(xpath); - String starred = ps.set(xpath); - String attrs = ps.getAttributesString(separator); - if (OK_IF_MISSING.containsEntry(starred, attrs)) { - logln("OK if missing: " + starred + ";\t" + attrs); - continue; - } - String example = null; - PathHeader ph = phf.fromPath(xpath); - String heading = ph.getSection() + " > " + ph.getPageId(); - SectionId section = ph.getSectionId(); - MissingKey key = new MissingKey(ph.getSectionId(), ph.getPageId(), starred); - try { - example = exampleGenerator.getExampleHtml(xpath, value); - } catch (Exception e) { + Factory factory = info.getCldrFactory(); // don't worry about examples for annotations + for (String localeId : List.of("en", "de")) { + CLDRFile cldrFile = factory.make(localeId, true); + ExampleGenerator exampleGenerator = new ExampleGenerator(cldrFile, info.getEnglish()); + PathStarrer ps = new PathStarrer(); + ps.setSubstitutionPattern("*"); + Counter countWithExamples = new Counter<>(); + Map samplesForWith = new HashMap<>(); + Counter countWithoutExamples = new Counter<>(); + Multimap samplesForWithout = TreeMultimap.create(); + DtdData dtdData = DtdData.getInstance(DtdType.ldml); + PathHeader.Factory phf = PathHeader.getFactory(); + final String separator = "•"; + + // collect all of the strings in the file to test + for (String xpath : cldrFile.fullIterable()) { + if (xpath.endsWith("/alias")) { + continue; + } + final XPathParts parts = XPathParts.getFrozenInstance(xpath); + if (dtdData.isDeprecated(parts)) { + continue; + } + Level level = SDI.getCoverageLevel(xpath, "en"); + if (level.compareTo(Level.COMPREHENSIVE) == 0) { + continue; + } + String value = cldrFile.getStringValue(xpath); + String starred = ps.set(xpath); + String attrs = ps.getAttributesString(separator); + if (OK_IF_MISSING.containsEntry(starred, attrs)) { + logln("OK if missing: " + starred + ";\t" + attrs); + continue; + } + String example = null; + PathHeader ph = phf.fromPath(xpath); + String heading = ph.getSection() + " > " + ph.getPageId(); + SectionId section = ph.getSectionId(); + MissingKey key = new MissingKey(ph.getSectionId(), ph.getPageId(), starred); + try { + example = exampleGenerator.getExampleHtml(xpath, value); + } catch (Exception e) { + } + if (example == null) { + samplesForWithout.put(key, sampleAttrAndValue(ps, separator, value)); + countWithoutExamples.add(key, 1); + } else { + if (!samplesForWith.containsKey(key)) { + samplesForWith.put(key, sampleAttrAndValue(ps, separator, value)); + } + countWithExamples.add(key, 1); + } } - if (example == null) { - samplesForWithout.put(key, sampleAttrAndValue(ps, separator, value)); - countWithoutExamples.add(key, 1); - } else { - if (!samplesForWith.containsKey(key)) { - samplesForWith.put(key, sampleAttrAndValue(ps, separator, value)); + Set keys = new TreeSet<>(); + keys.addAll(countWithoutExamples.keySet()); + keys.addAll(countWithExamples.keySet()); + List missingItems = new ArrayList<>(); + for (MissingKey key : keys) { + final long countWithout = countWithoutExamples.get(key); + if (countWithout == 0) { // ok, no missing + continue; } - countWithExamples.add(key, 1); + final Collection sampleForWithoutItem = samplesForWithout.get(key); + final String sampleForWithItem = samplesForWith.get(key); + final long countWith = countWithExamples.get(key); + final double doneRatio = countWith / (double) (countWith + countWithout); + missingItems.add( + TAB_JOINER.join( + doneRatio, + countWithout, + (sampleForWithItem == null + ? sampleForWithoutItem.iterator().next() + : Joiner.on("; ") + .join(Iterables.limit(sampleForWithoutItem, 5))), + countWith, + (sampleForWithItem == null ? "n/a" : sampleForWithItem), + key.sectionId, + key.pageId, + key.starred)); } - } - Set keys = new TreeSet<>(); - keys.addAll(countWithoutExamples.keySet()); - keys.addAll(countWithExamples.keySet()); - List missingItems = new ArrayList<>(); - for (MissingKey key : keys) { - final long countWithout = countWithoutExamples.get(key); - if (countWithout == 0) { // ok, no missing - continue; + if (!missingItems.isEmpty()) { + errln( + TAB_JOINER.join(localeId, "missing examples:", missingItems.size()) + + "\n" + + "\nDone?\tWithout\tSample Attrs\tWith\tSample Attrs\tSection\tPage\tStarred Pattern\n" + + Joiner.on("\n").join(missingItems)); } - final Collection sampleForWithoutItem = samplesForWithout.get(key); - final String sampleForWithItem = samplesForWith.get(key); - final long countWith = countWithExamples.get(key); - final double doneRatio = countWith / (double) (countWith + countWithout); - missingItems.add( - TAB_JOINER.join( - doneRatio, - countWithout, - (sampleForWithItem == null - ? sampleForWithoutItem.iterator().next() - : Joiner.on("; ") - .join(Iterables.limit(sampleForWithoutItem, 5))), - countWith, - (sampleForWithItem == null ? "n/a" : sampleForWithItem), - key.sectionId, - key.pageId, - key.starred)); - } - if (!missingItems.isEmpty()) { - errln( - "Missing Examples:\t" - + missingItems.size() - + "\n" - + "\nDone?\tWithout\tSample Attrs\tWith\tSample Attrs\tSection\tPage\tStarred Pattern\n" - + Joiner.on("\n").join(missingItems)); } } From 2387814e52549d99dc6b62d7525a9a0a66a01235 Mon Sep 17 00:00:00 2001 From: macchiati Date: Wed, 10 Jul 2024 17:50:34 -0700 Subject: [PATCH 3/3] CLDR-17756 Either skip failures or logKnown Issues --- .../unicode/cldr/unittest/TestCheckCLDR.java | 27 +- .../cldr/unittest/TestExampleGenerator.java | 332 ++++++++++++++++-- 2 files changed, 326 insertions(+), 33 deletions(-) diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java index 86ccdd3eccc..13ac7d2f6d1 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java @@ -1089,7 +1089,7 @@ public void TestShowRowAction() { for (String locale : localesForRowAction) { DummyPathValueInfo dummyPathValueInfo = new DummyPathValueInfo(); - dummyPathValueInfo.locale = CLDRLocale.getInstance(locale); + dummyPathValueInfo.setLocale(CLDRLocale.getInstance(locale)); CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); CLDRFile cldrFileUnresolved = testInfo.getCldrFactory().make(locale, false); @@ -1107,8 +1107,9 @@ public void TestShowRowAction() { for (PathHeader ph : sorted) { String path = ph.getOriginalPath(); SurveyToolStatus surveyToolStatus = ph.getSurveyToolStatus(); - dummyPathValueInfo.xpath = path; - dummyPathValueInfo.baselineValue = cldrFileUnresolved.getStringValue(path); + dummyPathValueInfo.setXpath(path); + dummyPathValueInfo.setBaselineValue( + cldrFileUnresolved.getStringValue(path)); StatusAction action = phase.getShowRowAction( dummyPathValueInfo, InputMethod.DIRECT, ph, dummyUserInfo); @@ -1126,7 +1127,7 @@ public void TestShowRowAction() { "vo ==> FORBID_READONLY", StatusAction.FORBID_READONLY, action); - } else if (dummyPathValueInfo.baselineValue == null) { + } else if (dummyPathValueInfo.getBaselineValue() == null) { if (!assertEquals( "missing ==> ALLOW", StatusAction.ALLOW, action)) { warnln("\t\t" + locale + "\t" + ph); @@ -1150,7 +1151,9 @@ public void TestShowRowAction() { } actionToExamplePath.put( key, - Pair.of(dummyPathValueInfo.baselineValue != null, path)); + Pair.of( + dummyPathValueInfo.getBaselineValue() != null, + path)); } } } @@ -1210,7 +1213,7 @@ public VoterInfo getVoterInfo() { } }; - private static class DummyPathValueInfo implements PathValueInfo { + public static class DummyPathValueInfo implements PathValueInfo { private CLDRLocale locale; private String xpath; private String baselineValue; @@ -1271,6 +1274,18 @@ public CLDRLocale getLocale() { public String getXpath() { return xpath; } + + public void setLocale(CLDRLocale locale) { + this.locale = locale; + } + + public void setXpath(String xpath) { + this.xpath = xpath; + } + + public void setBaselineValue(String baselineValue) { + this.baselineValue = baselineValue; + } } final Set cldrLocales = diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java index f98e6699154..e9ed5968ff1 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java @@ -2,8 +2,8 @@ import com.google.common.base.Joiner; import com.google.common.base.Objects; +import com.google.common.base.Splitter; import com.google.common.collect.ComparisonChain; -import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Multimap; @@ -20,12 +20,19 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; +import org.unicode.cldr.test.CheckCLDR.InputMethod; +import org.unicode.cldr.test.CheckCLDR.Phase; +import org.unicode.cldr.test.CheckCLDR.StatusAction; import org.unicode.cldr.test.ExampleGenerator; import org.unicode.cldr.test.ExampleGenerator.UnitLength; +import org.unicode.cldr.unittest.TestCheckCLDR.DummyPathValueInfo; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRInfo.UserInfo; +import org.unicode.cldr.util.CLDRLocale; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Counter; import org.unicode.cldr.util.DtdData; @@ -37,8 +44,10 @@ import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; import org.unicode.cldr.util.Level; +import org.unicode.cldr.util.Organization; import org.unicode.cldr.util.Pair; import org.unicode.cldr.util.PathHeader; +import org.unicode.cldr.util.PathHeader.BaseUrl; import org.unicode.cldr.util.PathHeader.PageId; import org.unicode.cldr.util.PathHeader.SectionId; import org.unicode.cldr.util.PathStarrer; @@ -47,11 +56,21 @@ import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; import org.unicode.cldr.util.SupplementalDataInfo.PluralType; import org.unicode.cldr.util.UnitPathType; +import org.unicode.cldr.util.VoteResolver.VoterInfo; import org.unicode.cldr.util.With; import org.unicode.cldr.util.XPathParts; public class TestExampleGenerator extends TestFmwk { + private static final String SKIP = "SKIP"; + + private static final Joiner CR_TAB2_JOINER = Joiner.on("\n\t\t"); + + private static final boolean CHECK_ROW_ACTION = false; + + private static final Splitter SLASH2_SPLITTER = + Splitter.on("//").omitEmptyStrings().trimResults(); + private static final Joiner TAB_JOINER = Joiner.on('\t'); boolean showTranslationPaths = @@ -270,6 +289,7 @@ public void testCurrency() { "//ldml/dates/timeZoneNames/metazone[@type=\"([^\"]*+)\"]/short/daylight", "//ldml/personNames/personName[@order=\"([^\"]*+)\"][@length=\"([^\"]*+)\"][@formality=\"([^\"]*+)\"]/namePattern", "//ldml/personNames/personName[@order=\"([^\"]*+)\"][@length=\"([^\"]*+)\"][@usage=\"([^\"]*+)\"][@formality=\"([^\"]*+)\"]/namePattern"); // CLDR-15384 + // Add to above if the background SHOULD appear, but we don't have them yet. TODO Add later public void TestAllPaths() { @@ -1694,36 +1714,78 @@ public int hashCode() { } } - static final Multimap OK_IF_MISSING = - ImmutableMultimap.builder() - .put("//ldml/characters/moreInformation", "") - // mul➔«Multiple languages»; zxx➔«No linguistic content» - .putAll( - "//ldml/localeDisplayNames/languages/language[@type=\"*\"]", - "mul", - "zxx") - .build(); - public void TestForMissing() { Factory factory = info.getCldrFactory(); // don't worry about examples for annotations - for (String localeId : List.of("en", "de")) { + DtdData dtdData = DtdData.getInstance(DtdType.ldml); + PathHeader.Factory phf = PathHeader.getFactory(); + Set seenPaths = + new HashSet<>(); // assume whether there is an example is independent of locale, to + // speed up the test. + final String separator = "•"; + PathStarrer ps = new PathStarrer(); + ps.setSubstitutionPattern("*"); + + // Setup for calling phase.getShowRowAction + DummyPathValueInfo dummyPathValueInfo = null; + VoterInfo dummyVoterInfo = null; + UserInfo dummyUserInfo = null; + + // disabled, since it doesn't eliminate anything. However, left under a flag just in case it + // is useful later + if (CHECK_ROW_ACTION) { + dummyPathValueInfo = new DummyPathValueInfo(); + dummyVoterInfo = + new VoterInfo( + Organization.cldr, + org.unicode.cldr.util.VoteResolver.Level.vetter, + "somename"); + dummyUserInfo = + new UserInfo() { + @Override + public VoterInfo getVoterInfo() { + return dummyVoterInfo; + } + }; + } + + // Use representative locales: + // 'en' for the most coverage, + // 'de' and 'cs' for more complex inflections, + // 'ja' for CJK issues + + for (String localeId : List.of("de", "en", "cs", "ja")) { CLDRFile cldrFile = factory.make(localeId, true); + CLDRFile cldrFileUnresolved = cldrFile.getUnresolved(); + ExampleGenerator exampleGenerator = new ExampleGenerator(cldrFile, info.getEnglish()); - PathStarrer ps = new PathStarrer(); - ps.setSubstitutionPattern("*"); + if (CHECK_ROW_ACTION) { + dummyPathValueInfo.setLocale(CLDRLocale.getInstance(localeId)); + } + // for collecting data + Counter countWithExamples = new Counter<>(); Map samplesForWith = new HashMap<>(); Counter countWithoutExamples = new Counter<>(); Multimap samplesForWithout = TreeMultimap.create(); - DtdData dtdData = DtdData.getInstance(DtdType.ldml); - PathHeader.Factory phf = PathHeader.getFactory(); - final String separator = "•"; + Map sampleUrlForWithout = new TreeMap<>(); + TreeMultimap skipped = TreeMultimap.create(); + + // for each path in the file, check that there is an example + // or we know why not - // collect all of the strings in the file to test for (String xpath : cldrFile.fullIterable()) { + if (seenPaths.contains(xpath)) { + continue; + } + seenPaths.add(xpath); if (xpath.endsWith("/alias")) { continue; } + String value = cldrFile.getStringValue(xpath); + if (value == null) { + continue; + } + final XPathParts parts = XPathParts.getFrozenInstance(xpath); if (dtdData.isDeprecated(parts)) { continue; @@ -1732,24 +1794,37 @@ public void TestForMissing() { if (level.compareTo(Level.COMPREHENSIVE) == 0) { continue; } - String value = cldrFile.getStringValue(xpath); String starred = ps.set(xpath); String attrs = ps.getAttributesString(separator); - if (OK_IF_MISSING.containsEntry(starred, attrs)) { - logln("OK if missing: " + starred + ";\t" + attrs); - continue; - } - String example = null; + PathHeader ph = phf.fromPath(xpath); - String heading = ph.getSection() + " > " + ph.getPageId(); - SectionId section = ph.getSectionId(); + if (CHECK_ROW_ACTION) { + dummyPathValueInfo.setXpath(xpath); + dummyPathValueInfo.setBaselineValue(cldrFileUnresolved.getStringValue(xpath)); + StatusAction action = + Phase.SUBMISSION.getShowRowAction( + dummyPathValueInfo, InputMethod.DIRECT, ph, dummyUserInfo); + if (action.isForbidden()) { + System.out.println(xpath + " is forbidden"); + continue; + } + } + MissingKey key = new MissingKey(ph.getSectionId(), ph.getPageId(), starred); + String example = null; try { example = exampleGenerator.getExampleHtml(xpath, value); } catch (Exception e) { } if (example == null) { + String missingResult = getResult(starred, attrs); + if (missingResult != null) { + skipped.put(missingResult, starred); + continue; + } + samplesForWithout.put(key, sampleAttrAndValue(ps, separator, value)); + sampleUrlForWithout.put(key, ph.getUrl(BaseUrl.PRODUCTION, localeId)); countWithoutExamples.add(key, 1); } else { if (!samplesForWith.containsKey(key)) { @@ -1762,6 +1837,9 @@ public void TestForMissing() { keys.addAll(countWithoutExamples.keySet()); keys.addAll(countWithExamples.keySet()); List missingItems = new ArrayList<>(); + + // we use the missing keys, which sort by section, page, path + for (MissingKey key : keys) { final long countWithout = countWithoutExamples.get(key); if (countWithout == 0) { // ok, no missing @@ -1779,22 +1857,222 @@ public void TestForMissing() { ? sampleForWithoutItem.iterator().next() : Joiner.on("; ") .join(Iterables.limit(sampleForWithoutItem, 5))), + sampleUrlForWithout.get(key), countWith, (sampleForWithItem == null ? "n/a" : sampleForWithItem), key.sectionId, key.pageId, key.starred)); } + + // show all the skipped items, and logKnownIssue items + + for (Entry> entry : skipped.asMap().entrySet()) { + final String ticketComment = entry.getKey(); + final String paths = CR_TAB2_JOINER.join(entry.getValue()); + if (ticketComment.equals(SKIP)) { + logln(ticketComment + ";\n\t\t" + paths); + } else { + int spacePos = ticketComment.indexOf(' '); + logKnownIssue( + ticketComment.substring(0, spacePos), + ticketComment.substring(spacePos + 1) + + ")\n\t\t(For the following paths:\n\t\t" + + paths); + } + } + + // Here is where missing examples will show up. + // If it is ok to skip them (only when there is no reasonable example), add to + // HANDLE_MISSING data + // Otherwise add an example + if (!missingItems.isEmpty()) { errln( TAB_JOINER.join(localeId, "missing examples:", missingItems.size()) + "\n" - + "\nDone?\tWithout\tSample Attrs\tWith\tSample Attrs\tSection\tPage\tStarred Pattern\n" + + "\nDone?\tWithout\tSample Attrs\tURL\tWith\tSample Attrs\tSection\tPage\tStarred Pattern\n" + Joiner.on("\n").join(missingItems)); } } } + /** + * This is a mechanism for TestMissing exceptions: a) skipping the items where there are no + * reasonable examples b) logging known issues where we know what to do, and have filed tickets + * + *

Then only new missing examples will trigger errors. + * + *

If new structure is added, an example should be added at the same time if possible, + * otherwise it should be added with "OK". + */ + static final Map> HANDLE_MISSING; + + static { + // The format is 3 items + // a) a list of paths (separated by space or just concatenated) + // b) a return value. OK to just skip, otherwise + // c) a list of 1 or more attributes (like "mul", "zxx") or a wildcard "*" + + String[][] data = { + // mul➔«Multiple languages»; zxx➔«No linguistic content» + {SKIP, "//ldml/localeDisplayNames/languages/language[@type=\"*\"]", "mul", "zxx"}, + { + SKIP, + "//ldml/characters/moreInformation" + + "//ldml/dates/fields/field[@type=\"*\"]/relative[@type=\"*\"]" + + "//ldml/dates/timeZoneNames/gmtZeroFormat" + + "//ldml/dates/timeZoneNames/metazone[@type=\"*\"]/short/standard" + + "//ldml/numbers/symbols[@numberSystem=\"*\"]/infinity" + + "//ldml/numbers/symbols[@numberSystem=\"*\"]/nan" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraAbbr/era[@type=\"*\"][@alt=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraNames/era[@type=\"*\"][@alt=\"*\"]" + + "//ldml/typographicNames/styleName[@type=\"*\"][@subtype=\"*\"][@alt=\"*\"]", + "*" + }, + { + "CLDR-17756 Add examples of date intervals", + "//ldml/dates/calendars/calendar[@type=\"*\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"*\"]/greatestDifference[@id=\"*\"]", + "*" + }, + { + "CLDR-17756 Show \"{0} ¤¤\" with formatted number and ISO code, eg {0} ¤¤ becomes 3,5 EUR", + "//ldml/numbers/currencyFormats[@numberSystem=\"*\"]/currencyPatternAppendISO", + "*" + }, + { + "CLDR-17756 Show 2 currencies with pattern, eg EUR ➔ USD", + "//ldml/numbers/currencies/currency[@type=\"*\"]/displayName", + "*" + }, + { + "CLDR-17756 Show as part of a locale name", + "//ldml/localeDisplayNames/keys/key[@type=\"*\"]" + + "//ldml/localeDisplayNames/measurementSystemNames/measurementSystemName[@type=\"*\"]" + + "//ldml/localeDisplayNames/subdivisions/subdivision[@type=\"*\"]" + + "//ldml/localeDisplayNames/types/type[@key=\"*\"][@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show using two months, eg Januar - Juni", + "//ldml/dates/calendars/calendar[@type=\"*\"]/dateTimeFormats/intervalFormats/intervalFormatFallback", + "*" + }, + { + "CLDR-15078 Enable compound unit formatting", + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/unitPattern[@count=\"*\"]" + + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/unitPattern[@count=\"*\"][@case=\"*\"]", + "*" + }, + { + "CLDR-17756 Show font with field, eg: Helvetica (kursiv), Helvetica (Kursivstellung), Helvetica (vertikale Brüch)", + "//ldml/typographicNames/styleName[@type=\"*\"][@subtype=\"*\"]" + + "//ldml/typographicNames/axisName[@type=\"*\"]" + + "//ldml/typographicNames/featureName[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show in date with both variants: formatting and standalone. That way people can see what difference it makes, eg between MMMM and LLLL", + "//ldml/dates/calendars/calendar[@type=\"*\"]/days/dayContext[@type=\"*\"]/dayWidth[@type=\"*\"]/day[@type=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/months/monthContext[@type=\"*\"]/monthWidth[@type=\"*\"]/month[@type=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/months/monthContext[@type=\"*\"]/monthWidth[@type=\"*\"]/month[@type=\"*\"][@yeartype=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/quarters/quarterContext[@type=\"*\"]/quarterWidth[@type=\"*\"]/quarter[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show pattern with example", + "//ldml/dates/fields/field[@type=\"*\"]/relativePeriod", + "*" + }, + { + "CLDR-17756 Show sample name with 2 different values", + "//ldml/personNames/foreignSpaceReplacement" + + "//ldml/personNames/initialPattern[@type=\"*\"]" + + "//ldml/personNames/nativeSpaceReplacement" + + "//ldml/personNames/parameterDefault[@parameter=\"*\"]" + + "//ldml/personNames/sampleName[@item=\"*\"]/nameField[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show two units with pattern, eg 'Meter ➔ Fuß'", + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/displayName", + "*" + }, + { + "CLDR-17756 Show with {0}: {0}, eg Monat: Januar", + "//ldml/dates/fields/field[@type=\"*\"]/displayName", + "*" + }, + { + "CLDR-5854 Show with appropriate amount, eg 'in 3 Jahren', and for all relatives > 1 day, add a time", + "//ldml/dates/fields/field[@type=\"*\"]/relativeTime[@type=\"*\"]/relativeTimePattern[@count=\"*\"]", + "*" + }, + { + "CLDR-17756 Show with formattted date, including era", + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraAbbr/era[@type=\"*\"]\n" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraNames/era[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show with pattern, eg '30° Süd'", + "//ldml/units/unitLength[@type=\"*\"]/coordinateUnit/coordinateUnitPattern[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show with pattern, eg Richtung: 30° Süd", + "//ldml/units/unitLength[@type=\"*\"]/coordinateUnit/displayName", + "*" + }, + { + "CLDR-17756 Show with sample characters (where possible, emoji)", + "//ldml/characterLabels/characterLabelPattern[@type=\"*\"][@count=\"*\"]\n" + + "//ldml/characterLabels/characterLabel[@type=\"*\"]\n" + + "//ldml/characterLabels/characterLabelPattern[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Use gender minimal pair patterns to show in context — look at the minimal pair examples, reversing the background", + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/gender", + "*" + } + }; + Map> _HANDLE_MISSING = new TreeMap<>(); + for (String[] row : data) { + if (row.length < 3) { + throw new IllegalArgumentException( + "Need 3+ values; see comments below HANDLE_MISSING"); + } + String result = row[0]; + String paths = row[1]; + for (String path : SLASH2_SPLITTER.split(paths)) { + path = "//" + path; + // note, the resulting attributeToResult may be empty + Map attributeToResult = _HANDLE_MISSING.get(path); + if (attributeToResult == null) { + _HANDLE_MISSING.put(path, attributeToResult = new TreeMap<>()); + } + for (int i = 2; i < row.length; ++i) { + String attribute = row[i]; + attributeToResult.put(attribute, result); + } + } + } + HANDLE_MISSING = CldrUtility.protectCollection(_HANDLE_MISSING); + } + + private String getResult(String starredPath, String attr) { + Map attributeToResult = HANDLE_MISSING.get(starredPath); + if (attributeToResult == null) { + return null; + } + String result = attributeToResult.get(attr); + if (result == null) { + result = attributeToResult.get("*"); // wildcard + } + return result; + } + public String sampleAttrAndValue(PathStarrer ps, final String separator, String value) { return ps.getAttributesString(separator) + "➔«" + value + "»"; }