diff --git a/common/dtd/ldmlSupplemental.dtd b/common/dtd/ldmlSupplemental.dtd index 864e696e030..4864b957ae1 100644 --- a/common/dtd/ldmlSupplemental.dtd +++ b/common/dtd/ldmlSupplemental.dtd @@ -1155,17 +1155,17 @@ CLDR data files are interpreted according to the LDML specification (http://unic - + - + - + - + diff --git a/tools/cldr-code/src/main/java/com/ibm/icu/util/MatchElementAttribute.java b/tools/cldr-code/src/main/java/com/ibm/icu/util/MatchElementAttribute.java new file mode 100644 index 00000000000..2fe0ec363b6 --- /dev/null +++ b/tools/cldr-code/src/main/java/com/ibm/icu/util/MatchElementAttribute.java @@ -0,0 +1,22 @@ +package com.ibm.icu.util; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; + +public class MatchElementAttribute { + private Multimap matchElementAttribute = + HashMultimap.create(); // "" is a wildcard + + public MatchElementAttribute add(String... elementAttributePairs) { + for (int i = 0; i < elementAttributePairs.length; i += 2) { + matchElementAttribute.put(elementAttributePairs[i], elementAttributePairs[i + 1]); + } + return this; + } + + public boolean matches(String element, String attribute) { + return matchElementAttribute.containsEntry(element, attribute) + || matchElementAttribute.containsEntry("", attribute) + || matchElementAttribute.containsEntry(element, ""); + } +} diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java index 49d918b0213..3049c0f8aea 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.Stack; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -2269,4 +2270,51 @@ public static Element getElement(String xpath, int elementIndex) { .getElementFromName() .get(parts.getElement(elementIndex)); } + + public static class DtdGuide { + public interface DtdVisitor { + /** Return false if all children should be skipped */ + public boolean visit( + DtdType dtdType, Stack ancestors, Element child, Attribute attribute); + } + + private Set seenElements = new HashSet<>(); + private DtdVisitor dtdVisitor; + private DtdType dtdType; + private Stack ancestors = new Stack<>(); + private boolean skipDeprecated; + + public DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor) { + this.dtdVisitor = dtdVisitor; + this.skipDeprecated = skipDeprecated; + process(DtdType.values()); + } + + public void process(DtdType... dtdTypes) { + for (DtdType dt : dtdTypes.length != 0 ? dtdTypes : DtdType.values()) { + dtdType = dt; + process(getInstance(dtdType).ROOT); + } + } + + private void process(Element element) { + if (seenElements.contains(element) || !skipDeprecated && element.isDeprecated()) { + return; + } + seenElements.add(element); + for (Attribute attribute : element.getAttributes().keySet()) { + if (!skipDeprecated && attribute.isDeprecated()) { + continue; + } + if (!dtdVisitor.visit(dtdType, ancestors, element, attribute)) { + return; + } + } + ancestors.push(element); + for (Element child : element.getChildren().keySet()) { + process(child); + } + ancestors.pop(); + } + } } diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestPaths.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestPaths.java index 8f68dd29bc5..e4310727d41 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestPaths.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestPaths.java @@ -1,6 +1,7 @@ package org.unicode.cldr.unittest; import com.google.common.collect.ImmutableSet; +import com.ibm.icu.util.MatchElementAttribute; import java.io.File; import java.util.ArrayList; import java.util.Arrays; @@ -12,6 +13,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.Stack; import java.util.TreeSet; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; @@ -24,8 +26,10 @@ import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.DtdData; import org.unicode.cldr.util.DtdData.Attribute; +import org.unicode.cldr.util.DtdData.DtdGuide.DtdVisitor; import org.unicode.cldr.util.DtdData.Element; import org.unicode.cldr.util.DtdData.ElementType; +import org.unicode.cldr.util.DtdData.ValueStatus; import org.unicode.cldr.util.DtdType; import org.unicode.cldr.util.Pair; import org.unicode.cldr.util.PathHeader; @@ -689,4 +693,84 @@ private int removeNonDistinguishing( } return counter; } + + public void testForUndefined() { + DtdVisitor visitor = + new DtdVisitor() { + final MatchElementAttribute skipAttributeNames = + new MatchElementAttribute() + .add( // Add, once checking to make sure that these are safe. + // Pairs of element, attribute + "", "references", // + "", "cp", // + "version", "", // + // "ruleset", "type", // + "parseLenient", "", // UnicodeSet + "ruleset", "", // special structure + "casingItem", "", // Special structure + "unitIdComponent", "", // small, relatively fixed set + "unitConstant", "", // only used internally/... + "unitQuantity", + "", // quantity and and baseUnit will be in + // validity/... + "convertUnit", "", // source and baseUnit will be in + // validity/... + "unitPreferences", + "category", // category == quantity will be in + // validity/... + "unitPreferences", + "usage", // usage will be in validity/... + "unitPreference", "", // not ids + "transform", "", // not ids + "numberingSystem", "rules", // rule format can't match + "coverageVariable", "", // no ids + "coverageLevel", "", // no ids + "approvalRequirement", "", // no ids + "pathMatch", "", // no ids + "languageMatch", "", // no ids + "rgPath", "", // no ids + "mapTimezones", "", // ids checked elsewhere + "mapZone", "" // ids checked elsewhere + ); + final Set skipElementAndChildren = Set.of("keyboard3", "keyboardTest3"); + + @Override + public boolean visit( + DtdType dtdType, + Stack ancestors, + Element element, + Attribute attribute) { + if (skipElementAndChildren.contains(element.getName())) { + return false; + } + final String attributeName = attribute.getName(); + if (skipAttributeNames.matches(element.getName(), attributeName)) { + return true; + } + final ValueStatus valueStatus = attribute.getValueStatus("undefined"); + attribute.toString(); + if (valueStatus == ValueStatus.valid) { + errln( + String.format( + "Can match 'undefined': type=%s\tancestors=%s\telement=%s\tattribute=%s\tmatch=%s", + dtdType, + ancestors, + element, + attributeName, + attribute.getMatchString())); + } else { + logln( + String.format( + "visiting: type=%s\tparent=%s\telement=%s\tancestors=%s\tmatch=%s", + dtdType, + ancestors, + element, + attributeName, + attribute.getMatchString())); + } + return true; + } + }; + new DtdData.DtdGuide(true, visitor).process(); + } }