Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17561 Utility for visiting DtdData elements #3646

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions common/dtd/ldmlSupplemental.dtd
Original file line number Diff line number Diff line change
Expand Up @@ -1155,17 +1155,17 @@ CLDR data files are interpreted according to the LDML specification (http://unic

<!ELEMENT matchVariable EMPTY >
<!ATTLIST matchVariable id CDATA #REQUIRED >
<!--@MATCH:regex/\$[a-zA-Z0-9_]+-->
<!--@MATCH:regex/(?!undefined)\$[a-zA-Z0-9_]+-->
<!ATTLIST matchVariable value CDATA #REQUIRED >
<!--@MATCH:any-->
<!--@MATCH:regex/([A-Z]{2}|[0-9]{3})(\+([A-Z]{2}|[0-9]{3}))*-->
<!--@VALUE-->

<!ELEMENT languageMatch EMPTY >
<!--@ORDERED-->
<!ATTLIST languageMatch desired CDATA #REQUIRED >
<!--@MATCH:any/TODO-->
<!--@MATCH:regex/(?!undefined).*-->
<!ATTLIST languageMatch supported CDATA #REQUIRED >
<!--@MATCH:any/TODO-->
<!--@MATCH:any/regex/(?!undefined).*-->
<!ATTLIST languageMatch percent NMTOKEN #IMPLIED >
<!--@MATCH:range/0~100-->
<!--@VALUE-->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.ibm.icu.util;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;

public class MatchElementAttribute {
private Multimap<String, String> matchElementAttribute =
HashMultimap.create(); // "" is a wildcard

public MatchElementAttribute add(String... elementAttributePairs) {
for (int i = 0; i < elementAttributePairs.length; i += 2) {
matchElementAttribute.put(elementAttributePairs[i], elementAttributePairs[i + 1]);
}
return this;
}

public boolean matches(String element, String attribute) {
return matchElementAttribute.containsEntry(element, attribute)
|| matchElementAttribute.containsEntry("", attribute)
|| matchElementAttribute.containsEntry(element, "");
}
}
48 changes: 48 additions & 0 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
Expand Down Expand Up @@ -2269,4 +2270,51 @@ public static Element getElement(String xpath, int elementIndex) {
.getElementFromName()
.get(parts.getElement(elementIndex));
}

public static class DtdGuide {
public interface DtdVisitor {
/** Return false if all children should be skipped */
public boolean visit(
DtdType dtdType, Stack<Element> ancestors, Element child, Attribute attribute);
}

private Set<Element> seenElements = new HashSet<>();
private DtdVisitor dtdVisitor;
private DtdType dtdType;
private Stack<Element> ancestors = new Stack<>();
private boolean skipDeprecated;

public DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor) {
this.dtdVisitor = dtdVisitor;
this.skipDeprecated = skipDeprecated;
process(DtdType.values());
}

public void process(DtdType... dtdTypes) {
for (DtdType dt : dtdTypes.length != 0 ? dtdTypes : DtdType.values()) {
dtdType = dt;
process(getInstance(dtdType).ROOT);
}
}

private void process(Element element) {
if (seenElements.contains(element) || !skipDeprecated && element.isDeprecated()) {
return;
}
seenElements.add(element);
for (Attribute attribute : element.getAttributes().keySet()) {
if (!skipDeprecated && attribute.isDeprecated()) {
continue;
}
if (!dtdVisitor.visit(dtdType, ancestors, element, attribute)) {
return;
}
}
ancestors.push(element);
for (Element child : element.getChildren().keySet()) {
process(child);
}
ancestors.pop();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.unicode.cldr.unittest;

import com.google.common.collect.ImmutableSet;
import com.ibm.icu.util.MatchElementAttribute;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -12,6 +13,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
Expand All @@ -24,8 +26,10 @@
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.DtdData;
import org.unicode.cldr.util.DtdData.Attribute;
import org.unicode.cldr.util.DtdData.DtdGuide.DtdVisitor;
import org.unicode.cldr.util.DtdData.Element;
import org.unicode.cldr.util.DtdData.ElementType;
import org.unicode.cldr.util.DtdData.ValueStatus;
import org.unicode.cldr.util.DtdType;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.PathHeader;
Expand Down Expand Up @@ -689,4 +693,84 @@ private int removeNonDistinguishing(
}
return counter;
}

public void testForUndefined() {
DtdVisitor visitor =
new DtdVisitor() {
final MatchElementAttribute skipAttributeNames =
new MatchElementAttribute()
.add( // Add, once checking to make sure that these are safe.
// Pairs of element, attribute
"", "references", //
"", "cp", //
"version", "", //
// "ruleset", "type", //
Copy link
Member

@btangmu btangmu Apr 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commented-out code and empty comments are messy to begin with, and messier due to deep indentation and formatting that breaks up some but not all pairs. Ideally the parameters for add() might be moved to a named constant defined outside the method call, less indented, maybe as an array of Map.Entry objects for element/attribute pairs...

The semantics of these elements and attributes are still mostly beyond my understanding. I sometimes wonder whether defining constants for such strings, maybe as members of enums, and referencing them as such rather than as string literals could be helpful for understanding the code and maybe for catching typos...

"parseLenient", "", // UnicodeSet
"ruleset", "", // special structure
"casingItem", "", // Special structure
"unitIdComponent", "", // small, relatively fixed set
"unitConstant", "", // only used internally/...
"unitQuantity",
"", // quantity and and baseUnit will be in
// validity/...
"convertUnit", "", // source and baseUnit will be in
// validity/...
"unitPreferences",
"category", // category == quantity will be in
// validity/...
"unitPreferences",
"usage", // usage will be in validity/...
"unitPreference", "", // not ids
"transform", "", // not ids
"numberingSystem", "rules", // rule format can't match
"coverageVariable", "", // no ids
"coverageLevel", "", // no ids
"approvalRequirement", "", // no ids
"pathMatch", "", // no ids
"languageMatch", "", // no ids
"rgPath", "", // no ids
"mapTimezones", "", // ids checked elsewhere
"mapZone", "" // ids checked elsewhere
);
final Set<String> skipElementAndChildren = Set.of("keyboard3", "keyboardTest3");

@Override
public boolean visit(
DtdType dtdType,
Stack<Element> ancestors,
Element element,
Attribute attribute) {
if (skipElementAndChildren.contains(element.getName())) {
return false;
}
final String attributeName = attribute.getName();
if (skipAttributeNames.matches(element.getName(), attributeName)) {
return true;
}
final ValueStatus valueStatus = attribute.getValueStatus("undefined");
attribute.toString();
if (valueStatus == ValueStatus.valid) {
errln(
String.format(
"Can match 'undefined': type=%s\tancestors=%s\telement=%s\tattribute=%s\tmatch=%s",
dtdType,
ancestors,
element,
attributeName,
attribute.getMatchString()));
} else {
logln(
String.format(
"visiting: type=%s\tparent=%s\telement=%s\tancestors=%s\tmatch=%s",
dtdType,
ancestors,
element,
attributeName,
attribute.getMatchString()));
}
return true;
}
};
new DtdData.DtdGuide(true, visitor).process();
}
}
Loading