Skip to content

Commit

Permalink
CLDR-14571 json: improve bcp47 (#1482)
Browse files Browse the repository at this point in the history
- example JSON paths
  keyword.t.io.handwrit._description
  keyword.u.ms.metric._since
- 'deprecated' in bcp47 is a boolean, omitted if falsy
  • Loading branch information
srl295 authored Sep 8, 2021
1 parent c3abb33 commit 2875bb6
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ public ArrayList<CldrNode> getNodesInPath() throws ParseException {
String parent = "";
for (int i = 0; i < pathSegments.length; i++) {
CldrNode node = CldrNode.createNode(parent, pathSegments[i],
fullPathSegments[i]);
fullPathSegments[i], this);

// Zone and time zone element has '/' in attribute value, like
// .../zone[@type="America/Adak"]/...
Expand All @@ -195,13 +195,13 @@ public ArrayList<CldrNode> getNodesInPath() throws ParseException {
String nodeName = node.getName();
if (node.isTimezoneType()) {
nodesInPath.add(CldrNode.createNode(parent, node.getName(),
node.getName()));
node.getName(), this));
String typeValue = node.getDistinguishingAttributes().get("type");
typeValue = typeValue.replaceAll("Asia:Taipei", "Asia/Taipei");
String[] segments = typeValue.split("/");
for (int j = 0; j < segments.length; j++) {
CldrNode newNode = CldrNode.createNode(parent, node.getName(),
node.getName());
node.getName(), this);
if (j == segments.length - 1) {
newNode.getDistinguishingAttributes().putAll(
node.getDistinguishingAttributes());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@ public class CldrNode {

public static CldrNode createNode(String parent, String pathSegment,
String fullPathSegment) throws ParseException {
return createNode(parent, pathSegment, fullPathSegment, null);
}
public static CldrNode createNode(String parent, String pathSegment,
String fullPathSegment, CldrItem item) throws ParseException {
CldrNode node = new CldrNode();

node.item = item;
node.parent = parent;
node.name = extractAttrs(pathSegment, node.distinguishingAttributes);
String fullTrunk = extractAttrs(fullPathSegment,
Expand Down Expand Up @@ -123,6 +127,19 @@ private static String extractAttrs(String pathSegment,
*/
private String parent;

/**
* CldrItem, if any
*/
private CldrItem item;

public String getUntransformedPath() {
if (item != null) {
return item.getUntransformedPath();
} else {
return "noitem";
}
}

/**
* This name is derived from element name and attributes. Once it is
* calculated, it is cached in this variable.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1241,9 +1241,19 @@ private void startNonleafNode(JsonWriter out, CldrNode node, int level)

out.beginObject();
for (String key : attrAsValueMap.keySet()) {
String value = escapeValue(attrAsValueMap.get(key));
String rawAttrValue = attrAsValueMap.get(key);
String value = escapeValue(rawAttrValue);
// attribute is prefixed with "_" when being used as key.
out.name("_" + key).value(value);
String attrAsKey = "_" + key;
if (LdmlConvertRules.attrIsBooleanOmitFalse(node.getUntransformedPath(), node.getName(), node.getParent(), key)) {
final Boolean v = Boolean.parseBoolean(rawAttrValue);
if (v) {
out.name(attrAsKey).value(v);
} // else, omit
} else {
System.err.println(node.getUntransformedPath()+ "@BOOOOL@"+ node.getName() +":"+ node.getParent() +":"+ key+"="+rawAttrValue);
out.name(attrAsKey).value(value);
}
}
}

Expand Down Expand Up @@ -1321,7 +1331,7 @@ private void outputArrayItem(JsonWriter out, CldrItem item,
writeRbnfLeafNode(out, item, attrAsValueMap);
} else {
out.beginObject();
writeLeafNode(out, objName, attrAsValueMap, value, nodesNum, cldrNode.getName(), cldrNode.getParent());
writeLeafNode(out, objName, attrAsValueMap, value, nodesNum, cldrNode.getName(), cldrNode.getParent(), cldrNode);
out.endObject();
}
// the last node is closed, remove it.
Expand Down Expand Up @@ -1558,7 +1568,7 @@ private void writeLeafNode(JsonWriter out, CldrNode node, String value,

String objName = node.getNodeKeyName();
Map<String, String> attrAsValueMaps = node.getAttrAsValueMap();
writeLeafNode(out, objName, attrAsValueMaps, value, level, node.getName(), node.getParent());
writeLeafNode(out, objName, attrAsValueMaps, value, level, node.getName(), node.getParent(), node);
}

/**
Expand All @@ -1579,7 +1589,7 @@ private void writeLeafNode(JsonWriter out, CldrNode node, String value,
*/
private void writeLeafNode(JsonWriter out, String objName,
Map<String, String> attrAsValueMap, String value, int level, final String nodeName,
String parent)
String parent, CldrNode node)
throws IOException {
if (objName == null) {
return;
Expand Down Expand Up @@ -1635,18 +1645,26 @@ private void writeLeafNode(JsonWriter out, String objName,
}

for (String key : attrAsValueMap.keySet()) {
String attrValue = escapeValue(attrAsValueMap.get(key));
String rawAttrValue = attrAsValueMap.get(key);
String attrValue = escapeValue(rawAttrValue);
// attribute is prefixed with "_" when being used as key.
String attrAsKey = "_" + key;
if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) {
String[] strings = attrValue.trim().split("\\s+");
out.name("_" + key);
out.name(attrAsKey);
out.beginArray();
for (String s : strings) {
out.value(s);
}
out.endArray();
} else if (node != null &&
LdmlConvertRules.attrIsBooleanOmitFalse(node.getUntransformedPath(), nodeName, parent, key)) {
final Boolean v = Boolean.parseBoolean(rawAttrValue);
if (v) {
out.name(attrAsKey).value(v);
} // else: omit falsy value
} else {
out.name("_" + key).value(attrValue);
out.name(attrAsKey).value(attrValue);
}
}
out.endObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,17 +152,15 @@ class LdmlConvertRules {
"grammaticalDerivations:deriveComponent:value0",
"grammaticalDerivations:deriveComponent:value1",

// in common/bcp47/*.xml
"keyword:key:alias",
"keyword:key:name",
"key:type:alias",
"key:type:name",

// identity elements
"identity:language:type",
"identity:script:type",
"identity:territory:type",
"identity:variant:type");
"identity:variant:type",

// in common/bcp47/*.xml
"keyword:key:name"
);

/**
* The set of element:attribute pair in which the attribute should be
Expand Down Expand Up @@ -598,4 +596,16 @@ public static final boolean valueIsSpacesepArray(final String nodeName, String p
return VALUE_IS_SPACESEP_ARRAY.matcher(nodeName).matches()
|| (parent!=null && CHILD_VALUE_IS_SPACESEP_ARRAY.contains(parent));
}

static final Set<String> BCP47_BOOLEAN_OMIT_FALSE = ImmutableSet.of(
// attribute names within bcp47 that are booleans, but omitted if false.
"deprecated"
);

// These attributes are booleans, and should be omitted if false
public static final boolean attrIsBooleanOmitFalse(final String fullPath, final String nodeName, final String parent, final String key) {
return (fullPath != null &&
(fullPath.startsWith("//ldmlBCP47/keyword/key") &&
BCP47_BOOLEAN_OMIT_FALSE.contains(key)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,67 +65,67 @@
< (.*/numbers/(decimal|scientific|percent|currency)Formats\[@numberSystem="([^"]*)"\])/(decimal|scientific|percent|currency)FormatLength/(decimal|scientific|percent|currency)Format\[@type="standard"]/pattern.*$
> $1/standard

#
#
< (.*/numbers/currencyFormats\[@numberSystem="([^"]*)"\])/currencyFormatLength/currencyFormat\[@type="accounting"]/pattern.*$
> $1/accounting

# Add "type" attribute with value "standard" if there is no "type" in "decimalFormatLength".
< (.*/numbers/(decimal|scientific|percent)Formats\[@numberSystem="([^"]*)"\]/(decimal|scientific|percent)FormatLength)/(.*)$
> $1[@type="standard"]/$5

#
#
< (.*/listPattern)/(.*)$
> $1[@type="standard"]/$2

#
#
< (.*/languagePopulation)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/languageAlias)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/scriptAlias)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/territoryAlias)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/subdivisionAlias)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/variantAlias)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/zoneAlias)\[@type="([^"]*)"\](.*)
> $1/$2$3

#
#
< (.*/alias)(.*)
> $1/alias$2

#
#
< (.*currencyData/region)(.*)
> $1/region$2

# Skip exemplar city in /etc/GMT or UTC timezones, since they don't have them
< (.*(GMT|UTC).*/exemplarCity)(.*)
>
>

#
#
< (.*/transforms/transform[^/]*)/(.*)
> $1/tRules/$2

#
#
< (.*)\[@territories="([^"]*)"\](.*)\[@alt="variant"\](.*)
> $1\[@territories="$2-alt-variant"\]

#
#
< (.*)/weekData/(.*)\[@alt="variant"\](.*)
> $1/weekData/$2$3

Expand All @@ -141,3 +141,10 @@
< (.*)/(grammaticalData)/(.*)/(grammaticalGender)(.*)$
> $1/grammaticalGenderData/$3/$4$5

# BCP47 (No extension, assume 'u')
< (.*)/(keyword)/(key)\[@name="([^"]*)"\](.*)$
> $1/$2/u/$4$5

# BCP47 (Some other extension)
< (.*)/(keyword)/(key)\[@extension="([^"]*)"\]\[@name="([^"]*)"\](.*)$
> $1/$2/$4/$5$6

0 comments on commit 2875bb6

Please sign in to comment.