From 5d9d8642ff6b8a32684531e4ea3ba6c35e3ea616 Mon Sep 17 00:00:00 2001 From: "R. Schmunk" <8718035+rschmunk@users.noreply.github.com> Date: Thu, 4 Apr 2024 17:11:28 -0400 Subject: [PATCH] Use _ARRAY_DIMENSIONS to create shared, named dimensions in ZarrHeader (#1325) * Use _ARRAY_DIMENSIONS to create shared, named dimension in ZarrHeader * Watch out for null attrs param in ZarrHeader.makeVariable * ZarrHeader: Show a bit of .zarray info in var attributes * Reset fatJars --- .../java/ucar/nc2/iosp/zarr/ZarrHeader.java | 160 ++++++++++++++++-- .../java/ucar/nc2/iosp/zarr/ZarrKeys.java | 1 + 2 files changed, 146 insertions(+), 15 deletions(-) diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java index 24854661f6..f24b0b945f 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java @@ -6,15 +6,23 @@ package ucar.nc2.iosp.zarr; import com.fasterxml.jackson.databind.ObjectMapper; + +import ucar.ma2.Array; +import ucar.ma2.ArrayObject; +import ucar.ma2.ArrayString; import ucar.nc2.Attribute; import ucar.nc2.Dimension; import ucar.nc2.Group; +import ucar.ma2.Index; import ucar.nc2.Variable; import ucar.nc2.filter.Filter; import ucar.unidata.io.RandomAccessFile; import ucar.unidata.io.zarr.RandomAccessDirectory; import ucar.unidata.io.zarr.RandomAccessDirectoryItem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.nio.ByteOrder; import java.util.*; @@ -24,11 +32,16 @@ */ public class ZarrHeader { + private static final Logger logger = LoggerFactory.getLogger(ZarrHeader.class); + private final RandomAccessDirectory rootRaf; private final Group.Builder rootGroup; private final String rootLocation; private static ObjectMapper objectMapper = new ObjectMapper(); + /* + * + */ public ZarrHeader(RandomAccessDirectory raf, Group.Builder rootGroup) { this.rootRaf = raf; this.rootGroup = rootGroup; @@ -47,10 +60,16 @@ private class DelayedVarMaker { private List attrs; // list of variable attributes private long dataOffset; // byte position where data starts + /* + * + */ void setAttrs(List attrs) { this.attrs = attrs; } + /* + * + */ void setVar(RandomAccessDirectoryItem var) { this.var = var; this.attrs = null; @@ -63,14 +82,16 @@ void setVar(RandomAccessDirectoryItem var) { raf.seek(0); // reset in case file has previously been opened by another iosp this.zarray = objectMapper.readValue(raf, ZArray.class); } catch (IOException | ClassCastException ex) { - ZarrIosp.logger.error(new ZarrFormatException(ex.getMessage()).getMessage()); + logger.error(new ZarrFormatException(ex.getMessage()).getMessage()); // skip var if metadata invalid this.var = null; } } } - // check if attribute file belongs to current variable + /* + * check if attribute file belongs to current variable + */ boolean myAttrs(RandomAccessDirectoryItem attrs) { if (var == null || attrs == null) { return false; @@ -81,6 +102,9 @@ boolean myAttrs(RandomAccessDirectoryItem attrs) { return ZarrUtils.getObjectNameFromPath(attrPath).equals(ZarrUtils.getObjectNameFromPath(varPath)); } + /* + * + */ void processItem(RandomAccessDirectoryItem item) { if (var == null) { return; @@ -88,7 +112,7 @@ void processItem(RandomAccessDirectoryItem item) { // get index of chunks int index = getChunkIndex(item, this.zarray); if (index < 0) { // not data files, skip rest of var - ZarrIosp.logger.error(new ZarrFormatException().getMessage()); + logger.error(new ZarrFormatException().getMessage()); this.var = null; // skip rest of var is unrecognized files found } this.initializedChunks.put(index, item.length()); @@ -98,6 +122,9 @@ void processItem(RandomAccessDirectoryItem item) { } } + /* + * + */ void makeVar() { if (var == null) { return; // do nothing if no variable is in progress @@ -105,7 +132,7 @@ void makeVar() { try { makeVariable(var, dataOffset, zarray, initializedChunks, attrs); } catch (ZarrFormatException ex) { - ZarrIosp.logger.error(ex.getMessage()); + logger.error(ex.getMessage()); } var = null; // reset var } @@ -113,7 +140,7 @@ void makeVar() { /** * Create CDM object on 'rootGroup' from RandomAccessFile - * + * * @throws IOException */ public void read() throws IOException { @@ -124,8 +151,10 @@ public void read() throws IOException { for (RandomAccessDirectoryItem item : items) { String filepath = ZarrUtils.trimLocation(item.getLocation()); + if (filepath.endsWith(ZarrKeys.ZATTRS)) { // attributes List attrs = makeAttributes(item); + // assign attrs to either variable or group if (delayedVarMaker.myAttrs(item)) { delayedVarMaker.setAttrs(attrs); @@ -134,16 +163,22 @@ public void read() throws IOException { delayedVarMaker.makeVar(); grp_attrs = attrs; } + + } else if (filepath.endsWith(ZarrKeys.ZMETADATA)) { // possible consolidated metadata in root group + logger.trace("encountered .zmetadata; not yet coded for"); + } else if (filepath.endsWith(ZarrKeys.ZGROUP)) { // groups // build any vars in progress delayedVarMaker.makeVar(); makeGroup(item, grp_attrs); // .zattrs will always be processed before .zgroup, so we can make group immediately grp_attrs = null; // reset + } else if (filepath.endsWith(ZarrKeys.ZARRAY)) { // variables // build any vars in progress delayedVarMaker.makeVar(); // set up variable to be created after processing the rest of the files in the folder delayedVarMaker.setVar(item); + } else { delayedVarMaker.processItem(item); } @@ -152,6 +187,9 @@ public void read() throws IOException { delayedVarMaker.makeVar(); } + /* + * + */ private void makeGroup(RandomAccessDirectoryItem item, List attrs) { // make new Group Group.Builder group = Group.builder(); @@ -174,11 +212,14 @@ private void makeGroup(RandomAccessDirectoryItem item, List attrs) { group.setParentGroup(parentGroup); parentGroup.addGroup(group); } catch (ZarrFormatException ex) { - ZarrIosp.logger.error(ex.getMessage()); + logger.error(ex.getMessage()); } } } + /* + * + */ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArray zarray, Map initializedChunks, List attrs) throws ZarrFormatException { // make new Variable @@ -186,21 +227,96 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra String location = ZarrUtils.trimLocation(item.getLocation()); // set var name - var.setName(ZarrUtils.getObjectNameFromPath(location)); + String vname = ZarrUtils.getObjectNameFromPath(location); + var.setName(vname); + logger.trace("evaluating {}", vname); + + // Check if var has named dimensions by looking for _ARRAY_DIMENSIONS attribute. + // This is the convention followed by xarray and geozarr. + // NOTE: The Nczarr spec allows for honoring or ignoring this attribute by specifying a mode. + // See under "Client Parameters" on https://docs.unidata.ucar.edu/nug/current/nczarr_head.html + // We do nothing to check how that's set. + String[] dimNames = null; + boolean hasNamedDimensions = false; + + if (attrs != null) { + + for (Attribute attr : attrs) { + final String attrName = attr.getName(); + if ("_ARRAY_DIMENSIONS".equals(attrName)) { + try { + final ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues(); + + // getSize returns a long + final int aodSize = (int) aod1.getSize(); + dimNames = new String[aodSize]; + + for (int i = 0; i < aodSize; ++i) { + dimNames[i] = (String) aod1.get(i); + } + hasNamedDimensions = true; + // logger.trace(" found _ARRAY_DIMENSIONS array {}", aod1); + } catch (final Exception exc) { + logger.debug(" Could not extract _ARRAY_DIMENSIONS for {}, {}", vname, exc.getMessage()); + } + + //// Informational logging + // } else if ("coordinates".equals(attrName) || "standard_name".equals(attrName) || "units".equals(attrName)) + //// { + // try { + // ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues(); + // String coordsStr = (String) aod1.get(0); + // logger.trace(" var {} has {} attr '{}'", vname, attrName, coordsStr); + // } catch (final Exception exc) { + // logger.debug(" Exception extracting {} attr value, {}", attrName, exc.getMessage()); + // } + + } + } + } // set variable datatype var.setDataType(zarray.getDataType()); + // find variable's group or throw if non-existent. + final Group.Builder parentGroup = findGroup(location); + // create and set dimensions + // If hasNamedDimensions set above, we will want to share var's dimensions with the group. int[] shape = zarray.getShape(); - List dims = new ArrayList<>(); - for (int d = 0; d < shape.length; d++) { - // TODO: revisit dimension props and names (especially for nczarr) - Dimension.Builder dim = Dimension.builder(String.format("dim%d", d), shape[d]); + + if (hasNamedDimensions && shape.length != dimNames.length) { + throw new ZarrFormatException("Array " + vname + " has dimensions attribute count that does not match its rank."); + } + + final List dims = new ArrayList<>(); + for (int i = 0; i < shape.length; i++) { + + final String dname = (hasNamedDimensions) ? dimNames[i] : String.format("dim%d", i); + + final Dimension.Builder dim = Dimension.builder(dname, shape[i]); dim.setIsVariableLength(false); dim.setIsUnlimited(false); dim.setIsShared(false); - dims.add(dim.build()); + + final Dimension dd = dim.build(); + + dims.add(dd); + + if (hasNamedDimensions) { + Optional optd = parentGroup.findDimensionLocal(dname); + + if (optd.isPresent()) { + final Dimension prevd = optd.get(); + + if (dd.getLength() != prevd.getLength()) { + throw new ZarrFormatException("Named dimension " + dname + " seen with inconsistent lengths."); + } + } else { + logger.trace("adding {} to group as a shared dimension", dname); + parentGroup.addDimension(dd); + } + } } var.addDimensions(dims); @@ -215,16 +331,30 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks); var.setSPobject(vinfo); + // Include some info from .zarray file in attributes for display when showing variable detail. + // Possibly add to this fill_value if in .zarray but not .zattrs? + if (attrs == null) { + attrs = new ArrayList(); + } + final Filter compressor = zarray.getCompressor(); + if (compressor == null) { + attrs.add(new Attribute("_Compressor", "none")); + } else { + attrs.add(new Attribute("_Compressor", zarray.getCompressor().getName())); + } + // add current attributes, if any exist if (attrs != null) { var.addAttributes(attrs); } - // find variable's group or throw if non-existent - Group.Builder parentGroup = findGroup(location); + // Add var to parent. parentGroup.addVariable(var); } + /* + * + */ private List makeAttributes(RandomAccessDirectoryItem item) { // get RandomAccessFile for JSON parsing try { @@ -287,7 +417,7 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray) /** * Find Group builder matching provided name - * + * * @throws ZarrFormatException if group is not found */ private Group.Builder findGroup(String location) throws ZarrFormatException { diff --git a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrKeys.java b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrKeys.java index 887a4b9b36..a64256f11b 100644 --- a/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrKeys.java +++ b/cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrKeys.java @@ -14,6 +14,7 @@ public final class ZarrKeys { public static final String ZARRAY = ".zarray"; public static final String ZATTRS = ".zattrs"; public static final String ZGROUP = ".zgroup"; + public static final String ZMETADATA = ".zmetadata"; // key names public static final String SHAPE = "shape";