Skip to content

Commit

Permalink
Use _ARRAY_DIMENSIONS to create shared, named dimensions in ZarrHeader (
Browse files Browse the repository at this point in the history
#1325)

* Use _ARRAY_DIMENSIONS to create shared, named dimension in ZarrHeader

* Watch out for null attrs param in ZarrHeader.makeVariable

* ZarrHeader: Show a bit of .zarray info in var attributes

* Reset fatJars
  • Loading branch information
rschmunk authored Apr 4, 2024
1 parent d7cc4fb commit 5d9d864
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 15 deletions.
160 changes: 145 additions & 15 deletions cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,23 @@
package ucar.nc2.iosp.zarr;

import com.fasterxml.jackson.databind.ObjectMapper;

import ucar.ma2.Array;
import ucar.ma2.ArrayObject;
import ucar.ma2.ArrayString;
import ucar.nc2.Attribute;
import ucar.nc2.Dimension;
import ucar.nc2.Group;
import ucar.ma2.Index;
import ucar.nc2.Variable;
import ucar.nc2.filter.Filter;
import ucar.unidata.io.RandomAccessFile;
import ucar.unidata.io.zarr.RandomAccessDirectory;
import ucar.unidata.io.zarr.RandomAccessDirectoryItem;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.*;
Expand All @@ -24,11 +32,16 @@
*/
public class ZarrHeader {

private static final Logger logger = LoggerFactory.getLogger(ZarrHeader.class);

private final RandomAccessDirectory rootRaf;
private final Group.Builder rootGroup;
private final String rootLocation;
private static ObjectMapper objectMapper = new ObjectMapper();

/*
*
*/
public ZarrHeader(RandomAccessDirectory raf, Group.Builder rootGroup) {
this.rootRaf = raf;
this.rootGroup = rootGroup;
Expand All @@ -47,10 +60,16 @@ private class DelayedVarMaker {
private List<Attribute> attrs; // list of variable attributes
private long dataOffset; // byte position where data starts

/*
*
*/
void setAttrs(List<Attribute> attrs) {
this.attrs = attrs;
}

/*
*
*/
void setVar(RandomAccessDirectoryItem var) {
this.var = var;
this.attrs = null;
Expand All @@ -63,14 +82,16 @@ void setVar(RandomAccessDirectoryItem var) {
raf.seek(0); // reset in case file has previously been opened by another iosp
this.zarray = objectMapper.readValue(raf, ZArray.class);
} catch (IOException | ClassCastException ex) {
ZarrIosp.logger.error(new ZarrFormatException(ex.getMessage()).getMessage());
logger.error(new ZarrFormatException(ex.getMessage()).getMessage());
// skip var if metadata invalid
this.var = null;
}
}
}

// check if attribute file belongs to current variable
/*
* check if attribute file belongs to current variable
*/
boolean myAttrs(RandomAccessDirectoryItem attrs) {
if (var == null || attrs == null) {
return false;
Expand All @@ -81,14 +102,17 @@ boolean myAttrs(RandomAccessDirectoryItem attrs) {
return ZarrUtils.getObjectNameFromPath(attrPath).equals(ZarrUtils.getObjectNameFromPath(varPath));
}

/*
*
*/
void processItem(RandomAccessDirectoryItem item) {
if (var == null) {
return;
}
// get index of chunks
int index = getChunkIndex(item, this.zarray);
if (index < 0) { // not data files, skip rest of var
ZarrIosp.logger.error(new ZarrFormatException().getMessage());
logger.error(new ZarrFormatException().getMessage());
this.var = null; // skip rest of var is unrecognized files found
}
this.initializedChunks.put(index, item.length());
Expand All @@ -98,22 +122,25 @@ void processItem(RandomAccessDirectoryItem item) {
}
}

/*
*
*/
void makeVar() {
if (var == null) {
return; // do nothing if no variable is in progress
}
try {
makeVariable(var, dataOffset, zarray, initializedChunks, attrs);
} catch (ZarrFormatException ex) {
ZarrIosp.logger.error(ex.getMessage());
logger.error(ex.getMessage());
}
var = null; // reset var
}
}

/**
* Create CDM object on 'rootGroup' from RandomAccessFile
*
*
* @throws IOException
*/
public void read() throws IOException {
Expand All @@ -124,8 +151,10 @@ public void read() throws IOException {

for (RandomAccessDirectoryItem item : items) {
String filepath = ZarrUtils.trimLocation(item.getLocation());

if (filepath.endsWith(ZarrKeys.ZATTRS)) { // attributes
List<Attribute> attrs = makeAttributes(item);

// assign attrs to either variable or group
if (delayedVarMaker.myAttrs(item)) {
delayedVarMaker.setAttrs(attrs);
Expand All @@ -134,16 +163,22 @@ public void read() throws IOException {
delayedVarMaker.makeVar();
grp_attrs = attrs;
}

} else if (filepath.endsWith(ZarrKeys.ZMETADATA)) { // possible consolidated metadata in root group
logger.trace("encountered .zmetadata; not yet coded for");

} else if (filepath.endsWith(ZarrKeys.ZGROUP)) { // groups
// build any vars in progress
delayedVarMaker.makeVar();
makeGroup(item, grp_attrs); // .zattrs will always be processed before .zgroup, so we can make group immediately
grp_attrs = null; // reset

} else if (filepath.endsWith(ZarrKeys.ZARRAY)) { // variables
// build any vars in progress
delayedVarMaker.makeVar();
// set up variable to be created after processing the rest of the files in the folder
delayedVarMaker.setVar(item);

} else {
delayedVarMaker.processItem(item);
}
Expand All @@ -152,6 +187,9 @@ public void read() throws IOException {
delayedVarMaker.makeVar();
}

/*
*
*/
private void makeGroup(RandomAccessDirectoryItem item, List<Attribute> attrs) {
// make new Group
Group.Builder group = Group.builder();
Expand All @@ -174,33 +212,111 @@ private void makeGroup(RandomAccessDirectoryItem item, List<Attribute> attrs) {
group.setParentGroup(parentGroup);
parentGroup.addGroup(group);
} catch (ZarrFormatException ex) {
ZarrIosp.logger.error(ex.getMessage());
logger.error(ex.getMessage());
}
}
}

/*
*
*/
private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArray zarray,
Map<Integer, Long> initializedChunks, List<Attribute> attrs) throws ZarrFormatException {
// make new Variable
Variable.Builder var = Variable.builder();
String location = ZarrUtils.trimLocation(item.getLocation());

// set var name
var.setName(ZarrUtils.getObjectNameFromPath(location));
String vname = ZarrUtils.getObjectNameFromPath(location);
var.setName(vname);
logger.trace("evaluating {}", vname);

// Check if var has named dimensions by looking for _ARRAY_DIMENSIONS attribute.
// This is the convention followed by xarray and geozarr.
// NOTE: The Nczarr spec allows for honoring or ignoring this attribute by specifying a mode.
// See under "Client Parameters" on https://docs.unidata.ucar.edu/nug/current/nczarr_head.html
// We do nothing to check how that's set.
String[] dimNames = null;
boolean hasNamedDimensions = false;

if (attrs != null) {

for (Attribute attr : attrs) {
final String attrName = attr.getName();
if ("_ARRAY_DIMENSIONS".equals(attrName)) {
try {
final ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues();

// getSize returns a long
final int aodSize = (int) aod1.getSize();
dimNames = new String[aodSize];

for (int i = 0; i < aodSize; ++i) {
dimNames[i] = (String) aod1.get(i);
}
hasNamedDimensions = true;
// logger.trace(" found _ARRAY_DIMENSIONS array {}", aod1);
} catch (final Exception exc) {
logger.debug(" Could not extract _ARRAY_DIMENSIONS for {}, {}", vname, exc.getMessage());
}

//// Informational logging
// } else if ("coordinates".equals(attrName) || "standard_name".equals(attrName) || "units".equals(attrName))
//// {
// try {
// ArrayObject.D1 aod1 = (ArrayObject.D1) attr.getValues();
// String coordsStr = (String) aod1.get(0);
// logger.trace(" var {} has {} attr '{}'", vname, attrName, coordsStr);
// } catch (final Exception exc) {
// logger.debug(" Exception extracting {} attr value, {}", attrName, exc.getMessage());
// }

}
}
}

// set variable datatype
var.setDataType(zarray.getDataType());

// find variable's group or throw if non-existent.
final Group.Builder parentGroup = findGroup(location);

// create and set dimensions
// If hasNamedDimensions set above, we will want to share var's dimensions with the group.
int[] shape = zarray.getShape();
List<Dimension> dims = new ArrayList<>();
for (int d = 0; d < shape.length; d++) {
// TODO: revisit dimension props and names (especially for nczarr)
Dimension.Builder dim = Dimension.builder(String.format("dim%d", d), shape[d]);

if (hasNamedDimensions && shape.length != dimNames.length) {
throw new ZarrFormatException("Array " + vname + " has dimensions attribute count that does not match its rank.");
}

final List<Dimension> dims = new ArrayList<>();
for (int i = 0; i < shape.length; i++) {

final String dname = (hasNamedDimensions) ? dimNames[i] : String.format("dim%d", i);

final Dimension.Builder dim = Dimension.builder(dname, shape[i]);
dim.setIsVariableLength(false);
dim.setIsUnlimited(false);
dim.setIsShared(false);
dims.add(dim.build());

final Dimension dd = dim.build();

dims.add(dd);

if (hasNamedDimensions) {
Optional<Dimension> optd = parentGroup.findDimensionLocal(dname);

if (optd.isPresent()) {
final Dimension prevd = optd.get();

if (dd.getLength() != prevd.getLength()) {
throw new ZarrFormatException("Named dimension " + dname + " seen with inconsistent lengths.");
}
} else {
logger.trace("adding {} to group as a shared dimension", dname);
parentGroup.addDimension(dd);
}
}
}
var.addDimensions(dims);

Expand All @@ -215,16 +331,30 @@ private void makeVariable(RandomAccessDirectoryItem item, long dataOffset, ZArra
zarray.getOrder(), zarray.getSeparator(), zarray.getFilters(), dataOffset, initializedChunks);
var.setSPobject(vinfo);

// Include some info from .zarray file in attributes for display when showing variable detail.
// Possibly add to this fill_value if in .zarray but not .zattrs?
if (attrs == null) {
attrs = new ArrayList<Attribute>();
}
final Filter compressor = zarray.getCompressor();
if (compressor == null) {
attrs.add(new Attribute("_Compressor", "none"));
} else {
attrs.add(new Attribute("_Compressor", zarray.getCompressor().getName()));
}

// add current attributes, if any exist
if (attrs != null) {
var.addAttributes(attrs);
}

// find variable's group or throw if non-existent
Group.Builder parentGroup = findGroup(location);
// Add var to parent.
parentGroup.addVariable(var);
}

/*
*
*/
private List<Attribute> makeAttributes(RandomAccessDirectoryItem item) {
// get RandomAccessFile for JSON parsing
try {
Expand Down Expand Up @@ -287,7 +417,7 @@ private static int getChunkIndex(RandomAccessDirectoryItem item, ZArray zarray)

/**
* Find Group builder matching provided name
*
*
* @throws ZarrFormatException if group is not found
*/
private Group.Builder findGroup(String location) throws ZarrFormatException {
Expand Down
1 change: 1 addition & 0 deletions cdm/zarr/src/main/java/ucar/nc2/iosp/zarr/ZarrKeys.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public final class ZarrKeys {
public static final String ZARRAY = ".zarray";
public static final String ZATTRS = ".zattrs";
public static final String ZGROUP = ".zgroup";
public static final String ZMETADATA = ".zmetadata";

// key names
public static final String SHAPE = "shape";
Expand Down

0 comments on commit 5d9d864

Please sign in to comment.