Skip to content

Commit

Permalink
Merge pull request #1210 from cssjessica/standardizer
Browse files Browse the repository at this point in the history
Standardizer
  • Loading branch information
cssjessica committed Jul 11, 2023
2 parents 4b5bc19 + 5d35feb commit b844a98
Show file tree
Hide file tree
Showing 9 changed files with 241 additions and 2 deletions.
2 changes: 2 additions & 0 deletions cdm/core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ apply plugin: 'groovy' // For Spock tests.

dependencies {
api enforcedPlatform(project(':netcdf-java-platform'))
implementation 'commons-math:commons-math'

testImplementation enforcedPlatform(project(':netcdf-java-testing-platform'))

compile project(':udunits')
Expand Down
1 change: 1 addition & 0 deletions cdm/core/src/main/java/ucar/nc2/constants/CDM.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public class CDM {
public static final String TIME_OFFSET = "time offset from runtime";
public static final String TIME_OFFSET_HOUR = "hoursFrom0z";
public static final String RUNTIME_COORDINATE = "runtimeCoordinate";
public static final String STANDARDIZE = "standardize";

// Special attributes

Expand Down
10 changes: 8 additions & 2 deletions cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,16 @@ public enum Enhance {
* every dimension in a variable has a corresponding coordinate variable.
*/
IncompleteCoordSystems,
/**
* Calculate mean and standard deviation and apply to data: (z-mean)/standard_deviation.
* If the enhanced data type is not {@code FLOAT} or {@code DOUBLE}, this has no effect.
*/
ApplyStandardizer,
}

private static Set<Enhance> EnhanceAll = Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums,
Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, Enhance.ConvertMissing, Enhance.CoordSystems));
private static Set<Enhance> EnhanceAll =
Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums, Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset,
Enhance.ConvertMissing, Enhance.CoordSystems, Enhance.ApplyStandardizer));
private static Set<Enhance> EnhanceNone = Collections.unmodifiableSet(EnumSet.noneOf(Enhance.class));
private static Set<Enhance> defaultEnhanceMode = EnhanceAll;

Expand Down
9 changes: 9 additions & 0 deletions cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import ucar.nc2.filter.ConvertMissing;
import ucar.nc2.filter.FilterHelpers;
import ucar.nc2.filter.ScaleOffset;
import ucar.nc2.filter.Standardizer;
import ucar.nc2.filter.UnsignedConversion;
import ucar.nc2.internal.dataset.CoordinatesHelper;
import ucar.nc2.iosp.netcdf3.N3iosp;
Expand Down Expand Up @@ -269,6 +270,9 @@ Array convert(Array data, Set<NetcdfDataset.Enhance> enhancements) {
&& (dataType == DataType.FLOAT || dataType == DataType.DOUBLE)) {
data = convertMissing.convertMissing(data);
}
if (enhancements.contains(Enhance.ApplyStandardizer) && standardizer != null) {
data = standardizer.convert(data);
}
return data;
}
}
Expand Down Expand Up @@ -797,6 +801,7 @@ public Array convert(Array in, boolean convertUnsigned, boolean applyScaleOffset
// TODO make immutable in version 6
private UnsignedConversion unsignedConversion;
private ScaleOffset scaleOffset;
private Standardizer standardizer;
private ConvertMissing convertMissing;
private Set<Enhance> enhanceMode = EnumSet.noneOf(Enhance.class); // The set of enhancements that were made.

Expand Down Expand Up @@ -851,6 +856,10 @@ private void createEnhancements() {
this.scaleOffset = ScaleOffset.createFromVariable(this);
this.dataType = scaleOffset != null ? scaleOffset.getScaledOffsetType() : this.dataType;
}
Attribute standardizerAtt = findAttribute(CDM.STANDARDIZE);
if (standardizerAtt != null && this.enhanceMode.contains(Enhance.ApplyStandardizer) && dataType.isFloatingPoint()) {
this.standardizer = Standardizer.createFromVariable(this);
}

// need fill value info before convertMissing
Attribute fillValueAtt = findAttribute(CDM.FILL_VALUE);
Expand Down
67 changes: 67 additions & 0 deletions cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package ucar.nc2.filter;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import ucar.ma2.Array;
import ucar.ma2.DataType;
import ucar.ma2.IndexIterator;
import ucar.nc2.dataset.VariableDS;

public class Standardizer {

private final ScaleOffset scaleOffset;
private final double mean;
private final double stdDev;

public static Standardizer createFromVariable(VariableDS var) {
try {
Array arr = var.read();
DataType type = var.getDataType();
return createFromArray(arr, type);
} catch (IOException e) {
return new Standardizer(0.0, 1.0, var.getDataType());
}
}

public static Standardizer createFromArray(Array arr, DataType type) {
SummaryStatistics statistics = calculationHelper(arr);
return new Standardizer(statistics.getMean(), statistics.getStandardDeviation(), type);
}

private Standardizer(double mean, double stdDev, DataType type) {
this.mean = mean;
this.stdDev = stdDev;
Map<String, Object> props = new HashMap<>();
props.put("offset", mean);
props.put("scale", 1 / stdDev);
props.put("dtype", type);
scaleOffset = new ScaleOffset(props);
}

private static SummaryStatistics calculationHelper(Array arr) {
SummaryStatistics sumStat = new SummaryStatistics();
IndexIterator iterArr = arr.getIndexIterator();
while (iterArr.hasNext()) {
Number value = (Number) iterArr.getObjectNext();
if (!Double.isNaN(value.doubleValue())) {
sumStat.addValue(value.doubleValue());
}
}
return sumStat;
}

public Array convert(Array arr) {
return scaleOffset.applyScaleOffset(arr);
}

public double getMean() {
return mean;
}

public double getStdDev() {
return stdDev;
}
}

20 changes: 20 additions & 0 deletions cdm/core/src/test/data/ncml/enhance/testStandardizer.ncml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (c) 1998-2023 University Corporation for Atmospheric Research/Unidata
~ See LICENSE for license information.
-->

<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2" enhance="all">
<variable name="doublevar" shape="5" type="double">
<attribute name="standardize"/>
<values>1.0 2.0 3.0 4.0 5.0</values>
</variable>
<variable name="floatvar" shape="5" type="float">
<attribute name="standardize"/>
<values>1.0 2.0 3.0 4.0 5.0</values>
</variable>
<variable name="intvar" shape="5" type="int">
<attribute name="standardize"/>
<values>1 2 3 4 5</values>
</variable>
</netcdf>
78 changes: 78 additions & 0 deletions cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package ucar.nc2.filter;

import static com.google.common.truth.Truth.assertThat;
import static ucar.ma2.MAMath.nearlyEquals;

import java.nio.ByteOrder;
import org.junit.Test;
import ucar.ma2.Array;
import ucar.ma2.DataType;
import ucar.ma2.IndexIterator;

public class TestStandardizer {

public static final double DATA_MEAN = 10.0;
public static final double DATA_STDDEV = 3.3166247903554;
public static double TOLERANCE = 1.0E-9;

public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES);

public static final float[] FLOATS =
{Float.NaN, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F};
public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS);

public double[] standardize(Array arr) {
double[] standardizedSample = new double[(int) arr.getSize()];
IndexIterator iterArr = arr.getIndexIterator();
for (int i = 0; i < (int) arr.getSize(); i++) {
Number value = (Number) iterArr.getObjectNext();
standardizedSample[i] = (value.doubleValue() - DATA_MEAN) / DATA_STDDEV;
}
return standardizedSample;
}

@Test
public void testConvertDouble() {
Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE);
double[] dataStandardized = standardize(DATA_DOUBLES);
Array dataConverted = filter.convert(DATA_DOUBLES);
assertThat(nearlyEquals(dataConverted, Array.makeFromJavaArray(dataStandardized))).isTrue();
}

@Test
public void testConvertFloat() {
Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT);
double[] dataStandardized = standardize(DATA_FLOATS);
Array dataConverted = filter.convert(DATA_FLOATS);
assertThat(nearlyEquals(dataConverted, Array.makeFromJavaArray(dataStandardized))).isTrue();
}

@Test
public void testCalculateMean() {
Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE);
double calcMean = filter.getMean();
assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN);
}

@Test
public void testCalculateMeanFloat() {
Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT);
double calcMean = filter.getMean();
assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN);
}

@Test
public void testCalculateStandardDeviation() {
Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE);
double calcStd = filter.getStdDev();
assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV);
}

@Test
public void testCalculateStandardDeviationFloat() {
Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT);
double calcStd = filter.getStdDev();
assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV);
}
}
53 changes: 53 additions & 0 deletions cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package ucar.nc2.ncml;

import static com.google.common.truth.Truth.assertThat;
import static ucar.ma2.MAMath.nearlyEquals;

import java.io.IOException;
import org.junit.Test;
import ucar.ma2.Array;
import ucar.ma2.DataType;
import ucar.nc2.NetcdfFile;
import ucar.nc2.Variable;
import ucar.nc2.dataset.NetcdfDatasets;
import ucar.unidata.util.test.TestDir;

public class TestEnhanceStandardizer {

private static String dataDir = TestDir.cdmLocalTestDataDir + "ncml/enhance/";
public static final double[] DOUBLES = {-1.26491106406735, -0.63245553203368, 0, 0.63245553203368, 1.26491106406735};
public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES);
public static final float[] FLOATS =
{-1.26491106406735F, -0.63245553203368F, 0, 0.63245553203368F, 1.26491106406735F};
public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS);
public static final int[] INTS = {1, 2, 3, 4, 5};
public static final Array DATA_INTS = Array.makeFromJavaArray(INTS);

@Test
public void testEnhanceStandardizer() throws IOException {
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandardizer.ncml", true, null)) {
Variable doubleVar = ncfile.findVariable("doublevar");
assertThat((Object) doubleVar).isNotNull();
assertThat(doubleVar.getDataType()).isEqualTo(DataType.DOUBLE);
assertThat(doubleVar.attributes().hasAttribute("standardize")).isTrue();
Array dataDoubles = doubleVar.read();
assertThat(nearlyEquals(dataDoubles, DATA_DOUBLES)).isTrue();

Variable floatVar = ncfile.findVariable("floatvar");
assertThat((Object) floatVar).isNotNull();
assertThat(floatVar.getDataType()).isEqualTo(DataType.FLOAT);
assertThat(doubleVar.attributes().hasAttribute("standardize")).isTrue();
Array dataFloats = doubleVar.read();
assertThat(nearlyEquals(dataFloats, DATA_FLOATS)).isTrue();

Variable intVar = ncfile.findVariable("intvar");
assertThat((Object) intVar).isNotNull();
assertThat(intVar.getDataType()).isEqualTo(DataType.INT);
assertThat(intVar.attributes().hasAttribute("standardize")).isTrue();
Array data = intVar.read();
assertThat(nearlyEquals(data, DATA_INTS)).isTrue(); // The enhancement doesn't apply to ints, so the data should
// be equal to the input array
}
}

}
3 changes: 3 additions & 0 deletions netcdf-java-platform/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ dependencies {
// command line parser
api 'com.beust:jcommander:1.78'

// cdm-core
api 'commons-math:commons-math:1.2'

// cdm-grib
api 'edu.ucar:jj2000:5.4'
api 'org.jsoup:jsoup:1.11.2' // HTML scraper used in GRIB
Expand Down

0 comments on commit b844a98

Please sign in to comment.