-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1210 from cssjessica/standardizer
Standardizer
- Loading branch information
Showing
9 changed files
with
241 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package ucar.nc2.filter; | ||
|
||
import java.io.IOException; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import org.apache.commons.math.stat.descriptive.SummaryStatistics; | ||
import ucar.ma2.Array; | ||
import ucar.ma2.DataType; | ||
import ucar.ma2.IndexIterator; | ||
import ucar.nc2.dataset.VariableDS; | ||
|
||
public class Standardizer { | ||
|
||
private final ScaleOffset scaleOffset; | ||
private final double mean; | ||
private final double stdDev; | ||
|
||
public static Standardizer createFromVariable(VariableDS var) { | ||
try { | ||
Array arr = var.read(); | ||
DataType type = var.getDataType(); | ||
return createFromArray(arr, type); | ||
} catch (IOException e) { | ||
return new Standardizer(0.0, 1.0, var.getDataType()); | ||
} | ||
} | ||
|
||
public static Standardizer createFromArray(Array arr, DataType type) { | ||
SummaryStatistics statistics = calculationHelper(arr); | ||
return new Standardizer(statistics.getMean(), statistics.getStandardDeviation(), type); | ||
} | ||
|
||
private Standardizer(double mean, double stdDev, DataType type) { | ||
this.mean = mean; | ||
this.stdDev = stdDev; | ||
Map<String, Object> props = new HashMap<>(); | ||
props.put("offset", mean); | ||
props.put("scale", 1 / stdDev); | ||
props.put("dtype", type); | ||
scaleOffset = new ScaleOffset(props); | ||
} | ||
|
||
private static SummaryStatistics calculationHelper(Array arr) { | ||
SummaryStatistics sumStat = new SummaryStatistics(); | ||
IndexIterator iterArr = arr.getIndexIterator(); | ||
while (iterArr.hasNext()) { | ||
Number value = (Number) iterArr.getObjectNext(); | ||
if (!Double.isNaN(value.doubleValue())) { | ||
sumStat.addValue(value.doubleValue()); | ||
} | ||
} | ||
return sumStat; | ||
} | ||
|
||
public Array convert(Array arr) { | ||
return scaleOffset.applyScaleOffset(arr); | ||
} | ||
|
||
public double getMean() { | ||
return mean; | ||
} | ||
|
||
public double getStdDev() { | ||
return stdDev; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
~ Copyright (c) 1998-2023 University Corporation for Atmospheric Research/Unidata | ||
~ See LICENSE for license information. | ||
--> | ||
|
||
<netcdf xmlns="http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2" enhance="all"> | ||
<variable name="doublevar" shape="5" type="double"> | ||
<attribute name="standardize"/> | ||
<values>1.0 2.0 3.0 4.0 5.0</values> | ||
</variable> | ||
<variable name="floatvar" shape="5" type="float"> | ||
<attribute name="standardize"/> | ||
<values>1.0 2.0 3.0 4.0 5.0</values> | ||
</variable> | ||
<variable name="intvar" shape="5" type="int"> | ||
<attribute name="standardize"/> | ||
<values>1 2 3 4 5</values> | ||
</variable> | ||
</netcdf> |
78 changes: 78 additions & 0 deletions
78
cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package ucar.nc2.filter; | ||
|
||
import static com.google.common.truth.Truth.assertThat; | ||
import static ucar.ma2.MAMath.nearlyEquals; | ||
|
||
import java.nio.ByteOrder; | ||
import org.junit.Test; | ||
import ucar.ma2.Array; | ||
import ucar.ma2.DataType; | ||
import ucar.ma2.IndexIterator; | ||
|
||
public class TestStandardizer { | ||
|
||
public static final double DATA_MEAN = 10.0; | ||
public static final double DATA_STDDEV = 3.3166247903554; | ||
public static double TOLERANCE = 1.0E-9; | ||
|
||
public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; | ||
public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES); | ||
|
||
public static final float[] FLOATS = | ||
{Float.NaN, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; | ||
public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); | ||
|
||
public double[] standardize(Array arr) { | ||
double[] standardizedSample = new double[(int) arr.getSize()]; | ||
IndexIterator iterArr = arr.getIndexIterator(); | ||
for (int i = 0; i < (int) arr.getSize(); i++) { | ||
Number value = (Number) iterArr.getObjectNext(); | ||
standardizedSample[i] = (value.doubleValue() - DATA_MEAN) / DATA_STDDEV; | ||
} | ||
return standardizedSample; | ||
} | ||
|
||
@Test | ||
public void testConvertDouble() { | ||
Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE); | ||
double[] dataStandardized = standardize(DATA_DOUBLES); | ||
Array dataConverted = filter.convert(DATA_DOUBLES); | ||
assertThat(nearlyEquals(dataConverted, Array.makeFromJavaArray(dataStandardized))).isTrue(); | ||
} | ||
|
||
@Test | ||
public void testConvertFloat() { | ||
Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT); | ||
double[] dataStandardized = standardize(DATA_FLOATS); | ||
Array dataConverted = filter.convert(DATA_FLOATS); | ||
assertThat(nearlyEquals(dataConverted, Array.makeFromJavaArray(dataStandardized))).isTrue(); | ||
} | ||
|
||
@Test | ||
public void testCalculateMean() { | ||
Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE); | ||
double calcMean = filter.getMean(); | ||
assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); | ||
} | ||
|
||
@Test | ||
public void testCalculateMeanFloat() { | ||
Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT); | ||
double calcMean = filter.getMean(); | ||
assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); | ||
} | ||
|
||
@Test | ||
public void testCalculateStandardDeviation() { | ||
Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE); | ||
double calcStd = filter.getStdDev(); | ||
assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); | ||
} | ||
|
||
@Test | ||
public void testCalculateStandardDeviationFloat() { | ||
Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT); | ||
double calcStd = filter.getStdDev(); | ||
assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package ucar.nc2.ncml; | ||
|
||
import static com.google.common.truth.Truth.assertThat; | ||
import static ucar.ma2.MAMath.nearlyEquals; | ||
|
||
import java.io.IOException; | ||
import org.junit.Test; | ||
import ucar.ma2.Array; | ||
import ucar.ma2.DataType; | ||
import ucar.nc2.NetcdfFile; | ||
import ucar.nc2.Variable; | ||
import ucar.nc2.dataset.NetcdfDatasets; | ||
import ucar.unidata.util.test.TestDir; | ||
|
||
public class TestEnhanceStandardizer { | ||
|
||
private static String dataDir = TestDir.cdmLocalTestDataDir + "ncml/enhance/"; | ||
public static final double[] DOUBLES = {-1.26491106406735, -0.63245553203368, 0, 0.63245553203368, 1.26491106406735}; | ||
public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES); | ||
public static final float[] FLOATS = | ||
{-1.26491106406735F, -0.63245553203368F, 0, 0.63245553203368F, 1.26491106406735F}; | ||
public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); | ||
public static final int[] INTS = {1, 2, 3, 4, 5}; | ||
public static final Array DATA_INTS = Array.makeFromJavaArray(INTS); | ||
|
||
@Test | ||
public void testEnhanceStandardizer() throws IOException { | ||
try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandardizer.ncml", true, null)) { | ||
Variable doubleVar = ncfile.findVariable("doublevar"); | ||
assertThat((Object) doubleVar).isNotNull(); | ||
assertThat(doubleVar.getDataType()).isEqualTo(DataType.DOUBLE); | ||
assertThat(doubleVar.attributes().hasAttribute("standardize")).isTrue(); | ||
Array dataDoubles = doubleVar.read(); | ||
assertThat(nearlyEquals(dataDoubles, DATA_DOUBLES)).isTrue(); | ||
|
||
Variable floatVar = ncfile.findVariable("floatvar"); | ||
assertThat((Object) floatVar).isNotNull(); | ||
assertThat(floatVar.getDataType()).isEqualTo(DataType.FLOAT); | ||
assertThat(doubleVar.attributes().hasAttribute("standardize")).isTrue(); | ||
Array dataFloats = doubleVar.read(); | ||
assertThat(nearlyEquals(dataFloats, DATA_FLOATS)).isTrue(); | ||
|
||
Variable intVar = ncfile.findVariable("intvar"); | ||
assertThat((Object) intVar).isNotNull(); | ||
assertThat(intVar.getDataType()).isEqualTo(DataType.INT); | ||
assertThat(intVar.attributes().hasAttribute("standardize")).isTrue(); | ||
Array data = intVar.read(); | ||
assertThat(nearlyEquals(data, DATA_INTS)).isTrue(); // The enhancement doesn't apply to ints, so the data should | ||
// be equal to the input array | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters