From 517acedfad2a39700c3d6615ee9c50f9c5e63fdb Mon Sep 17 00:00:00 2001 From: jessica Date: Thu, 15 Jun 2023 14:59:11 -0500 Subject: [PATCH 01/15] Class template for standard scaler --- .../java/ucar/nc2/filter/StandardScaler.java | 31 +++++++++++++++++++ .../ucar/nc2/filter/TestStandardScaler.java | 16 ++++++++++ 2 files changed, 47 insertions(+) create mode 100644 cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java create mode 100644 cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java new file mode 100644 index 0000000000..45a34fa06c --- /dev/null +++ b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java @@ -0,0 +1,31 @@ +package ucar.nc2.filter; + +import java.io.IOException; + +public class StandardScaler extends Filter { + + private static final String name = "Standard Scaler"; + + private static final int id = -1; + + + @Override + public String getName() { + return name; + } + + @Override + public int getId() { + return id; + } + + @Override + public byte[] encode(byte[] dataIn) { + return dataIn; + } + + @Override + public byte[] decode(byte[] dataIn) { + return dataIn; + } +} diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java new file mode 100644 index 0000000000..40611c7905 --- /dev/null +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java @@ -0,0 +1,16 @@ +package ucar.nc2.filter; + +import static com.google.common.truth.Truth.assertThat; +import org.junit.Test; + +public class TestStandardScaler { + + @Test + public void shouldEncodeDecode() { + StandardScaler filter = new StandardScaler(); + byte[] array = new byte[10]; + byte[] encoded = filter.encode(array); + byte[] decoded = filter.decode(encoded); + assertThat(decoded).isEqualTo(array); + } +} From 21363009fcdc139f4eda42def4401e0b4d4f680d Mon Sep 17 00:00:00 2001 From: jessica Date: Mon, 26 Jun 2023 14:11:46 -0500 Subject: [PATCH 02/15] normalize data and add simple tests --- .../java/ucar/nc2/filter/StandardScaler.java | 64 ++++++++++++- .../ucar/nc2/filter/TestStandardScaler.java | 90 ++++++++++++++++++- 2 files changed, 148 insertions(+), 6 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java index 45a34fa06c..c6c4eec945 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java @@ -1,13 +1,68 @@ package ucar.nc2.filter; -import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; +import org.apache.commons.math.stat.descriptive.SummaryStatistics; public class StandardScaler extends Filter { + private final ScaleOffset scaleOffset; private static final String name = "Standard Scaler"; - private static final int id = -1; + private final double mean; + private final double stdDev; + + public StandardScaler(double[] arr){ + mean = calculateMean(arr); + stdDev = calculateStandardDeviation(arr); + Map props = new HashMap<>(); + props.put("offset", mean); + props.put("scale", (int) stdDev); + props.put("dtype", ">f8"); + scaleOffset = new ScaleOffset(props); + } + + public double[] flattenArray(double[][] arr) { + double[] result= new double[arr.length * arr[0].length]; + int index = 0; + for (int i = 0; i < arr.length; i++) { + for (int j = 0; j < arr[0].length; j++) { + result[index] = arr[i][j]; + index++; + } + } + return result; + } + private double calculateMean(double[] data) { + SummaryStatistics curr = new SummaryStatistics(); + for (int a = 0; a < data.length; a++) { + curr.addValue(data[a]); + } + return curr.getMean(); + } + + private double calculateStandardDeviation(double[] data) { + StandardDeviation stdDev = new StandardDeviation(); + return stdDev.evaluate(data); + } + + public double[] calculateStandardScaler(double[] data, double mean, double stdDev){ + double[] scaledData = new double[data.length]; + for (int i = 0; i < data.length; i++) { + // remove int conversion later + scaledData[i] = (data[i] - mean) / (int) stdDev; + } + return scaledData; + } + public double getMean(){ + return mean; + } + + public double getStdDev() { + return stdDev; + } @Override public String getName() { @@ -21,11 +76,12 @@ public int getId() { @Override public byte[] encode(byte[] dataIn) { - return dataIn; + return scaleOffset.encode(dataIn); } @Override public byte[] decode(byte[] dataIn) { - return dataIn; + return scaleOffset.decode(dataIn); } } + diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java index 40611c7905..7235bf6299 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java @@ -1,16 +1,102 @@ package ucar.nc2.filter; import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; import org.junit.Test; public class TestStandardScaler { @Test - public void shouldEncodeDecode() { - StandardScaler filter = new StandardScaler(); + public void testEncodeDecode() { + double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(data); byte[] array = new byte[10]; byte[] encoded = filter.encode(array); byte[] decoded = filter.decode(encoded); assertThat(decoded).isEqualTo(array); } + + @Test + public void testEncode() { + double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(data); + double dataMean = filter.getMean(); + double dataStdDev = filter.getStdDev(); + double[] dataStandardScaler = filter.calculateStandardScaler(data, dataMean, dataStdDev); + byte[] barray = createByteArray(data); + byte[] encoded = filter.encode(barray); + double[] dencoded = createDoubleArray(encoded); + assertThat(dencoded).isEqualTo(dataStandardScaler); + } +// @Test +// public void testFlattenArray(){ +// double[][] data2D = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; +// double[] data1D = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; +// StandardScaler filter = new StandardScaler(); +// double[] flatData = filter.flattenArray(data2D); +// assertThat(flatData).isEqualTo(data1D); +// } + + @Test + public void testCalculateMean(){ + double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(data); + double calcMean = filter.getMean(); + assertThat(calcMean).isEqualTo(3); + } + + @Test + public void testCalculateStandardDeviation(){ + double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(data); + double calcStd = filter.getStdDev(); + assertThat(calcStd).isEqualTo(Math.sqrt(2.5)); + } + + @Test + public void testCalculateStandardScaler(){ + double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(data); + double dataMean = filter.getMean(); + double dataStdDev = filter.getStdDev(); + double[] dataStandardScaler = filter.calculateStandardScaler(data, dataMean, dataStdDev); + double[] scaledData = normalize(data); + assertThat(dataStandardScaler).isEqualTo(scaledData); + } + + public byte[] createByteArray(double[] arr){ + ByteBuffer bb = ByteBuffer.allocate(arr.length * 8); + for (double d: arr) { + bb.putDouble(d); + } + return bb.array(); + } + + public double[] createDoubleArray(byte[] arr){ + ByteBuffer bb = ByteBuffer.wrap(arr); + double[] doubles = new double[arr.length / 8]; + for (int i = 0; i < doubles.length; i++) { + doubles[i] = bb.getDouble(); + } + return doubles; + } + + public double[] normalize(double[] sample) { + DescriptiveStatistics stats = new DescriptiveStatistics(); + for (int i = 0; i < sample.length; i++) { + stats.addValue(sample[i]); + } + double mean = stats.getMean(); + double standardDeviation = stats.getStandardDeviation(); + double[] standardizedSample = new double[sample.length]; + for (int i = 0; i < sample.length; i++) { + standardizedSample[i] = (sample[i] - mean) / standardDeviation; + } + return standardizedSample; + } + } From 776242413e0e5dd87a26f1e1817199af06139fce Mon Sep 17 00:00:00 2001 From: jessica Date: Mon, 26 Jun 2023 15:21:41 -0500 Subject: [PATCH 03/15] wip --- .../java/ucar/nc2/filter/StandardScaler.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java index c6c4eec945..9703e2f0cb 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java @@ -23,17 +23,17 @@ public StandardScaler(double[] arr){ scaleOffset = new ScaleOffset(props); } - public double[] flattenArray(double[][] arr) { - double[] result= new double[arr.length * arr[0].length]; - int index = 0; - for (int i = 0; i < arr.length; i++) { - for (int j = 0; j < arr[0].length; j++) { - result[index] = arr[i][j]; - index++; - } - } - return result; - } +// public double[] flattenArray(double[][] arr) { +// double[] result= new double[arr.length * arr[0].length]; +// int index = 0; +// for (int i = 0; i < arr.length; i++) { +// for (int j = 0; j < arr[0].length; j++) { +// result[index] = arr[i][j]; +// index++; +// } +// } +// return result; +// } private double calculateMean(double[] data) { SummaryStatistics curr = new SummaryStatistics(); for (int a = 0; a < data.length; a++) { From 76946e5fa523b5ad9eaa1d74364fa70b374c9601 Mon Sep 17 00:00:00 2001 From: jessica Date: Tue, 27 Jun 2023 12:28:54 -0500 Subject: [PATCH 04/15] updated testEncode --- .../main/java/ucar/nc2/filter/StandardScaler.java | 5 ++--- .../java/ucar/nc2/filter/TestStandardScaler.java | 12 +++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java index 9703e2f0cb..b85d663b7d 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java @@ -18,7 +18,7 @@ public StandardScaler(double[] arr){ stdDev = calculateStandardDeviation(arr); Map props = new HashMap<>(); props.put("offset", mean); - props.put("scale", (int) stdDev); + props.put("scale", 1/stdDev); props.put("dtype", ">f8"); scaleOffset = new ScaleOffset(props); } @@ -50,8 +50,7 @@ private double calculateStandardDeviation(double[] data) { public double[] calculateStandardScaler(double[] data, double mean, double stdDev){ double[] scaledData = new double[data.length]; for (int i = 0; i < data.length; i++) { - // remove int conversion later - scaledData[i] = (data[i] - mean) / (int) stdDev; + scaledData[i] = (data[i] - mean) / stdDev; } return scaledData; } diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java index 7235bf6299..64a7287af9 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java @@ -5,8 +5,11 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.Arrays; import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; import org.junit.Test; +import ucar.ma2.Array; +import ucar.ma2.DataType; public class TestStandardScaler { @@ -27,10 +30,13 @@ public void testEncode() { double dataMean = filter.getMean(); double dataStdDev = filter.getStdDev(); double[] dataStandardScaler = filter.calculateStandardScaler(data, dataMean, dataStdDev); - byte[] barray = createByteArray(data); +// FilterHelpers helper = new FilterHelpers(); + byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(data), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); +// byte[] barray = createByteArray(data); byte[] encoded = filter.encode(barray); - double[] dencoded = createDoubleArray(encoded); - assertThat(dencoded).isEqualTo(dataStandardScaler); + Array dencoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); +// double[] dencoded = createDoubleArray(encoded); + assertThat(dencoded.getStorage()).isEqualTo(dataStandardScaler); } // @Test // public void testFlattenArray(){ From e992d118eff01737bfc20a74a48caaeeb05a0b13 Mon Sep 17 00:00:00 2001 From: jessica Date: Mon, 26 Jun 2023 15:43:28 -0500 Subject: [PATCH 05/15] update dependencies --- cdm/core/build.gradle | 2 ++ netcdf-java-platform/build.gradle | 3 +++ 2 files changed, 5 insertions(+) diff --git a/cdm/core/build.gradle b/cdm/core/build.gradle index 6386298aad..369a4cc19e 100644 --- a/cdm/core/build.gradle +++ b/cdm/core/build.gradle @@ -10,6 +10,8 @@ apply plugin: 'groovy' // For Spock tests. dependencies { api enforcedPlatform(project(':netcdf-java-platform')) + implementation 'commons-math:commons-math' + testImplementation enforcedPlatform(project(':netcdf-java-testing-platform')) compile project(':udunits') diff --git a/netcdf-java-platform/build.gradle b/netcdf-java-platform/build.gradle index 1a56304631..a9279f6357 100644 --- a/netcdf-java-platform/build.gradle +++ b/netcdf-java-platform/build.gradle @@ -36,6 +36,9 @@ dependencies { // command line parser api 'com.beust:jcommander:1.78' + // cdm-core + api 'commons-math:commons-math:1.2' + // cdm-grib api 'edu.ucar:jj2000:5.4' api 'org.jsoup:jsoup:1.11.2' // HTML scraper used in GRIB From 5108fb69f0f2fbd5a8f5381119c0b1af2e986aa7 Mon Sep 17 00:00:00 2001 From: jessica Date: Wed, 28 Jun 2023 14:08:42 -0500 Subject: [PATCH 06/15] changes to array type --- .../java/ucar/nc2/filter/StandardScaler.java | 45 ++--- .../ucar/nc2/filter/TestStandardScaler.java | 170 ++++++++++-------- 2 files changed, 113 insertions(+), 102 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java index b85d663b7d..ee96baa6d9 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java @@ -2,8 +2,9 @@ import java.util.HashMap; import java.util.Map; -import org.apache.commons.math.stat.descriptive.moment.StandardDeviation; import org.apache.commons.math.stat.descriptive.SummaryStatistics; +import ucar.ma2.Array; +import ucar.ma2.IndexIterator; public class StandardScaler extends Filter { @@ -13,7 +14,7 @@ public class StandardScaler extends Filter { private final double mean; private final double stdDev; - public StandardScaler(double[] arr){ + public StandardScaler(Array arr){ mean = calculateMean(arr); stdDev = calculateStandardDeviation(arr); Map props = new HashMap<>(); @@ -23,36 +24,24 @@ public StandardScaler(double[] arr){ scaleOffset = new ScaleOffset(props); } -// public double[] flattenArray(double[][] arr) { -// double[] result= new double[arr.length * arr[0].length]; -// int index = 0; -// for (int i = 0; i < arr.length; i++) { -// for (int j = 0; j < arr[0].length; j++) { -// result[index] = arr[i][j]; -// index++; -// } -// } -// return result; -// } - private double calculateMean(double[] data) { - SummaryStatistics curr = new SummaryStatistics(); - for (int a = 0; a < data.length; a++) { - curr.addValue(data[a]); + private double calculateMean(Array arr) { + SummaryStatistics cur = new SummaryStatistics(); + IndexIterator iterArr = arr.getIndexIterator(); + while (iterArr.hasNext()) { + Number value = (Number) iterArr.getObjectNext(); + cur.addValue(value.doubleValue()); } - return curr.getMean(); + return cur.getMean(); } - private double calculateStandardDeviation(double[] data) { - StandardDeviation stdDev = new StandardDeviation(); - return stdDev.evaluate(data); - } - - public double[] calculateStandardScaler(double[] data, double mean, double stdDev){ - double[] scaledData = new double[data.length]; - for (int i = 0; i < data.length; i++) { - scaledData[i] = (data[i] - mean) / stdDev; + private double calculateStandardDeviation(Array arr) { + SummaryStatistics cur = new SummaryStatistics(); + IndexIterator iterArr = arr.getIndexIterator(); + while (iterArr.hasNext()) { + Number value = (Number) iterArr.getObjectNext(); + cur.addValue(value.doubleValue()); } - return scaledData; + return cur.getStandardDeviation(); } public double getMean(){ diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java index 64a7287af9..17e39030a0 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java @@ -1,108 +1,130 @@ package ucar.nc2.filter; import static com.google.common.truth.Truth.assertThat; -import static org.junit.Assert.assertEquals; +import static ucar.ma2.MAMath.nearlyEquals; -import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Arrays; -import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; +import java.util.Random; import org.junit.Test; import ucar.ma2.Array; import ucar.ma2.DataType; +import ucar.ma2.IndexIterator; public class TestStandardScaler { + public static final double DATA_MEAN = 100.0; + public static final double DATA_STDDEV = 10.0; + public static final int DATA_LEN = 10; + public static double TOLERANCE = 1.0E-10; + public static double GAP = 1.0E-2; + + public static double[] createRandData(){ + double[] randData= new double[DATA_LEN]; + Random r = new Random(); + for (int i = 0; i < randData.length; i++) { + randData[i] = r.nextGaussian() * DATA_STDDEV + DATA_MEAN; + } + return randData; + } + + double[] randData = createRandData(); + + public double[] normalize(double[] sample) { + double[] standardizedSample = new double[sample.length]; + for (int i = 0; i < sample.length; i++) { + standardizedSample[i] = (sample[i] - DATA_MEAN) / DATA_STDDEV; + } + return standardizedSample; + } + + public double[] denormalize(double[] sample){ + double[] descaledData = new double[sample.length]; + for (int i = 0; i < sample.length; i++) { + descaledData[i] = sample[i] * DATA_STDDEV + DATA_MEAN; + } + return descaledData; + } + + public double[] createDoubleArray(Array arr) { + double[] out = new double[arr.getShape()[0]]; + IndexIterator iterArr = arr.getIndexIterator(); + int index = 0; + while (iterArr.hasNext()) { + Number value = (Number) iterArr.getObjectNext(); + out[index] = value.doubleValue(); + } + return out; + } + + public Array createArray(double[] arr) { + DataType outType = DataType.DOUBLE; + Array out = Array.factory(outType, new int[]{arr.length}); + IndexIterator iterOut = out.getIndexIterator(); + for (int i = 0; i < arr.length; i++) { + iterOut.setObjectNext(arr[i]); + } + return out; + } + @Test public void testEncodeDecode() { - double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(data); - byte[] array = new byte[10]; - byte[] encoded = filter.encode(array); + StandardScaler filter = new StandardScaler(createArray(randData)); + byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(randData), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + byte[] encoded = filter.encode(barray); byte[] decoded = filter.decode(encoded); - assertThat(decoded).isEqualTo(array); + Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + assertThat(convertedDecoded.getStorage()).isEqualTo(randData); } @Test public void testEncode() { - double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(data); - double dataMean = filter.getMean(); - double dataStdDev = filter.getStdDev(); - double[] dataStandardScaler = filter.calculateStandardScaler(data, dataMean, dataStdDev); -// FilterHelpers helper = new FilterHelpers(); - byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(data), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); -// byte[] barray = createByteArray(data); + StandardScaler filter = new StandardScaler(createArray(randData)); + double[] dataStandardScaler = normalize(randData); + byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(randData), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); byte[] encoded = filter.encode(barray); Array dencoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); -// double[] dencoded = createDoubleArray(encoded); - assertThat(dencoded.getStorage()).isEqualTo(dataStandardScaler); +// assertThat(dencoded.getStorage()).isEqualTo(dataStandardScaler); + assertThat(nearlyEquals(dencoded, createArray(dataStandardScaler))).isTrue(); + + } + + @Test + public void testDecode() { + StandardScaler filter = new StandardScaler(createArray(randData)); + double DataMean = filter.getMean(); + double DataStdDev = filter.getStdDev(); + double[] dataStandardScaler = denormalize(randData); + byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(randData), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + byte[] decoded = filter.decode(barray); + Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + assertThat(convertedDecoded.getStorage()).isEqualTo(dataStandardScaler); } -// @Test -// public void testFlattenArray(){ -// double[][] data2D = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; -// double[] data1D = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; -// StandardScaler filter = new StandardScaler(); -// double[] flatData = filter.flattenArray(data2D); -// assertThat(flatData).isEqualTo(data1D); -// } @Test public void testCalculateMean(){ - double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(data); +// double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(createArray(randData)); double calcMean = filter.getMean(); - assertThat(calcMean).isEqualTo(3); + assertThat(calcMean).isWithin(GAP).of(DATA_MEAN); } @Test public void testCalculateStandardDeviation(){ - double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(data); +// double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; + StandardScaler filter = new StandardScaler(createArray(randData)); double calcStd = filter.getStdDev(); - assertThat(calcStd).isEqualTo(Math.sqrt(2.5)); - } - - @Test - public void testCalculateStandardScaler(){ - double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(data); - double dataMean = filter.getMean(); - double dataStdDev = filter.getStdDev(); - double[] dataStandardScaler = filter.calculateStandardScaler(data, dataMean, dataStdDev); - double[] scaledData = normalize(data); - assertThat(dataStandardScaler).isEqualTo(scaledData); - } - - public byte[] createByteArray(double[] arr){ - ByteBuffer bb = ByteBuffer.allocate(arr.length * 8); - for (double d: arr) { - bb.putDouble(d); - } - return bb.array(); - } - - public double[] createDoubleArray(byte[] arr){ - ByteBuffer bb = ByteBuffer.wrap(arr); - double[] doubles = new double[arr.length / 8]; - for (int i = 0; i < doubles.length; i++) { - doubles[i] = bb.getDouble(); - } - return doubles; + assertThat(calcStd).isWithin(GAP).of(DATA_STDDEV); } - public double[] normalize(double[] sample) { - DescriptiveStatistics stats = new DescriptiveStatistics(); - for (int i = 0; i < sample.length; i++) { - stats.addValue(sample[i]); - } - double mean = stats.getMean(); - double standardDeviation = stats.getStandardDeviation(); - double[] standardizedSample = new double[sample.length]; - for (int i = 0; i < sample.length; i++) { - standardizedSample[i] = (sample[i] - mean) / standardDeviation; - } - return standardizedSample; - } +// @Test +// public void testNormalize(){ +//// double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; +// StandardScaler filter = new StandardScaler(randData); +// double dataMean = filter.getMean(); +// double dataStdDev = filter.getStdDev(); +// double[] dataStandardScaler = filter.normalize(randData, dataMean, dataStdDev); +// double[] scaledData = normalize(randData); +// assertThat(dataStandardScaler).usingTolerance(GAP).equals(scaledData); +// } } From 3999898f6c48332c16c52fe4b659430741aab9b3 Mon Sep 17 00:00:00 2001 From: jessica Date: Wed, 28 Jun 2023 14:38:57 -0500 Subject: [PATCH 07/15] tests for array passing --- .../ucar/nc2/filter/TestStandardScaler.java | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java index 17e39030a0..3980c8ca13 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java @@ -4,7 +4,6 @@ import static ucar.ma2.MAMath.nearlyEquals; import java.nio.ByteOrder; -import java.util.Random; import org.junit.Test; import ucar.ma2.Array; import ucar.ma2.DataType; @@ -12,22 +11,25 @@ public class TestStandardScaler { - public static final double DATA_MEAN = 100.0; - public static final double DATA_STDDEV = 10.0; - public static final int DATA_LEN = 10; + public static final double DATA_MEAN = 10.0; + public static final double DATA_STDDEV = 3.3166247903554; +// public static final int DATA_LEN = 10; public static double TOLERANCE = 1.0E-10; - public static double GAP = 1.0E-2; - public static double[] createRandData(){ - double[] randData= new double[DATA_LEN]; - Random r = new Random(); - for (int i = 0; i < randData.length; i++) { - randData[i] = r.nextGaussian() * DATA_STDDEV + DATA_MEAN; - } - return randData; - } + public static final double[] DOUBLES = {5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; + + public static final Array DATA = Array.makeFromJavaArray(DOUBLES); - double[] randData = createRandData(); + +// public static double[] createRandData(){ +// double[] randData= new double[DATA_LEN]; +// Random r = new Random(); +// for (int i = 0; i < randData.length; i++) { +// randData[i] = r.nextGaussian() * DATA_STDDEV + DATA_MEAN; +// } +// return randData; +// } +// double[] randData = createRandData(); public double[] normalize(double[] sample) { double[] standardizedSample = new double[sample.length]; @@ -45,75 +47,70 @@ public double[] denormalize(double[] sample){ return descaledData; } - public double[] createDoubleArray(Array arr) { - double[] out = new double[arr.getShape()[0]]; - IndexIterator iterArr = arr.getIndexIterator(); - int index = 0; - while (iterArr.hasNext()) { - Number value = (Number) iterArr.getObjectNext(); - out[index] = value.doubleValue(); - } - return out; - } +// public double[] createDoubleArray(Array arr) { +// double[] out = new double[arr.getShape()[0]]; +// IndexIterator iterArr = arr.getIndexIterator(); +// int index = 0; +// while (iterArr.hasNext()) { +// Number value = (Number) iterArr.getObjectNext(); +// out[index] = value.doubleValue(); +// } +// return out; +// } - public Array createArray(double[] arr) { - DataType outType = DataType.DOUBLE; - Array out = Array.factory(outType, new int[]{arr.length}); - IndexIterator iterOut = out.getIndexIterator(); - for (int i = 0; i < arr.length; i++) { - iterOut.setObjectNext(arr[i]); - } - return out; - } +// public Array createArray(double[] arr) { +// DataType outType = DataType.DOUBLE; +// Array out = Array.factory(outType, new int[]{arr.length}); +// IndexIterator iterOut = out.getIndexIterator(); +// for (int i = 0; i < arr.length; i++) { +// iterOut.setObjectNext(arr[i]); +// } +// return out; +// } @Test public void testEncodeDecode() { - StandardScaler filter = new StandardScaler(createArray(randData)); - byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(randData), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + StandardScaler filter = new StandardScaler(DATA); + byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); byte[] encoded = filter.encode(barray); byte[] decoded = filter.decode(encoded); Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); - assertThat(convertedDecoded.getStorage()).isEqualTo(randData); + assertThat(nearlyEquals(convertedDecoded, DATA)).isTrue(); } @Test public void testEncode() { - StandardScaler filter = new StandardScaler(createArray(randData)); - double[] dataStandardScaler = normalize(randData); - byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(randData), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + StandardScaler filter = new StandardScaler(DATA); + double[] dataStandardScaler = normalize(DOUBLES); + byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); byte[] encoded = filter.encode(barray); Array dencoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); -// assertThat(dencoded.getStorage()).isEqualTo(dataStandardScaler); - assertThat(nearlyEquals(dencoded, createArray(dataStandardScaler))).isTrue(); + assertThat(nearlyEquals(dencoded, Array.makeFromJavaArray(dataStandardScaler))).isTrue(); } @Test public void testDecode() { - StandardScaler filter = new StandardScaler(createArray(randData)); - double DataMean = filter.getMean(); - double DataStdDev = filter.getStdDev(); - double[] dataStandardScaler = denormalize(randData); - byte[] barray = FilterHelpers.arrayToBytes(Array.makeFromJavaArray(randData), DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + StandardScaler filter = new StandardScaler(DATA); + double[] dataStandardScaler = denormalize(DOUBLES); + byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); byte[] decoded = filter.decode(barray); Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); - assertThat(convertedDecoded.getStorage()).isEqualTo(dataStandardScaler); + assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardScaler))).isTrue(); } @Test public void testCalculateMean(){ -// double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(createArray(randData)); + StandardScaler filter = new StandardScaler(DATA); double calcMean = filter.getMean(); - assertThat(calcMean).isWithin(GAP).of(DATA_MEAN); + assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } @Test public void testCalculateStandardDeviation(){ -// double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; - StandardScaler filter = new StandardScaler(createArray(randData)); + StandardScaler filter = new StandardScaler(DATA); double calcStd = filter.getStdDev(); - assertThat(calcStd).isWithin(GAP).of(DATA_STDDEV); + assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } // @Test From 0ffbb82f3e2ff3554d039cde83d9326a7d2d6ac9 Mon Sep 17 00:00:00 2001 From: jessica Date: Wed, 28 Jun 2023 17:05:07 -0500 Subject: [PATCH 08/15] include tests for other data types --- .../java/ucar/nc2/filter/StandardScaler.java | 21 ++++- .../ucar/nc2/filter/TestStandardScaler.java | 94 ++++++++----------- 2 files changed, 55 insertions(+), 60 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java index ee96baa6d9..4ceead7515 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java @@ -1,10 +1,13 @@ package ucar.nc2.filter; +import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.commons.math.stat.descriptive.SummaryStatistics; import ucar.ma2.Array; +import ucar.ma2.DataType; import ucar.ma2.IndexIterator; +import ucar.nc2.dataset.VariableDS; public class StandardScaler extends Filter { @@ -14,13 +17,19 @@ public class StandardScaler extends Filter { private final double mean; private final double stdDev; - public StandardScaler(Array arr){ + public static StandardScaler createFromVariable(VariableDS var) throws IOException { + Array arr = var.read(); + DataType type = var.getDataType(); + return new StandardScaler(arr, type); + } + + public StandardScaler(Array arr, DataType type){ mean = calculateMean(arr); stdDev = calculateStandardDeviation(arr); Map props = new HashMap<>(); props.put("offset", mean); props.put("scale", 1/stdDev); - props.put("dtype", ">f8"); + props.put("dtype", type); scaleOffset = new ScaleOffset(props); } @@ -29,7 +38,9 @@ private double calculateMean(Array arr) { IndexIterator iterArr = arr.getIndexIterator(); while (iterArr.hasNext()) { Number value = (Number) iterArr.getObjectNext(); - cur.addValue(value.doubleValue()); + if (!Double.isNaN(value.doubleValue())){ + cur.addValue(value.doubleValue()); + } } return cur.getMean(); } @@ -39,7 +50,9 @@ private double calculateStandardDeviation(Array arr) { IndexIterator iterArr = arr.getIndexIterator(); while (iterArr.hasNext()) { Number value = (Number) iterArr.getObjectNext(); - cur.addValue(value.doubleValue()); + if (!Double.isNaN(value.doubleValue())) { + cur.addValue(value.doubleValue()); + } } return cur.getStandardDeviation(); } diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java index 3980c8ca13..279294cefa 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java @@ -7,29 +7,22 @@ import org.junit.Test; import ucar.ma2.Array; import ucar.ma2.DataType; -import ucar.ma2.IndexIterator; public class TestStandardScaler { public static final double DATA_MEAN = 10.0; public static final double DATA_STDDEV = 3.3166247903554; -// public static final int DATA_LEN = 10; public static double TOLERANCE = 1.0E-10; - public static final double[] DOUBLES = {5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; + public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; public static final Array DATA = Array.makeFromJavaArray(DOUBLES); + public static final int[] INTS = {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + public static final Array DATA_INTS = Array.makeFromJavaArray(INTS); + public static final float[] FLOATS = {5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; -// public static double[] createRandData(){ -// double[] randData= new double[DATA_LEN]; -// Random r = new Random(); -// for (int i = 0; i < randData.length; i++) { -// randData[i] = r.nextGaussian() * DATA_STDDEV + DATA_MEAN; -// } -// return randData; -// } -// double[] randData = createRandData(); + public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); public double[] normalize(double[] sample) { double[] standardizedSample = new double[sample.length]; @@ -47,81 +40,70 @@ public double[] denormalize(double[] sample){ return descaledData; } -// public double[] createDoubleArray(Array arr) { -// double[] out = new double[arr.getShape()[0]]; -// IndexIterator iterArr = arr.getIndexIterator(); -// int index = 0; -// while (iterArr.hasNext()) { -// Number value = (Number) iterArr.getObjectNext(); -// out[index] = value.doubleValue(); -// } -// return out; -// } - -// public Array createArray(double[] arr) { -// DataType outType = DataType.DOUBLE; -// Array out = Array.factory(outType, new int[]{arr.length}); -// IndexIterator iterOut = out.getIndexIterator(); -// for (int i = 0; i < arr.length; i++) { -// iterOut.setObjectNext(arr[i]); -// } -// return out; -// } - @Test public void testEncodeDecode() { - StandardScaler filter = new StandardScaler(DATA); - byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); + byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); byte[] encoded = filter.encode(barray); byte[] decoded = filter.decode(encoded); - Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); assertThat(nearlyEquals(convertedDecoded, DATA)).isTrue(); } + @Test + public void testEncodeDecodeFloat() { + StandardScaler filter = new StandardScaler(DATA_FLOATS, DataType.FLOAT); + byte[] barray = FilterHelpers.arrayToBytes(DATA_FLOATS, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); + byte[] encoded = filter.encode(barray); + byte[] decoded = filter.decode(encoded); + Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); + assertThat(nearlyEquals(convertedDecoded, DATA_FLOATS)).isTrue(); + } + @Test public void testEncode() { - StandardScaler filter = new StandardScaler(DATA); + StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); double[] dataStandardScaler = normalize(DOUBLES); - byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); byte[] encoded = filter.encode(barray); - Array dencoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + Array dencoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); assertThat(nearlyEquals(dencoded, Array.makeFromJavaArray(dataStandardScaler))).isTrue(); } @Test public void testDecode() { - StandardScaler filter = new StandardScaler(DATA); + StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); double[] dataStandardScaler = denormalize(DOUBLES); - byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); byte[] decoded = filter.decode(barray); - Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.BIG_ENDIAN); + Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardScaler))).isTrue(); } @Test public void testCalculateMean(){ - StandardScaler filter = new StandardScaler(DATA); + StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); + double calcMean = filter.getMean(); + assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); + } + @Test + public void testCalculateMeanInt(){ + StandardScaler filter = new StandardScaler(DATA_INTS, DataType.INT); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } @Test public void testCalculateStandardDeviation(){ - StandardScaler filter = new StandardScaler(DATA); + StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); + double calcStd = filter.getStdDev(); + assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); + } + @Test + public void testCalculateStandardDeviationInt(){ + StandardScaler filter = new StandardScaler(DATA_INTS, DataType.INT); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } - -// @Test -// public void testNormalize(){ -//// double[] data = {1.0, 2.0, 3.0, 4.0, 5.0}; -// StandardScaler filter = new StandardScaler(randData); -// double dataMean = filter.getMean(); -// double dataStdDev = filter.getStdDev(); -// double[] dataStandardScaler = filter.normalize(randData, dataMean, dataStdDev); -// double[] scaledData = normalize(randData); -// assertThat(dataStandardScaler).usingTolerance(GAP).equals(scaledData); -// } - } From ac0fcee68e301a3cb3cd2ce0afca70e14b3894e0 Mon Sep 17 00:00:00 2001 From: jessica Date: Wed, 5 Jul 2023 09:39:11 -0500 Subject: [PATCH 09/15] change of class name and tests refactor --- ...{StandardScaler.java => Standardizer.java} | 12 ++-- ...ndardScaler.java => TestStandardizer.java} | 63 +++++++------------ 2 files changed, 30 insertions(+), 45 deletions(-) rename cdm/core/src/main/java/ucar/nc2/filter/{StandardScaler.java => Standardizer.java} (84%) rename cdm/core/src/test/java/ucar/nc2/filter/{TestStandardScaler.java => TestStandardizer.java} (52%) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java similarity index 84% rename from cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java rename to cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java index 4ceead7515..bd2eaae0eb 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/StandardScaler.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java @@ -9,26 +9,26 @@ import ucar.ma2.IndexIterator; import ucar.nc2.dataset.VariableDS; -public class StandardScaler extends Filter { +public class Standardizer extends Filter { private final ScaleOffset scaleOffset; - private static final String name = "Standard Scaler"; + private static final String name = "Standardizer"; private static final int id = -1; private final double mean; private final double stdDev; - public static StandardScaler createFromVariable(VariableDS var) throws IOException { + public static Standardizer createFromVariable(VariableDS var) throws IOException { Array arr = var.read(); DataType type = var.getDataType(); - return new StandardScaler(arr, type); + return new Standardizer(arr, type); } - public StandardScaler(Array arr, DataType type){ + public Standardizer(Array arr, DataType type){ mean = calculateMean(arr); stdDev = calculateStandardDeviation(arr); Map props = new HashMap<>(); props.put("offset", mean); - props.put("scale", 1/stdDev); + props.put("scale", 1/stdDev); // update to stdDev after changing scale offset props.put("dtype", type); scaleOffset = new ScaleOffset(props); } diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java similarity index 52% rename from cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java rename to cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java index 279294cefa..0911b25e08 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardScaler.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java @@ -8,23 +8,19 @@ import ucar.ma2.Array; import ucar.ma2.DataType; -public class TestStandardScaler { +public class TestStandardizer { public static final double DATA_MEAN = 10.0; public static final double DATA_STDDEV = 3.3166247903554; - public static double TOLERANCE = 1.0E-10; + public static double TOLERANCE = 1.0E-9; public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; - public static final Array DATA = Array.makeFromJavaArray(DOUBLES); - public static final int[] INTS = {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - public static final Array DATA_INTS = Array.makeFromJavaArray(INTS); public static final float[] FLOATS = {5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; - public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); - public double[] normalize(double[] sample) { + public double[] standardize(double[] sample) { double[] standardizedSample = new double[sample.length]; for (int i = 0; i < sample.length; i++) { standardizedSample[i] = (sample[i] - DATA_MEAN) / DATA_STDDEV; @@ -32,7 +28,7 @@ public double[] normalize(double[] sample) { return standardizedSample; } - public double[] denormalize(double[] sample){ + public double[] unstandardize(double[] sample){ double[] descaledData = new double[sample.length]; for (int i = 0; i < sample.length; i++) { descaledData[i] = sample[i] * DATA_STDDEV + DATA_MEAN; @@ -42,9 +38,9 @@ public double[] denormalize(double[] sample){ @Test public void testEncodeDecode() { - StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); - byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - byte[] encoded = filter.encode(barray); + Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); + byte[] byestArray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); + byte[] encoded = filter.encode(byestArray); byte[] decoded = filter.decode(encoded); Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); assertThat(nearlyEquals(convertedDecoded, DATA)).isTrue(); @@ -52,9 +48,9 @@ public void testEncodeDecode() { @Test public void testEncodeDecodeFloat() { - StandardScaler filter = new StandardScaler(DATA_FLOATS, DataType.FLOAT); - byte[] barray = FilterHelpers.arrayToBytes(DATA_FLOATS, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); - byte[] encoded = filter.encode(barray); + Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); + byte[] bytesArray = FilterHelpers.arrayToBytes(DATA_FLOATS, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); + byte[] encoded = filter.encode(bytesArray); byte[] decoded = filter.decode(encoded); Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); assertThat(nearlyEquals(convertedDecoded, DATA_FLOATS)).isTrue(); @@ -62,47 +58,36 @@ public void testEncodeDecodeFloat() { @Test public void testEncode() { - StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); - double[] dataStandardScaler = normalize(DOUBLES); - byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - byte[] encoded = filter.encode(barray); - Array dencoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - assertThat(nearlyEquals(dencoded, Array.makeFromJavaArray(dataStandardScaler))).isTrue(); + Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); + double[] dataStandardizer = standardize(DOUBLES); + byte[] bytesArray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); + byte[] encoded = filter.encode(bytesArray); + Array convertedDecoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); + assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); } @Test public void testDecode() { - StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); - double[] dataStandardScaler = denormalize(DOUBLES); - byte[] barray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - byte[] decoded = filter.decode(barray); + Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); + double[] dataStandardizer = unstandardize(DOUBLES); + byte[] bytesArray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); + byte[] decoded = filter.decode(bytesArray); Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardScaler))).isTrue(); + assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); } @Test public void testCalculateMean(){ - StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); - double calcMean = filter.getMean(); - assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); - } - @Test - public void testCalculateMeanInt(){ - StandardScaler filter = new StandardScaler(DATA_INTS, DataType.INT); + Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } + @Test public void testCalculateStandardDeviation(){ - StandardScaler filter = new StandardScaler(DATA, DataType.DOUBLE); - double calcStd = filter.getStdDev(); - assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); - } - @Test - public void testCalculateStandardDeviationInt(){ - StandardScaler filter = new StandardScaler(DATA_INTS, DataType.INT); + Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } From 67fd927e3f3a6a85cfa740f0d95b03d70956d246 Mon Sep 17 00:00:00 2001 From: jessica Date: Wed, 5 Jul 2023 16:29:50 -0500 Subject: [PATCH 10/15] correct standardizer as enhancement and refactor --- .../java/ucar/nc2/filter/Standardizer.java | 52 ++++++------- .../ucar/nc2/filter/TestStandardizer.java | 73 +++++++------------ 2 files changed, 48 insertions(+), 77 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java index bd2eaae0eb..1c679e19cf 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java @@ -9,7 +9,7 @@ import ucar.ma2.IndexIterator; import ucar.nc2.dataset.VariableDS; -public class Standardizer extends Filter { +public class Standardizer { private final ScaleOffset scaleOffset; private static final String name = "Standardizer"; @@ -17,15 +17,19 @@ public class Standardizer extends Filter { private final double mean; private final double stdDev; - public static Standardizer createFromVariable(VariableDS var) throws IOException { - Array arr = var.read(); - DataType type = var.getDataType(); - return new Standardizer(arr, type); + public static Standardizer createFromVariable(VariableDS var) { + try { + Array arr = var.read(); + DataType type = var.getDataType(); + return new Standardizer(arr, type); + } catch(IOException e) { + return new Standardizer(0.0, 1.0, var.getDataType()); + } } - public Standardizer(Array arr, DataType type){ - mean = calculateMean(arr); - stdDev = calculateStandardDeviation(arr); + private Standardizer(double mean, double stdDev, DataType type){ + this.mean = mean; + this.stdDev = stdDev; Map props = new HashMap<>(); props.put("offset", mean); props.put("scale", 1/stdDev); // update to stdDev after changing scale offset @@ -33,7 +37,11 @@ public Standardizer(Array arr, DataType type){ scaleOffset = new ScaleOffset(props); } - private double calculateMean(Array arr) { + public Standardizer(Array arr, DataType type) { + this(calculateMean(arr), calculateStandardDeviation(arr), type); + } + + private static double calculateMean(Array arr) { SummaryStatistics cur = new SummaryStatistics(); IndexIterator iterArr = arr.getIndexIterator(); while (iterArr.hasNext()) { @@ -45,7 +53,7 @@ private double calculateMean(Array arr) { return cur.getMean(); } - private double calculateStandardDeviation(Array arr) { + private static double calculateStandardDeviation(Array arr) { SummaryStatistics cur = new SummaryStatistics(); IndexIterator iterArr = arr.getIndexIterator(); while (iterArr.hasNext()) { @@ -57,6 +65,10 @@ private double calculateStandardDeviation(Array arr) { return cur.getStandardDeviation(); } + public Array convert(Array arr){ + return scaleOffset.applyScaleOffset(arr); + } + public double getMean(){ return mean; } @@ -64,25 +76,5 @@ public double getMean(){ public double getStdDev() { return stdDev; } - - @Override - public String getName() { - return name; - } - - @Override - public int getId() { - return id; - } - - @Override - public byte[] encode(byte[] dataIn) { - return scaleOffset.encode(dataIn); - } - - @Override - public byte[] decode(byte[] dataIn) { - return scaleOffset.decode(dataIn); - } } diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java index 0911b25e08..cc6d062f32 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java @@ -7,6 +7,7 @@ import org.junit.Test; import ucar.ma2.Array; import ucar.ma2.DataType; +import ucar.ma2.IndexIterator; public class TestStandardizer { @@ -17,78 +18,56 @@ public class TestStandardizer { public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; public static final Array DATA = Array.makeFromJavaArray(DOUBLES); - public static final float[] FLOATS = {5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; + public static final float[] FLOATS = {Float.NaN, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); - public double[] standardize(double[] sample) { - double[] standardizedSample = new double[sample.length]; - for (int i = 0; i < sample.length; i++) { - standardizedSample[i] = (sample[i] - DATA_MEAN) / DATA_STDDEV; + public double[] standardize(Array arr) { + double[] standardizedSample = new double[(int) arr.getSize()]; + IndexIterator iterArr = arr.getIndexIterator(); + for (int i = 0; i < (int) arr.getSize(); i++) { + Number value = (Number) iterArr.getObjectNext(); + standardizedSample[i] = (value.doubleValue() - DATA_MEAN) / DATA_STDDEV; } return standardizedSample; } - public double[] unstandardize(double[] sample){ - double[] descaledData = new double[sample.length]; - for (int i = 0; i < sample.length; i++) { - descaledData[i] = sample[i] * DATA_STDDEV + DATA_MEAN; - } - return descaledData; - } - @Test - public void testEncodeDecode() { + public void testConvertDouble(){ Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); - byte[] byestArray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - byte[] encoded = filter.encode(byestArray); - byte[] decoded = filter.decode(encoded); - Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - assertThat(nearlyEquals(convertedDecoded, DATA)).isTrue(); + double[] dataStandardizer = standardize(DATA); + Array convertedDecoded = filter.convert(DATA); + assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); } @Test - public void testEncodeDecodeFloat() { + public void testConvertFloat(){ Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); - byte[] bytesArray = FilterHelpers.arrayToBytes(DATA_FLOATS, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); - byte[] encoded = filter.encode(bytesArray); - byte[] decoded = filter.decode(encoded); - Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.FLOAT, ByteOrder.LITTLE_ENDIAN); - assertThat(nearlyEquals(convertedDecoded, DATA_FLOATS)).isTrue(); - } - - @Test - public void testEncode() { - Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); - double[] dataStandardizer = standardize(DOUBLES); - byte[] bytesArray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - byte[] encoded = filter.encode(bytesArray); - Array convertedDecoded = FilterHelpers.bytesToArray(encoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); + double[] dataStandardizer = standardize(DATA_FLOATS); + Array convertedDecoded = filter.convert(DATA_FLOATS); assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); - } - @Test - public void testDecode() { + public void testCalculateMean(){ Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); - double[] dataStandardizer = unstandardize(DOUBLES); - byte[] bytesArray = FilterHelpers.arrayToBytes(DATA, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - byte[] decoded = filter.decode(bytesArray); - Array convertedDecoded = FilterHelpers.bytesToArray(decoded, DataType.DOUBLE, ByteOrder.LITTLE_ENDIAN); - assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); + double calcMean = filter.getMean(); + assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } - @Test - public void testCalculateMean(){ - Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); + public void testCalculateMeanFloat(){ + Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } - - @Test public void testCalculateStandardDeviation(){ Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } + @Test + public void testCalculateStandardDeviationFloat(){ + Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); + double calcStd = filter.getStdDev(); + assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); + } } From 9737c0e3123aca0a32ee3faf3ded12e8bd20f9ea Mon Sep 17 00:00:00 2001 From: jessica Date: Wed, 5 Jul 2023 18:06:05 -0500 Subject: [PATCH 11/15] create and add ncml tests --- .../data/ncml/enhance/testStandardizer.ncml | 20 ++++++++ .../nc2/ncml/TestEnhanceStandardizer.java | 51 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 cdm/core/src/test/data/ncml/enhance/testStandardizer.ncml create mode 100644 cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java diff --git a/cdm/core/src/test/data/ncml/enhance/testStandardizer.ncml b/cdm/core/src/test/data/ncml/enhance/testStandardizer.ncml new file mode 100644 index 0000000000..7d565d6b89 --- /dev/null +++ b/cdm/core/src/test/data/ncml/enhance/testStandardizer.ncml @@ -0,0 +1,20 @@ + + + + + + + 1.0 2.0 3.0 4.0 5.0 + + + + 1.0 2.0 3.0 4.0 5.0 + + + + 1 2 3 4 5 + + diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java new file mode 100644 index 0000000000..a21228b605 --- /dev/null +++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java @@ -0,0 +1,51 @@ +package ucar.nc2.ncml; + +import static com.google.common.truth.Truth.assertThat; +import static ucar.ma2.MAMath.nearlyEquals; + +import java.io.IOException; +import org.junit.Test; +import ucar.ma2.Array; +import ucar.ma2.DataType; +import ucar.nc2.NetcdfFile; +import ucar.nc2.Variable; +import ucar.nc2.dataset.NetcdfDatasets; +import ucar.unidata.util.test.TestDir; + +public class TestEnhanceStandardizer { + + private static String dataDir = TestDir.cdmLocalTestDataDir + "ncml/enhance/"; + public static final double[] DOUBLES = {-1.26491106406735, -0.63245553203368, 0, 0.63245553203368, 1.26491106406735}; + public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES); + public static final float[] FLOATS = {-1.26491106406735F, -0.63245553203368F, 0, 0.63245553203368F, 1.26491106406735F}; + public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); + public static final int[] INTS = {1, 2, 3, 4, 5}; + public static final Array DATA_INTS = Array.makeFromJavaArray(INTS); + + @Test + public void testEnhanceStandardizer() throws IOException { + try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandardizer.ncml", true, null)) { + Variable doubleVar = ncfile.findVariable("doublevar"); + assertThat((Object) doubleVar).isNotNull(); + assertThat(doubleVar.getDataType()).isEqualTo(DataType.DOUBLE); + assertThat(doubleVar.attributes().hasAttribute("standardize")).isTrue(); + Array dataDoubles = doubleVar.read(); + assertThat(nearlyEquals(dataDoubles, DATA_DOUBLES)).isTrue(); + + Variable floatVar = ncfile.findVariable("floatvar"); + assertThat((Object) floatVar).isNotNull(); + assertThat(floatVar.getDataType()).isEqualTo(DataType.FLOAT); + assertThat(doubleVar.attributes().hasAttribute("standardize")).isTrue(); + Array dataFloats = doubleVar.read(); + assertThat(nearlyEquals(dataFloats, DATA_FLOATS)).isTrue(); + + Variable intVar = ncfile.findVariable("intvar"); + assertThat((Object) intVar).isNotNull(); + assertThat(intVar.getDataType()).isEqualTo(DataType.INT); + assertThat(intVar.attributes().hasAttribute("standardize")).isTrue(); + Array data = intVar.read(); + assertThat(nearlyEquals(data, DATA_INTS)).isTrue(); + } + } + +} From 512138608d90ae8d60e282c8f9bc606d7141f14c Mon Sep 17 00:00:00 2001 From: jessica Date: Thu, 6 Jul 2023 17:35:17 -0500 Subject: [PATCH 12/15] make standardize an enhancement --- cdm/core/src/main/java/ucar/nc2/constants/CDM.java | 1 + .../main/java/ucar/nc2/dataset/NetcdfDataset.java | 8 +++++++- .../src/main/java/ucar/nc2/dataset/VariableDS.java | 9 +++++++++ .../test/java/ucar/nc2/filter/TestStandardizer.java | 12 ++++++------ .../src/test/java/ucar/nc2/ncml/TestEnhance.java | 2 +- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/constants/CDM.java b/cdm/core/src/main/java/ucar/nc2/constants/CDM.java index 2ad8f10e7f..a7123eb996 100644 --- a/cdm/core/src/main/java/ucar/nc2/constants/CDM.java +++ b/cdm/core/src/main/java/ucar/nc2/constants/CDM.java @@ -56,6 +56,7 @@ public class CDM { public static final String TIME_OFFSET = "time offset from runtime"; public static final String TIME_OFFSET_HOUR = "hoursFrom0z"; public static final String RUNTIME_COORDINATE = "runtimeCoordinate"; + public static final String STANDARDIZE = "standardize"; // Special attributes diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java b/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java index 18209d83fc..2dff5d608a 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java @@ -115,10 +115,16 @@ public enum Enhance { * every dimension in a variable has a corresponding coordinate variable. */ IncompleteCoordSystems, + /** + * Calculate mean and standard deviation and apply to data: (z-mean)/standard_deviation. + * If the enhanced data type is not {@code FLOAT} or {@code DOUBLE}, this has no effect. + */ + ApplyStandardizer, } private static Set EnhanceAll = Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums, - Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, Enhance.ConvertMissing, Enhance.CoordSystems)); + Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, Enhance.ConvertMissing, Enhance.CoordSystems, + Enhance.ApplyStandardizer)); private static Set EnhanceNone = Collections.unmodifiableSet(EnumSet.noneOf(Enhance.class)); private static Set defaultEnhanceMode = EnhanceAll; diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java index 2ee21bc47d..f187ff5946 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java @@ -14,6 +14,7 @@ import ucar.nc2.filter.ConvertMissing; import ucar.nc2.filter.FilterHelpers; import ucar.nc2.filter.ScaleOffset; +import ucar.nc2.filter.Standardizer; import ucar.nc2.filter.UnsignedConversion; import ucar.nc2.internal.dataset.CoordinatesHelper; import ucar.nc2.iosp.netcdf3.N3iosp; @@ -269,6 +270,9 @@ Array convert(Array data, Set enhancements) { && (dataType == DataType.FLOAT || dataType == DataType.DOUBLE)) { data = convertMissing.convertMissing(data); } + if (enhancements.contains(Enhance.ApplyStandardizer) && standardizer != null) { + data = standardizer.convert(data); + } return data; } } @@ -797,6 +801,7 @@ public Array convert(Array in, boolean convertUnsigned, boolean applyScaleOffset // TODO make immutable in version 6 private UnsignedConversion unsignedConversion; private ScaleOffset scaleOffset; + private Standardizer standardizer; private ConvertMissing convertMissing; private Set enhanceMode = EnumSet.noneOf(Enhance.class); // The set of enhancements that were made. @@ -851,6 +856,10 @@ private void createEnhancements() { this.scaleOffset = ScaleOffset.createFromVariable(this); this.dataType = scaleOffset != null ? scaleOffset.getScaledOffsetType() : this.dataType; } + Attribute standardizerAtt = findAttribute(CDM.STANDARDIZE); + if (standardizerAtt != null && this.enhanceMode.contains(Enhance.ApplyStandardizer) && (dataType == DataType.DOUBLE || dataType == DataType.FLOAT)){ + this.standardizer = Standardizer.createFromVariable(this); + } // need fill value info before convertMissing Attribute fillValueAtt = findAttribute(CDM.FILL_VALUE); diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java index cc6d062f32..87baced490 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java @@ -16,7 +16,7 @@ public class TestStandardizer { public static double TOLERANCE = 1.0E-9; public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; - public static final Array DATA = Array.makeFromJavaArray(DOUBLES); + public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES); public static final float[] FLOATS = {Float.NaN, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); @@ -33,9 +33,9 @@ public double[] standardize(Array arr) { @Test public void testConvertDouble(){ - Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); - double[] dataStandardizer = standardize(DATA); - Array convertedDecoded = filter.convert(DATA); + Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); + double[] dataStandardizer = standardize(DATA_DOUBLES); + Array convertedDecoded = filter.convert(DATA_DOUBLES); assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); } @@ -48,7 +48,7 @@ public void testConvertFloat(){ } @Test public void testCalculateMean(){ - Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); + Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } @@ -60,7 +60,7 @@ public void testCalculateMeanFloat(){ } @Test public void testCalculateStandardDeviation(){ - Standardizer filter = new Standardizer(DATA, DataType.DOUBLE); + Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java index 4d60a7987d..40a3572ac5 100644 --- a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java +++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java @@ -76,7 +76,7 @@ public void testStandaloneEnhance() throws IOException { @Test public void testStandaloneEnhanceDataset() throws IOException { - try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandaloneNoEnhance.ncml", true, null)) { + try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandaloneEnhance.ncml", true, null)) { Variable unvar = ncfile.findVariable("unvar"); assertThat((Object) unvar).isNotNull(); assertThat(unvar.getDataType()).isEqualTo(DataType.UINT); From 1f78acd5cca7826c8ef7ad6bd960758fd79831f6 Mon Sep 17 00:00:00 2001 From: jessica Date: Fri, 7 Jul 2023 10:26:56 -0500 Subject: [PATCH 13/15] code style fixes --- .../java/ucar/nc2/dataset/NetcdfDataset.java | 6 +++--- .../java/ucar/nc2/dataset/VariableDS.java | 3 ++- .../java/ucar/nc2/filter/Standardizer.java | 12 ++++++------ .../ucar/nc2/filter/TestStandardizer.java | 19 ++++++++++++------- .../nc2/ncml/TestEnhanceStandardizer.java | 3 ++- 5 files changed, 25 insertions(+), 18 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java b/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java index 2dff5d608a..051eaeb034 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/NetcdfDataset.java @@ -122,9 +122,9 @@ public enum Enhance { ApplyStandardizer, } - private static Set EnhanceAll = Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums, - Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, Enhance.ConvertMissing, Enhance.CoordSystems, - Enhance.ApplyStandardizer)); + private static Set EnhanceAll = + Collections.unmodifiableSet(EnumSet.of(Enhance.ConvertEnums, Enhance.ConvertUnsigned, Enhance.ApplyScaleOffset, + Enhance.ConvertMissing, Enhance.CoordSystems, Enhance.ApplyStandardizer)); private static Set EnhanceNone = Collections.unmodifiableSet(EnumSet.noneOf(Enhance.class)); private static Set defaultEnhanceMode = EnhanceAll; diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java index f187ff5946..474772fa53 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java @@ -857,7 +857,8 @@ private void createEnhancements() { this.dataType = scaleOffset != null ? scaleOffset.getScaledOffsetType() : this.dataType; } Attribute standardizerAtt = findAttribute(CDM.STANDARDIZE); - if (standardizerAtt != null && this.enhanceMode.contains(Enhance.ApplyStandardizer) && (dataType == DataType.DOUBLE || dataType == DataType.FLOAT)){ + if (standardizerAtt != null && this.enhanceMode.contains(Enhance.ApplyStandardizer) + && (dataType == DataType.DOUBLE || dataType == DataType.FLOAT)) { this.standardizer = Standardizer.createFromVariable(this); } diff --git a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java index 1c679e19cf..e880b6cff9 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java @@ -22,17 +22,17 @@ public static Standardizer createFromVariable(VariableDS var) { Array arr = var.read(); DataType type = var.getDataType(); return new Standardizer(arr, type); - } catch(IOException e) { + } catch (IOException e) { return new Standardizer(0.0, 1.0, var.getDataType()); } } - private Standardizer(double mean, double stdDev, DataType type){ + private Standardizer(double mean, double stdDev, DataType type) { this.mean = mean; this.stdDev = stdDev; Map props = new HashMap<>(); props.put("offset", mean); - props.put("scale", 1/stdDev); // update to stdDev after changing scale offset + props.put("scale", 1 / stdDev); // update to stdDev after changing scale offset props.put("dtype", type); scaleOffset = new ScaleOffset(props); } @@ -46,7 +46,7 @@ private static double calculateMean(Array arr) { IndexIterator iterArr = arr.getIndexIterator(); while (iterArr.hasNext()) { Number value = (Number) iterArr.getObjectNext(); - if (!Double.isNaN(value.doubleValue())){ + if (!Double.isNaN(value.doubleValue())) { cur.addValue(value.doubleValue()); } } @@ -65,11 +65,11 @@ private static double calculateStandardDeviation(Array arr) { return cur.getStandardDeviation(); } - public Array convert(Array arr){ + public Array convert(Array arr) { return scaleOffset.applyScaleOffset(arr); } - public double getMean(){ + public double getMean() { return mean; } diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java index 87baced490..61bf63c468 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java @@ -18,7 +18,8 @@ public class TestStandardizer { public static final double[] DOUBLES = {Double.NaN, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}; public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES); - public static final float[] FLOATS = {Float.NaN, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; + public static final float[] FLOATS = + {Float.NaN, 5.0F, 6.0F, 7.0F, 8.0F, 9.0F, 10.0F, 11.0F, 12.0F, 13.0F, 14.0F, 15.0F}; public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); public double[] standardize(Array arr) { @@ -32,7 +33,7 @@ public double[] standardize(Array arr) { } @Test - public void testConvertDouble(){ + public void testConvertDouble() { Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); double[] dataStandardizer = standardize(DATA_DOUBLES); Array convertedDecoded = filter.convert(DATA_DOUBLES); @@ -40,32 +41,36 @@ public void testConvertDouble(){ } @Test - public void testConvertFloat(){ + public void testConvertFloat() { Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); double[] dataStandardizer = standardize(DATA_FLOATS); Array convertedDecoded = filter.convert(DATA_FLOATS); assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); } + @Test - public void testCalculateMean(){ + public void testCalculateMean() { Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } + @Test - public void testCalculateMeanFloat(){ + public void testCalculateMeanFloat() { Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } + @Test - public void testCalculateStandardDeviation(){ + public void testCalculateStandardDeviation() { Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } + @Test - public void testCalculateStandardDeviationFloat(){ + public void testCalculateStandardDeviationFloat() { Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java index a21228b605..56659e0df3 100644 --- a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java +++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java @@ -17,7 +17,8 @@ public class TestEnhanceStandardizer { private static String dataDir = TestDir.cdmLocalTestDataDir + "ncml/enhance/"; public static final double[] DOUBLES = {-1.26491106406735, -0.63245553203368, 0, 0.63245553203368, 1.26491106406735}; public static final Array DATA_DOUBLES = Array.makeFromJavaArray(DOUBLES); - public static final float[] FLOATS = {-1.26491106406735F, -0.63245553203368F, 0, 0.63245553203368F, 1.26491106406735F}; + public static final float[] FLOATS = + {-1.26491106406735F, -0.63245553203368F, 0, 0.63245553203368F, 1.26491106406735F}; public static final Array DATA_FLOATS = Array.makeFromJavaArray(FLOATS); public static final int[] INTS = {1, 2, 3, 4, 5}; public static final Array DATA_INTS = Array.makeFromJavaArray(INTS); From 2e16087d93275c23bb50d3a567abbb809b27aa16 Mon Sep 17 00:00:00 2001 From: jessica Date: Fri, 7 Jul 2023 11:32:03 -0500 Subject: [PATCH 14/15] small fix to run standardizer --- cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java | 3 +-- cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java index 474772fa53..77f45eafed 100644 --- a/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java +++ b/cdm/core/src/main/java/ucar/nc2/dataset/VariableDS.java @@ -857,8 +857,7 @@ private void createEnhancements() { this.dataType = scaleOffset != null ? scaleOffset.getScaledOffsetType() : this.dataType; } Attribute standardizerAtt = findAttribute(CDM.STANDARDIZE); - if (standardizerAtt != null && this.enhanceMode.contains(Enhance.ApplyStandardizer) - && (dataType == DataType.DOUBLE || dataType == DataType.FLOAT)) { + if (standardizerAtt != null && this.enhanceMode.contains(Enhance.ApplyStandardizer) && dataType.isFloatingPoint()) { this.standardizer = Standardizer.createFromVariable(this); } diff --git a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java index e880b6cff9..9ef28d0790 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java @@ -32,7 +32,7 @@ private Standardizer(double mean, double stdDev, DataType type) { this.stdDev = stdDev; Map props = new HashMap<>(); props.put("offset", mean); - props.put("scale", 1 / stdDev); // update to stdDev after changing scale offset + props.put("scale", 1 / stdDev); props.put("dtype", type); scaleOffset = new ScaleOffset(props); } From 5d35feb4d660a0c0babbb18448c58cc52dca486a Mon Sep 17 00:00:00 2001 From: jessica Date: Mon, 10 Jul 2023 14:25:25 -0500 Subject: [PATCH 15/15] refactor standardizer --- .../java/ucar/nc2/filter/Standardizer.java | 33 ++++++------------- .../ucar/nc2/filter/TestStandardizer.java | 24 +++++++------- .../test/java/ucar/nc2/ncml/TestEnhance.java | 2 +- .../nc2/ncml/TestEnhanceStandardizer.java | 3 +- 4 files changed, 25 insertions(+), 37 deletions(-) diff --git a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java index 9ef28d0790..a778d20574 100644 --- a/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java +++ b/cdm/core/src/main/java/ucar/nc2/filter/Standardizer.java @@ -12,8 +12,6 @@ public class Standardizer { private final ScaleOffset scaleOffset; - private static final String name = "Standardizer"; - private static final int id = -1; private final double mean; private final double stdDev; @@ -21,12 +19,17 @@ public static Standardizer createFromVariable(VariableDS var) { try { Array arr = var.read(); DataType type = var.getDataType(); - return new Standardizer(arr, type); + return createFromArray(arr, type); } catch (IOException e) { return new Standardizer(0.0, 1.0, var.getDataType()); } } + public static Standardizer createFromArray(Array arr, DataType type) { + SummaryStatistics statistics = calculationHelper(arr); + return new Standardizer(statistics.getMean(), statistics.getStandardDeviation(), type); + } + private Standardizer(double mean, double stdDev, DataType type) { this.mean = mean; this.stdDev = stdDev; @@ -37,32 +40,16 @@ private Standardizer(double mean, double stdDev, DataType type) { scaleOffset = new ScaleOffset(props); } - public Standardizer(Array arr, DataType type) { - this(calculateMean(arr), calculateStandardDeviation(arr), type); - } - - private static double calculateMean(Array arr) { - SummaryStatistics cur = new SummaryStatistics(); - IndexIterator iterArr = arr.getIndexIterator(); - while (iterArr.hasNext()) { - Number value = (Number) iterArr.getObjectNext(); - if (!Double.isNaN(value.doubleValue())) { - cur.addValue(value.doubleValue()); - } - } - return cur.getMean(); - } - - private static double calculateStandardDeviation(Array arr) { - SummaryStatistics cur = new SummaryStatistics(); + private static SummaryStatistics calculationHelper(Array arr) { + SummaryStatistics sumStat = new SummaryStatistics(); IndexIterator iterArr = arr.getIndexIterator(); while (iterArr.hasNext()) { Number value = (Number) iterArr.getObjectNext(); if (!Double.isNaN(value.doubleValue())) { - cur.addValue(value.doubleValue()); + sumStat.addValue(value.doubleValue()); } } - return cur.getStandardDeviation(); + return sumStat; } public Array convert(Array arr) { diff --git a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java index 61bf63c468..d66409c9d8 100644 --- a/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java +++ b/cdm/core/src/test/java/ucar/nc2/filter/TestStandardizer.java @@ -34,44 +34,44 @@ public double[] standardize(Array arr) { @Test public void testConvertDouble() { - Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); - double[] dataStandardizer = standardize(DATA_DOUBLES); - Array convertedDecoded = filter.convert(DATA_DOUBLES); - assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); + Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE); + double[] dataStandardized = standardize(DATA_DOUBLES); + Array dataConverted = filter.convert(DATA_DOUBLES); + assertThat(nearlyEquals(dataConverted, Array.makeFromJavaArray(dataStandardized))).isTrue(); } @Test public void testConvertFloat() { - Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); - double[] dataStandardizer = standardize(DATA_FLOATS); - Array convertedDecoded = filter.convert(DATA_FLOATS); - assertThat(nearlyEquals(convertedDecoded, Array.makeFromJavaArray(dataStandardizer))).isTrue(); + Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT); + double[] dataStandardized = standardize(DATA_FLOATS); + Array dataConverted = filter.convert(DATA_FLOATS); + assertThat(nearlyEquals(dataConverted, Array.makeFromJavaArray(dataStandardized))).isTrue(); } @Test public void testCalculateMean() { - Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); + Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } @Test public void testCalculateMeanFloat() { - Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); + Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT); double calcMean = filter.getMean(); assertThat(calcMean).isWithin(TOLERANCE).of(DATA_MEAN); } @Test public void testCalculateStandardDeviation() { - Standardizer filter = new Standardizer(DATA_DOUBLES, DataType.DOUBLE); + Standardizer filter = Standardizer.createFromArray(DATA_DOUBLES, DataType.DOUBLE); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } @Test public void testCalculateStandardDeviationFloat() { - Standardizer filter = new Standardizer(DATA_FLOATS, DataType.FLOAT); + Standardizer filter = Standardizer.createFromArray(DATA_FLOATS, DataType.FLOAT); double calcStd = filter.getStdDev(); assertThat(calcStd).isWithin(TOLERANCE).of(DATA_STDDEV); } diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java index 40a3572ac5..4d60a7987d 100644 --- a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java +++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhance.java @@ -76,7 +76,7 @@ public void testStandaloneEnhance() throws IOException { @Test public void testStandaloneEnhanceDataset() throws IOException { - try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandaloneEnhance.ncml", true, null)) { + try (NetcdfFile ncfile = NetcdfDatasets.openDataset(dataDir + "testStandaloneNoEnhance.ncml", true, null)) { Variable unvar = ncfile.findVariable("unvar"); assertThat((Object) unvar).isNotNull(); assertThat(unvar.getDataType()).isEqualTo(DataType.UINT); diff --git a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java index 56659e0df3..6d5a380342 100644 --- a/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java +++ b/cdm/core/src/test/java/ucar/nc2/ncml/TestEnhanceStandardizer.java @@ -45,7 +45,8 @@ public void testEnhanceStandardizer() throws IOException { assertThat(intVar.getDataType()).isEqualTo(DataType.INT); assertThat(intVar.attributes().hasAttribute("standardize")).isTrue(); Array data = intVar.read(); - assertThat(nearlyEquals(data, DATA_INTS)).isTrue(); + assertThat(nearlyEquals(data, DATA_INTS)).isTrue(); // The enhancement doesn't apply to ints, so the data should + // be equal to the input array } }